first support for integration with NVCC/CUDART API
This commit is contained in:
@@ -26,7 +26,11 @@ ISPC_BC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.bc)
|
|||||||
PTXSRC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.ptx)
|
PTXSRC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.ptx)
|
||||||
CXX_OBJ=$(CXX_SRC:%.cpp=%.o)
|
CXX_OBJ=$(CXX_SRC:%.cpp=%.o)
|
||||||
|
|
||||||
all: $(PROG)
|
all: $(ISPC_BC) $(PROG)
|
||||||
|
|
||||||
|
CUDART:
|
||||||
|
cd _cuobj && make
|
||||||
|
g++ -o mandel_cu_nvcc mandel_cu.cpp -I$(CUDATK)/include -lcuda mandelbrot_tasks_serial.cpp -L./_cuobj -lmandel_cudart -lcudart -L$(CUDATK)/lib64 -D_CUDART_ -lcudadevrt
|
||||||
|
|
||||||
|
|
||||||
$(CXX_OBJ) : kernel.ptx
|
$(CXX_OBJ) : kernel.ptx
|
||||||
|
|||||||
15
examples_cuda/mandelbrot_tasks3d/_cuobj/Makefile
Normal file
15
examples_cuda/mandelbrot_tasks3d/_cuobj/Makefile
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
FILE=mandel
|
||||||
|
|
||||||
|
LIB=lib$(FILE)_cudart.a
|
||||||
|
all: $(LIB)
|
||||||
|
|
||||||
|
|
||||||
|
$(LIB) : $(FILE).cu
|
||||||
|
nvcc -dc $(FILE).cu -arch=sm_35 -dryrun 2>&1 | sed 's/\#\$$//g'|awk '{ if ($$1 == "cicc") print "cp ../__kernels.ptx " $$NF; else print $0 }' > run.sh
|
||||||
|
sh run.sh
|
||||||
|
nvcc -dlink -o $(FILE)_dlink.o $(FILE).o -lcudadevrt -arch=sm_35
|
||||||
|
nvcc $(FILE).o $(FILE)_dlink.o --lib -o lib$(FILE)_cudart.a
|
||||||
|
|
||||||
|
clean:
|
||||||
|
/bin/rm -f *.o *.a run.sh
|
||||||
|
|
||||||
22
examples_cuda/mandelbrot_tasks3d/_cuobj/mandel.cu
Normal file
22
examples_cuda/mandelbrot_tasks3d/_cuobj/mandel.cu
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
extern "C" static inline int __device__ mandel___vyfvyfvyi_(float c_re, float c_im, int count) {}
|
||||||
|
extern "C" void __global__ mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E_( float x0, float dx,
|
||||||
|
float y0, float dy,
|
||||||
|
int width, int height,
|
||||||
|
int xspan, int yspan,
|
||||||
|
int maxIterations, int output[]) {}
|
||||||
|
extern "C" void __global__ mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_( float x0, float y0,
|
||||||
|
float x1, float y1,
|
||||||
|
int width, int height,
|
||||||
|
int maxIterations, int output[]) { }
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
void mandelbrot_ispc(float x0, float y0,
|
||||||
|
float x1, float y1,
|
||||||
|
int width, int height,
|
||||||
|
int maxIterations, int output[])
|
||||||
|
{
|
||||||
|
mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_<<<1,32>>>
|
||||||
|
(x0,y0,x1,y1,width,height,maxIterations,output);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
|
|
||||||
@@ -44,6 +44,13 @@
|
|||||||
#include "../timing.h"
|
#include "../timing.h"
|
||||||
|
|
||||||
#include "../cuda_ispc.h"
|
#include "../cuda_ispc.h"
|
||||||
|
#ifdef _CUDART_
|
||||||
|
extern "C"
|
||||||
|
void mandelbrot_ispc(float x0, float y0,
|
||||||
|
float x1, float y1,
|
||||||
|
int width, int height,
|
||||||
|
int maxIterations, int output[]);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||||
@@ -125,10 +132,10 @@ int main(int argc, char *argv[]) {
|
|||||||
for (unsigned int i = 0; i < width * height; ++i)
|
for (unsigned int i = 0; i < width * height; ++i)
|
||||||
buf[i] = 0;
|
buf[i] = 0;
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
#if 0
|
#ifdef _CUDART_
|
||||||
const double t0 = rtc();
|
const double t0 = rtc();
|
||||||
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf);
|
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf);
|
||||||
double dt = rtc() - t0; //get_elapsed_mcycles();
|
double dt = 1e3*(rtc() - t0); //get_elapsed_mcycles();
|
||||||
#else
|
#else
|
||||||
const char * func_name = "mandelbrot_ispc";
|
const char * func_name = "mandelbrot_ispc";
|
||||||
void *func_args[] = {&x0, &y0, &x1, &y1, &width, &height, &maxIterations, &d_buf};
|
void *func_args[] = {&x0, &y0, &x1, &y1, &width, &height, &maxIterations, &d_buf};
|
||||||
|
|||||||
10
func.cpp
10
func.cpp
@@ -568,6 +568,16 @@ Function::GenerateIR() {
|
|||||||
Assert(type != NULL);
|
Assert(type != NULL);
|
||||||
if (type->isExported) { // && g->target->getISA() != Target::VPTX64) {
|
if (type->isExported) { // && g->target->getISA() != Target::VPTX64) {
|
||||||
if (!type->isTask) {
|
if (!type->isTask) {
|
||||||
|
if (g->target->isPTX() && g->target->getISA() == Target::NVPTX64)
|
||||||
|
{
|
||||||
|
llvm::NamedMDNode* annotations =
|
||||||
|
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||||
|
llvm::SmallVector<llvm::Value*, 3> av;
|
||||||
|
av.push_back(function);
|
||||||
|
av.push_back(llvm::MDString::get(*g->ctx, "kernel"));
|
||||||
|
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
|
||||||
|
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||||
|
}
|
||||||
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
|
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
|
||||||
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
||||||
std::string functionName = sym->name;
|
std::string functionName = sym->name;
|
||||||
|
|||||||
Reference in New Issue
Block a user