diff --git a/examples_cuda/mandelbrot_tasks3d/Makefile_gpu b/examples_cuda/mandelbrot_tasks3d/Makefile_gpu index e5f8e001..0df687d7 100644 --- a/examples_cuda/mandelbrot_tasks3d/Makefile_gpu +++ b/examples_cuda/mandelbrot_tasks3d/Makefile_gpu @@ -26,7 +26,11 @@ ISPC_BC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.bc) PTXSRC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.ptx) CXX_OBJ=$(CXX_SRC:%.cpp=%.o) -all: $(PROG) +all: $(ISPC_BC) $(PROG) + +CUDART: + cd _cuobj && make + g++ -o mandel_cu_nvcc mandel_cu.cpp -I$(CUDATK)/include -lcuda mandelbrot_tasks_serial.cpp -L./_cuobj -lmandel_cudart -lcudart -L$(CUDATK)/lib64 -D_CUDART_ -lcudadevrt $(CXX_OBJ) : kernel.ptx diff --git a/examples_cuda/mandelbrot_tasks3d/_cuobj/Makefile b/examples_cuda/mandelbrot_tasks3d/_cuobj/Makefile new file mode 100644 index 00000000..645577a5 --- /dev/null +++ b/examples_cuda/mandelbrot_tasks3d/_cuobj/Makefile @@ -0,0 +1,15 @@ +FILE=mandel + +LIB=lib$(FILE)_cudart.a +all: $(LIB) + + +$(LIB) : $(FILE).cu + nvcc -dc $(FILE).cu -arch=sm_35 -dryrun 2>&1 | sed 's/\#\$$//g'|awk '{ if ($$1 == "cicc") print "cp ../__kernels.ptx " $$NF; else print $0 }' > run.sh + sh run.sh + nvcc -dlink -o $(FILE)_dlink.o $(FILE).o -lcudadevrt -arch=sm_35 + nvcc $(FILE).o $(FILE)_dlink.o --lib -o lib$(FILE)_cudart.a + +clean: + /bin/rm -f *.o *.a run.sh + diff --git a/examples_cuda/mandelbrot_tasks3d/_cuobj/mandel.cu b/examples_cuda/mandelbrot_tasks3d/_cuobj/mandel.cu new file mode 100644 index 00000000..12565670 --- /dev/null +++ b/examples_cuda/mandelbrot_tasks3d/_cuobj/mandel.cu @@ -0,0 +1,22 @@ +extern "C" static inline int __device__ mandel___vyfvyfvyi_(float c_re, float c_im, int count) {} +extern "C" void __global__ mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E_( float x0, float dx, + float y0, float dy, + int width, int height, + int xspan, int yspan, + int maxIterations, int output[]) {} +extern "C" void __global__ mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_( float x0, float y0, + float x1, float y1, + int width, int height, + int maxIterations, int output[]) { } + +extern "C" +void mandelbrot_ispc(float x0, float y0, + float x1, float y1, + int width, int height, + int maxIterations, int output[]) +{ + mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_<<<1,32>>> + (x0,y0,x1,y1,width,height,maxIterations,output); + cudaDeviceSynchronize(); +} + diff --git a/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp b/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp index f33b3138..73d3aa0f 100644 --- a/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp +++ b/examples_cuda/mandelbrot_tasks3d/mandel_cu.cpp @@ -44,6 +44,13 @@ #include "../timing.h" #include "../cuda_ispc.h" +#ifdef _CUDART_ +extern "C" +void mandelbrot_ispc(float x0, float y0, + float x1, float y1, + int width, int height, + int maxIterations, int output[]); +#endif extern void mandelbrot_serial(float x0, float y0, float x1, float y1, @@ -125,10 +132,10 @@ int main(int argc, char *argv[]) { for (unsigned int i = 0; i < width * height; ++i) buf[i] = 0; reset_and_start_timer(); -#if 0 +#ifdef _CUDART_ const double t0 = rtc(); mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, (int*)d_buf); - double dt = rtc() - t0; //get_elapsed_mcycles(); + double dt = 1e3*(rtc() - t0); //get_elapsed_mcycles(); #else const char * func_name = "mandelbrot_ispc"; void *func_args[] = {&x0, &y0, &x1, &y1, &width, &height, &maxIterations, &d_buf}; diff --git a/func.cpp b/func.cpp index c60c1f2c..bb2fd785 100644 --- a/func.cpp +++ b/func.cpp @@ -568,6 +568,16 @@ Function::GenerateIR() { Assert(type != NULL); if (type->isExported) { // && g->target->getISA() != Target::VPTX64) { if (!type->isTask) { + if (g->target->isPTX() && g->target->getISA() == Target::NVPTX64) + { + llvm::NamedMDNode* annotations = + m->module->getOrInsertNamedMetadata("nvvm.annotations"); + llvm::SmallVector av; + av.push_back(function); + av.push_back(llvm::MDString::get(*g->ctx, "kernel")); + av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1)); + annotations->addOperand(llvm::MDNode::get(*g->ctx, av)); + } llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true); llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage; std::string functionName = sym->name;