added Makefile
This commit is contained in:
53
examples_cuda/aobench/Makefile_gpu
Normal file
53
examples_cuda/aobench/Makefile_gpu
Normal file
@@ -0,0 +1,53 @@
|
||||
PROG=aob_cu
|
||||
ISPC_SRC=ao1.ispc
|
||||
CXX_SRC=ao_cu.cpp
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-O3 -I$(CUDATK)/include
|
||||
LD=g++
|
||||
LDFLAGS=-lcuda
|
||||
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64,avx
|
||||
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
PTXGEN = $(HOME)/ptxgen
|
||||
|
||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
|
||||
.SUFFIXES: .bc .o .ptx .cu _ispc_nvptx64.bc
|
||||
|
||||
|
||||
ISPC_OBJ=$(ISPC_SRC:%.ispc=%_ispc.o)
|
||||
ISPC_BC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.bc)
|
||||
PTXSRC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.ptx)
|
||||
CXX_OBJ=$(CXX_SRC:%.cpp=%.o)
|
||||
|
||||
all: $(PROG)
|
||||
|
||||
|
||||
$(CXX_OBJ) : kernel.ptx
|
||||
$(PROG): $(CXX_OBJ) kernel.ptx
|
||||
/bin/cp kernel.ptx __kernels.ptx
|
||||
$(LD) -o $@ $(CXX_OBJ) $(LDFLAGS)
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
|
||||
%_ispc_nvptx64.bc: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm
|
||||
|
||||
%.ptx: %.bc
|
||||
$(LLVM32DIS) $<
|
||||
$(PTXGEN) `basename $< .bc`.ll > $@
|
||||
|
||||
kernel.ptx: $(PTXSRC)
|
||||
cat $^ > kernel.ptx
|
||||
|
||||
clean:
|
||||
/bin/rm -rf *.ptx *.bc *.ll $(PROG)
|
||||
|
||||
|
||||
|
||||
@@ -51,8 +51,8 @@
|
||||
#include <algorithm>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "ao_ispc.h"
|
||||
using namespace ispc;
|
||||
//#include "ao1_ispc.h"
|
||||
//using namespace ispc;
|
||||
|
||||
#include "../timing.h"
|
||||
|
||||
@@ -191,7 +191,7 @@ CUmodule loadModule(const char * module)
|
||||
optionVals[5] = (void*) 1;
|
||||
// Max # of registers/pthread
|
||||
options[6] = CU_JIT_MAX_REGISTERS;
|
||||
int jitRegCount = 64;
|
||||
int jitRegCount = 48;
|
||||
optionVals[6] = (void *)(size_t)jitRegCount;
|
||||
|
||||
// Create a pending linker invocation
|
||||
@@ -321,7 +321,7 @@ extern "C"
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
void CUDALaunch(
|
||||
double CUDALaunch(
|
||||
void **handlePtr,
|
||||
const char * func_name,
|
||||
void **func_args)
|
||||
@@ -330,8 +330,12 @@ extern "C"
|
||||
const char * module = &module_str[0];
|
||||
CUmodule cudaModule = loadModule(module);
|
||||
CUfunction cudaFunction = getFunction(cudaModule, func_name);
|
||||
const double t0 = rtc();
|
||||
deviceLaunch(cudaFunction, func_args);
|
||||
checkCudaErrors(cuStreamSynchronize(0));
|
||||
const double dt = rtc() - t0;
|
||||
unloadModule(cudaModule);
|
||||
return dt;
|
||||
}
|
||||
void CUDASync(void *handle)
|
||||
{
|
||||
@@ -452,22 +456,22 @@ int main(int argc, char **argv)
|
||||
memcpyH2D(d_fimg, fimg, width*height*3*sizeof(float));
|
||||
|
||||
reset_and_start_timer();
|
||||
const double t0 = rtc();
|
||||
#if 0
|
||||
const double t0 = rtc();
|
||||
ao_ispc_tasks(
|
||||
width,
|
||||
height,
|
||||
NSUBSAMPLES,
|
||||
(float*)d_fimg);
|
||||
// double t = (rtc() - t0); //get_elapsed_mcycles();
|
||||
#else
|
||||
const char * func_name = "ao_ispc_tasks";
|
||||
int arg_1 = width;
|
||||
int arg_2 = height;
|
||||
int arg_3 = NSUBSAMPLES;
|
||||
void *func_args[] = {&arg_1, &arg_2, &arg_3, (float*)&d_fimg};
|
||||
CUDALaunch(NULL, func_name, func_args);
|
||||
const double t = CUDALaunch(NULL, func_name, func_args);
|
||||
#endif
|
||||
double t = (rtc() - t0); //get_elapsed_mcycles();
|
||||
minTimeISPCTasks = std::min(minTimeISPCTasks, t);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user