added Makefile

This commit is contained in:
Evghenii
2013-11-13 13:45:24 +01:00
parent 2dd7128db5
commit d3ade0654e
2 changed files with 64 additions and 7 deletions

View File

@@ -0,0 +1,53 @@
PROG=aob_cu
ISPC_SRC=ao1.ispc
CXX_SRC=ao_cu.cpp
CXX=g++
CXXFLAGS=-O3 -I$(CUDATK)/include
LD=g++
LDFLAGS=-lcuda
ISPC=ispc
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64,avx
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen
LLVM32DIS=$(LLVM32)/bin/llvm-dis
.SUFFIXES: .bc .o .ptx .cu _ispc_nvptx64.bc
ISPC_OBJ=$(ISPC_SRC:%.ispc=%_ispc.o)
ISPC_BC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.bc)
PTXSRC=$(ISPC_SRC:%.ispc=%_ispc_nvptx64.ptx)
CXX_OBJ=$(CXX_SRC:%.cpp=%.o)
all: $(PROG)
$(CXX_OBJ) : kernel.ptx
$(PROG): $(CXX_OBJ) kernel.ptx
/bin/cp kernel.ptx __kernels.ptx
$(LD) -o $@ $(CXX_OBJ) $(LDFLAGS)
%.o: %.cpp
$(CXX) $(CXXFLAGS) -o $@ -c $<
%_ispc_nvptx64.bc: %.ispc
$(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm
%.ptx: %.bc
$(LLVM32DIS) $<
$(PTXGEN) `basename $< .bc`.ll > $@
kernel.ptx: $(PTXSRC)
cat $^ > kernel.ptx
clean:
/bin/rm -rf *.ptx *.bc *.ll $(PROG)

View File

@@ -51,8 +51,8 @@
#include <algorithm>
#include <sys/types.h>
#include "ao_ispc.h"
using namespace ispc;
//#include "ao1_ispc.h"
//using namespace ispc;
#include "../timing.h"
@@ -191,7 +191,7 @@ CUmodule loadModule(const char * module)
optionVals[5] = (void*) 1;
// Max # of registers/pthread
options[6] = CU_JIT_MAX_REGISTERS;
int jitRegCount = 64;
int jitRegCount = 48;
optionVals[6] = (void *)(size_t)jitRegCount;
// Create a pending linker invocation
@@ -321,7 +321,7 @@ extern "C"
{
return NULL;
}
void CUDALaunch(
double CUDALaunch(
void **handlePtr,
const char * func_name,
void **func_args)
@@ -330,8 +330,12 @@ extern "C"
const char * module = &module_str[0];
CUmodule cudaModule = loadModule(module);
CUfunction cudaFunction = getFunction(cudaModule, func_name);
const double t0 = rtc();
deviceLaunch(cudaFunction, func_args);
checkCudaErrors(cuStreamSynchronize(0));
const double dt = rtc() - t0;
unloadModule(cudaModule);
return dt;
}
void CUDASync(void *handle)
{
@@ -452,22 +456,22 @@ int main(int argc, char **argv)
memcpyH2D(d_fimg, fimg, width*height*3*sizeof(float));
reset_and_start_timer();
const double t0 = rtc();
#if 0
const double t0 = rtc();
ao_ispc_tasks(
width,
height,
NSUBSAMPLES,
(float*)d_fimg);
// double t = (rtc() - t0); //get_elapsed_mcycles();
#else
const char * func_name = "ao_ispc_tasks";
int arg_1 = width;
int arg_2 = height;
int arg_3 = NSUBSAMPLES;
void *func_args[] = {&arg_1, &arg_2, &arg_3, (float*)&d_fimg};
CUDALaunch(NULL, func_name, func_args);
const double t = CUDALaunch(NULL, func_name, func_args);
#endif
double t = (rtc() - t0); //get_elapsed_mcycles();
minTimeISPCTasks = std::min(minTimeISPCTasks, t);
}