some tuning, adding cuda kernels
This commit is contained in:
@@ -10,7 +10,7 @@ CCFLAGS+=-Iobjs/ -O2
|
||||
|
||||
LIBS=-lm $(TASK_LIB) -lstdc++
|
||||
ISPC=ispc
|
||||
ISPC_FLAGS+=-O2
|
||||
ISPC_FLAGS+=-O2 --opt=fast-math --math-lib=fast
|
||||
ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h)
|
||||
|
||||
ARCH:=$(shell uname -m | sed -e s/x86_64/x86/ -e s/i686/x86/ -e s/arm.*/arm/ -e s/sa110/arm/)
|
||||
|
||||
@@ -8,7 +8,7 @@ LD=g++
|
||||
LDFLAGS=-lcuda
|
||||
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64
|
||||
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
|
||||
@@ -8,7 +8,7 @@ LD=g++
|
||||
LDFLAGS=-lcuda
|
||||
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64
|
||||
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
|
||||
@@ -8,7 +8,7 @@ LD=g++
|
||||
LDFLAGS=-lcuda
|
||||
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64,avx
|
||||
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
@@ -37,7 +37,7 @@ $(PROG): $(CXX_OBJ) kernel.ptx
|
||||
|
||||
|
||||
%_ispc_nvptx64.bc: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm
|
||||
$(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc_nvptx64.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm
|
||||
|
||||
%.ptx: %.bc
|
||||
$(LLVM32DIS) $<
|
||||
|
||||
@@ -362,7 +362,6 @@ volume_tile(int x0, int y0, int x1,
|
||||
}
|
||||
|
||||
|
||||
extern "C"
|
||||
__global__ void
|
||||
volume_task(float density[], int _nVoxels[3],
|
||||
const float _raster2camera[4][4],
|
||||
@@ -432,3 +431,16 @@ volume_task(float density[], int _nVoxels[3],
|
||||
}
|
||||
|
||||
|
||||
extern "C"
|
||||
__global__ void
|
||||
volume_ispc_tasks( float density[], int nVoxels[3],
|
||||
const float raster2camera[4][4],
|
||||
const float camera2world[4][4],
|
||||
int width, int height, float image[]) {
|
||||
// Launch tasks to work on (dx,dy)-sized tiles of the image
|
||||
int dx = 8, dy = 8;
|
||||
int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
||||
if (programIndex == 0)
|
||||
volume_task<<<(nTasks-1)/4+1, 128>>>(density, nVoxels, raster2camera, camera2world,
|
||||
width, height, image);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user