some tuning, adding cuda kernels

This commit is contained in:
Evghenii
2013-11-14 22:33:58 +01:00
parent f12826bac5
commit 294fb039fe
5 changed files with 18 additions and 6 deletions

View File

@@ -10,7 +10,7 @@ CCFLAGS+=-Iobjs/ -O2
LIBS=-lm $(TASK_LIB) -lstdc++
ISPC=ispc
ISPC_FLAGS+=-O2
ISPC_FLAGS+=-O2 --opt=fast-math --math-lib=fast
ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h)
ARCH:=$(shell uname -m | sed -e s/x86_64/x86/ -e s/i686/x86/ -e s/arm.*/arm/ -e s/sa110/arm/)

View File

@@ -8,7 +8,7 @@ LD=g++
LDFLAGS=-lcuda
ISPC=ispc
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3

View File

@@ -8,7 +8,7 @@ LD=g++
LDFLAGS=-lcuda
ISPC=ispc
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3

View File

@@ -8,7 +8,7 @@ LD=g++
LDFLAGS=-lcuda
ISPC=ispc
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64,avx
ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3
@@ -37,7 +37,7 @@ $(PROG): $(CXX_OBJ) kernel.ptx
%_ispc_nvptx64.bc: %.ispc
$(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm
$(ISPC) $(ISPCFLAGS) --emit-llvm -o `basename $< .ispc`_ispc_nvptx64.bc -h `basename $< .ispc`_ispc.h $< --emit-llvm
%.ptx: %.bc
$(LLVM32DIS) $<

View File

@@ -362,7 +362,6 @@ volume_tile(int x0, int y0, int x1,
}
extern "C"
__global__ void
volume_task(float density[], int _nVoxels[3],
const float _raster2camera[4][4],
@@ -432,3 +431,16 @@ volume_task(float density[], int _nVoxels[3],
}
extern "C"
__global__ void
volume_ispc_tasks( float density[], int nVoxels[3],
const float raster2camera[4][4],
const float camera2world[4][4],
int width, int height, float image[]) {
// Launch tasks to work on (dx,dy)-sized tiles of the image
int dx = 8, dy = 8;
int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
if (programIndex == 0)
volume_task<<<(nTasks-1)/4+1, 128>>>(density, nVoxels, raster2camera, camera2world,
width, height, image);
}