added some ptx options

This commit is contained in:
Evghenii
2013-11-15 17:23:22 +01:00
parent 6b65f6d9f4
commit 3454f51d2c
9 changed files with 17 additions and 3 deletions

View File

@@ -68,6 +68,7 @@ static void createContext(
// Create driver context // Create driver context
checkCudaErrors(cuCtxCreate(&context, 0, device)); checkCudaErrors(cuCtxCreate(&context, 0, device));
#if 0
size_t limit; size_t limit;
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE)); checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE));
fprintf(stderr, " stack_limit= %llu KB\n", limit/1024); fprintf(stderr, " stack_limit= %llu KB\n", limit/1024);
@@ -75,6 +76,7 @@ static void createContext(
fprintf(stderr, " heap_limit= %llu KB\n", limit/1024); fprintf(stderr, " heap_limit= %llu KB\n", limit/1024);
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,stackLimit)); checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,stackLimit));
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,heapLimit)); checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,heapLimit));
#endif
} }
static void destroyContext() static void destroyContext()
{ {

View File

@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2 LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3 LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen PTXGEN = $(HOME)/ptxgen
PTXGEN += -opt=3
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
LLVM32DIS=$(LLVM32)/bin/llvm-dis LLVM32DIS=$(LLVM32)/bin/llvm-dis

View File

@@ -171,7 +171,7 @@ struct Uniform
shptr[chunk][elem] = value; shptr[chunk][elem] = value;
} }
}; };
#elif 1 #elif 0
template<typename T, int N> template<typename T, int N>
struct Uniform struct Uniform
{ {
@@ -184,7 +184,7 @@ struct Uniform
__device__ inline Uniform() __device__ inline Uniform()
{ {
#if 0 #if 1
if (programIndex == 0) if (programIndex == 0)
data = new T[N]; data = new T[N];
ptr[0] = __shfl(ptr[0], 0); ptr[0] = __shfl(ptr[0], 0);
@@ -200,7 +200,7 @@ struct Uniform
} }
__device__ inline ~Uniform() __device__ inline ~Uniform()
{ {
#if 0 #if 1
if (programIndex == 0) if (programIndex == 0)
delete data; delete data;
#else #else
@@ -821,4 +821,5 @@ RenderStatic(InputHeader inputHeaderPtr[],
inputHeaderPtr, inputDataPtr, visualizeLightCount, inputHeaderPtr, inputDataPtr, visualizeLightCount,
framebuffer_r, framebuffer_g, framebuffer_b); framebuffer_r, framebuffer_g, framebuffer_b);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
cudaDeviceSynchronize();
} }

View File

@@ -549,6 +549,7 @@ RenderStatic(uniform InputHeader inputHeaderPtr[],
launch[num_groups] RenderTile(num_groups_x, num_groups_y, launch[num_groups] RenderTile(num_groups_x, num_groups_y,
inputHeaderPtr, inputDataPtr, visualizeLightCount, inputHeaderPtr, inputDataPtr, visualizeLightCount,
framebuffer_r, framebuffer_g, framebuffer_b); framebuffer_r, framebuffer_g, framebuffer_b);
sync;
} }

View File

@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2 LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3 LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen PTXGEN = $(HOME)/ptxgen
PTXGEN += -opt=3
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
LLVM32DIS=$(LLVM32)/bin/llvm-dis LLVM32DIS=$(LLVM32)/bin/llvm-dis

View File

@@ -119,5 +119,6 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
#endif #endif
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
maxIterations, output); maxIterations, output);
sync;
} }
#endif #endif

View File

@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2 LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3 LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen PTXGEN = $(HOME)/ptxgen
PTXGEN += -opt=3
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
LLVM32DIS=$(LLVM32)/bin/llvm-dis LLVM32DIS=$(LLVM32)/bin/llvm-dis

View File

@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2 LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3 LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen PTXGEN = $(HOME)/ptxgen
PTXGEN += -opt=3
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
LLVM32DIS=$(LLVM32)/bin/llvm-dis LLVM32DIS=$(LLVM32)/bin/llvm-dis

View File

@@ -419,4 +419,5 @@ volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy); uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world, launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
width, height, image); width, height, image);
sync;
} }