added some ptx options

This commit is contained in:
Evghenii
2013-11-15 17:23:22 +01:00
parent 6b65f6d9f4
commit 3454f51d2c
9 changed files with 17 additions and 3 deletions

View File

@@ -68,6 +68,7 @@ static void createContext(
// Create driver context
checkCudaErrors(cuCtxCreate(&context, 0, device));
#if 0
size_t limit;
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE));
fprintf(stderr, " stack_limit= %llu KB\n", limit/1024);
@@ -75,6 +76,7 @@ static void createContext(
fprintf(stderr, " heap_limit= %llu KB\n", limit/1024);
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,stackLimit));
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,heapLimit));
#endif
}
static void destroyContext()
{

View File

@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen
PTXGEN += -opt=3
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
LLVM32DIS=$(LLVM32)/bin/llvm-dis

View File

@@ -171,7 +171,7 @@ struct Uniform
shptr[chunk][elem] = value;
}
};
#elif 1
#elif 0
template<typename T, int N>
struct Uniform
{
@@ -184,7 +184,7 @@ struct Uniform
__device__ inline Uniform()
{
#if 0
#if 1
if (programIndex == 0)
data = new T[N];
ptr[0] = __shfl(ptr[0], 0);
@@ -200,7 +200,7 @@ struct Uniform
}
__device__ inline ~Uniform()
{
#if 0
#if 1
if (programIndex == 0)
delete data;
#else
@@ -821,4 +821,5 @@ RenderStatic(InputHeader inputHeaderPtr[],
inputHeaderPtr, inputDataPtr, visualizeLightCount,
framebuffer_r, framebuffer_g, framebuffer_b);
cudaDeviceSynchronize();
cudaDeviceSynchronize();
}

View File

@@ -549,6 +549,7 @@ RenderStatic(uniform InputHeader inputHeaderPtr[],
launch[num_groups] RenderTile(num_groups_x, num_groups_y,
inputHeaderPtr, inputDataPtr, visualizeLightCount,
framebuffer_r, framebuffer_g, framebuffer_b);
sync;
}

View File

@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen
PTXGEN += -opt=3
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
LLVM32DIS=$(LLVM32)/bin/llvm-dis

View File

@@ -119,5 +119,6 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
#endif
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
maxIterations, output);
sync;
}
#endif

View File

@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen
PTXGEN += -opt=3
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
LLVM32DIS=$(LLVM32)/bin/llvm-dis

View File

@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
LLVM = $(HOME)/usr/local/llvm/bin-3.3
PTXGEN = $(HOME)/ptxgen
PTXGEN += -opt=3
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
LLVM32DIS=$(LLVM32)/bin/llvm-dis

View File

@@ -419,4 +419,5 @@ volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
width, height, image);
sync;
}