added some ptx options
This commit is contained in:
@@ -68,6 +68,7 @@ static void createContext(
|
||||
|
||||
// Create driver context
|
||||
checkCudaErrors(cuCtxCreate(&context, 0, device));
|
||||
#if 0
|
||||
size_t limit;
|
||||
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE));
|
||||
fprintf(stderr, " stack_limit= %llu KB\n", limit/1024);
|
||||
@@ -75,6 +76,7 @@ static void createContext(
|
||||
fprintf(stderr, " heap_limit= %llu KB\n", limit/1024);
|
||||
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,stackLimit));
|
||||
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,heapLimit));
|
||||
#endif
|
||||
}
|
||||
static void destroyContext()
|
||||
{
|
||||
|
||||
@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
PTXGEN = $(HOME)/ptxgen
|
||||
PTXGEN += -opt=3
|
||||
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||
|
||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ struct Uniform
|
||||
shptr[chunk][elem] = value;
|
||||
}
|
||||
};
|
||||
#elif 1
|
||||
#elif 0
|
||||
template<typename T, int N>
|
||||
struct Uniform
|
||||
{
|
||||
@@ -184,7 +184,7 @@ struct Uniform
|
||||
|
||||
__device__ inline Uniform()
|
||||
{
|
||||
#if 0
|
||||
#if 1
|
||||
if (programIndex == 0)
|
||||
data = new T[N];
|
||||
ptr[0] = __shfl(ptr[0], 0);
|
||||
@@ -200,7 +200,7 @@ struct Uniform
|
||||
}
|
||||
__device__ inline ~Uniform()
|
||||
{
|
||||
#if 0
|
||||
#if 1
|
||||
if (programIndex == 0)
|
||||
delete data;
|
||||
#else
|
||||
@@ -821,4 +821,5 @@ RenderStatic(InputHeader inputHeaderPtr[],
|
||||
inputHeaderPtr, inputDataPtr, visualizeLightCount,
|
||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||
cudaDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
@@ -549,6 +549,7 @@ RenderStatic(uniform InputHeader inputHeaderPtr[],
|
||||
launch[num_groups] RenderTile(num_groups_x, num_groups_y,
|
||||
inputHeaderPtr, inputDataPtr, visualizeLightCount,
|
||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||
sync;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
PTXGEN = $(HOME)/ptxgen
|
||||
PTXGEN += -opt=3
|
||||
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||
|
||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
|
||||
|
||||
@@ -119,5 +119,6 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
#endif
|
||||
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
|
||||
maxIterations, output);
|
||||
sync;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
PTXGEN = $(HOME)/ptxgen
|
||||
PTXGEN += -opt=3
|
||||
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||
|
||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
|
||||
|
||||
@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||
PTXGEN = $(HOME)/ptxgen
|
||||
PTXGEN += -opt=3
|
||||
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||
|
||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
|
||||
|
||||
@@ -419,4 +419,5 @@ volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
|
||||
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
||||
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
|
||||
width, height, image);
|
||||
sync;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user