added some ptx options
This commit is contained in:
@@ -68,6 +68,7 @@ static void createContext(
|
|||||||
|
|
||||||
// Create driver context
|
// Create driver context
|
||||||
checkCudaErrors(cuCtxCreate(&context, 0, device));
|
checkCudaErrors(cuCtxCreate(&context, 0, device));
|
||||||
|
#if 0
|
||||||
size_t limit;
|
size_t limit;
|
||||||
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE));
|
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE));
|
||||||
fprintf(stderr, " stack_limit= %llu KB\n", limit/1024);
|
fprintf(stderr, " stack_limit= %llu KB\n", limit/1024);
|
||||||
@@ -75,6 +76,7 @@ static void createContext(
|
|||||||
fprintf(stderr, " heap_limit= %llu KB\n", limit/1024);
|
fprintf(stderr, " heap_limit= %llu KB\n", limit/1024);
|
||||||
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,stackLimit));
|
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,stackLimit));
|
||||||
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,heapLimit));
|
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,heapLimit));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
static void destroyContext()
|
static void destroyContext()
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
|||||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||||
PTXGEN = $(HOME)/ptxgen
|
PTXGEN = $(HOME)/ptxgen
|
||||||
|
PTXGEN += -opt=3
|
||||||
|
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||||
|
|
||||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||||
|
|
||||||
|
|||||||
@@ -171,7 +171,7 @@ struct Uniform
|
|||||||
shptr[chunk][elem] = value;
|
shptr[chunk][elem] = value;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#elif 1
|
#elif 0
|
||||||
template<typename T, int N>
|
template<typename T, int N>
|
||||||
struct Uniform
|
struct Uniform
|
||||||
{
|
{
|
||||||
@@ -184,7 +184,7 @@ struct Uniform
|
|||||||
|
|
||||||
__device__ inline Uniform()
|
__device__ inline Uniform()
|
||||||
{
|
{
|
||||||
#if 0
|
#if 1
|
||||||
if (programIndex == 0)
|
if (programIndex == 0)
|
||||||
data = new T[N];
|
data = new T[N];
|
||||||
ptr[0] = __shfl(ptr[0], 0);
|
ptr[0] = __shfl(ptr[0], 0);
|
||||||
@@ -200,7 +200,7 @@ struct Uniform
|
|||||||
}
|
}
|
||||||
__device__ inline ~Uniform()
|
__device__ inline ~Uniform()
|
||||||
{
|
{
|
||||||
#if 0
|
#if 1
|
||||||
if (programIndex == 0)
|
if (programIndex == 0)
|
||||||
delete data;
|
delete data;
|
||||||
#else
|
#else
|
||||||
@@ -821,4 +821,5 @@ RenderStatic(InputHeader inputHeaderPtr[],
|
|||||||
inputHeaderPtr, inputDataPtr, visualizeLightCount,
|
inputHeaderPtr, inputDataPtr, visualizeLightCount,
|
||||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -549,6 +549,7 @@ RenderStatic(uniform InputHeader inputHeaderPtr[],
|
|||||||
launch[num_groups] RenderTile(num_groups_x, num_groups_y,
|
launch[num_groups] RenderTile(num_groups_x, num_groups_y,
|
||||||
inputHeaderPtr, inputDataPtr, visualizeLightCount,
|
inputHeaderPtr, inputDataPtr, visualizeLightCount,
|
||||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||||
|
sync;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
|||||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||||
PTXGEN = $(HOME)/ptxgen
|
PTXGEN = $(HOME)/ptxgen
|
||||||
|
PTXGEN += -opt=3
|
||||||
|
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||||
|
|
||||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||||
|
|
||||||
|
|||||||
@@ -119,5 +119,6 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
|||||||
#endif
|
#endif
|
||||||
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
|
mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan,
|
||||||
maxIterations, output);
|
maxIterations, output);
|
||||||
|
sync;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
|||||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||||
PTXGEN = $(HOME)/ptxgen
|
PTXGEN = $(HOME)/ptxgen
|
||||||
|
PTXGEN += -opt=3
|
||||||
|
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||||
|
|
||||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ ISPCFLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
|||||||
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
LLVM32 = $(HOME)/usr/local/llvm/bin-3.2
|
||||||
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
LLVM = $(HOME)/usr/local/llvm/bin-3.3
|
||||||
PTXGEN = $(HOME)/ptxgen
|
PTXGEN = $(HOME)/ptxgen
|
||||||
|
PTXGEN += -opt=3
|
||||||
|
PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||||
|
|
||||||
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||||
|
|
||||||
|
|||||||
@@ -419,4 +419,5 @@ volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
|
|||||||
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
||||||
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
|
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
|
||||||
width, height, image);
|
width, height, image);
|
||||||
|
sync;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user