From 66faf8b4e4f2700ef05c201cb77d7dbab1617033 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Mon, 13 Jan 2014 09:52:34 +0100 Subject: [PATCH] some examples tuning --- examples_ptx/aobench/Makefile_gpu | 2 +- examples_ptx/deferred/kernels.cu | 2 +- examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cu | 6 +++--- examples_ptx/mandelbrot_tasks/mandelbrot_tasks.ispc | 6 +++--- examples_ptx/options/Makefile_gpu | 2 +- examples_ptx/options/options.cu | 2 ++ 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/examples_ptx/aobench/Makefile_gpu b/examples_ptx/aobench/Makefile_gpu index 09f62c55..5d21a06f 100644 --- a/examples_ptx/aobench/Makefile_gpu +++ b/examples_ptx/aobench/Makefile_gpu @@ -4,7 +4,7 @@ CU_SRC=ao.cu CXX_SRC=ao.cpp ao_serial.cpp PTXCC_REGMAX=64 -# LLVM_GPU=1 +LLVM_GPU=1 NVVM_GPU=1 include ../common_gpu.mk diff --git a/examples_ptx/deferred/kernels.cu b/examples_ptx/deferred/kernels.cu index 3b353acf..cd693ae3 100644 --- a/examples_ptx/deferred/kernels.cu +++ b/examples_ptx/deferred/kernels.cu @@ -133,7 +133,7 @@ struct Uniform data[chunkIdx] = shdata[programIndex]; } }; -#elif 0 +#elif 1 template struct Uniform { diff --git a/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cu b/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cu index babec5a0..b9dba8fb 100644 --- a/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cu +++ b/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cu @@ -35,10 +35,10 @@ __device__ static inline int -mandel(float c_re, float c_im, int count) { +mandel(float c_re, float c_im, float count) { float z_re = c_re, z_im = c_im; - int i; - for (i = 0; i < count; ++i) { + float i = 0.0f; + for (; i < count; ++i) { if (z_re * z_re + z_im * z_im > 4.0f) break; diff --git a/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.ispc b/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.ispc index 9defbc5b..1173ede7 100644 --- a/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.ispc +++ b/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.ispc @@ -32,10 +32,10 @@ */ static inline int -mandel(float c_re, float c_im, int count) { +mandel(float c_re, float c_im, float count) { float z_re = c_re, z_im = c_im; - int i; - for (i = 0; i < count; ++i) { + float i = 0.0f; + for (;i < count; ++i) { if (z_re * z_re + z_im * z_im > 4.0f) break; diff --git a/examples_ptx/options/Makefile_gpu b/examples_ptx/options/Makefile_gpu index 6edfefba..5edba9cf 100644 --- a/examples_ptx/options/Makefile_gpu +++ b/examples_ptx/options/Makefile_gpu @@ -2,7 +2,7 @@ PROG=options ISPC_SRC=options.ispc CU_SRC=options.cu CXX_SRC=options.cpp options_serial.cpp -PTXCC_REGMAX=32 +PTXCC_REGMAX=128 LLVM_GPU=1 diff --git a/examples_ptx/options/options.cu b/examples_ptx/options/options.cu index 9209a8ea..d588096e 100644 --- a/examples_ptx/options/options.cu +++ b/examples_ptx/options/options.cu @@ -327,6 +327,8 @@ __host__ void binomial_put_ispc_tasks( float Sa[], float Xa[], float Ta[], float ra[], float va[], float result[], int count) { + + cudaDeviceSetCacheConfig (cudaFuncCachePreferL1); binomial_put_ispc_tasks___export<<<1,32>>>(Sa,Xa,Ta,ra,va,result,count); cudaDeviceSynchronize(); }