diff --git a/examples_cuda/cuda_ispc.h b/examples_cuda/cuda_ispc.h index e143d90d..3060693f 100644 --- a/examples_cuda/cuda_ispc.h +++ b/examples_cuda/cuda_ispc.h @@ -213,7 +213,7 @@ static void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size) checkCudaErrors(cuMemcpyHtoD(d_buf, h_buf, size)); } #define deviceLaunch(func,params) \ - checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_L1)); \ + checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_SHARED)); \ checkCudaErrors( \ cuLaunchKernel( \ (func), \ diff --git a/examples_cuda/options/options.cu b/examples_cuda/options/options.cu index 2b8be792..f02ab904 100644 --- a/examples_cuda/options/options.cu +++ b/examples_cuda/options/options.cu @@ -97,7 +97,12 @@ black_scholes_ispc_tasks( float Sa[], float Xa[], float Ta[], __device__ static inline float binomial_put(float S, float X, float T, float r, float v) { +#if 0 float V[BINOMIAL_NUM]; +#else + __shared__ float VSH[BINOMIAL_NUM*4]; + float *V = VSH + warpIdx*BINOMIAL_NUM; +#endif float dt = T / BINOMIAL_NUM; float u = exp(v * sqrt(dt));