diff --git a/examples_cuda/aobench/ao.cu b/examples_cuda/aobench/ao.cu index 2ebcb011..071bec16 100644 --- a/examples_cuda/aobench/ao.cu +++ b/examples_cuda/aobench/ao.cu @@ -417,6 +417,11 @@ void ao_ispc_tasks( const int nby = ntiley; const int nbz = 1; const dim3 blocks (nbx, nby, nbz); - ao_task<<>>(w,h,nsubsamples,image); + if (threadIdx.x == 0) + { + printf(" --- using CDP -- \n"); + ao_task<<>>(w,h,nsubsamples,image); + } + cudaDeviceSynchronize(); } #endif diff --git a/examples_cuda/aobench/ao_cu.cpp b/examples_cuda/aobench/ao_cu.cpp index b0aa607d..dc53a7b5 100755 --- a/examples_cuda/aobench/ao_cu.cpp +++ b/examples_cuda/aobench/ao_cu.cpp @@ -340,8 +340,10 @@ extern "C" const char * module = &module_str[0]; #endif CUmodule cudaModule = loadModule(module); - CUfunction cudaFunction = getFunction(cudaModule, func_name); - deviceLaunch(cudaFunction, countx, county, countz, func_args); +// CUfunction cudaFunction = getFunction(cudaModule, func_name); +// deviceLaunch(cudaFunction, countx, county, countz, func_args); + CUfunction cudaFunction = getFunction(cudaModule, "ao_ispc_tasks"); + deviceLaunch(cudaFunction, 1, 1, 1, func_args); unloadModule(cudaModule); } void CUDASync(void *handle)