ao1.ispc is not functional just yet :S
This commit is contained in:
@@ -418,10 +418,7 @@ void ao_ispc_tasks(
|
|||||||
const int nbz = 1;
|
const int nbz = 1;
|
||||||
const dim3 blocks (nbx, nby, nbz);
|
const dim3 blocks (nbx, nby, nbz);
|
||||||
if (threadIdx.x == 0)
|
if (threadIdx.x == 0)
|
||||||
{
|
|
||||||
printf(" --- using CDP -- \n");
|
|
||||||
ao_task<<<blocks, 128>>>(w,h,nsubsamples,image);
|
ao_task<<<blocks, 128>>>(w,h,nsubsamples,image);
|
||||||
}
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -268,13 +268,13 @@ void memcpyH2D(CUdeviceptr d_buf, void * h_buf, const size_t size)
|
|||||||
{
|
{
|
||||||
checkCudaErrors(cuMemcpyHtoD(d_buf, h_buf, size));
|
checkCudaErrors(cuMemcpyHtoD(d_buf, h_buf, size));
|
||||||
}
|
}
|
||||||
#define deviceLaunch(func,nbx,nby,nbz,params) \
|
#define deviceLaunch(func,params) \
|
||||||
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_EQUAL)); \
|
checkCudaErrors(cuFuncSetCacheConfig((func), CU_FUNC_CACHE_PREFER_EQUAL)); \
|
||||||
checkCudaErrors( \
|
checkCudaErrors( \
|
||||||
cuLaunchKernel( \
|
cuLaunchKernel( \
|
||||||
(func), \
|
(func), \
|
||||||
((nbx-1)/(128/32)+1), (nby), (nbz), \
|
1,1,1, \
|
||||||
128, 1, 1, \
|
32, 1, 1, \
|
||||||
0, NULL, (params), NULL \
|
0, NULL, (params), NULL \
|
||||||
));
|
));
|
||||||
|
|
||||||
@@ -323,27 +323,14 @@ extern "C"
|
|||||||
}
|
}
|
||||||
void CUDALaunch(
|
void CUDALaunch(
|
||||||
void **handlePtr,
|
void **handlePtr,
|
||||||
const char * module_name,
|
|
||||||
const char * module_1,
|
|
||||||
const char * func_name,
|
const char * func_name,
|
||||||
void **func_args,
|
void **func_args)
|
||||||
int countx, int county, int countz)
|
|
||||||
{
|
{
|
||||||
assert(module_name != NULL);
|
|
||||||
assert(module_1 != NULL);
|
|
||||||
assert(func_name != NULL);
|
|
||||||
assert(func_args != NULL);
|
|
||||||
#if 0
|
|
||||||
const char * module = module_1;
|
|
||||||
#else
|
|
||||||
const std::vector<char> module_str = readBinary("kernel.ptx");
|
const std::vector<char> module_str = readBinary("kernel.ptx");
|
||||||
const char * module = &module_str[0];
|
const char * module = &module_str[0];
|
||||||
#endif
|
|
||||||
CUmodule cudaModule = loadModule(module);
|
CUmodule cudaModule = loadModule(module);
|
||||||
// CUfunction cudaFunction = getFunction(cudaModule, func_name);
|
CUfunction cudaFunction = getFunction(cudaModule, func_name);
|
||||||
// deviceLaunch(cudaFunction, countx, county, countz, func_args);
|
deviceLaunch(cudaFunction, func_args);
|
||||||
CUfunction cudaFunction = getFunction(cudaModule, "ao_ispc_tasks");
|
|
||||||
deviceLaunch(cudaFunction, 1, 1, 1, func_args);
|
|
||||||
unloadModule(cudaModule);
|
unloadModule(cudaModule);
|
||||||
}
|
}
|
||||||
void CUDASync(void *handle)
|
void CUDASync(void *handle)
|
||||||
@@ -466,11 +453,20 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
const double t0 = rtc();
|
const double t0 = rtc();
|
||||||
|
#if 0
|
||||||
ao_ispc_tasks(
|
ao_ispc_tasks(
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
NSUBSAMPLES,
|
NSUBSAMPLES,
|
||||||
(float*)d_fimg);
|
(float*)d_fimg);
|
||||||
|
#else
|
||||||
|
const char * func_name = "ao_ispc_tasks";
|
||||||
|
int arg_1 = width;
|
||||||
|
int arg_2 = height;
|
||||||
|
int arg_3 = NSUBSAMPLES;
|
||||||
|
void *func_args[] = {&arg_1, &arg_2, &arg_3, (float*)&d_fimg};
|
||||||
|
CUDALaunch(NULL, func_name, func_args);
|
||||||
|
#endif
|
||||||
double t = (rtc() - t0); //get_elapsed_mcycles();
|
double t = (rtc() - t0); //get_elapsed_mcycles();
|
||||||
minTimeISPCTasks = std::min(minTimeISPCTasks, t);
|
minTimeISPCTasks = std::min(minTimeISPCTasks, t);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user