fixed problem with new/delete and added Mel/sec counter
This commit is contained in:
@@ -108,7 +108,7 @@ int main (int argc, char *argv[])
|
||||
progressbar (i, m);
|
||||
}
|
||||
|
||||
printf("[sort ispc + task]:\t[%.3f] million cycles\n", tISPC2);
|
||||
printf("[sort ispc + task]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC2, 1.0e-6*n*m/tISPC2);
|
||||
|
||||
for (int i = 0; i < n-1; i++)
|
||||
{
|
||||
@@ -133,7 +133,7 @@ int main (int argc, char *argv[])
|
||||
progressbar (i, m);
|
||||
}
|
||||
|
||||
printf("[sort ispc]:\t[%.3f] million cycles\n", tISPC1);
|
||||
printf("[sort ispc]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC1, 1.0e-6*n*m/tISPC1);
|
||||
|
||||
|
||||
srand (0);
|
||||
@@ -153,7 +153,7 @@ int main (int argc, char *argv[])
|
||||
progressbar (i, m);
|
||||
}
|
||||
|
||||
printf("[sort serial]:\t\t[%.3f] million cycles\n", tSerial);
|
||||
printf("[sort serial]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tSerial, 1.0e-6*n*m/tSerial);
|
||||
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", tSerial/tISPC1, tSerial/tISPC2);
|
||||
|
||||
|
||||
@@ -219,12 +219,16 @@ export void sort_ispc (uniform int n,
|
||||
{
|
||||
uniform int num = ntasks;
|
||||
uniform int span = n / num;
|
||||
#if 0
|
||||
#if 1
|
||||
/* if fails, change to 0. some issues with stack size/heap inside CUDA context
|
||||
* grep for CuCtxSetLimit in sort_cu.cpp
|
||||
*/
|
||||
uniform int hsize = 256*programCount*num;
|
||||
uniform int * uniform hist = uniform new uniform int [hsize];
|
||||
uniform int64 * uniform pair = uniform new uniform int64 [n];
|
||||
uniform int64 * uniform temp = uniform new uniform int64 [n];
|
||||
uniform int * uniform g = uniform new uniform int [num+1];
|
||||
#define ALLOCATED
|
||||
#else
|
||||
uniform int * uniform hist = _hist;
|
||||
uniform int64 * uniform pair = _pair;
|
||||
@@ -253,7 +257,7 @@ export void sort_ispc (uniform int n,
|
||||
launch[num] unpack (span, n, pair, code, order);
|
||||
sync;
|
||||
|
||||
#if ALLOCATED
|
||||
#ifdef ALLOCATED
|
||||
delete g;
|
||||
delete hist;
|
||||
delete pair;
|
||||
|
||||
@@ -99,7 +99,13 @@ void createContext(const int deviceId = 0)
|
||||
|
||||
// Create driver context
|
||||
checkCudaErrors(cuCtxCreate(&context, 0, device));
|
||||
size_t limit;
|
||||
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE));
|
||||
fprintf(stderr, " stack_limit= %llu KB\n", limit/1024);
|
||||
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_MALLOC_HEAP_SIZE));
|
||||
fprintf(stderr, " heap= %llu KB\n", limit/1024);
|
||||
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,1024*1024*1024));
|
||||
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,1024*4));
|
||||
}
|
||||
void destroyContext()
|
||||
{
|
||||
@@ -386,7 +392,7 @@ int main (int argc, char *argv[])
|
||||
progressbar (i, m);
|
||||
}
|
||||
|
||||
printf("[sort cuda]:\t[%.3f] million cycles\n", tISPC2);
|
||||
printf("[sort cuda]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC2, 1.0e-6*n*m/tISPC2);
|
||||
memcpyD2H(code, d_code, n*sizeof(int));
|
||||
memcpyD2H(order, d_order, n*sizeof(int));
|
||||
for (int i = 0; i < n-1; i++)
|
||||
|
||||
Reference in New Issue
Block a user