fixed problem with new/delete and added Mel/sec counter

This commit is contained in:
Evghenii
2013-11-13 20:34:01 +01:00
parent 6f9cea5b58
commit a0f6f264f6
3 changed files with 16 additions and 6 deletions

View File

@@ -108,7 +108,7 @@ int main (int argc, char *argv[])
progressbar (i, m); progressbar (i, m);
} }
printf("[sort ispc + task]:\t[%.3f] million cycles\n", tISPC2); printf("[sort ispc + task]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC2, 1.0e-6*n*m/tISPC2);
for (int i = 0; i < n-1; i++) for (int i = 0; i < n-1; i++)
{ {
@@ -133,7 +133,7 @@ int main (int argc, char *argv[])
progressbar (i, m); progressbar (i, m);
} }
printf("[sort ispc]:\t[%.3f] million cycles\n", tISPC1); printf("[sort ispc]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC1, 1.0e-6*n*m/tISPC1);
srand (0); srand (0);
@@ -153,7 +153,7 @@ int main (int argc, char *argv[])
progressbar (i, m); progressbar (i, m);
} }
printf("[sort serial]:\t\t[%.3f] million cycles\n", tSerial); printf("[sort serial]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tSerial, 1.0e-6*n*m/tSerial);
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", tSerial/tISPC1, tSerial/tISPC2); printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", tSerial/tISPC1, tSerial/tISPC2);

View File

@@ -219,12 +219,16 @@ export void sort_ispc (uniform int n,
{ {
uniform int num = ntasks; uniform int num = ntasks;
uniform int span = n / num; uniform int span = n / num;
#if 0 #if 1
/* if fails, change to 0. some issues with stack size/heap inside CUDA context
* grep for CuCtxSetLimit in sort_cu.cpp
*/
uniform int hsize = 256*programCount*num; uniform int hsize = 256*programCount*num;
uniform int * uniform hist = uniform new uniform int [hsize]; uniform int * uniform hist = uniform new uniform int [hsize];
uniform int64 * uniform pair = uniform new uniform int64 [n]; uniform int64 * uniform pair = uniform new uniform int64 [n];
uniform int64 * uniform temp = uniform new uniform int64 [n]; uniform int64 * uniform temp = uniform new uniform int64 [n];
uniform int * uniform g = uniform new uniform int [num+1]; uniform int * uniform g = uniform new uniform int [num+1];
#define ALLOCATED
#else #else
uniform int * uniform hist = _hist; uniform int * uniform hist = _hist;
uniform int64 * uniform pair = _pair; uniform int64 * uniform pair = _pair;
@@ -253,7 +257,7 @@ export void sort_ispc (uniform int n,
launch[num] unpack (span, n, pair, code, order); launch[num] unpack (span, n, pair, code, order);
sync; sync;
#if ALLOCATED #ifdef ALLOCATED
delete g; delete g;
delete hist; delete hist;
delete pair; delete pair;

View File

@@ -99,7 +99,13 @@ void createContext(const int deviceId = 0)
// Create driver context // Create driver context
checkCudaErrors(cuCtxCreate(&context, 0, device)); checkCudaErrors(cuCtxCreate(&context, 0, device));
size_t limit;
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE));
fprintf(stderr, " stack_limit= %llu KB\n", limit/1024);
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_MALLOC_HEAP_SIZE));
fprintf(stderr, " heap= %llu KB\n", limit/1024);
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,1024*1024*1024)); checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,1024*1024*1024));
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,1024*4));
} }
void destroyContext() void destroyContext()
{ {
@@ -386,7 +392,7 @@ int main (int argc, char *argv[])
progressbar (i, m); progressbar (i, m);
} }
printf("[sort cuda]:\t[%.3f] million cycles\n", tISPC2); printf("[sort cuda]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC2, 1.0e-6*n*m/tISPC2);
memcpyD2H(code, d_code, n*sizeof(int)); memcpyD2H(code, d_code, n*sizeof(int));
memcpyD2H(order, d_order, n*sizeof(int)); memcpyD2H(order, d_order, n*sizeof(int));
for (int i = 0; i < n-1; i++) for (int i = 0; i < n-1; i++)