fixed problem with new/delete and added Mel/sec counter

This commit is contained in:
Evghenii
2013-11-13 20:34:01 +01:00
parent 6f9cea5b58
commit a0f6f264f6
3 changed files with 16 additions and 6 deletions

View File

@@ -108,7 +108,7 @@ int main (int argc, char *argv[])
progressbar (i, m);
}
printf("[sort ispc + task]:\t[%.3f] million cycles\n", tISPC2);
printf("[sort ispc + task]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC2, 1.0e-6*n*m/tISPC2);
for (int i = 0; i < n-1; i++)
{
@@ -133,7 +133,7 @@ int main (int argc, char *argv[])
progressbar (i, m);
}
printf("[sort ispc]:\t[%.3f] million cycles\n", tISPC1);
printf("[sort ispc]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC1, 1.0e-6*n*m/tISPC1);
srand (0);
@@ -153,7 +153,7 @@ int main (int argc, char *argv[])
progressbar (i, m);
}
printf("[sort serial]:\t\t[%.3f] million cycles\n", tSerial);
printf("[sort serial]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tSerial, 1.0e-6*n*m/tSerial);
printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", tSerial/tISPC1, tSerial/tISPC2);

View File

@@ -219,12 +219,16 @@ export void sort_ispc (uniform int n,
{
uniform int num = ntasks;
uniform int span = n / num;
#if 0
#if 1
/* if fails, change to 0. some issues with stack size/heap inside CUDA context
* grep for CuCtxSetLimit in sort_cu.cpp
*/
uniform int hsize = 256*programCount*num;
uniform int * uniform hist = uniform new uniform int [hsize];
uniform int64 * uniform pair = uniform new uniform int64 [n];
uniform int64 * uniform temp = uniform new uniform int64 [n];
uniform int * uniform g = uniform new uniform int [num+1];
#define ALLOCATED
#else
uniform int * uniform hist = _hist;
uniform int64 * uniform pair = _pair;
@@ -253,7 +257,7 @@ export void sort_ispc (uniform int n,
launch[num] unpack (span, n, pair, code, order);
sync;
#if ALLOCATED
#ifdef ALLOCATED
delete g;
delete hist;
delete pair;

View File

@@ -99,7 +99,13 @@ void createContext(const int deviceId = 0)
// Create driver context
checkCudaErrors(cuCtxCreate(&context, 0, device));
size_t limit;
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_STACK_SIZE));
fprintf(stderr, " stack_limit= %llu KB\n", limit/1024);
checkCudaErrors(cuCtxGetLimit(&limit, CU_LIMIT_MALLOC_HEAP_SIZE));
fprintf(stderr, " heap= %llu KB\n", limit/1024);
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,1024*1024*1024));
checkCudaErrors(cuCtxSetLimit(CU_LIMIT_STACK_SIZE,1024*4));
}
void destroyContext()
{
@@ -386,7 +392,7 @@ int main (int argc, char *argv[])
progressbar (i, m);
}
printf("[sort cuda]:\t[%.3f] million cycles\n", tISPC2);
printf("[sort cuda]:\t[%.3f] million cycles :: rate= %g Mel/sec\n", tISPC2, 1.0e-6*n*m/tISPC2);
memcpyD2H(code, d_code, n*sizeof(int));
memcpyD2H(order, d_order, n*sizeof(int));
for (int i = 0; i < n-1; i++)