From 8b0f871c06ca20a970b880e7de67a3ee048f92e9 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 13 Nov 2013 17:23:23 +0100 Subject: [PATCH] +1 --- examples_cuda/sort/sort1.cu | 76 +++++++++++++++++++++++++--------- examples_cuda/sort/sort_cu.cpp | 2 + 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/examples_cuda/sort/sort1.cu b/examples_cuda/sort/sort1.cu index b35ba506..8b2c8fc5 100644 --- a/examples_cuda/sort/sort1.cu +++ b/examples_cuda/sort/sort1.cu @@ -46,6 +46,28 @@ __device__ inline int nbx(const int n) { return (n - 1) / 4 + 1; } +template +__device__ inline T* __new(const int n) +{ + union + { + T* ptr; + int v[2]; + } val; + if (programIndex == 0) + val.ptr = new T[n]; + val.v[0] = __shfl(val.v[0],0); + val.v[1] = __shfl(val.v[1],0); + return val.ptr; +}; + +template +__device__ inline void __delete(T* ptr) +{ + if (programIndex == 0) + delete ptr; +}; + __global__ void histogram ( int span, int n, int64 code[], int pass, int hist[]) { if (taskIndex >= taskCount) return; @@ -194,9 +216,9 @@ __global__ void bumpup ( int h[], int g[]) } __device__ -static void prefix_sum ( int num, int h[]) +static void prefix_sum ( int num, int h[], int * g) { - int * g = new int [num+1]; +// int * g = new int [num+1]; int i; // launch[num] addup (h, g+1); @@ -204,25 +226,34 @@ static void prefix_sum ( int num, int h[]) addup<<>>(h,g+1); sync; - for (g[0] = 0, i = 1; i < num; i ++) g[i] += g[i-1]; + if (programIndex == 0) + for (g[0] = 0, i = 1; i < num; i ++) g[i] += g[i-1]; // launch[num] bumpup (h, g); if(programIndex == 0) bumpup<<>>(h,g); sync; - delete g; + +// delete g; } extern "C" __global__ void sort_ispc ( int n, unsigned int code[], int order[], int ntasks) { - int num = ntasks < 1 ? 13*4*8 : ntasks; + int num = ntasks < 1 ? 13*4 : ntasks; int span = n / num; int hsize = 256*programCount*num; +#if 0 int * hist = new int [hsize]; int64 * pair = new int64 [n]; int64 * temp = new int64 [n]; +#else + int * hist = __new< int>(hsize); + int64 * pair = __new< int64>(n); + int64 * temp = __new< int64>(n); + int * g = __new(num+1); +#endif int pass, i; @@ -231,35 +262,42 @@ void sort_ispc ( int n, unsigned int code[], int order[], int ntasks) pack<<>>(span, n, code, pair); sync; -#if 0 +#if 1 for (pass = 0; pass < 4; pass ++) { -// launch[num] histogram (span, n, pair, pass, hist); - if(programIndex == 0) - histogram<<>>(span, n, pair, pass, hist); + // launch[num] histogram (span, n, pair, pass, hist); + if(programIndex == 0) + histogram<<>>(span, n, pair, pass, hist); sync; - prefix_sum (num, hist); + prefix_sum (num, hist,g); -// launch[num] permutation (span, n, pair, pass, hist, temp); - if(programIndex == 0) - permutation<<>> (span, n, pair, pass, hist, temp); + // launch[num] permutation (span, n, pair, pass, hist, temp); + if(programIndex == 0) + permutation<<>> (span, n, pair, pass, hist, temp); sync; -/// launch[num] copy (span, n, temp, pair); - if(programIndex == 0) - copy<<>> (span, n, temp, pair); + /// launch[num] copy (span, n, temp, pair); + if(programIndex == 0) + copy<<>> (span, n, temp, pair); sync; } -/// launch[num] unpack (span, n, pair, code, order); - if(programIndex == 0) - unpack<<>> (span, n, pair, code, order); + /// launch[num] unpack (span, n, pair, code, order); + if(programIndex == 0) + unpack<<>> (span, n, pair, code, order); sync; #endif +#if 0 delete hist; delete pair; delete temp; +#else + __delete(g); + __delete(hist); + __delete(pair); + __delete(temp); +#endif } diff --git a/examples_cuda/sort/sort_cu.cpp b/examples_cuda/sort/sort_cu.cpp index 616be83f..f749d343 100644 --- a/examples_cuda/sort/sort_cu.cpp +++ b/examples_cuda/sort/sort_cu.cpp @@ -99,6 +99,7 @@ void createContext(const int deviceId = 0) // Create driver context checkCudaErrors(cuCtxCreate(&context, 0, device)); + checkCudaErrors(cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE,1024*1024*1024)); } void destroyContext() { @@ -370,6 +371,7 @@ int main (int argc, char *argv[]) int ntask = 0; void *func_args[] = {&n, &d_code, &d_order, &ntask}; const double dt = CUDALaunch(NULL, func_name, func_args); + tISPC2 += dt; #endif if (argc != 3)