From d4b46b1295c0bd7e1544f713b6a90d1a4bc8d907 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 29 Jan 2014 13:47:39 +0100 Subject: [PATCH] +checkpoint --- examples_ptx/radixSort/radixSort.cu | 6 +++--- examples_ptx/radixSort/radixSort.ispc | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/examples_ptx/radixSort/radixSort.cu b/examples_ptx/radixSort/radixSort.cu index be5537b9..9b9587e3 100644 --- a/examples_ptx/radixSort/radixSort.cu +++ b/examples_ptx/radixSort/radixSort.cu @@ -93,9 +93,9 @@ void sortPass( const int mask = (1 << NUMBITS) - 1; /* copy digit offset from Gmem to Lmem */ -#if 0 +#if 1 __shared__ int digitOffsets_sh[NUMDIGITS*4]; - int *digitOffsets = digitOffsets_sh + warpIdx*NUMDIGITS; + volatile int *digitOffsets = digitOffsets_sh + warpIdx*NUMDIGITS; for (int digit = programIndex; digit < NUMDIGITS; digit += programCount) digitOffsets[digit] = digitOffsetsAll[blkIdx*NUMDIGITS + digit]; #else @@ -103,11 +103,11 @@ void sortPass( #endif + int scatter; for (int i = programIndex; i < nloc; i += programCount) if (i < nloc) { const int key = mask & ((unsigned int)keys[i] >> bit); - int scatter; /* not a vector friendly loop */ #pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */ for (int iv = 0; iv < programCount; iv++) diff --git a/examples_ptx/radixSort/radixSort.ispc b/examples_ptx/radixSort/radixSort.ispc index 8e97bc5b..6e160610 100644 --- a/examples_ptx/radixSort/radixSort.ispc +++ b/examples_ptx/radixSort/radixSort.ispc @@ -284,7 +284,6 @@ export void radixSort( /* computing offsets for each digit */ radixExclusiveScan(numBlocks, excScan, counts, partialSum, prefixSum); -#if 1 /* sorting */ launch [numBlocks] sortPass( @@ -294,7 +293,6 @@ export void radixSort( numElements, excScan); sync; -#endif } }