diff --git a/examples_ptx/radixSort/radixSort.cu b/examples_ptx/radixSort/radixSort.cu index be5537b9..9b9587e3 100644 --- a/examples_ptx/radixSort/radixSort.cu +++ b/examples_ptx/radixSort/radixSort.cu @@ -93,9 +93,9 @@ void sortPass( const int mask = (1 << NUMBITS) - 1; /* copy digit offset from Gmem to Lmem */ -#if 0 +#if 1 __shared__ int digitOffsets_sh[NUMDIGITS*4]; - int *digitOffsets = digitOffsets_sh + warpIdx*NUMDIGITS; + volatile int *digitOffsets = digitOffsets_sh + warpIdx*NUMDIGITS; for (int digit = programIndex; digit < NUMDIGITS; digit += programCount) digitOffsets[digit] = digitOffsetsAll[blkIdx*NUMDIGITS + digit]; #else @@ -103,11 +103,11 @@ void sortPass( #endif + int scatter; for (int i = programIndex; i < nloc; i += programCount) if (i < nloc) { const int key = mask & ((unsigned int)keys[i] >> bit); - int scatter; /* not a vector friendly loop */ #pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */ for (int iv = 0; iv < programCount; iv++) diff --git a/examples_ptx/radixSort/radixSort.ispc b/examples_ptx/radixSort/radixSort.ispc index 8e97bc5b..6e160610 100644 --- a/examples_ptx/radixSort/radixSort.ispc +++ b/examples_ptx/radixSort/radixSort.ispc @@ -284,7 +284,6 @@ export void radixSort( /* computing offsets for each digit */ radixExclusiveScan(numBlocks, excScan, counts, partialSum, prefixSum); -#if 1 /* sorting */ launch [numBlocks] sortPass( @@ -294,7 +293,6 @@ export void radixSort( numElements, excScan); sync; -#endif } }