+checkpoint
This commit is contained in:
@@ -93,9 +93,9 @@ void sortPass(
|
|||||||
const int mask = (1 << NUMBITS) - 1;
|
const int mask = (1 << NUMBITS) - 1;
|
||||||
|
|
||||||
/* copy digit offset from Gmem to Lmem */
|
/* copy digit offset from Gmem to Lmem */
|
||||||
#if 0
|
#if 1
|
||||||
__shared__ int digitOffsets_sh[NUMDIGITS*4];
|
__shared__ int digitOffsets_sh[NUMDIGITS*4];
|
||||||
int *digitOffsets = digitOffsets_sh + warpIdx*NUMDIGITS;
|
volatile int *digitOffsets = digitOffsets_sh + warpIdx*NUMDIGITS;
|
||||||
for (int digit = programIndex; digit < NUMDIGITS; digit += programCount)
|
for (int digit = programIndex; digit < NUMDIGITS; digit += programCount)
|
||||||
digitOffsets[digit] = digitOffsetsAll[blkIdx*NUMDIGITS + digit];
|
digitOffsets[digit] = digitOffsetsAll[blkIdx*NUMDIGITS + digit];
|
||||||
#else
|
#else
|
||||||
@@ -103,11 +103,11 @@ void sortPass(
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
int scatter;
|
||||||
for (int i = programIndex; i < nloc; i += programCount)
|
for (int i = programIndex; i < nloc; i += programCount)
|
||||||
if (i < nloc)
|
if (i < nloc)
|
||||||
{
|
{
|
||||||
const int key = mask & ((unsigned int)keys[i] >> bit);
|
const int key = mask & ((unsigned int)keys[i] >> bit);
|
||||||
int scatter;
|
|
||||||
/* not a vector friendly loop */
|
/* not a vector friendly loop */
|
||||||
#pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */
|
#pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */
|
||||||
for (int iv = 0; iv < programCount; iv++)
|
for (int iv = 0; iv < programCount; iv++)
|
||||||
|
|||||||
@@ -284,7 +284,6 @@ export void radixSort(
|
|||||||
/* computing offsets for each digit */
|
/* computing offsets for each digit */
|
||||||
radixExclusiveScan(numBlocks, excScan, counts, partialSum, prefixSum);
|
radixExclusiveScan(numBlocks, excScan, counts, partialSum, prefixSum);
|
||||||
|
|
||||||
#if 1
|
|
||||||
/* sorting */
|
/* sorting */
|
||||||
launch [numBlocks]
|
launch [numBlocks]
|
||||||
sortPass(
|
sortPass(
|
||||||
@@ -294,7 +293,6 @@ export void radixSort(
|
|||||||
numElements,
|
numElements,
|
||||||
excScan);
|
excScan);
|
||||||
sync;
|
sync;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user