diff --git a/examples_ptx/radixSort/Makefile_gpu b/examples_ptx/radixSort/Makefile_gpu index 97a51f26..5dcaee1b 100644 --- a/examples_ptx/radixSort/Makefile_gpu +++ b/examples_ptx/radixSort/Makefile_gpu @@ -1,8 +1,8 @@ -PROG=sort -ISPC_SRC=sort.ispc -CU_SRC=sort.cu -CXX_SRC=sort.cpp sort_serial.cpp -PTXCC_REGMAX=32 +PROG=radixSort +ISPC_SRC=radixSort.ispc +#CU_SRC=radixSort.cu +CXX_SRC=radixSort.cpp radixSort.cpp +PTXCC_REGMAX=64 LLVM_GPU=1 NVVM_GPU=1 diff --git a/examples_ptx/radixSort/radixSort.ispc b/examples_ptx/radixSort/radixSort.ispc index 8a1c5c51..a62ceb02 100644 --- a/examples_ptx/radixSort/radixSort.ispc +++ b/examples_ptx/radixSort/radixSort.ispc @@ -61,10 +61,14 @@ void sortPass( foreach (i = 0 ... nloc) { const int key = mask & ((unsigned int)keys[i] >> bit); - const int rel = localCounts[key]; + int rel; + foreach_active(iv) + { + rel = localCounts[key]; + localCounts[key]++; + } const int scatter = rel + digitOffsets[key]; sorted [scatter] = keys[i]; - localCounts[key] = 1 + rel; } } @@ -74,8 +78,8 @@ void partialScanLocal( uniform int countsAll[], uniform int partialSumAll[]) { - const uniform int numBlocks = taskCount; const uniform int blockIdx = taskIndex; + const uniform int numBlocks = taskCount; const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks; const uniform int bbeg = blockIdx * blockDim; @@ -190,8 +194,8 @@ export void radixSort( uniform int * uniform sharedCounts = mem_pool; uniform int * uniform countsGlobal = sharedCounts + nSharedCounts; uniform int * uniform excScan = countsGlobal + nCountsGlobal; - uniform int * uniform countsBlock = excScan + nExcScan; - uniform int * uniform partialSum = countsBlock + nCountsBlock; + uniform int * uniform counts = excScan + nExcScan; + uniform int * uniform partialSum = counts + nCountsBlock; uniform int * uniform prefixSum = partialSum + nPartialSum; for (uniform int bit = 0; bit < 32; bit += NUMBITS) @@ -201,7 +205,7 @@ export void radixSort( countsGlobal[digit] = 0; /* compute histogram for each digit */ - launch [numBlocks] countPass(keys, bit, numElements, countsBlock, countsGlobal); + launch [numBlocks] countPass(keys, bit, numElements, counts, countsGlobal); sync; /* exclusive scan on global histogram */ @@ -216,7 +220,7 @@ export void radixSort( } /* computing offsets for each digit */ - radixExclusiveScan(numBlocks, excScan, countsBlock, partialSum, prefixSum); + radixExclusiveScan(numBlocks, excScan, counts, partialSum, prefixSum); /* sorting */ launch [numBlocks]