tuning radixSort

This commit is contained in:
Evghenii
2014-01-28 15:00:43 +01:00
parent d4dd945828
commit 1b993e167f
2 changed files with 17 additions and 9 deletions

View File

@@ -4,7 +4,7 @@ ISPC_SRC=radixSort.ispc
CXX_SRC=radixSort.cpp radixSort.cpp CXX_SRC=radixSort.cpp radixSort.cpp
PTXCC_REGMAX=64 PTXCC_REGMAX=64
LLVM_GPU=1 # LLVM_GPU=1
NVVM_GPU=1 NVVM_GPU=1
include ../common_gpu.mk include ../common_gpu.mk

View File

@@ -22,11 +22,18 @@ void countPass(
foreach (digit = 0 ... NUMDIGITS) foreach (digit = 0 ... NUMDIGITS)
counts[digit] = 0; counts[digit] = 0;
#if 1
foreach (i = 0 ... nloc) foreach (i = 0 ... nloc)
{ {
const int key = mask & ((unsigned int)keys[i] >> bit); const int key = mask & ((unsigned int)keys[i] >> bit);
uniform int skey;
if (reduce_equal(key, &skey) == true)
counts[skey] += reduce_add(1);
else
atomic_add_local(&counts[key], 1); atomic_add_local(&counts[key], 1);
} }
#else
#endif
foreach (digit = 0 ... NUMDIGITS) foreach (digit = 0 ... NUMDIGITS)
atomic_add_global(&countsGlobal[digit], counts[digit]); atomic_add_global(&countsGlobal[digit], counts[digit]);
@@ -74,14 +81,14 @@ void sortPass(
task task
void partialScanLocal( void partialScanLocal(
uniform int numBlocks,
uniform int excScanAll[], uniform int excScanAll[],
uniform int countsAll[], uniform int countsAll[],
uniform int partialSumAll[]) uniform int partialSumAll[])
{ {
const uniform int blockIdx = taskIndex; const uniform int blockIdx = taskIndex;
const uniform int numBlocks = taskCount;
const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks; const uniform int blockDim = (numBlocks+taskCount-1)/taskCount;
const uniform int bbeg = blockIdx * blockDim; const uniform int bbeg = blockIdx * blockDim;
const uniform int bend = min(bbeg + blockDim, numBlocks); const uniform int bend = min(bbeg + blockDim, numBlocks);
@@ -123,12 +130,12 @@ void partialScanGlobal(
task task
void completeScanGlobal( void completeScanGlobal(
uniform int numBlocks,
uniform int excScanAll[], uniform int excScanAll[],
uniform int carryValueAll[]) uniform int carryValueAll[])
{ {
const uniform int numBlocks = taskCount;
const uniform int blockIdx = taskIndex; const uniform int blockIdx = taskIndex;
const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks; const uniform int blockDim = (numBlocks+taskCount-1)/taskCount;
const uniform int bbeg = blockIdx * blockDim; const uniform int bbeg = blockIdx * blockDim;
const uniform int bend = min(bbeg + blockDim, numBlocks); const uniform int bend = min(bbeg + blockDim, numBlocks);
@@ -151,13 +158,14 @@ inline void radixExclusiveScan(
uniform int partialSum[], uniform int partialSum[],
uniform int prefixSum[]) uniform int prefixSum[])
{ {
launch [numBlocks] partialScanLocal(excScanPtr, countsPtr, partialSum); const uniform int scale = 4;
launch [numBlocks/scale] partialScanLocal(numBlocks, excScanPtr, countsPtr, partialSum);
sync; sync;
launch [NUMDIGITS] partialScanGlobal(numBlocks, partialSum, prefixSum); launch [NUMDIGITS] partialScanGlobal(numBlocks/scale, partialSum, prefixSum);
sync; sync;
launch [numBlocks] completeScanGlobal(excScanPtr, prefixSum); launch [numBlocks/scale] completeScanGlobal(numBlocks, excScanPtr, prefixSum);
sync; sync;
} }