runs
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
PROG=sort
|
PROG=radixSort
|
||||||
ISPC_SRC=sort.ispc
|
ISPC_SRC=radixSort.ispc
|
||||||
CU_SRC=sort.cu
|
#CU_SRC=radixSort.cu
|
||||||
CXX_SRC=sort.cpp sort_serial.cpp
|
CXX_SRC=radixSort.cpp radixSort.cpp
|
||||||
PTXCC_REGMAX=32
|
PTXCC_REGMAX=64
|
||||||
|
|
||||||
LLVM_GPU=1
|
LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|||||||
@@ -61,10 +61,14 @@ void sortPass(
|
|||||||
foreach (i = 0 ... nloc)
|
foreach (i = 0 ... nloc)
|
||||||
{
|
{
|
||||||
const int key = mask & ((unsigned int)keys[i] >> bit);
|
const int key = mask & ((unsigned int)keys[i] >> bit);
|
||||||
const int rel = localCounts[key];
|
int rel;
|
||||||
|
foreach_active(iv)
|
||||||
|
{
|
||||||
|
rel = localCounts[key];
|
||||||
|
localCounts[key]++;
|
||||||
|
}
|
||||||
const int scatter = rel + digitOffsets[key];
|
const int scatter = rel + digitOffsets[key];
|
||||||
sorted [scatter] = keys[i];
|
sorted [scatter] = keys[i];
|
||||||
localCounts[key] = 1 + rel;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,8 +78,8 @@ void partialScanLocal(
|
|||||||
uniform int countsAll[],
|
uniform int countsAll[],
|
||||||
uniform int partialSumAll[])
|
uniform int partialSumAll[])
|
||||||
{
|
{
|
||||||
const uniform int numBlocks = taskCount;
|
|
||||||
const uniform int blockIdx = taskIndex;
|
const uniform int blockIdx = taskIndex;
|
||||||
|
const uniform int numBlocks = taskCount;
|
||||||
|
|
||||||
const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks;
|
const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks;
|
||||||
const uniform int bbeg = blockIdx * blockDim;
|
const uniform int bbeg = blockIdx * blockDim;
|
||||||
@@ -190,8 +194,8 @@ export void radixSort(
|
|||||||
uniform int * uniform sharedCounts = mem_pool;
|
uniform int * uniform sharedCounts = mem_pool;
|
||||||
uniform int * uniform countsGlobal = sharedCounts + nSharedCounts;
|
uniform int * uniform countsGlobal = sharedCounts + nSharedCounts;
|
||||||
uniform int * uniform excScan = countsGlobal + nCountsGlobal;
|
uniform int * uniform excScan = countsGlobal + nCountsGlobal;
|
||||||
uniform int * uniform countsBlock = excScan + nExcScan;
|
uniform int * uniform counts = excScan + nExcScan;
|
||||||
uniform int * uniform partialSum = countsBlock + nCountsBlock;
|
uniform int * uniform partialSum = counts + nCountsBlock;
|
||||||
uniform int * uniform prefixSum = partialSum + nPartialSum;
|
uniform int * uniform prefixSum = partialSum + nPartialSum;
|
||||||
|
|
||||||
for (uniform int bit = 0; bit < 32; bit += NUMBITS)
|
for (uniform int bit = 0; bit < 32; bit += NUMBITS)
|
||||||
@@ -201,7 +205,7 @@ export void radixSort(
|
|||||||
countsGlobal[digit] = 0;
|
countsGlobal[digit] = 0;
|
||||||
|
|
||||||
/* compute histogram for each digit */
|
/* compute histogram for each digit */
|
||||||
launch [numBlocks] countPass(keys, bit, numElements, countsBlock, countsGlobal);
|
launch [numBlocks] countPass(keys, bit, numElements, counts, countsGlobal);
|
||||||
sync;
|
sync;
|
||||||
|
|
||||||
/* exclusive scan on global histogram */
|
/* exclusive scan on global histogram */
|
||||||
@@ -216,7 +220,7 @@ export void radixSort(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* computing offsets for each digit */
|
/* computing offsets for each digit */
|
||||||
radixExclusiveScan(numBlocks, excScan, countsBlock, partialSum, prefixSum);
|
radixExclusiveScan(numBlocks, excScan, counts, partialSum, prefixSum);
|
||||||
|
|
||||||
/* sorting */
|
/* sorting */
|
||||||
launch [numBlocks]
|
launch [numBlocks]
|
||||||
|
|||||||
Reference in New Issue
Block a user