runs but incorrectly

This commit is contained in:
Evghenii
2014-01-28 13:45:54 +01:00
parent 2ae666dc7f
commit 88ffa96263
3 changed files with 43 additions and 44 deletions

View File

@@ -4,6 +4,6 @@ CPP_SRC=radixSort.cpp
ISPC_SRC=radixSort.ispc ISPC_SRC=radixSort.ispc
ISPC_IA_TARGETS=avx1-i32x8 ISPC_IA_TARGETS=avx1-i32x8
ISPC_ARM_TARGETS=neon ISPC_ARM_TARGETS=neon
#ISPC_FLAGS=-DDEBUG #ISPC_FLAGS=-DDEBUG -g
include ../common.mk include ../common.mk

View File

@@ -38,14 +38,16 @@ int main (int argc, char *argv[])
unsigned int *tmpv = new unsigned int [n]; unsigned int *tmpv = new unsigned int [n];
unsigned int *keys_orig = new unsigned int [n]; unsigned int *keys_orig = new unsigned int [n];
srand48(rtc()*65536); // srand48(rtc()*65536);
srand48(1234);
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {
keys[i] = drand48() * (1<<30); keys[i] = 4*n-3*i; //drand48() * (1<<30);
tmpv[i] = 0; tmpv[i] = keys[i];
} }
std::random_shuffle(keys, keys + n); std::random_shuffle(keys, keys + n);
#pragma omp parallel for #pragma omp parallel for
@@ -67,6 +69,7 @@ int main (int argc, char *argv[])
printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n*m/tISPC2); printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n*m/tISPC2);
std::sort(keys_orig, keys_orig + n); std::sort(keys_orig, keys_orig + n);
std::sort(keys, keys+ n);
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
assert(keys[i] == keys_orig[i]); assert(keys[i] == keys_orig[i]);
@@ -96,7 +99,7 @@ int main (int argc, char *argv[])
#endif #endif
delete keys; delete keys;
delete keys; delete keys_orig;
delete tmpv; delete tmpv;
return 0; return 0;
} }

View File

@@ -2,7 +2,7 @@
#define NUMDIGITS (1<<NUMBITS) #define NUMDIGITS (1<<NUMBITS)
task task
void computeHistogram( void countPass(
const uniform int keysAll[], const uniform int keysAll[],
const uniform int bit, const uniform int bit,
const uniform int numElements, const uniform int numElements,
@@ -46,7 +46,6 @@ void sortPass(
const uniform int blockDim = (numElements + numBlocks - 1) / numBlocks; const uniform int blockDim = (numElements + numBlocks - 1) / numBlocks;
const uniform int mask = (1 << NUMBITS) - 1;
uniform int * uniform localCounts = sharedCounts + blockIdx*NUMDIGITS; uniform int * uniform localCounts = sharedCounts + blockIdx*NUMDIGITS;
@@ -55,6 +54,7 @@ void sortPass(
uniform int * uniform digitOffsets = digitOffsetsAll + blockIdx*NUMDIGITS; uniform int * uniform digitOffsets = digitOffsetsAll + blockIdx*NUMDIGITS;
const uniform int nloc = min(numElements - keyIndex, blockDim); const uniform int nloc = min(numElements - keyIndex, blockDim);
const uniform int mask = (1 << NUMBITS) - 1;
foreach (i = 0 ... NUMDIGITS) foreach (i = 0 ... NUMDIGITS)
localCounts[i] = 0; localCounts[i] = 0;
@@ -63,82 +63,79 @@ void sortPass(
const int key = mask & ((unsigned int)keys[i] >> bit); const int key = mask & ((unsigned int)keys[i] >> bit);
const int rel = localCounts[key]; const int rel = localCounts[key];
const int scatter = rel + digitOffsets[key]; const int scatter = rel + digitOffsets[key];
sorted [scatter] = keys[i]; sorted [scatter] = keys[i];
localCounts[key] = 1 + rel; localCounts[key] = 1 + rel;
} }
} }
task task
void partialScanLocal( void partialScanLocal(
uniform int excScanPtr[], uniform int excScanAll[],
uniform int countsPtr[], uniform int countsAll[],
uniform int partialSum[]) uniform int partialSumAll[])
{ {
const uniform int numBlocks = taskCount; const uniform int numBlocks = taskCount;
const uniform int blockIdx = taskIndex; const uniform int blockIdx = taskIndex;
const uniform int blockDim = (numBlocks+taskCount-1)/taskCount; const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks;
const uniform int bbeg = blockIdx * blockDim; const uniform int bbeg = blockIdx * blockDim;
const uniform int bend = min(bbeg + blockDim, numBlocks); const uniform int bend = min(bbeg + blockDim, numBlocks);
uniform int (* uniform countsBlock)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])countsAll;
uniform int (* uniform excScanBlock)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])excScanAll;
uniform int (* uniform partialSum)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])partialSumAll;
foreach (digit = 0 ... NUMDIGITS) foreach (digit = 0 ... NUMDIGITS)
{ {
uniform int * uniform excScanBlock = excScanPtr + bbeg*NUMDIGITS; int prev = bbeg == 0 ? excScanBlock[0][digit] : 0;
uniform int * uniform countsBlock = countsPtr + bbeg*NUMDIGITS;
int prev = bbeg == 0 ? excScanBlock[digit] : 0;
for (uniform int block = bbeg; block < bend; block++) for (uniform int block = bbeg; block < bend; block++)
{ {
const int y = countsBlock[digit]; const int y = countsBlock[block][digit];
excScanBlock[digit] = prev; excScanBlock[block][digit] = prev;
prev += y; prev += y;
excScanBlock += NUMDIGITS;
countsBlock += NUMDIGITS;
} }
partialSum[blockIdx][digit] = excScanBlock[bend-1][digit] + countsBlock[bend-1][digit];
excScanBlock -= NUMDIGITS;
countsBlock -= NUMDIGITS;
partialSum[blockIdx*NUMDIGITS + digit] = excScanBlock[digit] + countsBlock[digit];
} }
} }
task task
void partialScanGlobal( void partialScanGlobal(
const uniform int numBlocks, const uniform int numBlocks,
uniform int partialSum[], uniform int partialSumAll[],
uniform int prefixSum[]) uniform int prefixSumAll[])
{ {
const int digit = taskIndex; uniform int (* uniform partialSum)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])partialSumAll;
uniform int (* uniform prefixSum)[NUMDIGITS] = (uniform int (*)[NUMDIGITS]) prefixSumAll;
const uniform int digit = taskIndex;
int carry = 0; int carry = 0;
foreach (block = 0 ... numBlocks) foreach (block = 0 ... numBlocks)
{ {
const int value = partialSum[block*NUMDIGITS + digit]; const int value = partialSum[block][digit];
const int scan = exclusive_scan_add(value); const int scan = exclusive_scan_add(value);
prefixSum[block*NUMDIGITS + digit] = value + carry; prefixSum[block][digit] = scan + carry;
carry = broadcast(scan+value, programCount-1); carry += broadcast(scan+value, programCount-1);
} }
} }
task task
void completeScanGlobal( void completeScanGlobal(
uniform int excScanAll[], uniform int excScanAll[],
uniform int carryValue[]) uniform int carryValueAll[])
{ {
const uniform int numBlocks = taskCount; const uniform int numBlocks = taskCount;
const uniform int blockIdx = taskIndex; const uniform int blockIdx = taskIndex;
const uniform int blockDim = (numBlocks+taskCount-1)/taskCount; const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks;
const uniform int bbeg = blockIdx * blockDim; const uniform int bbeg = blockIdx * blockDim;
const uniform int bend = min(bbeg + blockDim, numBlocks); const uniform int bend = min(bbeg + blockDim, numBlocks);
uniform int (* uniform excScanBlock)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])excScanAll;
uniform int (* uniform carryValue)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])carryValueAll;
carryValue += blockIdx*NUMDIGITS;
foreach (digit = 0 ... NUMDIGITS) foreach (digit = 0 ... NUMDIGITS)
{ {
const int carry = carryValue[digit]; const int carry = carryValue[blockIdx][digit];
uniform int * uniform excScanBlock = excScanAll + bbeg*NUMDIGITS; for (uniform int block = bbeg; block < bend; block++)
for (uniform int block = bbeg; block < bend; block++, excScanBlock += NUMDIGITS) excScanBlock[block][digit] += carry;
excScanBlock[digit] += carry;
} }
} }
@@ -165,7 +162,7 @@ export void radixSort(
uniform int keys[], uniform int keys[],
uniform int sorted[]) uniform int sorted[])
{ {
const uniform int numBlocks = num_cores()*2; const uniform int numBlocks = num_cores()*4;
#ifdef __NVPTX__ #ifdef __NVPTX__
assert((numBlocks & 3) == 0); /* task granularity on Kepler is 4 */ assert((numBlocks & 3) == 0); /* task granularity on Kepler is 4 */
@@ -204,7 +201,7 @@ export void radixSort(
countsGlobal[digit] = 0; countsGlobal[digit] = 0;
/* compute histogram for each digit */ /* compute histogram for each digit */
launch [numBlocks] computeHistogram(keys, bit, numElements, countsBlock, countsGlobal); launch [numBlocks] countPass(keys, bit, numElements, countsBlock, countsGlobal);
sync; sync;
/* exclusive scan on global histogram */ /* exclusive scan on global histogram */
@@ -214,10 +211,9 @@ export void radixSort(
{ {
const int value = countsGlobal[digit]; const int value = countsGlobal[digit];
const int scan = exclusive_scan_add(value); const int scan = exclusive_scan_add(value);
excScan[digit] = value + carry; excScan[digit] = scan + carry;
carry += broadcast(scan+value, programCount-1); carry += broadcast(scan+value, programCount-1);
} }
/* computing offsets for each digit */ /* computing offsets for each digit */
radixExclusiveScan(numBlocks, excScan, countsBlock, partialSum, prefixSum); radixExclusiveScan(numBlocks, excScan, countsBlock, partialSum, prefixSum);