runs but incorrectly

This commit is contained in:
Evghenii
2014-01-28 13:45:54 +01:00
parent 2ae666dc7f
commit 88ffa96263
3 changed files with 43 additions and 44 deletions

View File

@@ -4,6 +4,6 @@ CPP_SRC=radixSort.cpp
ISPC_SRC=radixSort.ispc
ISPC_IA_TARGETS=avx1-i32x8
ISPC_ARM_TARGETS=neon
#ISPC_FLAGS=-DDEBUG
#ISPC_FLAGS=-DDEBUG -g
include ../common.mk

View File

@@ -38,14 +38,16 @@ int main (int argc, char *argv[])
unsigned int *tmpv = new unsigned int [n];
unsigned int *keys_orig = new unsigned int [n];
srand48(rtc()*65536);
// srand48(rtc()*65536);
srand48(1234);
#pragma omp parallel for
for (int i = 0; i < n; i++)
{
keys[i] = drand48() * (1<<30);
tmpv[i] = 0;
keys[i] = 4*n-3*i; //drand48() * (1<<30);
tmpv[i] = keys[i];
}
std::random_shuffle(keys, keys + n);
#pragma omp parallel for
@@ -67,6 +69,7 @@ int main (int argc, char *argv[])
printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n*m/tISPC2);
std::sort(keys_orig, keys_orig + n);
std::sort(keys, keys+ n);
for (int i = 0; i < n; i++)
assert(keys[i] == keys_orig[i]);
@@ -96,7 +99,7 @@ int main (int argc, char *argv[])
#endif
delete keys;
delete keys;
delete keys_orig;
delete tmpv;
return 0;
}

View File

@@ -2,7 +2,7 @@
#define NUMDIGITS (1<<NUMBITS)
task
void computeHistogram(
void countPass(
const uniform int keysAll[],
const uniform int bit,
const uniform int numElements,
@@ -46,7 +46,6 @@ void sortPass(
const uniform int blockDim = (numElements + numBlocks - 1) / numBlocks;
const uniform int mask = (1 << NUMBITS) - 1;
uniform int * uniform localCounts = sharedCounts + blockIdx*NUMDIGITS;
@@ -55,6 +54,7 @@ void sortPass(
uniform int * uniform digitOffsets = digitOffsetsAll + blockIdx*NUMDIGITS;
const uniform int nloc = min(numElements - keyIndex, blockDim);
const uniform int mask = (1 << NUMBITS) - 1;
foreach (i = 0 ... NUMDIGITS)
localCounts[i] = 0;
@@ -63,82 +63,79 @@ void sortPass(
const int key = mask & ((unsigned int)keys[i] >> bit);
const int rel = localCounts[key];
const int scatter = rel + digitOffsets[key];
sorted [scatter] = keys[i];
localCounts[key] = 1 + rel;
sorted [scatter] = keys[i];
localCounts[key] = 1 + rel;
}
}
task
void partialScanLocal(
uniform int excScanPtr[],
uniform int countsPtr[],
uniform int partialSum[])
uniform int excScanAll[],
uniform int countsAll[],
uniform int partialSumAll[])
{
const uniform int numBlocks = taskCount;
const uniform int blockIdx = taskIndex;
const uniform int blockDim = (numBlocks+taskCount-1)/taskCount;
const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks;
const uniform int bbeg = blockIdx * blockDim;
const uniform int bend = min(bbeg + blockDim, numBlocks);
uniform int (* uniform countsBlock)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])countsAll;
uniform int (* uniform excScanBlock)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])excScanAll;
uniform int (* uniform partialSum)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])partialSumAll;
foreach (digit = 0 ... NUMDIGITS)
{
uniform int * uniform excScanBlock = excScanPtr + bbeg*NUMDIGITS;
uniform int * uniform countsBlock = countsPtr + bbeg*NUMDIGITS;
int prev = bbeg == 0 ? excScanBlock[digit] : 0;
int prev = bbeg == 0 ? excScanBlock[0][digit] : 0;
for (uniform int block = bbeg; block < bend; block++)
{
const int y = countsBlock[digit];
excScanBlock[digit] = prev;
const int y = countsBlock[block][digit];
excScanBlock[block][digit] = prev;
prev += y;
excScanBlock += NUMDIGITS;
countsBlock += NUMDIGITS;
}
excScanBlock -= NUMDIGITS;
countsBlock -= NUMDIGITS;
partialSum[blockIdx*NUMDIGITS + digit] = excScanBlock[digit] + countsBlock[digit];
partialSum[blockIdx][digit] = excScanBlock[bend-1][digit] + countsBlock[bend-1][digit];
}
}
task
void partialScanGlobal(
const uniform int numBlocks,
uniform int partialSum[],
uniform int prefixSum[])
uniform int partialSumAll[],
uniform int prefixSumAll[])
{
const int digit = taskIndex;
uniform int (* uniform partialSum)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])partialSumAll;
uniform int (* uniform prefixSum)[NUMDIGITS] = (uniform int (*)[NUMDIGITS]) prefixSumAll;
const uniform int digit = taskIndex;
int carry = 0;
foreach (block = 0 ... numBlocks)
{
const int value = partialSum[block*NUMDIGITS + digit];
const int value = partialSum[block][digit];
const int scan = exclusive_scan_add(value);
prefixSum[block*NUMDIGITS + digit] = value + carry;
carry = broadcast(scan+value, programCount-1);
prefixSum[block][digit] = scan + carry;
carry += broadcast(scan+value, programCount-1);
}
}
task
void completeScanGlobal(
uniform int excScanAll[],
uniform int carryValue[])
uniform int carryValueAll[])
{
const uniform int numBlocks = taskCount;
const uniform int blockIdx = taskIndex;
const uniform int blockDim = (numBlocks+taskCount-1)/taskCount;
const uniform int blockDim = (numBlocks+numBlocks-1)/numBlocks;
const uniform int bbeg = blockIdx * blockDim;
const uniform int bend = min(bbeg + blockDim, numBlocks);
uniform int (* uniform excScanBlock)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])excScanAll;
uniform int (* uniform carryValue)[NUMDIGITS] = (uniform int (*)[NUMDIGITS])carryValueAll;
carryValue += blockIdx*NUMDIGITS;
foreach (digit = 0 ... NUMDIGITS)
{
const int carry = carryValue[digit];
uniform int * uniform excScanBlock = excScanAll + bbeg*NUMDIGITS;
for (uniform int block = bbeg; block < bend; block++, excScanBlock += NUMDIGITS)
excScanBlock[digit] += carry;
const int carry = carryValue[blockIdx][digit];
for (uniform int block = bbeg; block < bend; block++)
excScanBlock[block][digit] += carry;
}
}
@@ -165,7 +162,7 @@ export void radixSort(
uniform int keys[],
uniform int sorted[])
{
const uniform int numBlocks = num_cores()*2;
const uniform int numBlocks = num_cores()*4;
#ifdef __NVPTX__
assert((numBlocks & 3) == 0); /* task granularity on Kepler is 4 */
@@ -204,7 +201,7 @@ export void radixSort(
countsGlobal[digit] = 0;
/* compute histogram for each digit */
launch [numBlocks] computeHistogram(keys, bit, numElements, countsBlock, countsGlobal);
launch [numBlocks] countPass(keys, bit, numElements, countsBlock, countsGlobal);
sync;
/* exclusive scan on global histogram */
@@ -214,10 +211,9 @@ export void radixSort(
{
const int value = countsGlobal[digit];
const int scan = exclusive_scan_add(value);
excScan[digit] = value + carry;
excScan[digit] = scan + carry;
carry += broadcast(scan+value, programCount-1);
}
/* computing offsets for each digit */
radixExclusiveScan(numBlocks, excScan, countsBlock, partialSum, prefixSum);