This commit is contained in:
Evghenii
2014-01-28 19:44:23 +01:00
parent 1e5476e573
commit 29bb129c9b
2 changed files with 37 additions and 21 deletions

View File

@@ -30,24 +30,36 @@ static inline void progressbar (unsigned int x, unsigned int n, unsigned int w =
cout << "]\r" << flush; cout << "]\r" << flush;
} }
struct Key
{
int32_t key,val;
};
int main (int argc, char *argv[]) int main (int argc, char *argv[])
{ {
int i, j, n = argc == 1 ? 1000000 : atoi(argv[1]), m = n < 100 ? 1 : 50, l = n < 100 ? n : RAND_MAX; int i, j, n = argc == 1 ? 1000000 : atoi(argv[1]), m = n < 100 ? 1 : 50, l = n < 100 ? n : RAND_MAX;
double tISPC1 = 0.0, tISPC2 = 0.0, tSerial = 0.0; double tISPC1 = 0.0, tISPC2 = 0.0, tSerial = 0.0;
unsigned int *keys = new unsigned int [n]; Key *keys = new Key [n];
unsigned int *keys_orig = new unsigned int [n]; Key *keys_orig = new Key [n];
unsigned int *keys_gold = new unsigned int [n];
srand48(rtc()*65536); srand48(rtc()*65536);
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
keys[i] = 10*i; //drand48() * (1<<30); {
keys[i].key = drand48() * (1<<30);
// keys[i].val = i;
}
std::random_shuffle(keys, keys + n); std::random_shuffle(keys, keys + n);
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{
keys_gold[i] = keys[i].key;
keys_orig[i] = keys[i]; keys_orig[i] = keys[i];
}
ispcSetMallocHeapLimit(1024*1024*1024); ispcSetMallocHeapLimit(1024*1024*1024);
@@ -55,9 +67,9 @@ int main (int argc, char *argv[])
for (i = 0; i < m; i ++) for (i = 0; i < m; i ++)
{ {
ispcMemcpy(keys, keys_orig, n*sizeof(unsigned int)); ispcMemcpy(keys, keys_orig, n*sizeof(Key));
reset_and_start_timer(); reset_and_start_timer();
ispc::radixSort(n, (int*)keys); ispc::radixSort(n, (int64_t*)keys);
tISPC2 += get_elapsed_msec(); tISPC2 += get_elapsed_msec();
if (argc != 3) if (argc != 3)
progressbar (i, m); progressbar (i, m);
@@ -67,12 +79,9 @@ int main (int argc, char *argv[])
printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n*m/tISPC2); printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n*m/tISPC2);
std::sort(keys_orig, keys_orig + n); std::sort(keys_gold, keys_gold + n);
#if 0
std::sort(keys, keys + n);
#endif
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
assert(keys[i] == keys_orig[i]); assert(keys[i].key == keys_gold[i]);
#if 0 #if 0
@@ -101,5 +110,6 @@ int main (int argc, char *argv[])
delete keys; delete keys;
delete keys_orig; delete keys_orig;
delete keys_gold;
return 0; return 0;
} }

View File

@@ -1,9 +1,11 @@
#define NUMBITS 8 #define NUMBITS 8
#define NUMDIGITS (1<<NUMBITS) #define NUMDIGITS (1<<NUMBITS)
typedef int64 Key;
task task
void countPass( void countPass(
const uniform int keysAll[], const uniform Key keysAll[],
const uniform int bit, const uniform int bit,
const uniform int numElements, const uniform int numElements,
uniform int countsAll[], uniform int countsAll[],
@@ -15,7 +17,7 @@ void countPass(
const uniform int mask = (1 << NUMBITS) - 1; const uniform int mask = (1 << NUMBITS) - 1;
const uniform int * uniform keys = keysAll + blockIdx*blockDim; const uniform Key * uniform keys = keysAll + blockIdx*blockDim;
uniform int * uniform counts = countsAll + blockIdx*NUMDIGITS; uniform int * uniform counts = countsAll + blockIdx*NUMDIGITS;
const uniform int nloc = min(numElements - blockIdx*blockDim, blockDim); const uniform int nloc = min(numElements - blockIdx*blockDim, blockDim);
@@ -47,8 +49,8 @@ void countPass(
task task
void sortPass( void sortPass(
uniform int keysAll[], uniform Key keysAll[],
uniform int sorted[], uniform Key sorted[],
uniform int bit, uniform int bit,
uniform int numElements, uniform int numElements,
uniform int digitOffsetsAll[], uniform int digitOffsetsAll[],
@@ -67,7 +69,7 @@ void sortPass(
#endif #endif
const uniform int keyIndex = blockIdx * blockDim; const uniform int keyIndex = blockIdx * blockDim;
uniform int * uniform keys = keysAll + keyIndex; uniform Key * uniform keys = keysAll + keyIndex;
uniform int * uniform digitOffsets = digitOffsetsAll + blockIdx*NUMDIGITS; uniform int * uniform digitOffsets = digitOffsetsAll + blockIdx*NUMDIGITS;
const uniform int nloc = min(numElements - keyIndex, blockDim); const uniform int nloc = min(numElements - keyIndex, blockDim);
@@ -96,8 +98,12 @@ void sortPass(
} }
else else
{ {
#if 1 //def __NVPTX__ #ifdef __NVPTX__
rel = atomic_add_global(&localCounts[key],1); /* buf fix, somehow atomic w/o branching fails */
if (programIndex < 16)
rel = atomic_add_global(&localCounts[key],1);
else
rel = atomic_add_global(&localCounts[key],1);
#else #else
rel = atomic_add_local(&localCounts[key],1); rel = atomic_add_local(&localCounts[key],1);
#endif #endif
@@ -215,7 +221,7 @@ static uniform int * uniform partialSum;
static uniform int * uniform prefixSum; static uniform int * uniform prefixSum;
static uniform int numElementsBuf = 0; static uniform int numElementsBuf = 0;
static uniform int * uniform bufKeys; static uniform Key * uniform bufKeys;
export void radixSort_alloc(const uniform int n) export void radixSort_alloc(const uniform int n)
{ {
@@ -268,7 +274,7 @@ export void radixSort_free()
export void radixSort( export void radixSort(
const uniform int numElements, const uniform int numElements,
uniform int keys[]) uniform Key keys[])
{ {
#ifdef __NVPTX__ #ifdef __NVPTX__
assert((numBlocks & 3) == 0); /* task granularity on Kepler is 4 */ assert((numBlocks & 3) == 0); /* task granularity on Kepler is 4 */
@@ -279,7 +285,7 @@ export void radixSort(
if (numElementsBuf == 0) if (numElementsBuf == 0)
{ {
numElementsBuf = numElements; numElementsBuf = numElements;
bufKeys = uniform new uniform int[numElementsBuf]; bufKeys = uniform new uniform Key[numElementsBuf];
} }
const uniform int blockDim = (numElements + numBlocks - 1) / numBlocks; const uniform int blockDim = (numElements + numBlocks - 1) / numBlocks;
@@ -319,7 +325,7 @@ export void radixSort(
sharedCounts); sharedCounts);
sync; sync;
uniform int * uniform tmp = keys; uniform Key * uniform tmp = keys;
keys = bufKeys; keys = bufKeys;
bufKeys = tmp; bufKeys = tmp;
} }