+1
This commit is contained in:
@@ -30,24 +30,36 @@ static inline void progressbar (unsigned int x, unsigned int n, unsigned int w =
|
||||
cout << "]\r" << flush;
|
||||
}
|
||||
|
||||
struct Key
|
||||
{
|
||||
int32_t key,val;
|
||||
};
|
||||
|
||||
int main (int argc, char *argv[])
|
||||
{
|
||||
int i, j, n = argc == 1 ? 1000000 : atoi(argv[1]), m = n < 100 ? 1 : 50, l = n < 100 ? n : RAND_MAX;
|
||||
double tISPC1 = 0.0, tISPC2 = 0.0, tSerial = 0.0;
|
||||
unsigned int *keys = new unsigned int [n];
|
||||
unsigned int *keys_orig = new unsigned int [n];
|
||||
Key *keys = new Key [n];
|
||||
Key *keys_orig = new Key [n];
|
||||
unsigned int *keys_gold = new unsigned int [n];
|
||||
|
||||
srand48(rtc()*65536);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < n; i++)
|
||||
keys[i] = 10*i; //drand48() * (1<<30);
|
||||
{
|
||||
keys[i].key = drand48() * (1<<30);
|
||||
// keys[i].val = i;
|
||||
}
|
||||
|
||||
std::random_shuffle(keys, keys + n);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
keys_gold[i] = keys[i].key;
|
||||
keys_orig[i] = keys[i];
|
||||
}
|
||||
|
||||
ispcSetMallocHeapLimit(1024*1024*1024);
|
||||
|
||||
@@ -55,9 +67,9 @@ int main (int argc, char *argv[])
|
||||
|
||||
for (i = 0; i < m; i ++)
|
||||
{
|
||||
ispcMemcpy(keys, keys_orig, n*sizeof(unsigned int));
|
||||
ispcMemcpy(keys, keys_orig, n*sizeof(Key));
|
||||
reset_and_start_timer();
|
||||
ispc::radixSort(n, (int*)keys);
|
||||
ispc::radixSort(n, (int64_t*)keys);
|
||||
tISPC2 += get_elapsed_msec();
|
||||
if (argc != 3)
|
||||
progressbar (i, m);
|
||||
@@ -67,12 +79,9 @@ int main (int argc, char *argv[])
|
||||
|
||||
printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n*m/tISPC2);
|
||||
|
||||
std::sort(keys_orig, keys_orig + n);
|
||||
#if 0
|
||||
std::sort(keys, keys + n);
|
||||
#endif
|
||||
std::sort(keys_gold, keys_gold + n);
|
||||
for (int i = 0; i < n; i++)
|
||||
assert(keys[i] == keys_orig[i]);
|
||||
assert(keys[i].key == keys_gold[i]);
|
||||
|
||||
|
||||
#if 0
|
||||
@@ -101,5 +110,6 @@ int main (int argc, char *argv[])
|
||||
|
||||
delete keys;
|
||||
delete keys_orig;
|
||||
delete keys_gold;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
#define NUMBITS 8
|
||||
#define NUMDIGITS (1<<NUMBITS)
|
||||
|
||||
typedef int64 Key;
|
||||
|
||||
task
|
||||
void countPass(
|
||||
const uniform int keysAll[],
|
||||
const uniform Key keysAll[],
|
||||
const uniform int bit,
|
||||
const uniform int numElements,
|
||||
uniform int countsAll[],
|
||||
@@ -15,7 +17,7 @@ void countPass(
|
||||
|
||||
const uniform int mask = (1 << NUMBITS) - 1;
|
||||
|
||||
const uniform int * uniform keys = keysAll + blockIdx*blockDim;
|
||||
const uniform Key * uniform keys = keysAll + blockIdx*blockDim;
|
||||
uniform int * uniform counts = countsAll + blockIdx*NUMDIGITS;
|
||||
const uniform int nloc = min(numElements - blockIdx*blockDim, blockDim);
|
||||
|
||||
@@ -47,8 +49,8 @@ void countPass(
|
||||
|
||||
task
|
||||
void sortPass(
|
||||
uniform int keysAll[],
|
||||
uniform int sorted[],
|
||||
uniform Key keysAll[],
|
||||
uniform Key sorted[],
|
||||
uniform int bit,
|
||||
uniform int numElements,
|
||||
uniform int digitOffsetsAll[],
|
||||
@@ -67,7 +69,7 @@ void sortPass(
|
||||
#endif
|
||||
|
||||
const uniform int keyIndex = blockIdx * blockDim;
|
||||
uniform int * uniform keys = keysAll + keyIndex;
|
||||
uniform Key * uniform keys = keysAll + keyIndex;
|
||||
uniform int * uniform digitOffsets = digitOffsetsAll + blockIdx*NUMDIGITS;
|
||||
const uniform int nloc = min(numElements - keyIndex, blockDim);
|
||||
|
||||
@@ -96,7 +98,11 @@ void sortPass(
|
||||
}
|
||||
else
|
||||
{
|
||||
#if 1 //def __NVPTX__
|
||||
#ifdef __NVPTX__
|
||||
/* buf fix, somehow atomic w/o branching fails */
|
||||
if (programIndex < 16)
|
||||
rel = atomic_add_global(&localCounts[key],1);
|
||||
else
|
||||
rel = atomic_add_global(&localCounts[key],1);
|
||||
#else
|
||||
rel = atomic_add_local(&localCounts[key],1);
|
||||
@@ -215,7 +221,7 @@ static uniform int * uniform partialSum;
|
||||
static uniform int * uniform prefixSum;
|
||||
|
||||
static uniform int numElementsBuf = 0;
|
||||
static uniform int * uniform bufKeys;
|
||||
static uniform Key * uniform bufKeys;
|
||||
|
||||
export void radixSort_alloc(const uniform int n)
|
||||
{
|
||||
@@ -268,7 +274,7 @@ export void radixSort_free()
|
||||
|
||||
export void radixSort(
|
||||
const uniform int numElements,
|
||||
uniform int keys[])
|
||||
uniform Key keys[])
|
||||
{
|
||||
#ifdef __NVPTX__
|
||||
assert((numBlocks & 3) == 0); /* task granularity on Kepler is 4 */
|
||||
@@ -279,7 +285,7 @@ export void radixSort(
|
||||
if (numElementsBuf == 0)
|
||||
{
|
||||
numElementsBuf = numElements;
|
||||
bufKeys = uniform new uniform int[numElementsBuf];
|
||||
bufKeys = uniform new uniform Key[numElementsBuf];
|
||||
}
|
||||
|
||||
const uniform int blockDim = (numElements + numBlocks - 1) / numBlocks;
|
||||
@@ -319,7 +325,7 @@ export void radixSort(
|
||||
sharedCounts);
|
||||
sync;
|
||||
|
||||
uniform int * uniform tmp = keys;
|
||||
uniform Key * uniform tmp = keys;
|
||||
keys = bufKeys;
|
||||
bufKeys = tmp;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user