+1
This commit is contained in:
@@ -30,24 +30,36 @@ static inline void progressbar (unsigned int x, unsigned int n, unsigned int w =
|
|||||||
cout << "]\r" << flush;
|
cout << "]\r" << flush;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Key
|
||||||
|
{
|
||||||
|
int32_t key,val;
|
||||||
|
};
|
||||||
|
|
||||||
int main (int argc, char *argv[])
|
int main (int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, j, n = argc == 1 ? 1000000 : atoi(argv[1]), m = n < 100 ? 1 : 50, l = n < 100 ? n : RAND_MAX;
|
int i, j, n = argc == 1 ? 1000000 : atoi(argv[1]), m = n < 100 ? 1 : 50, l = n < 100 ? n : RAND_MAX;
|
||||||
double tISPC1 = 0.0, tISPC2 = 0.0, tSerial = 0.0;
|
double tISPC1 = 0.0, tISPC2 = 0.0, tSerial = 0.0;
|
||||||
unsigned int *keys = new unsigned int [n];
|
Key *keys = new Key [n];
|
||||||
unsigned int *keys_orig = new unsigned int [n];
|
Key *keys_orig = new Key [n];
|
||||||
|
unsigned int *keys_gold = new unsigned int [n];
|
||||||
|
|
||||||
srand48(rtc()*65536);
|
srand48(rtc()*65536);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; i++)
|
||||||
keys[i] = 10*i; //drand48() * (1<<30);
|
{
|
||||||
|
keys[i].key = drand48() * (1<<30);
|
||||||
|
// keys[i].val = i;
|
||||||
|
}
|
||||||
|
|
||||||
std::random_shuffle(keys, keys + n);
|
std::random_shuffle(keys, keys + n);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
keys_gold[i] = keys[i].key;
|
||||||
keys_orig[i] = keys[i];
|
keys_orig[i] = keys[i];
|
||||||
|
}
|
||||||
|
|
||||||
ispcSetMallocHeapLimit(1024*1024*1024);
|
ispcSetMallocHeapLimit(1024*1024*1024);
|
||||||
|
|
||||||
@@ -55,9 +67,9 @@ int main (int argc, char *argv[])
|
|||||||
|
|
||||||
for (i = 0; i < m; i ++)
|
for (i = 0; i < m; i ++)
|
||||||
{
|
{
|
||||||
ispcMemcpy(keys, keys_orig, n*sizeof(unsigned int));
|
ispcMemcpy(keys, keys_orig, n*sizeof(Key));
|
||||||
reset_and_start_timer();
|
reset_and_start_timer();
|
||||||
ispc::radixSort(n, (int*)keys);
|
ispc::radixSort(n, (int64_t*)keys);
|
||||||
tISPC2 += get_elapsed_msec();
|
tISPC2 += get_elapsed_msec();
|
||||||
if (argc != 3)
|
if (argc != 3)
|
||||||
progressbar (i, m);
|
progressbar (i, m);
|
||||||
@@ -67,12 +79,9 @@ int main (int argc, char *argv[])
|
|||||||
|
|
||||||
printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n*m/tISPC2);
|
printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n*m/tISPC2);
|
||||||
|
|
||||||
std::sort(keys_orig, keys_orig + n);
|
std::sort(keys_gold, keys_gold + n);
|
||||||
#if 0
|
|
||||||
std::sort(keys, keys + n);
|
|
||||||
#endif
|
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; i++)
|
||||||
assert(keys[i] == keys_orig[i]);
|
assert(keys[i].key == keys_gold[i]);
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@@ -101,5 +110,6 @@ int main (int argc, char *argv[])
|
|||||||
|
|
||||||
delete keys;
|
delete keys;
|
||||||
delete keys_orig;
|
delete keys_orig;
|
||||||
|
delete keys_gold;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
#define NUMBITS 8
|
#define NUMBITS 8
|
||||||
#define NUMDIGITS (1<<NUMBITS)
|
#define NUMDIGITS (1<<NUMBITS)
|
||||||
|
|
||||||
|
typedef int64 Key;
|
||||||
|
|
||||||
task
|
task
|
||||||
void countPass(
|
void countPass(
|
||||||
const uniform int keysAll[],
|
const uniform Key keysAll[],
|
||||||
const uniform int bit,
|
const uniform int bit,
|
||||||
const uniform int numElements,
|
const uniform int numElements,
|
||||||
uniform int countsAll[],
|
uniform int countsAll[],
|
||||||
@@ -15,7 +17,7 @@ void countPass(
|
|||||||
|
|
||||||
const uniform int mask = (1 << NUMBITS) - 1;
|
const uniform int mask = (1 << NUMBITS) - 1;
|
||||||
|
|
||||||
const uniform int * uniform keys = keysAll + blockIdx*blockDim;
|
const uniform Key * uniform keys = keysAll + blockIdx*blockDim;
|
||||||
uniform int * uniform counts = countsAll + blockIdx*NUMDIGITS;
|
uniform int * uniform counts = countsAll + blockIdx*NUMDIGITS;
|
||||||
const uniform int nloc = min(numElements - blockIdx*blockDim, blockDim);
|
const uniform int nloc = min(numElements - blockIdx*blockDim, blockDim);
|
||||||
|
|
||||||
@@ -47,8 +49,8 @@ void countPass(
|
|||||||
|
|
||||||
task
|
task
|
||||||
void sortPass(
|
void sortPass(
|
||||||
uniform int keysAll[],
|
uniform Key keysAll[],
|
||||||
uniform int sorted[],
|
uniform Key sorted[],
|
||||||
uniform int bit,
|
uniform int bit,
|
||||||
uniform int numElements,
|
uniform int numElements,
|
||||||
uniform int digitOffsetsAll[],
|
uniform int digitOffsetsAll[],
|
||||||
@@ -67,7 +69,7 @@ void sortPass(
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
const uniform int keyIndex = blockIdx * blockDim;
|
const uniform int keyIndex = blockIdx * blockDim;
|
||||||
uniform int * uniform keys = keysAll + keyIndex;
|
uniform Key * uniform keys = keysAll + keyIndex;
|
||||||
uniform int * uniform digitOffsets = digitOffsetsAll + blockIdx*NUMDIGITS;
|
uniform int * uniform digitOffsets = digitOffsetsAll + blockIdx*NUMDIGITS;
|
||||||
const uniform int nloc = min(numElements - keyIndex, blockDim);
|
const uniform int nloc = min(numElements - keyIndex, blockDim);
|
||||||
|
|
||||||
@@ -96,7 +98,11 @@ void sortPass(
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#if 1 //def __NVPTX__
|
#ifdef __NVPTX__
|
||||||
|
/* buf fix, somehow atomic w/o branching fails */
|
||||||
|
if (programIndex < 16)
|
||||||
|
rel = atomic_add_global(&localCounts[key],1);
|
||||||
|
else
|
||||||
rel = atomic_add_global(&localCounts[key],1);
|
rel = atomic_add_global(&localCounts[key],1);
|
||||||
#else
|
#else
|
||||||
rel = atomic_add_local(&localCounts[key],1);
|
rel = atomic_add_local(&localCounts[key],1);
|
||||||
@@ -215,7 +221,7 @@ static uniform int * uniform partialSum;
|
|||||||
static uniform int * uniform prefixSum;
|
static uniform int * uniform prefixSum;
|
||||||
|
|
||||||
static uniform int numElementsBuf = 0;
|
static uniform int numElementsBuf = 0;
|
||||||
static uniform int * uniform bufKeys;
|
static uniform Key * uniform bufKeys;
|
||||||
|
|
||||||
export void radixSort_alloc(const uniform int n)
|
export void radixSort_alloc(const uniform int n)
|
||||||
{
|
{
|
||||||
@@ -268,7 +274,7 @@ export void radixSort_free()
|
|||||||
|
|
||||||
export void radixSort(
|
export void radixSort(
|
||||||
const uniform int numElements,
|
const uniform int numElements,
|
||||||
uniform int keys[])
|
uniform Key keys[])
|
||||||
{
|
{
|
||||||
#ifdef __NVPTX__
|
#ifdef __NVPTX__
|
||||||
assert((numBlocks & 3) == 0); /* task granularity on Kepler is 4 */
|
assert((numBlocks & 3) == 0); /* task granularity on Kepler is 4 */
|
||||||
@@ -279,7 +285,7 @@ export void radixSort(
|
|||||||
if (numElementsBuf == 0)
|
if (numElementsBuf == 0)
|
||||||
{
|
{
|
||||||
numElementsBuf = numElements;
|
numElementsBuf = numElements;
|
||||||
bufKeys = uniform new uniform int[numElementsBuf];
|
bufKeys = uniform new uniform Key[numElementsBuf];
|
||||||
}
|
}
|
||||||
|
|
||||||
const uniform int blockDim = (numElements + numBlocks - 1) / numBlocks;
|
const uniform int blockDim = (numElements + numBlocks - 1) / numBlocks;
|
||||||
@@ -319,7 +325,7 @@ export void radixSort(
|
|||||||
sharedCounts);
|
sharedCounts);
|
||||||
sync;
|
sync;
|
||||||
|
|
||||||
uniform int * uniform tmp = keys;
|
uniform Key * uniform tmp = keys;
|
||||||
keys = bufKeys;
|
keys = bufKeys;
|
||||||
bufKeys = tmp;
|
bufKeys = tmp;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user