small improvement
This commit is contained in:
@@ -103,11 +103,11 @@ void sortPass(
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int scatter;
|
|
||||||
for (int i = programIndex; i < nloc; i += programCount)
|
for (int i = programIndex; i < nloc; i += programCount)
|
||||||
if (i < nloc)
|
if (i < nloc)
|
||||||
{
|
{
|
||||||
const int key = mask & ((unsigned int)keys[i] >> bit);
|
const int key = mask & ((unsigned int)keys[i] >> bit);
|
||||||
|
int scatter;
|
||||||
/* not a vector friendly loop */
|
/* not a vector friendly loop */
|
||||||
#pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */
|
#pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */
|
||||||
for (int iv = 0; iv < programCount; iv++)
|
for (int iv = 0; iv < programCount; iv++)
|
||||||
|
|||||||
@@ -196,6 +196,9 @@ export void radixSort_alloc(const uniform int n)
|
|||||||
{
|
{
|
||||||
assert(memoryPool == NULL);
|
assert(memoryPool == NULL);
|
||||||
numBlocks = num_cores()*4;
|
numBlocks = num_cores()*4;
|
||||||
|
#ifdef __NVPTX__
|
||||||
|
numBlocks = 13*32*4; //num_cores()*4;
|
||||||
|
#endif
|
||||||
nSharedCounts = NUMDIGITS*numBlocks;
|
nSharedCounts = NUMDIGITS*numBlocks;
|
||||||
nCountsGlobal = NUMDIGITS;
|
nCountsGlobal = NUMDIGITS;
|
||||||
nExcScan = NUMDIGITS*numBlocks;
|
nExcScan = NUMDIGITS*numBlocks;
|
||||||
|
|||||||
Reference in New Issue
Block a user