diff --git a/examples_ptx/radixSort/radixSort.cu b/examples_ptx/radixSort/radixSort.cu index 9b9587e3..2bddd0fb 100644 --- a/examples_ptx/radixSort/radixSort.cu +++ b/examples_ptx/radixSort/radixSort.cu @@ -103,11 +103,11 @@ void sortPass( #endif - int scatter; for (int i = programIndex; i < nloc; i += programCount) if (i < nloc) { const int key = mask & ((unsigned int)keys[i] >> bit); + int scatter; /* not a vector friendly loop */ #pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */ for (int iv = 0; iv < programCount; iv++) diff --git a/examples_ptx/radixSort/radixSort.ispc b/examples_ptx/radixSort/radixSort.ispc index 6e160610..d2dce2ac 100644 --- a/examples_ptx/radixSort/radixSort.ispc +++ b/examples_ptx/radixSort/radixSort.ispc @@ -196,6 +196,9 @@ export void radixSort_alloc(const uniform int n) { assert(memoryPool == NULL); numBlocks = num_cores()*4; +#ifdef __NVPTX__ + numBlocks = 13*32*4; //num_cores()*4; +#endif nSharedCounts = NUMDIGITS*numBlocks; nCountsGlobal = NUMDIGITS; nExcScan = NUMDIGITS*numBlocks;