small improvement

2014-01-29 13:49:35 +01:00
parent d4b46b1295
commit 6099492579
2 changed files with 4 additions and 1 deletions
--- a/examples_ptx/radixSort/radixSort.cu
+++ b/examples_ptx/radixSort/radixSort.cu
@@ -103,11 +103,11 @@ void sortPass(
 #endif


-  int scatter;
  for (int i = programIndex; i < nloc; i += programCount)
    if (i < nloc)
    {
      const int key = mask & ((unsigned int)keys[i] >> bit);
+      int scatter;
      /* not a vector friendly loop */
 #pragma unroll 1  /* needed, otherwise compiler unroll and optimizes the result :S */
      for (int iv = 0; iv < programCount; iv++)
--- a/examples_ptx/radixSort/radixSort.ispc
+++ b/examples_ptx/radixSort/radixSort.ispc
@@ -196,6 +196,9 @@ export void radixSort_alloc(const uniform int n)
 {
  assert(memoryPool == NULL);
  numBlocks     = num_cores()*4;
+#ifdef __NVPTX__
+  numBlocks     = 13*32*4; //num_cores()*4;
+#endif
  nSharedCounts = NUMDIGITS*numBlocks;
  nCountsGlobal = NUMDIGITS;
  nExcScan      = NUMDIGITS*numBlocks;