From 6099492579b2fcebcca84ce31e415a7a9d7fbe34 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 29 Jan 2014 13:49:35 +0100 Subject: [PATCH] small improvement --- examples_ptx/radixSort/radixSort.cu | 2 +- examples_ptx/radixSort/radixSort.ispc | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/examples_ptx/radixSort/radixSort.cu b/examples_ptx/radixSort/radixSort.cu index 9b9587e3..2bddd0fb 100644 --- a/examples_ptx/radixSort/radixSort.cu +++ b/examples_ptx/radixSort/radixSort.cu @@ -103,11 +103,11 @@ void sortPass( #endif - int scatter; for (int i = programIndex; i < nloc; i += programCount) if (i < nloc) { const int key = mask & ((unsigned int)keys[i] >> bit); + int scatter; /* not a vector friendly loop */ #pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */ for (int iv = 0; iv < programCount; iv++) diff --git a/examples_ptx/radixSort/radixSort.ispc b/examples_ptx/radixSort/radixSort.ispc index 6e160610..d2dce2ac 100644 --- a/examples_ptx/radixSort/radixSort.ispc +++ b/examples_ptx/radixSort/radixSort.ispc @@ -196,6 +196,9 @@ export void radixSort_alloc(const uniform int n) { assert(memoryPool == NULL); numBlocks = num_cores()*4; +#ifdef __NVPTX__ + numBlocks = 13*32*4; //num_cores()*4; +#endif nSharedCounts = NUMDIGITS*numBlocks; nCountsGlobal = NUMDIGITS; nExcScan = NUMDIGITS*numBlocks;