diff --git a/examples_ptx/radixSort/Makefile_gpu b/examples_ptx/radixSort/Makefile_gpu index ad48a011..2e08ab98 100644 --- a/examples_ptx/radixSort/Makefile_gpu +++ b/examples_ptx/radixSort/Makefile_gpu @@ -2,7 +2,7 @@ PROG=radixSort ISPC_SRC=radixSort.ispc CU_SRC=radixSort.cu -NVCC_FLAGS=-Xptxas=-O1 +# NVCC_FLAGS=-Xptxas=-O1 CXX_SRC=radixSort.cpp radixSort.cpp PTXCC_REGMAX=64 diff --git a/examples_ptx/radixSort/radixSort.cu b/examples_ptx/radixSort/radixSort.cu index 5a56602a..c2fae9ab 100644 --- a/examples_ptx/radixSort/radixSort.cu +++ b/examples_ptx/radixSort/radixSort.cu @@ -109,6 +109,7 @@ void sortPass( const int key = mask & ((unsigned int)keys[i] >> bit); int scatter; /* not a vector friendly loop */ +#pragma unroll 1 /* needed, otherwise compiler unroll and optimizes the result :S */ for (int iv = 0; iv < programCount; iv++) if (programIndex == iv) scatter = digitOffsets[key]++;