From d4b46b1295c0bd7e1544f713b6a90d1a4bc8d907 Mon Sep 17 00:00:00 2001
From: Evghenii <egaburov@dds.nl>
Date: Wed, 29 Jan 2014 13:47:39 +0100
Subject: [PATCH] +checkpoint

---
 examples_ptx/radixSort/radixSort.cu   | 6 +++---
 examples_ptx/radixSort/radixSort.ispc | 2 --
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples_ptx/radixSort/radixSort.cu b/examples_ptx/radixSort/radixSort.cu
index be5537b9..9b9587e3 100644
--- a/examples_ptx/radixSort/radixSort.cu
+++ b/examples_ptx/radixSort/radixSort.cu
@@ -93,9 +93,9 @@ void sortPass(
   const  int mask = (1 << NUMBITS) - 1;
 
   /* copy digit offset from Gmem to Lmem */ 
-#if 0
+#if 1
   __shared__ int digitOffsets_sh[NUMDIGITS*4];
-  int *digitOffsets = digitOffsets_sh + warpIdx*NUMDIGITS;
+  volatile int *digitOffsets = digitOffsets_sh + warpIdx*NUMDIGITS;
   for (int digit = programIndex; digit < NUMDIGITS; digit += programCount)
     digitOffsets[digit] = digitOffsetsAll[blkIdx*NUMDIGITS + digit];
 #else
@@ -103,11 +103,11 @@ void sortPass(
 #endif
 
 
+  int scatter;
   for (int i = programIndex; i < nloc; i += programCount)
     if (i < nloc)
     {
       const int key = mask & ((unsigned int)keys[i] >> bit);
-      int scatter;
       /* not a vector friendly loop */
 #pragma unroll 1  /* needed, otherwise compiler unroll and optimizes the result :S */
       for (int iv = 0; iv < programCount; iv++)
diff --git a/examples_ptx/radixSort/radixSort.ispc b/examples_ptx/radixSort/radixSort.ispc
index 8e97bc5b..6e160610 100644
--- a/examples_ptx/radixSort/radixSort.ispc
+++ b/examples_ptx/radixSort/radixSort.ispc
@@ -284,7 +284,6 @@ export void radixSort(
     /* computing offsets for each digit */
     radixExclusiveScan(numBlocks, excScan, counts, partialSum, prefixSum);
 
-#if 1
     /* sorting */
     launch [numBlocks] 
       sortPass(
@@ -294,7 +293,6 @@ export void radixSort(
           numElements,
           excScan);
     sync;
-#endif
   }
 
 }