diff --git a/examples_cuda/sort/sort1.ispc b/examples_cuda/sort/sort1.ispc index e3acb4f0..855aa194 100644 --- a/examples_cuda/sort/sort1.ispc +++ b/examples_cuda/sort/sort1.ispc @@ -139,17 +139,15 @@ task void copy (uniform int span, uniform int n, uniform int64 from[], uniform i task void pack (uniform int span, uniform int n, uniform unsigned int code[], uniform int64 pair[]) { -#if 0 if (taskIndex >= taskCount) return; uniform int start = taskIndex*span; uniform int end = taskIndex == taskCount-1 ? n : start+span; for (int i = programIndex + start; i < end; i += programCount) if (i < end) - { - pair[i] = ((int64)i<<32)+code[i]; - } -#endif + { + pair[i] = ((int64)i<<32)+code[i]; + } } task void unpack (uniform int span, uniform int n, uniform int64 pair[], uniform int unsigned code[], uniform int order[]) diff --git a/examples_cuda/sort/sort_cu.cpp b/examples_cuda/sort/sort_cu.cpp index b8e9edf5..e636d525 100644 --- a/examples_cuda/sort/sort_cu.cpp +++ b/examples_cuda/sort/sort_cu.cpp @@ -351,7 +351,7 @@ int main (int argc, char *argv[]) createContext(); /*******************/ - int ntask = 13*4; + int ntask = 13*4*2; devicePtr d_code = deviceMalloc(n*sizeof(int)); devicePtr d_order = deviceMalloc(n*sizeof(int)); devicePtr d_pair = deviceMalloc(n*2*sizeof(int));