diff --git a/examples_ptx/mergeSort/mergeSort.ispc b/examples_ptx/mergeSort/mergeSort.ispc index cdbd27b4..eb464e4e 100644 --- a/examples_ptx/mergeSort/mergeSort.ispc +++ b/examples_ptx/mergeSort/mergeSort.ispc @@ -505,6 +505,12 @@ void mergeElementaryIntervals( limitsB, stride, N); + if (lastSegmentElements <= stride) + foreach (i = 0 ... lastSegmentElements) + { + dstKey[N-lastSegmentElements+i] = srcKey[N-lastSegmentElements+i]; + dstVal[N-lastSegmentElements+i] = srcVal[N-lastSegmentElements+i]; + } sync; } @@ -580,8 +586,6 @@ void mergeSort( #if 1 for (uniform int stride = 2*programCount; stride < N; stride <<= 1) { - const uniform int lastSegmentElements = N % (2 * stride); - // cpu: 30 gpu: 112 M/s { #if 1 @@ -602,15 +606,6 @@ void mergeSort( mergeElementaryIntervals(oKey, oVal, iKey, iVal, limitsA, limitsB, stride, N); } - if (lastSegmentElements <= stride) - foreach (i = 0 ... lastSegmentElements) - { - oKey[N-lastSegmentElements+i] = iKey[N-lastSegmentElements+i]; - oVal[N-lastSegmentElements+i] = iVal[N-lastSegmentElements+i]; - } - memory_barrier(); - - { uniform Key_t * uniform tmpKey = iKey; iKey = oKey; diff --git a/examples_ptx/tasksys.cpp b/examples_ptx/tasksys.cpp index 1aae61f4..c60c1af2 100644 --- a/examples_ptx/tasksys.cpp +++ b/examples_ptx/tasksys.cpp @@ -962,7 +962,7 @@ InitTaskSystem() { inline void TaskGroup::Launch(int baseIndex, int count) { -#pragma omp parallel for +#pragma omp parallel for schedule(guided) for(int i = 0; i < count; i++) { TaskInfo *ti = GetTaskInfo(baseIndex + i);