added performacne data

This commit is contained in:
Evghenii
2014-01-30 11:45:23 +01:00
parent e93c2b88ba
commit 97971bef0c
2 changed files with 86 additions and 54 deletions

View File

@@ -583,40 +583,56 @@ void mergeSort___export(
assert(N <= SAMPLE_STRIDE * MAX_SAMPLE_COUNT); assert(N <= SAMPLE_STRIDE * MAX_SAMPLE_COUNT);
assert(N % (programCount*2) == 0); assert(N % (programCount*2) == 0);
mergeSortGang(iKey, iVal, srcKey, srcVal, N/(2*programCount));
// k20m: 140 M/s
{
// k20m: 2367 M/s
mergeSortGang(iKey, iVal, srcKey, srcVal, N/(2*programCount));
#if 1 #if 1
for (uniform int stride = 2*programCount; stride < N; stride <<= 1) for (uniform int stride = 2*programCount; stride < N; stride <<= 1)
{ {
const uniform int lastSegmentElements = N % (2 * stride); const uniform int lastSegmentElements = N % (2 * stride);
//Find sample ranks and prepare for limiters merge // k20m: 271 M/s
generateSampleRanks(ranksA, ranksB, iKey, stride, N); {
#if 1
//Merge ranks and indices // k20m: 944 M/s
mergeRanksAndIndices(limitsA, limitsB, ranksA, ranksB, stride, N);
//Merge elementary intervals
mergeElementaryIntervals(nTasks, oKey, oVal, iKey, iVal, limitsA, limitsB, stride, N);
if (lastSegmentElements <= stride)
for (int i = programIndex; i < lastSegmentElements; i += programCount)
if (i < lastSegmentElements)
{ {
oKey[N-lastSegmentElements+i] = iKey[N-lastSegmentElements+i]; // k20m: 1396 M/s
oVal[N-lastSegmentElements+i] = iVal[N-lastSegmentElements+i]; //Find sample ranks and prepare for limiters merge
generateSampleRanks(ranksA, ranksB, iKey, stride, N);
// k20m: 2379 M/s
//Merge ranks and indices
mergeRanksAndIndices(limitsA, limitsB, ranksA, ranksB, stride, N);
} }
#endif
// k20m: 371 M/s
//Merge elementary intervals
mergeElementaryIntervals(nTasks, oKey, oVal, iKey, iVal, limitsA, limitsB, stride, N);
}
if (lastSegmentElements <= stride)
for (int i = programIndex; i < lastSegmentElements; i += programCount)
if (i < lastSegmentElements)
{
oKey[N-lastSegmentElements+i] = iKey[N-lastSegmentElements+i];
oVal[N-lastSegmentElements+i] = iVal[N-lastSegmentElements+i];
}
{ {
uniform Key_t * uniform tmpKey = iKey; uniform Key_t * uniform tmpKey = iKey;
iKey = oKey; iKey = oKey;
oKey = tmpKey; oKey = tmpKey;
} }
{ {
uniform Val_t * uniform tmpVal = iVal; uniform Val_t * uniform tmpVal = iVal;
iVal = oVal; iVal = oVal;
oVal = tmpVal; oVal = tmpVal;
}
} }
} }
#endif #endif

View File

@@ -687,40 +687,56 @@ void mergeSort(
assert(N <= SAMPLE_STRIDE * MAX_SAMPLE_COUNT); assert(N <= SAMPLE_STRIDE * MAX_SAMPLE_COUNT);
assert(N % (programCount*2) == 0); assert(N % (programCount*2) == 0);
mergeSortGang(iKey, iVal, srcKey, srcVal, N/(2*programCount));
// cpu: 28 gpu: 74 M/s
{
// cpu: 356 gpu: 534 M/s
mergeSortGang(iKey, iVal, srcKey, srcVal, N/(2*programCount));
#if 1 #if 1
for (uniform int stride = 2*programCount; stride < N; stride <<= 1) for (uniform int stride = 2*programCount; stride < N; stride <<= 1)
{ {
const uniform int lastSegmentElements = N % (2 * stride); const uniform int lastSegmentElements = N % (2 * stride);
//Find sample ranks and prepare for limiters merge // cpu: 30 gpu: 112 M/s
generateSampleRanks(ranksA, ranksB, iKey, stride, N);
//Merge ranks and indices
mergeRanksAndIndices(limitsA, limitsB, ranksA, ranksB, stride, N);
//Merge elementary intervals
mergeElementaryIntervals(nTasks, oKey, oVal, iKey, iVal, limitsA, limitsB, stride, N);
if (lastSegmentElements <= stride)
foreach (i = 0 ... lastSegmentElements)
{ {
oKey[N-lastSegmentElements+i] = iKey[N-lastSegmentElements+i]; #if 1
oVal[N-lastSegmentElements+i] = iVal[N-lastSegmentElements+i]; // cpu: 121 gpu: 460 M/s
{
// cpu: 190 gpu: 600 M/s
//Find sample ranks and prepare for limiters merge
generateSampleRanks(ranksA, ranksB, iKey, stride, N);
// cpu: 120 gpu: 457 M/s
//Merge ranks and indices
mergeRanksAndIndices(limitsA, limitsB, ranksA, ranksB, stride, N);
}
#endif
// cpu: 287 gpu: 194 M/s
//Merge elementary intervals
mergeElementaryIntervals(nTasks, oKey, oVal, iKey, iVal, limitsA, limitsB, stride, N);
} }
memory_barrier();
if (lastSegmentElements <= stride)
foreach (i = 0 ... lastSegmentElements)
{
oKey[N-lastSegmentElements+i] = iKey[N-lastSegmentElements+i];
oVal[N-lastSegmentElements+i] = iVal[N-lastSegmentElements+i];
}
memory_barrier();
{ {
uniform Key_t * uniform tmpKey = iKey; uniform Key_t * uniform tmpKey = iKey;
iKey = oKey; iKey = oKey;
oKey = tmpKey; oKey = tmpKey;
} }
{ {
uniform Val_t * uniform tmpVal = iVal; uniform Val_t * uniform tmpVal = iVal;
iVal = oVal; iVal = oVal;
oVal = tmpVal; oVal = tmpVal;
}
} }
} }
#endif #endif