some fixes
This commit is contained in:
@@ -13,3 +13,5 @@
|
|||||||
#define warpIdx (threadIdx.x >> 5)
|
#define warpIdx (threadIdx.x >> 5)
|
||||||
#define launch(ntx,nty,ntz,func) if (programIndex==0) func<<<dim3(((ntx)+4-1)/4,nty,ntz),128>>>
|
#define launch(ntx,nty,ntz,func) if (programIndex==0) func<<<dim3(((ntx)+4-1)/4,nty,ntz),128>>>
|
||||||
#define sync cudaDeviceSynchronize()
|
#define sync cudaDeviceSynchronize()
|
||||||
|
#define cif if
|
||||||
|
#define shuffle(x,y) __shfl(x,y)
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
PROG=mergeSort
|
PROG=mergeSort
|
||||||
ISPC_SRC=mergeSort.ispc
|
ISPC_SRC=mergeSort.ispc
|
||||||
#CU_SRC=mergeSort.cu
|
CU_SRC=mergeSort.cu
|
||||||
CXX_SRC=mergeSort.cpp mergeSort.cpp
|
CXX_SRC=mergeSort.cpp mergeSort.cpp
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
#PTXCC_FLAGS= -Xptxas=-O3
|
#PTXCC_FLAGS= -Xptxas=-O3
|
||||||
|
#NVCC_FLAGS=-Xptxas=-O0
|
||||||
|
|
||||||
# LLVM_GPU=1
|
# LLVM_GPU=1
|
||||||
NVVM_GPU=1
|
NVVM_GPU=1
|
||||||
|
|||||||
@@ -94,6 +94,29 @@ int main (int argc, char *argv[])
|
|||||||
|
|
||||||
printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n/tISPC2);
|
printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n/tISPC2);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
printf("\n---\n");
|
||||||
|
for (int i = 0; i < 128; i++)
|
||||||
|
{
|
||||||
|
if ((i%32) == 0) printf("\n");
|
||||||
|
printf("%d ", (int)keysSrc[i]);
|
||||||
|
}
|
||||||
|
printf("\n---\n");
|
||||||
|
for (int i = 0; i < 128; i++)
|
||||||
|
{
|
||||||
|
if ((i%32) == 0) printf("\n");
|
||||||
|
printf("%d ", (int)keysBuf[i]);
|
||||||
|
}
|
||||||
|
printf("\n---\n");
|
||||||
|
for (int i = 0; i < 128; i++)
|
||||||
|
{
|
||||||
|
if ((i%32) == 0) printf("\n");
|
||||||
|
printf("%d ", (int)keysDst[i]);
|
||||||
|
}
|
||||||
|
printf("\n---\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
std::sort(keysGld, keysGld + n);
|
std::sort(keysGld, keysGld + n);
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; i++)
|
||||||
|
|||||||
@@ -387,6 +387,7 @@ void mergeRanksAndIndices(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
static inline
|
static inline
|
||||||
void merge(
|
void merge(
|
||||||
int &dstPosA,
|
int &dstPosA,
|
||||||
@@ -404,6 +405,7 @@ void merge(
|
|||||||
if (programIndex < lenB)
|
if (programIndex < lenB)
|
||||||
dstPosB = binarySearchInclusive1(keyB, keyA, lenA, nPowTwoLenA) + programIndex;
|
dstPosB = binarySearchInclusive1(keyB, keyA, lenA, nPowTwoLenA) + programIndex;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@@ -548,12 +550,9 @@ void mergeElementaryIntervalsKernel(
|
|||||||
valB = srcVal[segmentBase + stride + startSrcB + programIndex];
|
valB = srcVal[segmentBase + stride + startSrcB + programIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
int dstPosA, dstPosB;
|
|
||||||
// Compute destination addresses for merge data
|
// Compute destination addresses for merge data
|
||||||
if (programIndex < lenSrcA)
|
int dstPosA = binarySearchExclusive1(keyA, keyB, lenSrcB, SAMPLE_STRIDE) + programIndex;
|
||||||
dstPosA = binarySearchExclusive1(keyA, keyB, lenSrcB, SAMPLE_STRIDE) + programIndex;
|
int dstPosB = binarySearchInclusive1(keyB, keyA, lenSrcA, SAMPLE_STRIDE) + programIndex;
|
||||||
if (programIndex < lenSrcB)
|
|
||||||
dstPosB = binarySearchInclusive1(keyB, keyA, lenSrcA, SAMPLE_STRIDE) + programIndex;
|
|
||||||
|
|
||||||
|
|
||||||
int dstA = -1, dstB = -1;
|
int dstA = -1, dstB = -1;
|
||||||
|
|||||||
Reference in New Issue
Block a user