From e93c2b88ba12b72540e95d7b41e06ca997957d4f Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 30 Jan 2014 11:32:27 +0100 Subject: [PATCH] some fixes --- examples_ptx/cuda_helpers.cuh | 2 ++ examples_ptx/mergeSort/Makefile_gpu | 3 ++- examples_ptx/mergeSort/mergeSort.cpp | 23 +++++++++++++++++++++++ examples_ptx/mergeSort/mergeSort.ispc | 9 ++++----- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/examples_ptx/cuda_helpers.cuh b/examples_ptx/cuda_helpers.cuh index 0ba3f95d..a28b5a51 100644 --- a/examples_ptx/cuda_helpers.cuh +++ b/examples_ptx/cuda_helpers.cuh @@ -13,3 +13,5 @@ #define warpIdx (threadIdx.x >> 5) #define launch(ntx,nty,ntz,func) if (programIndex==0) func<<>> #define sync cudaDeviceSynchronize() +#define cif if +#define shuffle(x,y) __shfl(x,y) diff --git a/examples_ptx/mergeSort/Makefile_gpu b/examples_ptx/mergeSort/Makefile_gpu index 79914c2c..9ed753a4 100644 --- a/examples_ptx/mergeSort/Makefile_gpu +++ b/examples_ptx/mergeSort/Makefile_gpu @@ -1,9 +1,10 @@ PROG=mergeSort ISPC_SRC=mergeSort.ispc -#CU_SRC=mergeSort.cu +CU_SRC=mergeSort.cu CXX_SRC=mergeSort.cpp mergeSort.cpp PTXCC_REGMAX=64 #PTXCC_FLAGS= -Xptxas=-O3 +#NVCC_FLAGS=-Xptxas=-O0 # LLVM_GPU=1 NVVM_GPU=1 diff --git a/examples_ptx/mergeSort/mergeSort.cpp b/examples_ptx/mergeSort/mergeSort.cpp index d21560ef..0abb3d1a 100644 --- a/examples_ptx/mergeSort/mergeSort.cpp +++ b/examples_ptx/mergeSort/mergeSort.cpp @@ -94,6 +94,29 @@ int main (int argc, char *argv[]) printf("[sort ispc + tasks]:\t[%.3f] msec [%.3f Mpair/s]\n", tISPC2, 1.0e-3*n/tISPC2); +#if 0 + printf("\n---\n"); + for (int i = 0; i < 128; i++) + { + if ((i%32) == 0) printf("\n"); + printf("%d ", (int)keysSrc[i]); + } + printf("\n---\n"); + for (int i = 0; i < 128; i++) + { + if ((i%32) == 0) printf("\n"); + printf("%d ", (int)keysBuf[i]); + } + printf("\n---\n"); + for (int i = 0; i < 128; i++) + { + if ((i%32) == 0) printf("\n"); + printf("%d ", (int)keysDst[i]); + } + printf("\n---\n"); +#endif + + std::sort(keysGld, keysGld + n); for (int i = 0; i < n; i++) diff --git a/examples_ptx/mergeSort/mergeSort.ispc b/examples_ptx/mergeSort/mergeSort.ispc index 52a94085..a85c5cc8 100644 --- a/examples_ptx/mergeSort/mergeSort.ispc +++ b/examples_ptx/mergeSort/mergeSort.ispc @@ -387,6 +387,7 @@ void mergeRanksAndIndices( } +#if 0 static inline void merge( int &dstPosA, @@ -404,6 +405,7 @@ void merge( if (programIndex < lenB) dstPosB = binarySearchInclusive1(keyB, keyA, lenA, nPowTwoLenA) + programIndex; } +#endif #if 0 @@ -548,12 +550,9 @@ void mergeElementaryIntervalsKernel( valB = srcVal[segmentBase + stride + startSrcB + programIndex]; } - int dstPosA, dstPosB; // Compute destination addresses for merge data - if (programIndex < lenSrcA) - dstPosA = binarySearchExclusive1(keyA, keyB, lenSrcB, SAMPLE_STRIDE) + programIndex; - if (programIndex < lenSrcB) - dstPosB = binarySearchInclusive1(keyB, keyA, lenSrcA, SAMPLE_STRIDE) + programIndex; + int dstPosA = binarySearchExclusive1(keyA, keyB, lenSrcB, SAMPLE_STRIDE) + programIndex; + int dstPosB = binarySearchInclusive1(keyB, keyA, lenSrcA, SAMPLE_STRIDE) + programIndex; int dstA = -1, dstB = -1;