From 774e907ecfc5885f7aa005c2e44c4e321baab362 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Mon, 6 Jan 2014 11:50:18 +0100 Subject: [PATCH] +1 --- examples_cuda/sort/sort.ispc | 10 +- examples_cuda/sort/sort_cu.cpp | 2 +- examples_ptx/cuda_helpers.cuh | 1 + examples_ptx/ispc_malloc.cpp | 19 +++ examples_ptx/ispc_malloc.h | 5 + examples_ptx/nvcc_helpers.cu | 25 +++ examples_ptx/sort/Makefile_cpu | 9 + examples_ptx/sort/Makefile_gpu | 13 ++ examples_ptx/sort/sort.cpp | 157 +++++++++++++++++ examples_ptx/sort/sort.cu | 272 ++++++++++++++++++++++++++++++ examples_ptx/sort/sort.ispc | 249 +++++++++++++++++++++++++++ examples_ptx/sort/sort.vcxproj | 34 ++++ examples_ptx/sort/sort_serial.cpp | 60 +++++++ 13 files changed, 850 insertions(+), 6 deletions(-) create mode 100644 examples_ptx/sort/Makefile_cpu create mode 100644 examples_ptx/sort/Makefile_gpu create mode 100644 examples_ptx/sort/sort.cpp create mode 100644 examples_ptx/sort/sort.cu create mode 100644 examples_ptx/sort/sort.ispc create mode 100644 examples_ptx/sort/sort.vcxproj create mode 100644 examples_ptx/sort/sort_serial.cpp diff --git a/examples_cuda/sort/sort.ispc b/examples_cuda/sort/sort.ispc index a713f1ac..45213610 100644 --- a/examples_cuda/sort/sort.ispc +++ b/examples_cuda/sort/sort.ispc @@ -170,9 +170,8 @@ task void bumpup (uniform int h[], uniform int g[]) } } -static void prefix_sum (uniform int num, uniform int h[]) +static void prefix_sum (uniform int num, uniform int h[], uniform int g[]) { - uniform int * uniform g = uniform new uniform int [num+1]; uniform int i; launch[num] addup (h, g+1); @@ -183,10 +182,9 @@ static void prefix_sum (uniform int num, uniform int h[]) launch[num] bumpup (h, g); sync; - delete g; } -export void sort_ispc (uniform int n, uniform unsigned int code[], uniform int order[], uniform int ntasks) +export void sort_ispc(uniform int n, uniform unsigned int code[], uniform int order[], uniform int ntasks) { uniform int num = ntasks ; //< 1 ? num_cores () : ntasks; uniform int span = n / num; @@ -194,6 +192,7 @@ export void sort_ispc (uniform int n, uniform unsigned int code[], uniform int o uniform int * uniform hist = uniform new uniform int [hsize]; uniform int64 * uniform pair = uniform new uniform int64 [n]; uniform int64 * uniform temp = uniform new uniform int64 [n]; + uniform int * uniform g = uniform new uniform int [num+1]; uniform int pass, i; #if DEBUG @@ -213,7 +212,7 @@ export void sort_ispc (uniform int n, uniform unsigned int code[], uniform int o launch[num] histogram (span, n, pair, pass, hist); sync; - prefix_sum (num, hist); + prefix_sum (num, hist,g); launch[num] permutation (span, n, pair, pass, hist, temp); sync; @@ -246,4 +245,5 @@ export void sort_ispc (uniform int n, uniform unsigned int code[], uniform int o delete hist; delete pair; delete temp; + delete g; } diff --git a/examples_cuda/sort/sort_cu.cpp b/examples_cuda/sort/sort_cu.cpp index f2f8b4be..a4ce5756 100644 --- a/examples_cuda/sort/sort_cu.cpp +++ b/examples_cuda/sort/sort_cu.cpp @@ -128,7 +128,7 @@ int main (int argc, char *argv[]) tISPC2 += (rtc() - t0); // get_elapsed_mcycles(); #else - const char * func_name = "sort_ispc"; + const char * func_name = "sort_ispc___export"; #if 0 void *func_args[] = {&n, &d_code, &d_order, &ntask}; #else diff --git a/examples_ptx/cuda_helpers.cuh b/examples_ptx/cuda_helpers.cuh index ae30ed34..0ba3f95d 100644 --- a/examples_ptx/cuda_helpers.cuh +++ b/examples_ptx/cuda_helpers.cuh @@ -12,3 +12,4 @@ #define taskCount (taskCount0*taskCount1*taskCount2) #define warpIdx (threadIdx.x >> 5) #define launch(ntx,nty,ntz,func) if (programIndex==0) func<<>> +#define sync cudaDeviceSynchronize() diff --git a/examples_ptx/ispc_malloc.cpp b/examples_ptx/ispc_malloc.cpp index 4b83751f..dcbe5d48 100644 --- a/examples_ptx/ispc_malloc.cpp +++ b/examples_ptx/ispc_malloc.cpp @@ -31,5 +31,24 @@ void ispc_memset(void *ptr, int value, size_t size) { memset(ptr, value, size); } +void ispcSetMallocHeapLimit(size_t value) +{ +} +void ispcSetStackLimit(size_t value) +{ +} +unsigned long long ispcGetMallocHeapLimit() +{ + return -1; +} +unsigned long long ispcGetStackLimit() +{ + return -1; +} +void * ispcMemcpy(void *dest, void *src, size_t num) +{ + memcpy(dest, src, num); + return dest; +} #endif diff --git a/examples_ptx/ispc_malloc.h b/examples_ptx/ispc_malloc.h index c42ac56b..1d63f602 100644 --- a/examples_ptx/ispc_malloc.h +++ b/examples_ptx/ispc_malloc.h @@ -3,3 +3,8 @@ extern void ispc_malloc(void **ptr, const size_t size); extern void ispc_free(void *ptr); extern void ispc_memset(void *ptr, int value, size_t size); +extern void ispcSetMallocHeapLimit(size_t value); +extern void ispcSetStackLimit(size_t value); +extern unsigned long long ispcGetMallocHeapLimit(); +extern unsigned long long ispcGetStackLimit(); +extern void * ispcMemcpy(void *dest, void *src, size_t num); diff --git a/examples_ptx/nvcc_helpers.cu b/examples_ptx/nvcc_helpers.cu index 3def93a1..e6faea96 100644 --- a/examples_ptx/nvcc_helpers.cu +++ b/examples_ptx/nvcc_helpers.cu @@ -14,5 +14,30 @@ void ispc_memset(void *ptr, int value, size_t size) { cudaMemset(ptr, value, size); } +void ispcSetMallocHeapLimit(size_t value) +{ + cudaDeviceSetLimit(cudaLimitMallocHeapSize,value); +} +void ispcSetStackLimit(size_t value) +{ + cudaDeviceSetLimit(cudaLimitStackSize,value); +} +unsigned long long ispcGetMallocHeapLimit() +{ + size_t value; + cudaDeviceGetLimit(&value, cudaLimitMallocHeapSize); + return value; +} +unsigned long long ispcGetStackLimit() +{ + size_t value; + cudaDeviceGetLimit(&value, cudaLimitStackSize); + return value; +} +void * ispcMemcpy(void *dest, void *src, size_t num) +{ + cudaMemcpy(dest, src, num, cudaMemcpyDefault); + return dest; +} diff --git a/examples_ptx/sort/Makefile_cpu b/examples_ptx/sort/Makefile_cpu new file mode 100644 index 00000000..a70ba750 --- /dev/null +++ b/examples_ptx/sort/Makefile_cpu @@ -0,0 +1,9 @@ + +EXAMPLE=sort +CPP_SRC=sort.cpp sort_serial.cpp +ISPC_SRC=sort.ispc +ISPC_IA_TARGETS=avx1-i32x8 +ISPC_ARM_TARGETS=neon +#ISPC_FLAGS=-DDEBUG + +include ../common.mk diff --git a/examples_ptx/sort/Makefile_gpu b/examples_ptx/sort/Makefile_gpu new file mode 100644 index 00000000..97a51f26 --- /dev/null +++ b/examples_ptx/sort/Makefile_gpu @@ -0,0 +1,13 @@ +PROG=sort +ISPC_SRC=sort.ispc +CU_SRC=sort.cu +CXX_SRC=sort.cpp sort_serial.cpp +PTXCC_REGMAX=32 + +LLVM_GPU=1 +NVVM_GPU=1 + +include ../common_gpu.mk + + + diff --git a/examples_ptx/sort/sort.cpp b/examples_ptx/sort/sort.cpp new file mode 100644 index 00000000..21778d18 --- /dev/null +++ b/examples_ptx/sort/sort.cpp @@ -0,0 +1,157 @@ +/* + Copyright (c) 2013, Durham University + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Durham University nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Author: Tomasz Koziara */ + +#include +#include +#include +#include +#include +#include +#include "../timing.h" +#include "../ispc_malloc.h" +#include "sort_ispc.h" + +using namespace ispc; + +extern void sort_serial (int n, unsigned int code[], int order[]); + +/* progress bar by Ross Hemsley; + * http://www.rosshemsley.co.uk/2011/02/creating-a-progress-bar-in-c-or-any-other-console-app/ */ +static inline void progressbar (unsigned int x, unsigned int n, unsigned int w = 50) +{ + if (n < 100) + { + x *= 100/n; + n = 100; + } + + if ((x != n) && (x % (n/100) != 0)) return; + + using namespace std; + float ratio = x/(float)n; + int c = ratio * w; + + cout << setw(3) << (int)(ratio*100) << "% ["; + for (int x=0; x +__device__ inline T* __new(const int n) +{ + union + { + T* ptr; + int v[2]; + } val; + if (programIndex == 0) + val.ptr = new T[n]; + val.v[0] = __shfl(val.v[0],0); + val.v[1] = __shfl(val.v[1],0); + return val.ptr; +}; + +template +__device__ inline void __delete(T* ptr) +{ + if (programIndex == 0) + delete ptr; +}; + +__global__ void histogram ( int span, int n, int64 code[], int pass, int hist[]) +{ + if (taskIndex >= taskCount) return; + int start = taskIndex*span; + int end = taskIndex == taskCount-1 ? n : start+span; + int strip = (end-start)/programCount; + int tail = (end-start)%programCount; + int i = programCount*taskIndex + programIndex; + int g [256]; + + cfor (int j = 0; j < 256; j ++) + { + g[j] = 0; + } + + cfor (int k = start+programIndex*strip; k < start+(programIndex+1)*strip; k ++) + { + unsigned int8 *c = (unsigned int8*) &code[k]; + + g[c[pass]] ++; + } + + if (programIndex == programCount-1) /* remainder is processed by the last lane */ + { + for (int k = start+programCount*strip; k < start+programCount*strip+tail; k ++) + { + unsigned int8 *c = (unsigned int8*) &code[k]; + + g[c[pass]] ++; + } + } + + cfor (int j = 0; j < 256; j ++) + { + hist[j*programCount*taskCount+i] = g[j]; + } +} + +__global__ void permutation ( int span, int n, int64 code[], int pass, int hist[], int64 perm[]) +{ + if (taskIndex >= taskCount) return; + int start = taskIndex*span; + int end = taskIndex == taskCount-1 ? n : start+span; + int strip = (end-start)/programCount; + int tail = (end-start)%programCount; + int i = programCount*taskIndex + programIndex; + int g [256]; + + cfor (int j = 0; j < 256; j ++) + { + g[j] = hist[j*programCount*taskCount+i]; + } + + cfor (int k = start+programIndex*strip; k < start+(programIndex+1)*strip; k ++) + { + unsigned int8 *c = (unsigned int8*) &code[k]; + + int l = g[c[pass]]; + + perm[l] = code[k]; + + g[c[pass]] = l+1; + } + + if (programIndex == programCount-1) /* remainder is processed by the last lane */ + { + for (int k = start+programCount*strip; k < start+programCount*strip+tail; k ++) + { + unsigned int8 *c = (unsigned int8*) &code[k]; + + int l = g[c[pass]]; + + perm[l] = code[k]; + + g[c[pass]] = l+1; + } + } +} + +__global__ void copy ( int span, int n, int64 from[], int64 to[]) +{ + if (taskIndex >= taskCount) return; + int start = taskIndex*span; + int end = taskIndex == taskCount-1 ? n : start+span; + + for (int i = programIndex + start; i < end; i += programCount) + if (i < end) + { + to[i] = from[i]; + } +} + +__global__ void pack ( int span, int n, unsigned int code[], int64 pair[]) +{ + if (taskIndex >= taskCount) return; + int start = taskIndex*span; + int end = taskIndex == taskCount-1 ? n : start+span; + + for (int i = programIndex + start; i < end; i += programCount) + if (i < end) + { + pair[i] = ((int64)i<<32)+code[i]; + } +} + +__global__ void unpack ( int span, int n, int64 pair[], int unsigned code[], int order[]) +{ + if (taskIndex >= taskCount) return; + int start = taskIndex*span; + int end = taskIndex == taskCount-1 ? n : start+span; + + for (int i = programIndex + start; i < end; i += programCount) + if (i < end) + { + code[i] = pair[i]; + order[i] = pair[i]>>32; + } +} + +__global__ void addup ( int h[], int g[]) +{ + if (taskIndex >= taskCount) return; + int * u = &h[256*programCount*taskIndex]; + int i, x, y = 0; + + for (i = 0; i < 256*programCount; i ++) + { + x = u[i]; + u[i] = y; + y += x; + } + + g[taskIndex] = y; +} + +__global__ void bumpup ( int h[], int g[]) +{ + if (taskIndex >= taskCount) return; + int * u = &h[256*programCount*taskIndex]; + int z = g[taskIndex]; + + for (int i = programIndex; i < 256*programCount; i += programCount) + { + u[i] += z; + } +} + +inline __device__ +static void prefix_sum ( int num, int h[], int * g) +{ + int i; + + launch(num,1,1,addup)(h,g+1); + sync; + + if (programIndex == 0) + for (g[0] = 0, i = 1; i < num; i ++) g[i] += g[i-1]; + + launch(num,1,1,bumpup)(h,g); + sync; +} + +extern "C" __global__ +void sort_ispc___export ( int n, unsigned int code[], int order[], int ntasks) +{ + int num = ntasks; + int span = n / num; + int hsize = 256*programCount*num; + int * hist = __new< int>(hsize); + int64 * pair = __new< int64>(n); + int64 * temp = __new< int64>(n); + int * g = __new(num+1); + int pass; + + + launch(num,1,1,pack)(span, n, code, pair); + sync; + + for (pass = 0; pass < 4; pass ++) + { + launch(num,1,1,histogram)(span, n, pair, pass, hist); + sync; + + prefix_sum (num, hist,g); + + launch(num,1,1,permutation)(span, n, pair, pass, hist, temp); + sync; + + launch(num,1,1,copy)(span, n, temp, pair); + sync; + } + + launch(num,1,1,unpack)(span, n, pair, code, order); + sync; + + __delete(g); + __delete(hist); + __delete(pair); + __delete(temp); +} + + extern "C" __host__ +void sort_ispc( int n, unsigned int code[], int order[], int ntasks) +{ + sort_ispc___export<<<1,32>>>(n,code,order,ntasks); + sync; +} diff --git a/examples_ptx/sort/sort.ispc b/examples_ptx/sort/sort.ispc new file mode 100644 index 00000000..0587b4be --- /dev/null +++ b/examples_ptx/sort/sort.ispc @@ -0,0 +1,249 @@ +/* + Copyright (c) 2013, Durham University + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Durham University nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Author: Tomasz Koziara */ + +task void histogram (uniform int span, uniform int n, uniform int64 code[], uniform int pass, uniform int hist[]) +{ + uniform int start = taskIndex*span; + uniform int end = taskIndex == taskCount-1 ? n : start+span; + uniform int strip = (end-start)/programCount; + uniform int tail = (end-start)%programCount; + int i = programCount*taskIndex + programIndex; + int g [256]; + + cfor (int j = 0; j < 256; j ++) + { + g[j] = 0; + } + + cfor (int k = start+programIndex*strip; k < start+(programIndex+1)*strip; k ++) + { + unsigned int8 *c = (unsigned int8*) &code[k]; + + g[c[pass]] ++; + } + + if (programIndex == programCount-1) /* remainder is processed by the last lane */ + { + for (int k = start+programCount*strip; k < start+programCount*strip+tail; k ++) + { + unsigned int8 *c = (unsigned int8*) &code[k]; + + g[c[pass]] ++; + } + } + + cfor (int j = 0; j < 256; j ++) + { + hist[j*programCount*taskCount+i] = g[j]; + } +} + +task void permutation (uniform int span, uniform int n, uniform int64 code[], uniform int pass, uniform int hist[], uniform int64 perm[]) +{ + uniform int start = taskIndex*span; + uniform int end = taskIndex == taskCount-1 ? n : start+span; + uniform int strip = (end-start)/programCount; + uniform int tail = (end-start)%programCount; + int i = programCount*taskIndex + programIndex; + int g [256]; + + cfor (int j = 0; j < 256; j ++) + { + g[j] = hist[j*programCount*taskCount+i]; + } + + cfor (int k = start+programIndex*strip; k < start+(programIndex+1)*strip; k ++) + { + unsigned int8 *c = (unsigned int8*) &code[k]; + + int l = g[c[pass]]; + + perm[l] = code[k]; + + g[c[pass]] = l+1; + } + + if (programIndex == programCount-1) /* remainder is processed by the last lane */ + { + for (int k = start+programCount*strip; k < start+programCount*strip+tail; k ++) + { + unsigned int8 *c = (unsigned int8*) &code[k]; + + int l = g[c[pass]]; + + perm[l] = code[k]; + + g[c[pass]] = l+1; + } + } +} + +task void copy (uniform int span, uniform int n, uniform int64 from[], uniform int64 to[]) +{ + uniform int start = taskIndex*span; + uniform int end = taskIndex == taskCount-1 ? n : start+span; + + foreach (i = start ... end) + { + to[i] = from[i]; + } +} + +task void pack (uniform int span, uniform int n, uniform unsigned int code[], uniform int64 pair[]) +{ + uniform int start = taskIndex*span; + uniform int end = taskIndex == taskCount-1 ? n : start+span; + + foreach (i = start ... end) + { + pair[i] = ((int64)i<<32)+code[i]; + } +} + +task void unpack (uniform int span, uniform int n, uniform int64 pair[], uniform int unsigned code[], uniform int order[]) +{ + uniform int start = taskIndex*span; + uniform int end = taskIndex == taskCount-1 ? n : start+span; + + foreach (i = start ... end) + { + code[i] = pair[i]; + order[i] = pair[i]>>32; + } +} + +task void addup (uniform int h[], uniform int g[]) +{ + uniform int * uniform u = &h[256*programCount*taskIndex]; + uniform int i, x, y = 0; + + for (i = 0; i < 256*programCount; i ++) + { + x = u[i]; + u[i] = y; + y += x; + } + + g[taskIndex] = y; +} + +task void bumpup (uniform int h[], uniform int g[]) +{ + uniform int * uniform u = &h[256*programCount*taskIndex]; + uniform int z = g[taskIndex]; + + foreach (i = 0 ... 256*programCount) + { + u[i] += z; + } +} + +static void prefix_sum (uniform int num, uniform int h[], uniform int g[]) +{ + uniform int i; + + launch[num] addup (h, g+1); + sync; + + for (g[0] = 0, i = 1; i < num; i ++) g[i] += g[i-1]; + + launch[num] bumpup (h, g); + sync; + +} + +export void sort_ispc (uniform int n, uniform unsigned int code[], uniform int order[], uniform int ntasks) +{ + uniform int num = ntasks; // < 1 ? num_cores () : ntasks; + uniform int span = n / num; + uniform int hsize = 256*programCount*num; + uniform int * uniform hist = uniform new uniform int [hsize]; + uniform int64 * uniform pair = uniform new uniform int64 [n]; + uniform int64 * uniform temp = uniform new uniform int64 [n]; + uniform int * uniform g = uniform new uniform int [num+1]; + uniform int pass, i; + +#if DEBUG + if (n < 100) + { + print ("input: "); + for (i = 0; i < n; i ++) print ("%, ", code[i]); + print ("\n"); + } +#endif + + launch[num] pack (span, n, code, pair); + sync; + + for (pass = 0; pass < 4; pass ++) + { + launch[num] histogram (span, n, pair, pass, hist); + sync; + + prefix_sum (num, hist, g); + + launch[num] permutation (span, n, pair, pass, hist, temp); + sync; + + launch[num] copy (span, n, temp, pair); + sync; + } + + launch[num] unpack (span, n, pair, code, order); + sync; + +#if DEBUG + for (i = 0; i < n; i ++) + { + if (i > 0 && code[i-1] > code[i]) + print ("ERR at % => % > %; ", i, code[i-1], code[i]); + } + + if (n < 100) + { + print ("output: "); + for (i = 0; i < n; i ++) print ("%, ", code[i]); + print ("\n"); + print ("order: "); + for (i = 0; i < n; i ++) print ("%, ", order[i]); + print ("\n"); + } +#endif + + delete hist; + delete pair; + delete temp; + delete g; +} diff --git a/examples_ptx/sort/sort.vcxproj b/examples_ptx/sort/sort.vcxproj new file mode 100644 index 00000000..b0bdc63d --- /dev/null +++ b/examples_ptx/sort/sort.vcxproj @@ -0,0 +1,34 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2} + Win32Proj + sort + sort + sse2,sse4-x2,avx1-x2 + + + + + + + + diff --git a/examples_ptx/sort/sort_serial.cpp b/examples_ptx/sort/sort_serial.cpp new file mode 100644 index 00000000..38bbdda6 --- /dev/null +++ b/examples_ptx/sort/sort_serial.cpp @@ -0,0 +1,60 @@ +/* + Copyright (c) 2013, Durham University + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Durham University nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Author: Tomasz Koziara */ + +#include +#include +#include + +typedef std::pair pair; + +struct cmp +{ + bool operator() (const pair& a, const pair& b) { return a.first < b.first; } +}; + +void sort_serial (int n, unsigned int code[], int order[]) +{ + std::vector pairs; + + pairs.reserve (n); + + for (int i = 0; i < n; i++) pairs.push_back (pair(code[i], i)); + + std::sort (pairs.begin(), pairs.end(), cmp()); + + int *o = order; + + for (std::vector::const_iterator p = pairs.begin(); p != pairs.end(); ++p, ++o) *o = p->second; +}