diff --git a/examples_cuda/sort/sort b/examples_cuda/sort/sort index d148e465..64c32dd0 100755 Binary files a/examples_cuda/sort/sort and b/examples_cuda/sort/sort differ diff --git a/examples_cuda/sort/sort.cpp b/examples_cuda/sort/sort.cpp index 37a5c289..8e3081e5 100644 --- a/examples_cuda/sort/sort.cpp +++ b/examples_cuda/sort/sort.cpp @@ -40,6 +40,7 @@ #include #include "../timing.h" #include "sort_ispc.h" +#include #include static inline double rtc(void) @@ -129,6 +130,12 @@ int main (int argc, char *argv[]) printf("[sort ispc + tasks]:\t[%.3f] million cycles\n", tISPC2); + for (int i = 0; i < n-1; i++) + { + assert(code[i+1] >= code[i]); + } + + srand (0); for (i = 0; i < m; i ++) diff --git a/examples_cuda/sort/sort_cu.cpp b/examples_cuda/sort/sort_cu.cpp index ec295434..b8e9edf5 100644 --- a/examples_cuda/sort/sort_cu.cpp +++ b/examples_cuda/sort/sort_cu.cpp @@ -351,7 +351,7 @@ int main (int argc, char *argv[]) createContext(); /*******************/ - int ntask = 13*4*8; + int ntask = 13*4; devicePtr d_code = deviceMalloc(n*sizeof(int)); devicePtr d_order = deviceMalloc(n*sizeof(int)); devicePtr d_pair = deviceMalloc(n*2*sizeof(int)); @@ -386,7 +386,13 @@ int main (int argc, char *argv[]) progressbar (i, m); } - printf("[sort ispc + tasks]:\t[%.3f] million cycles\n", tISPC2); + printf("[sort cuda]:\t[%.3f] million cycles\n", tISPC2); + memcpyD2H(code, d_code, n*sizeof(int)); + memcpyD2H(order, d_order, n*sizeof(int)); + for (int i = 0; i < n-1; i++) + { + assert(code[i+1] >= code[i]); + } srand (0);