diff --git a/examples/portable/inplaceTraspose/Makefile_cpu b/examples/portable/inplaceTraspose/Makefile_cpu deleted file mode 100644 index f50b2d45..00000000 --- a/examples/portable/inplaceTraspose/Makefile_cpu +++ /dev/null @@ -1,9 +0,0 @@ - -EXAMPLE=inplaceTranspose -CPP_SRC=inplaceTranspose.cpp -ISPC_SRC=inplaceTranspose.ispc -ISPC_IA_TARGETS=avx1-i32x8 -ISPC_ARM_TARGETS=neon -#ISPC_FLAGS=-DDEBUG -g - -include ../common_cpu.mk diff --git a/examples/portable/inplaceTraspose/Makefile_knc b/examples/portable/inplaceTraspose/Makefile_knc deleted file mode 100644 index 1204364f..00000000 --- a/examples/portable/inplaceTraspose/Makefile_knc +++ /dev/null @@ -1,7 +0,0 @@ -EXAMPLE=radixSort -CXX_SRC=radixSort.cpp -ISPC_SRC=radixSort.ispc -ISPC_INTRINSICS=../../intrinsics/knc-i1x16.h -ISPC_TARGET=generic-16 - -include ../common_knc.mk diff --git a/examples/portable/inplaceTraspose/Makefile_ptx b/examples/portable/inplaceTraspose/Makefile_ptx deleted file mode 100644 index 3f3d0044..00000000 --- a/examples/portable/inplaceTraspose/Makefile_ptx +++ /dev/null @@ -1,15 +0,0 @@ -PROG=inplaceTranspose -ISPC_SRC=inplaceTranspose.ispc - -#CU_SRC=inplaceTranspose.cu -# NVCC_FLAGS=-Xptxas=-O1 -CXX_SRC=inplaceTranspose.cpp -PTXCC_REGMAX=32 - -# LLVM_GPU=1 -NVVM_GPU=1 - -include ../common_ptx.mk - - - diff --git a/examples/portable/inplaceTraspose/inplaceTranspose.cpp b/examples/portable/inplaceTraspose/inplaceTranspose.cpp deleted file mode 100644 index cfb612e0..00000000 --- a/examples/portable/inplaceTraspose/inplaceTranspose.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - Copyright (c) 2010-2014, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include "timing.h" -#include "ispc_malloc.h" -#include "inplaceTranspose_ispc.h" -#include "typeT.h" - -/* progress bar by Ross Hemsley; - * http://www.rosshemsley.co.uk/2011/02/creating-a-progress-bar-in-c-or-any-other-console-app/ */ -static inline void progressbar (unsigned int x, unsigned int n, unsigned int w = 50) -{ - if (n < 100) - { - x *= 100/n; - n = 100; - } - - if ((x != n) && (x % (n/100) != 0)) return; - - using namespace std; - float ratio = x/(float)n; - int c = ratio * w; - - cerr << setw(3) << (int)(ratio*100) << "% ["; - for (int x=0; x 1 ? atoi(argv[1]) : 8; - int n = argc > 2 ? atoi(argv[2]) : 12; - bool verbose = argc > 3; - - - fprintf(stderr, " m= %d n= %d :: storage= %g MB\n", m, n, - m*n*sizeof(int)*2/1e6); - - - std::pair *A = new std::pair[m*n]; - std::pair *Acopy = new std::pair[m*n]; - - for (int j = 0; j < n; j++) - for (int i = 0; i < m; i++) - A[j*m+i] = std::make_pair(i,j); - - if (verbose) - { - fprintf(stderr, "Original: \n"); - for (int j = 0; j < n; j++) - { - for (int i = 0; i < m; i++) - { - fprintf(stderr, "(%2d,%2d) ", A[j*m+i].first, A[j*m+i].second); - } - fprintf(stderr, "\n"); - } - fprintf(stderr, "\n"); - for (int i = 0; i < m*n; i++) - fprintf(stderr, "(%2d,%2d) ", A[i].first, A[i].second); - fprintf(stderr, "\n"); - fprintf(stderr, "\n"); - } - - - for (int j = 0; j < n; j++) - for (int i = 0; i < m; i++) - assert(A[j*m+i].first == i && A[j*m+i].second == j); - - ispcSetMallocHeapLimit(1024ull*1024*1024*8); - ispcMemcpy(&Acopy[0], &A[0], sizeof(T)*m*n); - - int nrep = 10; - double dt = 1e10; - for (int r = 0; r < nrep; r++) - { - ispcMemcpy(&A[0], &Acopy[0], sizeof(T)*m*n); - reset_and_start_timer(); - ispc::transpose((T*)&A[0], n, m); - const double t1 = rtc(); - dt = std::min(dt, get_elapsed_msec()); - progressbar (r, nrep); - } - progressbar (nrep, nrep); - fprintf(stderr, "\n"); - - if (verbose) - { - fprintf(stderr, "Transposed: \n"); - for (int j = 0; j < m; j++) - { - for (int i = 0; i < n; i++) - { - fprintf(stderr, "(%2d,%2d) ", A[j*n+i].first, A[j*n+i].second); - } - fprintf(stderr, "\n"); - } - fprintf(stderr, "\n"); - for (int i = 0; i < m*n; i++) - fprintf(stderr, "(%2d,%2d) ", A[i].first, A[i].second); - fprintf(stderr, "\n"); - fprintf(stderr, "\n"); - } - - for (int j = 0; j < m; j++) - for (int i = 0; i < n; i++) - assert(A[j*n+i].first == j && A[j*n+i].second == i); - - fprintf(stderr, " tranpose done in %g msec :: BW= %g GB/s\n", - dt , 2*m*n*sizeof(int)*2/dt*1e3/1e9); - - - return 0; -} diff --git a/examples/portable/inplaceTraspose/inplaceTranspose.ispc b/examples/portable/inplaceTraspose/inplaceTranspose.ispc deleted file mode 100644 index d28be4b8..00000000 --- a/examples/portable/inplaceTraspose/inplaceTranspose.ispc +++ /dev/null @@ -1,234 +0,0 @@ -/* - Copyright (c) 2014, Evghenii Gaburov - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "typeT.h" - -static inline -uniform int gcd(uniform int a, uniform int b) -{ - while ( a != 0 ) - { - uniform int c = a; - a = b%a; - b = c; - } - return b; -} - - static inline -int __rj(const int i, const uniform int joverb, const uniform int m, const uniform int b) -{ - return (i + joverb) % m; -} - - static inline -int __di(const int i, const uniform int j, const uniform int m, const uniform int n, const uniform int joverb) -{ - return (((i+joverb) % m) + j*m) % n; -} - - static inline -int __sj(const int i, const uniform int j, const uniform int m, const uniform int n, const int iovera) -{ - return (j + i*n - iovera) % m; -} - - -#if 0 -static inline -void transpose_serial(uniform T A[], const uniform int m, const uniform int n) -{ - const uniform int tmpSize = max(m,n) * programCount; - uniform T * uniform tmp = uniform new uniform T [tmpSize]; - uniform int * uniform joverb = uniform new uniform int[n]; - uniform int * uniform iovera = uniform new uniform int[m]; - - uniform T (*uniform tmp2D)[programCount] = (uniform T (*uniform)[programCount])tmp; - - const uniform int c = gcd(m,n); - const uniform int a = m/c; - const uniform int b = n/c; - foreach (j = 0 ... n) - joverb[j] = j/b; - foreach (i = 0 ... m) - iovera[i] = i/a; - - if (c > 1) - { - for (uniform int j = 0; j < n; j++) - { - const uniform int base = j*m; - const uniform int __joverb = joverb[j]; - foreach (i = 0 ... m) - tmp[i] = A[base + __rj(i,__joverb,m,b)]; - foreach (i = 0 ... m) - A[base + i] = tmp[i]; - } - } - - foreach (i = 0 ... m) - { - for (uniform int j = 0; j < n; j++) - tmp2D[__di(i,j,m,n,joverb[j])][programIndex] = A[j*m + i]; - for (uniform int j = 0; j < n; j++) - A[j*m + i] = tmp2D[j][programIndex]; - } - - for (uniform int j = 0; j < n; j++) - { - const uniform int base = j*m; - foreach (i = 0 ... m) - tmp[i] = A[base + __sj(i,j,m,n,iovera[i])]; - foreach (i = 0 ... m) - A[base + i] = tmp[i]; - } - - delete iovera; - delete joverb; - delete tmp; -} -#endif - -static uniform int * uniform joverb = NULL; -static uniform int * uniform iovera = NULL; -static uniform int a,b,c; - -static -void transpose_init(const uniform int m, const uniform int n, const uniform int nTask) -{ - joverb = uniform new uniform int[n]; - iovera = uniform new uniform int[m]; - - c = gcd(m,n); - a = m/c; - b = n/c; - foreach (j = 0 ... n) - joverb[j] = j/b; - foreach (i = 0 ... m) - iovera[i] = i/a; -} - -static -void transpose_finalize() -{ - delete iovera; - delete joverb; -} - -task -void transpose_step1(uniform T A[], const uniform int m, const uniform int n) -{ - const uniform int n_per_task = (n + taskCount - 1)/taskCount; - const uniform int nibeg = taskIndex * n_per_task; - const uniform int niend = min(nibeg + n_per_task, n); - - uniform T * uniform tmp = uniform new uniform T[m]; - - for (uniform int j = nibeg; j < niend; j++) - { - const uniform int base = j*m; - const uniform int __joverb = joverb[j]; - foreach (i = 0 ... m) - tmp[i] = A[base + __rj(i,__joverb,m,b)]; - foreach (i = 0 ... m) - A[base + i] = tmp[i]; - } - - delete tmp; -} - -task -void transpose_step2(uniform T A[], const uniform int m, const uniform int n) -{ - const uniform int m_per_task = (m + taskCount - 1)/taskCount; - const uniform int mibeg = taskIndex * m_per_task; - const uniform int miend = min(mibeg + m_per_task, m); - - uniform T * uniform tmp = uniform new uniform T[n*programCount]; - - uniform T (*uniform tmp2D)[programCount] = (uniform T (*uniform)[programCount])tmp; - foreach (i = mibeg ... miend) - { - for (uniform int j = 0; j < n; j++) - tmp2D[__di(i,j,m,n,joverb[j])][programIndex] = A[j*m + i]; - for (uniform int j = 0; j < n; j++) - A[j*m + i] = tmp2D[j][programIndex]; - } - - delete tmp; -} - -task -void transpose_step3(uniform T A[], const uniform int m, const uniform int n) -{ - const uniform int n_per_task = (n + taskCount - 1)/taskCount; - const uniform int nibeg = taskIndex * n_per_task; - const uniform int niend = min(nibeg + n_per_task, n); - - uniform T * uniform tmp = uniform new uniform T[m]; - - for (uniform int j = nibeg; j < niend; j++) - { - const uniform int base = j*m; - foreach (i = 0 ... m) - tmp[i] = A[base + __sj(i,j,m,n,iovera[i])]; - foreach (i = 0 ... m) - A[base + i] = tmp[i]; - } - - delete tmp; -} - -export -void transpose(uniform T A[], const uniform int m, const uniform int n) -{ -#if 0 - transpose_serial(A, m, n); -#else - const uniform int nTask = 32*8; //num_cores()*4; - transpose_init(m,n,nTask); - - launch [nTask] transpose_step1(A, m, n); - sync; - - launch [nTask] transpose_step2(A, m, n); - sync; - - launch [nTask] transpose_step3(A, m, n); - sync; - - transpose_finalize(); -#endif - sync; -} - diff --git a/examples/portable/inplaceTraspose/typeT.h b/examples/portable/inplaceTraspose/typeT.h deleted file mode 100644 index a0aa630a..00000000 --- a/examples/portable/inplaceTraspose/typeT.h +++ /dev/null @@ -1,2 +0,0 @@ -#pragma once -typedef double T; diff --git a/examples/portable/mergeSort/mergeSort.cpp b/examples/portable/mergeSort/mergeSort.cpp index 18a7ef59..d3188544 100644 --- a/examples/portable/mergeSort/mergeSort.cpp +++ b/examples/portable/mergeSort/mergeSort.cpp @@ -42,26 +42,28 @@ #include "ispc_malloc.h" #include "mergeSort_ispc.h" -/* progress bar by Ross Hemsley; - * http://www.rosshemsley.co.uk/2011/02/creating-a-progress-bar-in-c-or-any-other-console-app/ */ -static inline void progressbar (unsigned int x, unsigned int n, unsigned int w = 50) +static void progressBar(const int x, const int n, const int width = 50) { - if (n < 100) - { - x *= 100/n; - n = 100; - } + assert(n > 1); + assert(x >= 0 && x < n); + assert(width > 10); + const float f = static_cast(x)/(n-1); + const int w = static_cast(f * width); - if ((x != n) && (x % (n/100) != 0)) return; + // print bar + std::string bstr("["); + for (int i = 0; i < width; i++) + bstr += i < w ? '=' : ' '; + bstr += "]"; - using namespace std; - float ratio = x/(float)n; - int c = ratio * w; + // print percentage + char pstr0[32]; + sprintf(pstr0, " %2d %c ", static_cast(f*100.0),'%'); + const std::string pstr(pstr0); + std::copy(pstr.begin(), pstr.end(), bstr.begin() + (width/2-2)); - cout << setw(3) << (int)(ratio*100) << "% ["; - for (int x=0; x 1); + assert(x >= 0 && x < n); + assert(width > 10); + const float f = static_cast(x)/(n-1); + const int w = static_cast(f * width); - if ((x != n) && (x % (n/100) != 0)) return; + // print bar + std::string bstr("["); + for (int i = 0; i < width; i++) + bstr += i < w ? '=' : ' '; + bstr += "]"; - using namespace std; - float ratio = x/(float)n; - int c = ratio * w; + // print percentage + char pstr0[32]; + sprintf(pstr0, " %2d %c ", static_cast(f*100.0),'%'); + const std::string pstr(pstr0); + std::copy(pstr.begin(), pstr.end(), bstr.begin() + (width/2-2)); - cout << setw(3) << (int)(ratio*100) << "% ["; - for (int x=0; x #include #include +#include #include #include "../timing.h" #include "sort_ispc.h" @@ -45,26 +46,28 @@ using namespace ispc; extern void sort_serial (int n, unsigned int code[], int order[]); -/* progress bar by Ross Hemsley; - * http://www.rosshemsley.co.uk/2011/02/creating-a-progress-bar-in-c-or-any-other-console-app/ */ -static inline void progressbar (unsigned int x, unsigned int n, unsigned int w = 50) +static void progressBar(const int x, const int n, const int width = 50) { - if (n < 100) - { - x *= 100/n; - n = 100; - } + assert(n > 1); + assert(x >= 0 && x < n); + assert(width > 10); + const float f = static_cast(x)/(n-1); + const int w = static_cast(f * width); - if ((x != n) && (x % (n/100) != 0)) return; + // print bar + std::string bstr("["); + for (int i = 0; i < width; i++) + bstr += i < w ? '=' : ' '; + bstr += "]"; - using namespace std; - float ratio = x/(float)n; - int c = ratio * w; + // print percentage + char pstr0[32]; + sprintf(pstr0, " %2d %c ", static_cast(f*100.0),'%'); + const std::string pstr(pstr0); + std::copy(pstr.begin(), pstr.end(), bstr.begin() + (width/2-2)); - cout << setw(3) << (int)(ratio*100) << "% ["; - for (int x=0; x