first commit
This commit is contained in:
9
examples/portable/inplaceTraspose/Makefile_cpu
Normal file
9
examples/portable/inplaceTraspose/Makefile_cpu
Normal file
@@ -0,0 +1,9 @@
|
||||
|
||||
EXAMPLE=inplaceTranspose
|
||||
CPP_SRC=inplaceTranspose.cpp
|
||||
ISPC_SRC=inplaceTranspose.ispc
|
||||
ISPC_IA_TARGETS=avx1-i32x8
|
||||
ISPC_ARM_TARGETS=neon
|
||||
#ISPC_FLAGS=-DDEBUG -g
|
||||
|
||||
include ../common_cpu.mk
|
||||
7
examples/portable/inplaceTraspose/Makefile_knc
Normal file
7
examples/portable/inplaceTraspose/Makefile_knc
Normal file
@@ -0,0 +1,7 @@
|
||||
EXAMPLE=radixSort
|
||||
CXX_SRC=radixSort.cpp
|
||||
ISPC_SRC=radixSort.ispc
|
||||
ISPC_INTRINSICS=../../intrinsics/knc-i1x16.h
|
||||
ISPC_TARGET=generic-16
|
||||
|
||||
include ../common_knc.mk
|
||||
15
examples/portable/inplaceTraspose/Makefile_ptx
Normal file
15
examples/portable/inplaceTraspose/Makefile_ptx
Normal file
@@ -0,0 +1,15 @@
|
||||
PROG=radixSort
|
||||
ISPC_SRC=radixSort.ispc
|
||||
|
||||
CU_SRC=radixSort.cu
|
||||
# NVCC_FLAGS=-Xptxas=-O1
|
||||
CXX_SRC=radixSort.cpp radixSort.cpp
|
||||
PTXCC_REGMAX=64
|
||||
|
||||
LLVM_GPU=1
|
||||
NVVM_GPU=1
|
||||
|
||||
include ../common_ptx.mk
|
||||
|
||||
|
||||
|
||||
114
examples/portable/inplaceTraspose/inplaceTranspose.cpp
Normal file
114
examples/portable/inplaceTraspose/inplaceTranspose.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include "timing.h"
|
||||
#include "ispc_malloc.h"
|
||||
#include "inplaceTranspose_ispc.h"
|
||||
#include "typeT.h"
|
||||
|
||||
/* progress bar by Ross Hemsley;
|
||||
* http://www.rosshemsley.co.uk/2011/02/creating-a-progress-bar-in-c-or-any-other-console-app/ */
|
||||
static inline void progressbar (unsigned int x, unsigned int n, unsigned int w = 50)
|
||||
{
|
||||
if (n < 100)
|
||||
{
|
||||
x *= 100/n;
|
||||
n = 100;
|
||||
}
|
||||
|
||||
if ((x != n) && (x % (n/100) != 0)) return;
|
||||
|
||||
using namespace std;
|
||||
float ratio = x/(float)n;
|
||||
int c = ratio * w;
|
||||
|
||||
cerr << setw(3) << (int)(ratio*100) << "% [";
|
||||
for (int x=0; x<c; x++) cerr << "=";
|
||||
for (int x=c; x<w; x++) cerr << " ";
|
||||
cerr << "]\r" << flush;
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
int m = argc > 1 ? atoi(argv[1]) : 8;
|
||||
int n = argc > 2 ? atoi(argv[2]) : 12;
|
||||
bool verbose = argc > 3;
|
||||
|
||||
|
||||
fprintf(stderr, " m= %d n= %d :: storage= %g MB\n", m, n,
|
||||
m*n*sizeof(int)*2/1e6);
|
||||
|
||||
|
||||
std::vector< std::pair<int,int> > A(m*n);
|
||||
|
||||
int nrep = 10;
|
||||
double dt = 1e10;
|
||||
for (int r = 0; r < nrep; r++)
|
||||
{
|
||||
for (int j = 0; j < n; j++)
|
||||
for (int i = 0; i < m; i++)
|
||||
A[j*m+i] = std::make_pair(i,j);
|
||||
|
||||
if (r == 0 && verbose)
|
||||
{
|
||||
fprintf(stderr, "Original: \n");
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
fprintf(stderr, "(%2d,%2d) ", A[j*m+i].first, A[j*m+i].second);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
for (int i = 0; i < m*n; i++)
|
||||
fprintf(stderr, "(%2d,%2d) ", A[i].first, A[i].second);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
for (int j = 0; j < n; j++)
|
||||
for (int i = 0; i < m; i++)
|
||||
assert(A[j*m+i].first == i && A[j*m+i].second == j);
|
||||
|
||||
reset_and_start_timer();
|
||||
ispc::transpose((T*)&A[0], n, m);
|
||||
const double t1 = rtc();
|
||||
dt = std::min(dt, get_elapsed_msec());
|
||||
progressbar (r, nrep);
|
||||
}
|
||||
progressbar (nrep, nrep);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
if (verbose)
|
||||
{
|
||||
fprintf(stderr, "Transposed: \n");
|
||||
for (int j = 0; j < m; j++)
|
||||
{
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
fprintf(stderr, "(%2d,%2d) ", A[j*n+i].first, A[j*n+i].second);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
for (int i = 0; i < m*n; i++)
|
||||
fprintf(stderr, "(%2d,%2d) ", A[i].first, A[i].second);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
for (int j = 0; j < m; j++)
|
||||
for (int i = 0; i < n; i++)
|
||||
assert(A[j*n+i].first == j && A[j*n+i].second == i);
|
||||
|
||||
fprintf(stderr, " tranpose done in %g msec :: BW= % GB/s\n",
|
||||
dt , 2*m*n*sizeof(int)*2/dt*1e3/1e9);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
91
examples/portable/inplaceTraspose/inplaceTranspose.ispc
Normal file
91
examples/portable/inplaceTraspose/inplaceTranspose.ispc
Normal file
@@ -0,0 +1,91 @@
|
||||
#include "typeT.h"
|
||||
|
||||
static inline
|
||||
uniform int gcd(uniform int a, uniform int b)
|
||||
{
|
||||
while ( a != 0 )
|
||||
{
|
||||
uniform int c = a;
|
||||
a = b%a;
|
||||
b = c;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline
|
||||
int __rj(const int i, const uniform int joverb, const uniform int m, const uniform int b)
|
||||
{
|
||||
return (i + joverb) % m;
|
||||
}
|
||||
|
||||
static inline
|
||||
int __di(const int i, const uniform int j, const uniform int m, const uniform int n, const uniform int joverb)
|
||||
{
|
||||
return (((i+joverb) % m) + j*m) % n;
|
||||
}
|
||||
|
||||
static inline
|
||||
int __sj(const int i, const uniform int j, const uniform int m, const uniform int n, const int iovera)
|
||||
{
|
||||
return (j + i*n - iovera) % m;
|
||||
}
|
||||
|
||||
static inline
|
||||
void transpose_serial(uniform T A[], const uniform int m, const uniform int n)
|
||||
{
|
||||
const uniform int tmpSize = max(m,n) * programCount;
|
||||
uniform T * uniform tmp = uniform new uniform T [tmpSize];
|
||||
uniform int * uniform joverb = uniform new uniform int[n];
|
||||
uniform int * uniform iovera = uniform new uniform int[m];
|
||||
|
||||
uniform T (*uniform tmp2D)[programCount] = (uniform T (*uniform)[programCount])tmp;
|
||||
|
||||
const uniform int c = gcd(m,n);
|
||||
const uniform int a = m/c;
|
||||
const uniform int b = n/c;
|
||||
foreach (j = 0 ... n)
|
||||
joverb[j] = j/b;
|
||||
foreach (i = 0 ... m)
|
||||
iovera[i] = i/a;
|
||||
|
||||
if (c > 1)
|
||||
{
|
||||
for (uniform int j = 0; j < n; j++)
|
||||
{
|
||||
const uniform int base = j*m;
|
||||
const uniform int __joverb = joverb[j];
|
||||
foreach (i = 0 ... m)
|
||||
tmp[i] = A[base + __rj(i,__joverb,m,b)];
|
||||
foreach (i = 0 ... m)
|
||||
A[base + i] = tmp[i];
|
||||
}
|
||||
}
|
||||
|
||||
foreach (i = 0 ... m)
|
||||
{
|
||||
for (uniform int j = 0; j < n; j++)
|
||||
tmp2D[__di(i,j,m,n,joverb[j])][programIndex] = A[j*m + i];
|
||||
for (uniform int j = 0; j < n; j++)
|
||||
A[j*m + i] = tmp2D[j][programIndex];
|
||||
}
|
||||
|
||||
for (uniform int j = 0; j < n; j++)
|
||||
{
|
||||
const uniform int base = j*m;
|
||||
foreach (i = 0 ... m)
|
||||
tmp[i] = A[base + __sj(i,j,m,n,iovera[i])];
|
||||
foreach (i = 0 ... m)
|
||||
A[base + i] = tmp[i];
|
||||
}
|
||||
|
||||
delete iovera;
|
||||
delete joverb;
|
||||
delete tmp;
|
||||
}
|
||||
|
||||
export
|
||||
void transpose(uniform T A[], const uniform int m, const uniform int n)
|
||||
{
|
||||
transpose_serial(A, m, n);
|
||||
}
|
||||
|
||||
2
examples/portable/inplaceTraspose/typeT.h
Normal file
2
examples/portable/inplaceTraspose/typeT.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#pragma once
|
||||
typedef double T;
|
||||
Reference in New Issue
Block a user