From a918bda679e16342502a6fbb0639a60af1c9f806 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 30 Jan 2014 14:04:21 +0100 Subject: [PATCH] first commit --- examples_ptx/nbody/nbody.ispc | 160 ++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 examples_ptx/nbody/nbody.ispc diff --git a/examples_ptx/nbody/nbody.ispc b/examples_ptx/nbody/nbody.ispc new file mode 100644 index 00000000..693130d3 --- /dev/null +++ b/examples_ptx/nbody/nbody.ispc @@ -0,0 +1,160 @@ +typedef double real; + +typedef real<3> real3; +typedef real<4> real4; + +static uniform real * uniform accx; +static uniform real * uniform accy; +static uniform real * uniform accz; +static uniform real * uniform gpotList; + +static inline +real4 ppForce(real3 ipos, real3 jpos, real jmass) +{ + const real3 dr = jpos - ipos; + const real r2 = dr.x*dr.x + dr.y*dr.y + dr.z*dr.z; + const real rinv = r2 > 0 ? rsqrt(r2) : 0; + const real mrinv = jmass * rinv; + const real mrinv3 = mrinv * rinv*rinv; + + real4 acc; + acc.x = -mrinv3 * dr.x; + acc.y = -mrinv3 * dr.y; + acc.z = -mrinv3 * dr.z; + acc.w = -mrinv; + + return acc; +} + +task +void computeForces( + uniform int nbodies, + uniform real posx[], + uniform real posy[], + uniform real posz[], + uniform real mass[]) +{ + const uniform int blockIdx = taskIndex; + const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; + const uniform int blockBeg = blockIdx * blockDim; + const uniform int blockEnd = min(blockBeg + blockDim, nbodies); + +#if 0 + real gpotLoc = 0; + for (uniform int i = blockBeg; i < blockEnd; i++) + { + const real3 ipos = {posx[i], posy[i], posz[i]}; + real4 iacc = 0; + foreach (j = 0 ... nbodies) + { + const real3 jpos = {posx[j], posy[j], posz[j]}; + const real jmass = mass[j]; + iacc += ppForce(ipos, jpos, jmass); + } + accx[i] = reduce_add(iacc.x); + accy[i] = reduce_add(iacc.y); + accz[i] = reduce_add(iacc.z); + gpotLoc += reduce_add(iacc.w); + } + atomic_add_global(&gpot, gpotLoc); +#else + real gpotLoc = 0; + foreach (i = blockBeg ... blockEnd) + { + const real3 ipos = {posx[i], posy[i], posz[i]}; + real4 iacc = 0; + for (uniform int j = 0; j < nbodies; j++) + { + const real3 jpos = {posx[j], posy[j], posz[j]}; + const real jmass = mass[j]; + iacc += ppForce(ipos, jpos, jmass); + } + accx[i] = iacc.x; + accy[i] = iacc.y; + accz[i] = iacc.z; + gpotLoc += iacc.w; + } + gpotList[taskIndex] = reduce_add(gpotLoc); +#endif +} + +task +void updatePositions( + uniform int nbodies, + uniform real posx[], + uniform real posy[], + uniform real posz[], + uniform real velx[], + uniform real vely[], + uniform real velz[], + uniform real dt) +{ + const uniform int blockIdx = taskIndex; + const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; + const uniform int blockBeg = blockIdx * blockDim; + const uniform int blockEnd = min(blockBeg + blockDim, nbodies); + + foreach (i = blockBeg ... blockEnd) + { + posx[i] += dt*velx[i]; + posy[i] += dt*vely[i]; + posz[i] += dt*velz[i]; + } +} + +task +void updateVelocities( + uniform int nbodies, + uniform real velx[], + uniform real vely[], + uniform real velz[], + uniform real dt) +{ + const uniform int blockIdx = taskIndex; + const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; + const uniform int blockBeg = blockIdx * blockDim; + const uniform int blockEnd = min(blockBeg + blockDim, nbodies); + + foreach (i = blockBeg ... blockEnd) + { + velx[i] += dt*accx[i]; + vely[i] += dt*accy[i]; + velz[i] += dt*accz[i]; + } +} + +export +void nbodyIntegrate( + uniform int nSteps, + uniform int nbodies, + uniform real dt, + uniform real posx[], + uniform real posy[], + uniform real posz[], + uniform real mass[], + uniform real velx[], + uniform real vely[], + uniform real velz[], + uniform real energies[]) +{ + + uniform int nTasks = num_cores()*4; +#ifdef __NVPTX__ + nTasks = nbodies/(4*programCount); +#endif + assert((nbodies % nTasks) == 0); + + for (uniform int step = 0; step < nSteps; step++) + { + launch [nTasks] updatePositions(nbodies, posx, posy, posz, velx, vely, velz,dt); + sync; + launch [nTasks] computeForces(nbodies, posx, posy, posz, mass); + sync; + launch [nTasks] updateVelocities(nbodies, posx, posy, posz, dt); + sync; + } + + //energies[0] = gpot; +} + +