From cedf4bccf4be31b6831008d57325cecefddfd14b Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 30 Jan 2014 14:49:38 +0100 Subject: [PATCH] +1 --- examples_ptx/nbody/Makefile_gpu | 2 +- examples_ptx/nbody/nbody.ispc | 37 ++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/examples_ptx/nbody/Makefile_gpu b/examples_ptx/nbody/Makefile_gpu index 78ee0aaf..3d059ec4 100644 --- a/examples_ptx/nbody/Makefile_gpu +++ b/examples_ptx/nbody/Makefile_gpu @@ -2,7 +2,7 @@ PROG=nbody ISPC_SRC=nbody.ispc #CU_SRC=nbody.cu CXX_SRC=nbody.cpp -PTXCC_REGMAX=128 +PTXCC_REGMAX=64 LLVM_GPU=1 NVVM_GPU=1 diff --git a/examples_ptx/nbody/nbody.ispc b/examples_ptx/nbody/nbody.ispc index cbe82eee..38899916 100644 --- a/examples_ptx/nbody/nbody.ispc +++ b/examples_ptx/nbody/nbody.ispc @@ -87,24 +87,27 @@ void computeForces( real iaccy = 0; real iaccz = 0; real igpot = 0; - for (uniform int j = 0; j < nbodies; j++) + for (uniform int j = 0; j < nbodies; j += 1) { - const real jposx = posx[j]; - const real jposy = posy[j]; - const real jposz = posz[j]; - const real jmass = mass[j]; - const real dx = jposx - iposx; - const real dy = jposy - iposy; - const real dz = jposz - iposz; - const real r2 = dx*dx + dy*dy + dz*dz; - const real rinv = r2 > 0.0d ? rsqrt((float)r2) : 0; - const real mrinv = -jmass * rinv; - const real mrinv3 = mrinv * rinv*rinv; - - iaccx += mrinv3 * dx; - iaccy += mrinv3 * dy; - iaccz += mrinv3 * dz; - igpot += mrinv; +#define STEP(jk) {\ + const real jposx = posx[j+jk]; \ + const real jposy = posy[j+jk]; \ + const real jposz = posz[j+jk]; \ + const real jmass = mass[j+jk]; \ + const real dx = jposx - iposx; \ + const real dy = jposy - iposy; \ + const real dz = jposz - iposz; \ + const real r2 = dx*dx + dy*dy + dz*dz; \ + const real rinv = r2 > 0.0d ? rsqrt((float)r2) : 0; \ + const real mrinv = -jmass * rinv; \ + const real mrinv3 = mrinv * rinv*rinv; \ + \ + iaccx += mrinv3 * dx; \ + iaccy += mrinv3 * dy; \ + iaccz += mrinv3 * dz; \ + igpot += mrinv; \ +} + STEP(0) } accx[i] = iaccx; accy[i] = iaccy;