From 6d034596d31b7c9810dfc2cc8f7ffcf0ea1f9e8d Mon Sep 17 00:00:00 2001 From: Evghenii Date: Sun, 2 Feb 2014 19:01:10 +0100 Subject: [PATCH] +1 --- examples_ptx/nbody/nbody.cu | 14 ++++++++++++-- examples_ptx/nbody/nbody.ispc | 18 ++++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/examples_ptx/nbody/nbody.cu b/examples_ptx/nbody/nbody.cu index cf93f788..4a711882 100644 --- a/examples_ptx/nbody/nbody.cu +++ b/examples_ptx/nbody/nbody.cu @@ -39,6 +39,16 @@ void closeNbody() closeNbody___export<<<1,1>>>(); } +static inline __device__ +real rsqrt_real(real r2) +{ +#if 1 + return r2> (real)0.0 ? (real)1.0/sqrt(r2) : 0; +#else + return r2> (real)0.0 ? rsqrt((float)r2) : 0; +#endif +} + __global__ @@ -75,7 +85,7 @@ void computeForces( const real dy = jposy - iposy; const real dz = jposz - iposz; const real r2 = dx*dx + dy*dy + dz*dz; - const real rinv = r2; // > 0.0 ? rsqrt((float)r2) : 0; + const real rinv = rsqrt_real(r2); const real mrinv = -jmass * rinv; const real mrinv3 = mrinv * rinv*rinv; iaccx += mrinv3 * dx; @@ -118,7 +128,7 @@ void computeForces( const real dy = jposy - iposy; const real dz = jposz - iposz; const real r2 = dx*dx + dy*dy + dz*dz; - const real rinv = r2 > 0.0 ? 1.0/sqrt(r2) : 0; + const real rinv = rsqrt_real(r2); const real mrinv = -jmass * rinv; const real mrinv3 = mrinv * rinv*rinv; iaccx += mrinv3 * dx; diff --git a/examples_ptx/nbody/nbody.ispc b/examples_ptx/nbody/nbody.ispc index 4b98650c..f1d4b22f 100644 --- a/examples_ptx/nbody/nbody.ispc +++ b/examples_ptx/nbody/nbody.ispc @@ -26,6 +26,16 @@ void closeNbody() delete gpotList; } + static inline +real rsqrt_real(real r2) +{ +#if 1 + return r2> (real)0.0 ? (real)1.0d0/sqrt(r2) : 0; +#else + return r2> (real)0.0 ? rsqrt((float)r2) : 0; +#endif +} + uniform int nn = programCount; @@ -64,7 +74,7 @@ void computeForces( const real dy = jposy - iposy; const real dz = jposz - iposz; const real r2 = dx*dx + dy*dy + dz*dz; - const real rinv = r2> 0.0d ? 1.0/sqrt(r2) : 0; + const real rinv = rsqrt_real(r2); const real mrinv = -jmass * rinv; const real mrinv3 = mrinv * rinv*rinv; @@ -104,7 +114,11 @@ void computeForces( const real dy = jposy - iposy; const real dz = jposz - iposz; const real r2 = dx*dx + dy*dy + dz*dz; - const real rinv = r2 > 0.0d ? 1.0/sqrt(r2) : 0; +#if 0 /* faster */ + const real rinv = r2> (real)0.0 ? (real)1.0d0/sqrt(r2) : 0; +#else + const real rinv = rsqrt_real(r2); +#endif const real mrinv = -jmass * rinv; const real mrinv3 = mrinv * rinv*rinv;