diff --git a/examples/portable/nbody_hermite4/hermite4.cpp b/examples/portable/nbody_hermite4/hermite4.cpp index ed7ff3b1..e6f92110 100644 --- a/examples/portable/nbody_hermite4/hermite4.cpp +++ b/examples/portable/nbody_hermite4/hermite4.cpp @@ -20,6 +20,7 @@ struct Hermite4 enum {PP_FLOP=44}; const int n; const real eta; + real eps2; real *g_mass, *g_gpot; real *g_posx, *g_posy, *g_posz; real *g_velx, *g_vely, *g_velz; @@ -31,6 +32,8 @@ struct Hermite4 Hermite4(const int _n = 8192, const real _eta = 0.1) : n(_n), eta(_eta) { + eps2 = 4.0/n; /* eps = 4/n to give Ebin = 1 KT */ + eps2 *= eps2; g_mass = new real[n]; g_gpot = new real[n]; g_posx = new real[n]; @@ -289,7 +292,8 @@ void Hermite4::forces() g_jrkx, g_jrky, g_jrkz, - g_gpot); + g_gpot, + eps2); } void run(const int nbodies, const real eta, const int nstep) @@ -312,6 +316,7 @@ int main(int argc, char *argv[]) if (argc > 3) eta = atof(argv[3]); + printf("nbodies= %d\n", nbodies); printf("nstep= %d\n", nstep); printf(" eta= %g \n", eta); diff --git a/examples/portable/nbody_hermite4/hermite4.ispc b/examples/portable/nbody_hermite4/hermite4.ispc index 95992e05..5b9449cf 100644 --- a/examples/portable/nbody_hermite4/hermite4.ispc +++ b/examples/portable/nbody_hermite4/hermite4.ispc @@ -17,15 +17,20 @@ void body_body_force( Force &fi, const Predictor &pi, const Predictor &pj, - const real mj) + const real mj, + const real eps2) { const real dx = pj.pos.x - pi.pos.x; const real dy = pj.pos.y - pi.pos.y; const real dz = pj.pos.z - pi.pos.z; - const real ds2 = dx*dx + dy*dy + dz*dz; + const real ds2 = dx*dx + dy*dy + dz*dz + eps2; - const real inv_ds = ds2 > 1.0d-10 ? rsqrt((float)ds2) : 0.0; +#if 1 + const real inv_ds = rsqrt((float)ds2); +#else + const real inv_ds = rsqrt(ds2); +#endif const real inv_ds2 = inv_ds*inv_ds; const real minv_ds = inv_ds * mj; const real minv_ds3 = inv_ds2 * minv_ds; @@ -64,7 +69,8 @@ task void compute_forces_task( uniform real jrkx[], uniform real jrky[], uniform real jrkz[], - uniform real gpot[]) + uniform real gpot[], + const uniform real eps2) { const uniform int nibeg = taskIndex * nPerTask; const uniform int niend = min(n, nibeg + nPerTask); @@ -112,7 +118,7 @@ task void compute_forces_task( pj.vel.y = shdata[5][j]; pj.vel.z = shdata[6][j]; const real jmass = shdata[3][j]; - body_body_force(fi,pi,pj,jmass); + body_body_force(fi,pi,pj,jmass,eps2); } } @@ -141,7 +147,8 @@ export void compute_forces( uniform real jrkx[], uniform real jrky[], uniform real jrkz[], - uniform real gpot[]) + uniform real gpot[], + const uniform real eps2) { const uniform int nPerTask = min(128,programCount*8); const uniform int nTask = (n+nPerTask-1)/nPerTask; @@ -153,5 +160,5 @@ export void compute_forces( velx,vely,velz, accx,accy,accz, jrkx,jrky,jrkz, - gpot); + gpot,eps2); }