Files
ispc/examples/portable/nbody_hermite4/hermite4.ispc
2014-02-21 14:18:29 +01:00

165 lines
3.9 KiB
Plaintext

#include "typeReal.h"
typedef real<3> vec3;
struct Force
{
vec3 acc, jrk;
real pot, null;
};
struct Predictor
{
vec3 pos, vel;
};
static inline
void body_body_force(
Force &fi,
const Predictor &pi,
const Predictor &pj,
const real mj,
const real eps2)
{
const real dx = pj.pos.x - pi.pos.x;
const real dy = pj.pos.y - pi.pos.y;
const real dz = pj.pos.z - pi.pos.z;
const real ds2 = dx*dx + dy*dy + dz*dz + eps2;
#if 1
const real inv_ds = rsqrt((float)ds2);
#else
const real inv_ds = rsqrt(ds2);
#endif
const real inv_ds2 = inv_ds*inv_ds;
const real minv_ds = inv_ds * mj;
const real minv_ds3 = inv_ds2 * minv_ds;
fi.acc.x += minv_ds3 * dx;
fi.acc.y += minv_ds3 * dy;
fi.acc.z += minv_ds3 * dz;
fi.pot -= minv_ds;
const real dvx = pj.vel.x - pi.vel.x;
const real dvy = pj.vel.y - pi.vel.y;
const real dvz = pj.vel.z - pi.vel.z;
const real rv = dx*dvx + dy*dvy + dz*dvz;
const real Jij = (real)(-3.0) * (rv * inv_ds2 * minv_ds3);
fi.jrk.x += minv_ds3*dvx + Jij*dx;
fi.jrk.y += minv_ds3*dvy + Jij*dy;
fi.jrk.z += minv_ds3*dvz + Jij*dz;
}
task void compute_forces_task(
uniform const int n,
uniform const int nPerTask,
uniform const real mass[],
uniform const real posx[],
uniform const real posy[],
uniform const real posz[],
uniform const real velx[],
uniform const real vely[],
uniform const real velz[],
uniform real accx[],
uniform real accy[],
uniform real accz[],
uniform real jrkx[],
uniform real jrky[],
uniform real jrkz[],
uniform real gpot[],
const uniform real eps2)
{
const uniform int nibeg = taskIndex * nPerTask;
const uniform int niend = min(n, nibeg + nPerTask);
if (nibeg >= n)
return;
uniform real shdata[7][programCount];
assert((n%programCount) == 0);
foreach (i = nibeg ... niend)
{
Force fi;
fi.acc = (real)0.0;
fi.jrk = (real)0.0;
fi.pot = (real)0.0;
Predictor pi;
pi.pos.x = posx[i];
pi.pos.y = posy[i];
pi.pos.z = posz[i];
pi.vel.x = velx[i];
pi.vel.y = vely[i];
pi.vel.z = velz[i];
for (uniform int jb = 0; jb < n; jb += programCount)
{
const int jp = jb + programIndex;
shdata[0][programIndex] = posx[jp];
shdata[1][programIndex] = posy[jp];
shdata[2][programIndex] = posz[jp];
shdata[3][programIndex] = mass[jp];
shdata[4][programIndex] = velx[jp];
shdata[5][programIndex] = vely[jp];
shdata[6][programIndex] = velz[jp];
for (uniform int j = 0; j < programCount; j++)
{
Predictor pj;
pj.pos.x = shdata[0][j];
pj.pos.y = shdata[1][j];
pj.pos.z = shdata[2][j];
pj.vel.x = shdata[4][j];
pj.vel.y = shdata[5][j];
pj.vel.z = shdata[6][j];
const real jmass = shdata[3][j];
body_body_force(fi,pi,pj,jmass,eps2);
}
}
accx[i] = fi.acc.x;
accy[i] = fi.acc.y;
accz[i] = fi.acc.z;
jrkx[i] = fi.jrk.x;
jrky[i] = fi.jrk.y;
jrkz[i] = fi.jrk.z;
gpot[i] = fi.pot;
}
}
export void compute_forces(
uniform const int n,
uniform const real mass[],
uniform const real posx[],
uniform const real posy[],
uniform const real posz[],
uniform const real velx[],
uniform const real vely[],
uniform const real velz[],
uniform real accx[],
uniform real accy[],
uniform real accz[],
uniform real jrkx[],
uniform real jrky[],
uniform real jrkz[],
uniform real gpot[],
const uniform real eps2)
{
const uniform int nPerTask = min(128,programCount*8);
const uniform int nTask = (n+nPerTask-1)/nPerTask;
launch [nTask] compute_forces_task(
n, nPerTask,
mass,
posx,posy,posz,
velx,vely,velz,
accx,accy,accz,
jrkx,jrky,jrkz,
gpot,eps2);
}