This commit is contained in:
Evghenii
2014-01-30 17:59:21 +01:00
parent 50c505b845
commit adef91d82d
2 changed files with 139 additions and 98 deletions

View File

@@ -1,6 +1,6 @@
PROG=nbody PROG=nbody
ISPC_SRC=nbody.ispc ISPC_SRC=nbody.ispc
#CU_SRC=nbody.cu CU_SRC=nbody.cu
CXX_SRC=nbody.cpp CXX_SRC=nbody.cpp
PTXCC_REGMAX=64 PTXCC_REGMAX=64

View File

@@ -1,13 +1,16 @@
typedef double real; typedef double real;
#include "cuda_helpers.cuh"
#include <cassert>
#define uniform
static uniform real * uniform accx = NULL; __device__ static uniform real * uniform accx = NULL;
static uniform real * uniform accy; __device__ static uniform real * uniform accy;
static uniform real * uniform accz; __device__ static uniform real * uniform accz;
static uniform real * uniform gpotList; __device__ static uniform real * uniform gpotList;
export __global__
void openNbody(const uniform int n) void openNbody___export(const uniform int n)
{ {
assert(accx == NULL); assert(accx == NULL);
accx = uniform new uniform real[n]; accx = uniform new uniform real[n];
@@ -15,9 +18,14 @@ void openNbody(const uniform int n)
accz = uniform new uniform real[n]; accz = uniform new uniform real[n];
gpotList = uniform new uniform real[n]; gpotList = uniform new uniform real[n];
} }
extern "C"
void openNbody(int n)
{
openNbody___export<<<1,1>>>(n);
}
export __global__
void closeNbody() void closeNbody___export()
{ {
assert(accx != NULL); assert(accx != NULL);
delete accx; delete accx;
@@ -25,9 +33,15 @@ void closeNbody()
delete accz; delete accz;
delete gpotList; delete gpotList;
} }
extern "C"
void closeNbody()
{
closeNbody___export<<<1,1>>>();
}
task
__global__
void computeForces( void computeForces(
uniform int nbodies, uniform int nbodies,
uniform real posx[], uniform real posx[],
@@ -35,14 +49,14 @@ void computeForces(
uniform real posz[], uniform real posz[],
uniform real mass[]) uniform real mass[])
{ {
const uniform int blockIdx = taskIndex; const uniform int blkIdx = taskIndex;
const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; const uniform int blkDim = (nbodies + taskCount - 1)/taskCount;
const uniform int blockBeg = blockIdx * blockDim; const uniform int blkBeg = blkIdx * blkDim;
const uniform int blockEnd = min(blockBeg + blockDim, nbodies); const uniform int blkEnd = min(blkBeg + blkDim, nbodies);
#if 0 #if 0
uniform real gpotLoc = 0; uniform real gpotLoc = 0;
for (uniform int i = blockBeg; i < blockEnd; i++) for (uniform int i = blkBeg; i < blkEnd; i++)
{ {
const real iposx = posx[i]; const real iposx = posx[i];
const real iposy = posy[i]; const real iposy = posy[i];
@@ -78,47 +92,44 @@ void computeForces(
gpotList[taskIndex] = gpotLoc; gpotList[taskIndex] = gpotLoc;
#else #else
real gpotLoc = 0; real gpotLoc = 0;
foreach (i = blockBeg ... blockEnd) for (int i = programIndex + blkBeg; i < blkEnd; i += programCount)
{ if (i < blkEnd)
const real iposx = posx[i];
const real iposy = posy[i];
const real iposz = posz[i];
real iaccx = 0;
real iaccy = 0;
real iaccz = 0;
real igpot = 0;
for (uniform int j = 0; j < nbodies; j += 1)
{ {
#define STEP(jk) {\ const real iposx = posx[i];
const real jposx = posx[j+jk]; \ const real iposy = posy[i];
const real jposy = posy[j+jk]; \ const real iposz = posz[i];
const real jposz = posz[j+jk]; \ real iaccx = 0;
const real jmass = mass[j+jk]; \ real iaccy = 0;
const real dx = jposx - iposx; \ real iaccz = 0;
const real dy = jposy - iposy; \ real igpot = 0;
const real dz = jposz - iposz; \ for (uniform int j = 0; j < nbodies; j++)
const real r2 = dx*dx + dy*dy + dz*dz; \ {
const real rinv = r2 > 0.0d ? rsqrt((float)r2) : 0; \ const real jposx = posx[j];
const real mrinv = -jmass * rinv; \ const real jposy = posy[j];
const real mrinv3 = mrinv * rinv*rinv; \ const real jposz = posz[j];
\ const real jmass = mass[j];
iaccx += mrinv3 * dx; \ const real dx = jposx - iposx;
iaccy += mrinv3 * dy; \ const real dy = jposy - iposy;
iaccz += mrinv3 * dz; \ const real dz = jposz - iposz;
igpot += mrinv; \ const real r2 = dx*dx + dy*dy + dz*dz;
} const real rinv = r2 > 0.0 ? rsqrt((float)r2) : 0;
STEP(0) const real mrinv = -jmass * rinv;
const real mrinv3 = mrinv * rinv*rinv;
iaccx += mrinv3 * dx;
iaccy += mrinv3 * dy;
iaccz += mrinv3 * dz;
igpot += mrinv;
}
accx[i] = iaccx;
accy[i] = iaccy;
accz[i] = iaccz;
gpotLoc += igpot;
} }
accx[i] = iaccx; // gpotList[taskIndex] = reduce_add(gpotLoc);
accy[i] = iaccy;
accz[i] = iaccz;
gpotLoc += igpot;
}
gpotList[taskIndex] = reduce_add(gpotLoc);
#endif #endif
} }
task __global__
void updatePositions( void updatePositions(
uniform int nbodies, uniform int nbodies,
uniform real posx[], uniform real posx[],
@@ -129,20 +140,21 @@ void updatePositions(
uniform real velz[], uniform real velz[],
uniform real dt) uniform real dt)
{ {
const uniform int blockIdx = taskIndex; const uniform int blkIdx = taskIndex;
const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; const uniform int blkDim = (nbodies + taskCount - 1)/taskCount;
const uniform int blockBeg = blockIdx * blockDim; const uniform int blkBeg = blkIdx * blkDim;
const uniform int blockEnd = min(blockBeg + blockDim, nbodies); const uniform int blkEnd = min(blkBeg + blkDim, nbodies);
foreach (i = blockBeg ... blockEnd) for (int i = programIndex + blkBeg; i < blkEnd; i += programCount)
{ if (i < blkEnd)
posx[i] += dt*velx[i]; {
posy[i] += dt*vely[i]; posx[i] += dt*velx[i];
posz[i] += dt*velz[i]; posy[i] += dt*vely[i];
} posz[i] += dt*velz[i];
}
} }
task __global__
void updateVelocities( void updateVelocities(
uniform int nbodies, uniform int nbodies,
uniform real velx[], uniform real velx[],
@@ -150,20 +162,61 @@ void updateVelocities(
uniform real velz[], uniform real velz[],
uniform real dt) uniform real dt)
{ {
const uniform int blockIdx = taskIndex; const uniform int blkIdx = taskIndex;
const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; const uniform int blkDim = (nbodies + taskCount - 1)/taskCount;
const uniform int blockBeg = blockIdx * blockDim; const uniform int blkBeg = blkIdx * blkDim;
const uniform int blockEnd = min(blockBeg + blockDim, nbodies); const uniform int blkEnd = min(blkBeg + blkDim, nbodies);
foreach (i = blockBeg ... blockEnd) for (int i = programIndex + blkBeg; i < blkEnd; i += programCount)
{ if (i < blkEnd)
velx[i] += dt*accx[i]; {
vely[i] += dt*accy[i]; velx[i] += dt*accx[i];
velz[i] += dt*accz[i]; vely[i] += dt*accy[i];
} velz[i] += dt*accz[i];
}
} }
export __global__
void nbodyIntegrate___export(
uniform int nSteps,
uniform int nbodies,
uniform real dt,
uniform real posx[],
uniform real posy[],
uniform real posz[],
uniform real mass[],
uniform real velx[],
uniform real vely[],
uniform real velz[],
uniform real energies[])
{
uniform int nTasks ;
nTasks = nbodies/(4*programCount);
assert((nbodies % nTasks) == 0);
for (uniform int step = 0; step < nSteps; step++)
{
launch (nTasks,1,1, updatePositions)(nbodies, posx, posy, posz, velx, vely, velz,dt);
sync;
launch (nTasks,1,1, computeForces)(nbodies, posx, posy, posz, mass);
sync;
launch (nTasks,1,1, updateVelocities)(nbodies, posx, posy, posz, dt);
sync;
}
#if 0
if (energies != NULL)
{
real gpotLoc = 0;
foreach (i = 0 ... nTasks)
gpotLoc += gpotList[i];
energies[0] = reduce_add(gpotLoc);
}
#endif
}
extern "C"
void nbodyIntegrate( void nbodyIntegrate(
uniform int nSteps, uniform int nSteps,
uniform int nbodies, uniform int nbodies,
@@ -177,29 +230,17 @@ void nbodyIntegrate(
uniform real velz[], uniform real velz[],
uniform real energies[]) uniform real energies[])
{ {
uniform int nTasks = num_cores()*4; nbodyIntegrate___export<<<1,32>>>(
#ifdef __NVPTX__ nSteps,
nTasks = nbodies/(4*programCount); nbodies,
#endif dt,
assert((nbodies % nTasks) == 0); posx,
posy,
for (uniform int step = 0; step < nSteps; step++) posz,
{ mass,
launch [nTasks] updatePositions(nbodies, posx, posy, posz, velx, vely, velz,dt); velx,
sync; vely,
launch [nTasks] computeForces(nbodies, posx, posy, posz, mass); velz,
sync; energies);
launch [nTasks] updateVelocities(nbodies, posx, posy, posz, dt); sync;
sync;
}
if (energies != NULL)
{
real gpotLoc = 0;
foreach (i = 0 ... nTasks)
gpotLoc += gpotList[i];
energies[0] = reduce_add(gpotLoc);
}
} }