+1
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
PROG=nbody
|
PROG=nbody
|
||||||
ISPC_SRC=nbody.ispc
|
ISPC_SRC=nbody.ispc
|
||||||
#CU_SRC=nbody.cu
|
CU_SRC=nbody.cu
|
||||||
CXX_SRC=nbody.cpp
|
CXX_SRC=nbody.cpp
|
||||||
PTXCC_REGMAX=64
|
PTXCC_REGMAX=64
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,16 @@
|
|||||||
typedef double real;
|
typedef double real;
|
||||||
|
#include "cuda_helpers.cuh"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#define uniform
|
||||||
|
|
||||||
static uniform real * uniform accx = NULL;
|
__device__ static uniform real * uniform accx = NULL;
|
||||||
static uniform real * uniform accy;
|
__device__ static uniform real * uniform accy;
|
||||||
static uniform real * uniform accz;
|
__device__ static uniform real * uniform accz;
|
||||||
static uniform real * uniform gpotList;
|
__device__ static uniform real * uniform gpotList;
|
||||||
|
|
||||||
export
|
__global__
|
||||||
void openNbody(const uniform int n)
|
void openNbody___export(const uniform int n)
|
||||||
{
|
{
|
||||||
assert(accx == NULL);
|
assert(accx == NULL);
|
||||||
accx = uniform new uniform real[n];
|
accx = uniform new uniform real[n];
|
||||||
@@ -15,9 +18,14 @@ void openNbody(const uniform int n)
|
|||||||
accz = uniform new uniform real[n];
|
accz = uniform new uniform real[n];
|
||||||
gpotList = uniform new uniform real[n];
|
gpotList = uniform new uniform real[n];
|
||||||
}
|
}
|
||||||
|
extern "C"
|
||||||
|
void openNbody(int n)
|
||||||
|
{
|
||||||
|
openNbody___export<<<1,1>>>(n);
|
||||||
|
}
|
||||||
|
|
||||||
export
|
__global__
|
||||||
void closeNbody()
|
void closeNbody___export()
|
||||||
{
|
{
|
||||||
assert(accx != NULL);
|
assert(accx != NULL);
|
||||||
delete accx;
|
delete accx;
|
||||||
@@ -25,9 +33,15 @@ void closeNbody()
|
|||||||
delete accz;
|
delete accz;
|
||||||
delete gpotList;
|
delete gpotList;
|
||||||
}
|
}
|
||||||
|
extern "C"
|
||||||
|
void closeNbody()
|
||||||
|
{
|
||||||
|
closeNbody___export<<<1,1>>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
task
|
|
||||||
|
__global__
|
||||||
void computeForces(
|
void computeForces(
|
||||||
uniform int nbodies,
|
uniform int nbodies,
|
||||||
uniform real posx[],
|
uniform real posx[],
|
||||||
@@ -35,14 +49,14 @@ void computeForces(
|
|||||||
uniform real posz[],
|
uniform real posz[],
|
||||||
uniform real mass[])
|
uniform real mass[])
|
||||||
{
|
{
|
||||||
const uniform int blockIdx = taskIndex;
|
const uniform int blkIdx = taskIndex;
|
||||||
const uniform int blockDim = (nbodies + taskCount - 1)/taskCount;
|
const uniform int blkDim = (nbodies + taskCount - 1)/taskCount;
|
||||||
const uniform int blockBeg = blockIdx * blockDim;
|
const uniform int blkBeg = blkIdx * blkDim;
|
||||||
const uniform int blockEnd = min(blockBeg + blockDim, nbodies);
|
const uniform int blkEnd = min(blkBeg + blkDim, nbodies);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
uniform real gpotLoc = 0;
|
uniform real gpotLoc = 0;
|
||||||
for (uniform int i = blockBeg; i < blockEnd; i++)
|
for (uniform int i = blkBeg; i < blkEnd; i++)
|
||||||
{
|
{
|
||||||
const real iposx = posx[i];
|
const real iposx = posx[i];
|
||||||
const real iposy = posy[i];
|
const real iposy = posy[i];
|
||||||
@@ -78,47 +92,44 @@ void computeForces(
|
|||||||
gpotList[taskIndex] = gpotLoc;
|
gpotList[taskIndex] = gpotLoc;
|
||||||
#else
|
#else
|
||||||
real gpotLoc = 0;
|
real gpotLoc = 0;
|
||||||
foreach (i = blockBeg ... blockEnd)
|
for (int i = programIndex + blkBeg; i < blkEnd; i += programCount)
|
||||||
{
|
if (i < blkEnd)
|
||||||
const real iposx = posx[i];
|
|
||||||
const real iposy = posy[i];
|
|
||||||
const real iposz = posz[i];
|
|
||||||
real iaccx = 0;
|
|
||||||
real iaccy = 0;
|
|
||||||
real iaccz = 0;
|
|
||||||
real igpot = 0;
|
|
||||||
for (uniform int j = 0; j < nbodies; j += 1)
|
|
||||||
{
|
{
|
||||||
#define STEP(jk) {\
|
const real iposx = posx[i];
|
||||||
const real jposx = posx[j+jk]; \
|
const real iposy = posy[i];
|
||||||
const real jposy = posy[j+jk]; \
|
const real iposz = posz[i];
|
||||||
const real jposz = posz[j+jk]; \
|
real iaccx = 0;
|
||||||
const real jmass = mass[j+jk]; \
|
real iaccy = 0;
|
||||||
const real dx = jposx - iposx; \
|
real iaccz = 0;
|
||||||
const real dy = jposy - iposy; \
|
real igpot = 0;
|
||||||
const real dz = jposz - iposz; \
|
for (uniform int j = 0; j < nbodies; j++)
|
||||||
const real r2 = dx*dx + dy*dy + dz*dz; \
|
{
|
||||||
const real rinv = r2 > 0.0d ? rsqrt((float)r2) : 0; \
|
const real jposx = posx[j];
|
||||||
const real mrinv = -jmass * rinv; \
|
const real jposy = posy[j];
|
||||||
const real mrinv3 = mrinv * rinv*rinv; \
|
const real jposz = posz[j];
|
||||||
\
|
const real jmass = mass[j];
|
||||||
iaccx += mrinv3 * dx; \
|
const real dx = jposx - iposx;
|
||||||
iaccy += mrinv3 * dy; \
|
const real dy = jposy - iposy;
|
||||||
iaccz += mrinv3 * dz; \
|
const real dz = jposz - iposz;
|
||||||
igpot += mrinv; \
|
const real r2 = dx*dx + dy*dy + dz*dz;
|
||||||
}
|
const real rinv = r2 > 0.0 ? rsqrt((float)r2) : 0;
|
||||||
STEP(0)
|
const real mrinv = -jmass * rinv;
|
||||||
|
const real mrinv3 = mrinv * rinv*rinv;
|
||||||
|
iaccx += mrinv3 * dx;
|
||||||
|
iaccy += mrinv3 * dy;
|
||||||
|
iaccz += mrinv3 * dz;
|
||||||
|
igpot += mrinv;
|
||||||
|
}
|
||||||
|
accx[i] = iaccx;
|
||||||
|
accy[i] = iaccy;
|
||||||
|
accz[i] = iaccz;
|
||||||
|
gpotLoc += igpot;
|
||||||
}
|
}
|
||||||
accx[i] = iaccx;
|
// gpotList[taskIndex] = reduce_add(gpotLoc);
|
||||||
accy[i] = iaccy;
|
|
||||||
accz[i] = iaccz;
|
|
||||||
gpotLoc += igpot;
|
|
||||||
}
|
|
||||||
gpotList[taskIndex] = reduce_add(gpotLoc);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
task
|
__global__
|
||||||
void updatePositions(
|
void updatePositions(
|
||||||
uniform int nbodies,
|
uniform int nbodies,
|
||||||
uniform real posx[],
|
uniform real posx[],
|
||||||
@@ -129,20 +140,21 @@ void updatePositions(
|
|||||||
uniform real velz[],
|
uniform real velz[],
|
||||||
uniform real dt)
|
uniform real dt)
|
||||||
{
|
{
|
||||||
const uniform int blockIdx = taskIndex;
|
const uniform int blkIdx = taskIndex;
|
||||||
const uniform int blockDim = (nbodies + taskCount - 1)/taskCount;
|
const uniform int blkDim = (nbodies + taskCount - 1)/taskCount;
|
||||||
const uniform int blockBeg = blockIdx * blockDim;
|
const uniform int blkBeg = blkIdx * blkDim;
|
||||||
const uniform int blockEnd = min(blockBeg + blockDim, nbodies);
|
const uniform int blkEnd = min(blkBeg + blkDim, nbodies);
|
||||||
|
|
||||||
foreach (i = blockBeg ... blockEnd)
|
for (int i = programIndex + blkBeg; i < blkEnd; i += programCount)
|
||||||
{
|
if (i < blkEnd)
|
||||||
posx[i] += dt*velx[i];
|
{
|
||||||
posy[i] += dt*vely[i];
|
posx[i] += dt*velx[i];
|
||||||
posz[i] += dt*velz[i];
|
posy[i] += dt*vely[i];
|
||||||
}
|
posz[i] += dt*velz[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
task
|
__global__
|
||||||
void updateVelocities(
|
void updateVelocities(
|
||||||
uniform int nbodies,
|
uniform int nbodies,
|
||||||
uniform real velx[],
|
uniform real velx[],
|
||||||
@@ -150,20 +162,61 @@ void updateVelocities(
|
|||||||
uniform real velz[],
|
uniform real velz[],
|
||||||
uniform real dt)
|
uniform real dt)
|
||||||
{
|
{
|
||||||
const uniform int blockIdx = taskIndex;
|
const uniform int blkIdx = taskIndex;
|
||||||
const uniform int blockDim = (nbodies + taskCount - 1)/taskCount;
|
const uniform int blkDim = (nbodies + taskCount - 1)/taskCount;
|
||||||
const uniform int blockBeg = blockIdx * blockDim;
|
const uniform int blkBeg = blkIdx * blkDim;
|
||||||
const uniform int blockEnd = min(blockBeg + blockDim, nbodies);
|
const uniform int blkEnd = min(blkBeg + blkDim, nbodies);
|
||||||
|
|
||||||
foreach (i = blockBeg ... blockEnd)
|
for (int i = programIndex + blkBeg; i < blkEnd; i += programCount)
|
||||||
{
|
if (i < blkEnd)
|
||||||
velx[i] += dt*accx[i];
|
{
|
||||||
vely[i] += dt*accy[i];
|
velx[i] += dt*accx[i];
|
||||||
velz[i] += dt*accz[i];
|
vely[i] += dt*accy[i];
|
||||||
}
|
velz[i] += dt*accz[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export
|
__global__
|
||||||
|
void nbodyIntegrate___export(
|
||||||
|
uniform int nSteps,
|
||||||
|
uniform int nbodies,
|
||||||
|
uniform real dt,
|
||||||
|
uniform real posx[],
|
||||||
|
uniform real posy[],
|
||||||
|
uniform real posz[],
|
||||||
|
uniform real mass[],
|
||||||
|
uniform real velx[],
|
||||||
|
uniform real vely[],
|
||||||
|
uniform real velz[],
|
||||||
|
uniform real energies[])
|
||||||
|
{
|
||||||
|
uniform int nTasks ;
|
||||||
|
nTasks = nbodies/(4*programCount);
|
||||||
|
assert((nbodies % nTasks) == 0);
|
||||||
|
|
||||||
|
for (uniform int step = 0; step < nSteps; step++)
|
||||||
|
{
|
||||||
|
launch (nTasks,1,1, updatePositions)(nbodies, posx, posy, posz, velx, vely, velz,dt);
|
||||||
|
sync;
|
||||||
|
launch (nTasks,1,1, computeForces)(nbodies, posx, posy, posz, mass);
|
||||||
|
sync;
|
||||||
|
launch (nTasks,1,1, updateVelocities)(nbodies, posx, posy, posz, dt);
|
||||||
|
sync;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if (energies != NULL)
|
||||||
|
{
|
||||||
|
real gpotLoc = 0;
|
||||||
|
foreach (i = 0 ... nTasks)
|
||||||
|
gpotLoc += gpotList[i];
|
||||||
|
energies[0] = reduce_add(gpotLoc);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern "C"
|
||||||
void nbodyIntegrate(
|
void nbodyIntegrate(
|
||||||
uniform int nSteps,
|
uniform int nSteps,
|
||||||
uniform int nbodies,
|
uniform int nbodies,
|
||||||
@@ -177,29 +230,17 @@ void nbodyIntegrate(
|
|||||||
uniform real velz[],
|
uniform real velz[],
|
||||||
uniform real energies[])
|
uniform real energies[])
|
||||||
{
|
{
|
||||||
uniform int nTasks = num_cores()*4;
|
nbodyIntegrate___export<<<1,32>>>(
|
||||||
#ifdef __NVPTX__
|
nSteps,
|
||||||
nTasks = nbodies/(4*programCount);
|
nbodies,
|
||||||
#endif
|
dt,
|
||||||
assert((nbodies % nTasks) == 0);
|
posx,
|
||||||
|
posy,
|
||||||
for (uniform int step = 0; step < nSteps; step++)
|
posz,
|
||||||
{
|
mass,
|
||||||
launch [nTasks] updatePositions(nbodies, posx, posy, posz, velx, vely, velz,dt);
|
velx,
|
||||||
sync;
|
vely,
|
||||||
launch [nTasks] computeForces(nbodies, posx, posy, posz, mass);
|
velz,
|
||||||
sync;
|
energies);
|
||||||
launch [nTasks] updateVelocities(nbodies, posx, posy, posz, dt);
|
sync;
|
||||||
sync;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (energies != NULL)
|
|
||||||
{
|
|
||||||
real gpotLoc = 0;
|
|
||||||
foreach (i = 0 ... nTasks)
|
|
||||||
gpotLoc += gpotList[i];
|
|
||||||
energies[0] = reduce_add(gpotLoc);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user