typedef double real; static uniform real * uniform accx = NULL; static uniform real * uniform accy; static uniform real * uniform accz; static uniform real * uniform gpotList; export void openNbody(const uniform int n) { assert(accx == NULL); accx = uniform new uniform real[n]; accy = uniform new uniform real[n]; accz = uniform new uniform real[n]; gpotList = uniform new uniform real[n]; } export void closeNbody() { assert(accx != NULL); delete accx; delete accy; delete accz; delete gpotList; } uniform int nn = programCount; task unmasked void computeForces( uniform int nbodies, uniform real posx[], uniform real posy[], uniform real posz[], uniform real mass[]) { const uniform int blockIdx = taskIndex; const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; const uniform int blockBeg = blockIdx * blockDim; const uniform int blockEnd = min(blockBeg + blockDim, nbodies); uniform real shmem[4*programCount]; //real gpotLoc = 0; foreach (i = blockBeg ... blockEnd) { const real iposx = posx[i]; const real iposy = posy[i]; const real iposz = posz[i]; real iaccx = 0; real iaccy = 0; real iaccz = 0; real igpot = 0; #if 0 for (uniform int j = 0; j < nbodies; j++) { const real jposx = posx[j]; const real jposy = posy[j]; const real jposz = posz[j]; const real jmass = mass[j]; const real dx = jposx - iposx; const real dy = jposy - iposy; const real dz = jposz - iposz; const real r2 = dx*dx + dy*dy + dz*dz; const real rinv = r2 > 0.0d ? rsqrt((float)r2) : 0; const real mrinv = -jmass * rinv; const real mrinv3 = mrinv * rinv*rinv; iaccx += mrinv3 * dx; iaccy += mrinv3 * dy; iaccz += mrinv3 * dz; igpot += mrinv; } #else for (uniform int j = 0; j < nbodies; j += programCount) { shmem[0*programCount + programIndex] = posx[j+programIndex]; shmem[1*programCount + programIndex] = posy[j+programIndex]; shmem[2*programCount + programIndex] = posz[j+programIndex]; shmem[3*programCount + programIndex] = mass[j+programIndex]; for (uniform int jb = 0; jb < programCount; jb++) { const real jposx = shmem[0*programCount + jb]; const real jposy = shmem[1*programCount + jb]; const real jposz = shmem[2*programCount + jb]; const real jmass = shmem[3*programCount + jb]; const real dx = jposx - iposx; const real dy = jposy - iposy; const real dz = jposz - iposz; const real r2 = dx*dx + dy*dy + dz*dz; const real rinv = r2; // > 0.0d ? rsqrt((float)r2) : 0; const real mrinv = -jmass * rinv; const real mrinv3 = mrinv * rinv*rinv; iaccx += mrinv3 * dx; iaccy += mrinv3 * dy; iaccz += mrinv3 * dz; igpot += mrinv; } } #endif accx[i] = iaccx; accy[i] = iaccy; accz[i] = iaccz; // gpotLoc += igpot; } // gpotList[taskIndex] = reduce_add(gpotLoc); } task void updatePositions( uniform int nbodies, uniform real posx[], uniform real posy[], uniform real posz[], uniform real velx[], uniform real vely[], uniform real velz[], uniform real dt) { const uniform int blockIdx = taskIndex; const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; const uniform int blockBeg = blockIdx * blockDim; const uniform int blockEnd = min(blockBeg + blockDim, nbodies); foreach (i = blockBeg ... blockEnd) { posx[i] += dt*velx[i]; posy[i] += dt*vely[i]; posz[i] += dt*velz[i]; } } task void updateVelocities( uniform int nbodies, uniform real velx[], uniform real vely[], uniform real velz[], uniform real dt) { const uniform int blockIdx = taskIndex; const uniform int blockDim = (nbodies + taskCount - 1)/taskCount; const uniform int blockBeg = blockIdx * blockDim; const uniform int blockEnd = min(blockBeg + blockDim, nbodies); foreach (i = blockBeg ... blockEnd) { velx[i] += dt*accx[i]; vely[i] += dt*accy[i]; velz[i] += dt*accz[i]; } } export void nbodyIntegrate( uniform int nSteps, uniform int nbodies, uniform real dt, uniform real posx[], uniform real posy[], uniform real posz[], uniform real mass[], uniform real velx[], uniform real vely[], uniform real velz[], uniform real energies[]) { uniform int nTasks = num_cores()*4; #ifdef __NVPTX__ nTasks = (nbodies + 4*programCount - 1)/(4*programCount); #endif assert((nbodies % nTasks) == 0); for (uniform int step = 0; step < nSteps; step++) { // launch [nTasks] updatePositions(nbodies, posx, posy, posz, velx, vely, velz,dt); // sync; launch [nTasks] computeForces(nbodies, posx, posy, posz, mass); sync; // launch [nTasks] updateVelocities(nbodies, posx, posy, posz, dt); // sync; } #if 0 if (energies != NULL) { real gpotLoc = 0; foreach (i = 0 ... nTasks) gpotLoc += gpotList[i]; energies[0] = reduce_add(gpotLoc); } #endif }