Update examples: bulk task launch in stencil/mandelbrot, use foreach more.

This commit is contained in:
Matt Pharr
2011-12-10 11:11:05 -08:00
parent 0b2febcec0
commit 034507a35b
2 changed files with 37 additions and 57 deletions

View File

@@ -41,27 +41,23 @@ stencil_step(uniform int x0, uniform int x1,
uniform const float Ain[], uniform float Aout[]) {
const uniform int Nxy = Nx * Ny;
for (uniform int z = z0; z < z1; ++z) {
for (uniform int y = y0; y < y1; ++y) {
foreach (x = x0 ... x1) {
int index = (z * Nxy) + (y * Nx) + x;
foreach (z = z0 ... z1, y = y0 ... y1, x = x0 ... x1) {
int index = (z * Nxy) + (y * Nx) + x;
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
float div = coef[0] * A_cur(0, 0, 0) +
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
A_cur(0, 0, +3) + A_cur(0, 0, -3));
float div = coef[0] * A_cur(0, 0, 0) +
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
A_cur(0, 0, +3) + A_cur(0, 0, -3));
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
vsq[index] * div;
}
}
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
vsq[index] * div;
}
}
@@ -69,11 +65,12 @@ stencil_step(uniform int x0, uniform int x1,
static task void
stencil_step_task(uniform int x0, uniform int x1,
uniform int y0, uniform int y1,
uniform int z0, uniform int z1,
uniform int z0,
uniform int Nx, uniform int Ny, uniform int Nz,
uniform const float coef[4], uniform const float vsq[],
uniform const float Ain[], uniform float Aout[]) {
stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq, Ain, Aout);
stencil_step(x0, x1, y0, y1, z0+taskIndex, z0+taskIndex+1,
Nx, Ny, Nz, coef, vsq, Ain, Aout);
}
@@ -89,17 +86,14 @@ loop_stencil_ispc_tasks(uniform int t0, uniform int t1,
{
for (uniform int t = t0; t < t1; ++t) {
// Parallelize across cores as well: each task will work on a slice
// of "dz" in the z extent of the volume. (dz=1 seems to work
// better than any larger values.)
uniform int dz = 1;
for (uniform int z = z0; z < z1; z += dz) {
if ((t & 1) == 0)
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
coef, vsq, Aeven, Aodd) >;
else
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
coef, vsq, Aodd, Aeven) >;
}
// of 1 in the z extent of the volume.
if ((t & 1) == 0)
launch[z1-z0] < stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
coef, vsq, Aeven, Aodd) >;
else
launch[z1-z0] < stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
coef, vsq, Aodd, Aeven) >;
// We need to wait for all of the launched tasks to finish before
// starting the next iteration.
sync;