Update examples: bulk task launch in stencil/mandelbrot, use foreach more.

This commit is contained in:
Matt Pharr
2011-12-10 11:11:05 -08:00
parent 0b2febcec0
commit 034507a35b
2 changed files with 37 additions and 57 deletions

View File

@@ -49,19 +49,18 @@ mandel(float c_re, float c_im, int count) {
}
/* Task to compute the Mandelbrot iterations for a span of scanlines from
[ystart,yend).
/* Task to compute the Mandelbrot iterations for a single scanline.
*/
task void
mandelbrot_scanlines(uniform int ybase, uniform int span,
uniform float x0, uniform float dx,
uniform float y0, uniform float dy,
uniform int width, uniform int maxIterations,
uniform int output[]) {
uniform int ystart = ybase + taskIndex * span;
uniform int yend = ystart + span;
mandelbrot_scanline(uniform float x0, uniform float dx,
uniform float y0, uniform float dy,
uniform int width, uniform int height,
uniform int span,
uniform int maxIterations, uniform int output[]) {
uniform int y0 = taskIndex * span;
uniform int y1 = min((taskIndex+1) * span, height);
foreach (yi = ystart ... yend, xi = 0 ... width) {
foreach (yi = y0 ... y1, xi = 0 ... width) {
float x = x0 + xi * dx;
float y = y0 + yi * dy;
@@ -71,20 +70,6 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
}
task void
mandelbrot_chunk(uniform float x0, uniform float dx,
uniform float y0, uniform float dy,
uniform int width, uniform int height,
uniform int maxIterations, uniform int output[]) {
uniform int ystart = taskIndex * (height/taskCount);
uniform int yend = (taskIndex+1) * (height/taskCount);
uniform int span = 1;
launch[(yend-ystart)/span] < mandelbrot_scanlines(ystart, span, x0, dx, y0, dy,
width, maxIterations, output) >;
}
export void
mandelbrot_ispc(uniform float x0, uniform float y0,
uniform float x1, uniform float y1,
@@ -92,7 +77,8 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
uniform int maxIterations, uniform int output[]) {
uniform float dx = (x1 - x0) / width;
uniform float dy = (y1 - y0) / height;
uniform int span = 4;
launch[32] < mandelbrot_chunk(x0, dx, y0, dy, width, height,
maxIterations, output) >;
launch[height/span] < mandelbrot_scanline(x0, dx, y0, dy, width, height, span,
maxIterations, output) >;
}

View File

@@ -41,27 +41,23 @@ stencil_step(uniform int x0, uniform int x1,
uniform const float Ain[], uniform float Aout[]) {
const uniform int Nxy = Nx * Ny;
for (uniform int z = z0; z < z1; ++z) {
for (uniform int y = y0; y < y1; ++y) {
foreach (x = x0 ... x1) {
int index = (z * Nxy) + (y * Nx) + x;
foreach (z = z0 ... z1, y = y0 ... y1, x = x0 ... x1) {
int index = (z * Nxy) + (y * Nx) + x;
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
float div = coef[0] * A_cur(0, 0, 0) +
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
A_cur(0, 0, +3) + A_cur(0, 0, -3));
float div = coef[0] * A_cur(0, 0, 0) +
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
A_cur(0, 0, +3) + A_cur(0, 0, -3));
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
vsq[index] * div;
}
}
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
vsq[index] * div;
}
}
@@ -69,11 +65,12 @@ stencil_step(uniform int x0, uniform int x1,
static task void
stencil_step_task(uniform int x0, uniform int x1,
uniform int y0, uniform int y1,
uniform int z0, uniform int z1,
uniform int z0,
uniform int Nx, uniform int Ny, uniform int Nz,
uniform const float coef[4], uniform const float vsq[],
uniform const float Ain[], uniform float Aout[]) {
stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq, Ain, Aout);
stencil_step(x0, x1, y0, y1, z0+taskIndex, z0+taskIndex+1,
Nx, Ny, Nz, coef, vsq, Ain, Aout);
}
@@ -89,17 +86,14 @@ loop_stencil_ispc_tasks(uniform int t0, uniform int t1,
{
for (uniform int t = t0; t < t1; ++t) {
// Parallelize across cores as well: each task will work on a slice
// of "dz" in the z extent of the volume. (dz=1 seems to work
// better than any larger values.)
uniform int dz = 1;
for (uniform int z = z0; z < z1; z += dz) {
if ((t & 1) == 0)
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
coef, vsq, Aeven, Aodd) >;
else
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
coef, vsq, Aodd, Aeven) >;
}
// of 1 in the z extent of the volume.
if ((t & 1) == 0)
launch[z1-z0] < stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
coef, vsq, Aeven, Aodd) >;
else
launch[z1-z0] < stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
coef, vsq, Aodd, Aeven) >;
// We need to wait for all of the launched tasks to finish before
// starting the next iteration.
sync;