Fix mapping to 8, 16 program instances in AO bench example.

With this, we now compute a correct image with AVX.
This commit is contained in:
Matt Pharr
2011-09-07 11:34:24 -07:00
parent c86128e8ee
commit 2ea6d249d5

View File

@@ -232,6 +232,9 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
// direction we do per iteration and ny the number in y. // direction we do per iteration and ny the number in y.
uniform int nx = 1, ny = 1; uniform int nx = 1, ny = 1;
// FIXME: We actually need ny to be 1 regardless of the decomposition,
// since the task decomposition is one scanline high.
if (programCount == 8) { if (programCount == 8) {
// Do two pixels at once in the x direction // Do two pixels at once in the x direction
nx = 2; nx = 2;
@@ -240,19 +243,21 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
++du; ++du;
} }
else if (programCount == 16) { else if (programCount == 16) {
// Two at once in both x and y nx = 4;
nx = ny = 2; ny = 1;
if ((programIndex >= 4 && programIndex < 8) || programIndex >= 12) if (programIndex >= 4 && programIndex < 8)
++du; ++du;
if (programIndex >= 8) if (programIndex >= 8 && programIndex < 12)
++dv; du += 2;
if (programIndex >= 12)
du += 3;
} }
// Now loop over all of the pixels, stepping in x and y as calculated // Now loop over all of the pixels, stepping in x and y as calculated
// above. (Assumes that ny divides y and nx divides x...) // above. (Assumes that ny divides y and nx divides x...)
for (uniform int y = y0; y < y1; y += ny) { for (uniform int y = y0; y < y1; y += ny) {
for (uniform int x = 0; x < w; x += nx) { for (uniform int x = 0; x < w; x += nx) {
// Figur out x,y pixel in NDC // Figure out x,y pixel in NDC
float px = (x + du - (w / 2.0f)) / (w / 2.0f); float px = (x + du - (w / 2.0f)) / (w / 2.0f);
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f); float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
float ret = 0.f; float ret = 0.f;
@@ -294,7 +299,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
// offset to the first pixel in the image // offset to the first pixel in the image
uniform int offset = 3 * (y * w + x); uniform int offset = 3 * (y * w + x);
for (uniform int p = 0; p < programCount; p += 4, ++offset) { for (uniform int p = 0; p < programCount; p += 4, offset += 3) {
// Get the four sample values for this pixel // Get the four sample values for this pixel
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] + uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
retArray[p+3]; retArray[p+3];