foreach[_tiled] seems to work now

This commit is contained in:
Evghenii
2013-11-14 16:29:40 +01:00
parent 48644813d4
commit af75afeb7a
5 changed files with 164 additions and 95 deletions

View File

@@ -228,57 +228,54 @@ static inline void ao_tile(
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 31)));
float invSamples = 1.f / nsubsamples;
for (uniform int y = y0; y < y1; y++)
for (uniform int xb = x0; xb < x1; xb += programCount)
{
const int x = xb + programIndex;
const int offset = 3 * (y * w + x);
float res = 0.0f;
foreach_tiled (y = y0 ... y1, x = x0 ... x1)
{
const int offset = 3 * (y * w + x);
float res = 0.0f;
for (uniform int u = 0; u < nsubsamples; u++)
for (uniform int v = 0; v < nsubsamples; v++)
{
float du = (float)u * invSamples, dv = (float)v * invSamples;
// Figure out x,y pixel in NDC
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
float ret = 0.f;
Ray ray;
Isect isect;
ray.org = 0.f;
// Poor man's perspective projection
ray.dir.x = px;
ray.dir.y = py;
ray.dir.z = -1.0;
vnormalize(ray.dir);
isect.t = 1.0e+17;
isect.hit = 0;
for (uniform int snum = 0; snum < 3; ++snum)
ray_sphere_intersect(isect, ray, spheres[snum]);
ray_plane_intersect(isect, ray, plane);
// Note use of 'coherent' if statement; the set of rays we
// trace will often all hit or all miss the scene
if (isect.hit) {
ret = ambient_occlusion(isect, plane, spheres, rngstate);
ret *= invSamples * invSamples;
res += ret;
}
}
if (xb < x1)
for (uniform int u = 0; u < nsubsamples; u++)
for (uniform int v = 0; v < nsubsamples; v++)
{
image[offset ] = res;
image[offset+1] = res;
image[offset+2] = res;
float du = (float)u * invSamples, dv = (float)v * invSamples;
// Figure out x,y pixel in NDC
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
float ret = 0.f;
Ray ray;
Isect isect;
ray.org = 0.f;
// Poor man's perspective projection
ray.dir.x = px;
ray.dir.y = py;
ray.dir.z = -1.0;
vnormalize(ray.dir);
isect.t = 1.0e+17;
isect.hit = 0;
for (uniform int snum = 0; snum < 3; ++snum)
ray_sphere_intersect(isect, ray, spheres[snum]);
ray_plane_intersect(isect, ray, plane);
// Note use of 'coherent' if statement; the set of rays we
// trace will often all hit or all miss the scene
if (isect.hit) {
ret = ambient_occlusion(isect, plane, spheres, rngstate);
ret *= invSamples * invSamples;
res += ret;
}
}
//if (x < x1)
{
image[offset ] = res;
image[offset+1] = res;
image[offset+2] = res;
}
}
}

View File

@@ -142,7 +142,7 @@ int main(int argc, char *argv[]) {
deviceFree(d_buf);
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
writePPM(buf, width, height, "mandelbrot-cuda.ppm");
//

View File

@@ -73,7 +73,8 @@ mandelbrot_scanline(uniform float x0, uniform float dx,
const uniform int ystart = taskIndex1 * yspan;
const uniform int yend = min(ystart + yspan, height);
#if 0
for (uniform int yi = ystart; yi < yend; yi++)
for (uniform int xi = xstart; xi < xend; xi += programCount)
{
@@ -85,6 +86,17 @@ mandelbrot_scanline(uniform float x0, uniform float dx,
if (xi + programIndex < xend)
output[index] = res;
}
#else
foreach (yi = ystart ... yend, xi = xstart ... xend)
{
const float x = x0 + xi * dx;
const float y = y0 + yi * dy;
const int res = mandel(x,y,maxIterations);
const int index = yi * width + xi;
output[index] = res;
}
#endif
}

View File

@@ -278,44 +278,54 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
const uniform float raster2camera[4][4],
const uniform float camera2world[4][4],
uniform int width, uniform int height, uniform float image[]) {
// Work on 4x4=16 pixel big tiles of the image. This function thus
// implicitly assumes that both (x1-x0) and (y1-y0) are evenly divisble
// by 4.
for (uniform int y = y0; y < y1; y += 8) {
for (uniform int x = x0; x < x1; x += 8) {
// foreach (o = 0 ... 16) {
for (uniform int ob = 0; ob < 64; ob += programCount)
{
const int o = ob + programIndex;
// These two arrays encode the mapping from [0,15] to
// offsets within the 4x4 pixel block so that we render
// each pixel inside the block
const uniform int xoffsets[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
0, 1, 0, 1, 2, 3, 2, 3 };
const uniform int yoffsets[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
2, 2, 3, 3, 2, 2, 3, 3 };
// Work on 4x4=16 pixel big tiles of the image. This function thus
// implicitly assumes that both (x1-x0) and (y1-y0) are evenly divisble
// by 4.
#if 0
for (uniform int y = y0; y < y1; y += 8)
for (uniform int x = x0; x < x1; x += 8)
foreach (o = 0 ... 64)
{
// These two arrays encode the mapping from [0,15] to
// offsets within the 4x4 pixel block so that we render
// each pixel inside the block
const uniform int xoffsets[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
0, 1, 0, 1, 2, 3, 2, 3 };
const uniform int yoffsets[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
2, 2, 3, 3, 2, 2, 3, 3 };
const uniform int xblock[4] = {0, 4, 0, 4};
const uniform int yblock[4] = {0, 0, 4, 4};
const uniform int xblock[4] = {0, 4, 0, 4};
const uniform int yblock[4] = {0, 0, 4, 4};
// Figure out the pixel to render for this program instance
const int xo = x + xblock[o/16] + xoffsets[o&15];
const int yo = y + yblock[o/16] + yoffsets[o&15];
// Figure out the pixel to render for this program instance
const int xo = x + xblock[o/16] + xoffsets[o&15];
const int yo = y + yblock[o/16] + yoffsets[o&15];
// Use viewing parameters to compute the corresponding ray
// for the pixel
Ray ray;
generateRay(raster2camera, camera2world, xo, yo, ray);
// Use viewing parameters to compute the corresponding ray
// for the pixel
Ray ray;
generateRay(raster2camera, camera2world, xo, yo, ray);
// And raymarch through the volume to compute the pixel's
// value
int offset = yo * width + xo;
if (xo < x1 && yo < y1)
image[offset] = raymarch(density, nVoxels, ray);
}
}
}
// And raymarch through the volume to compute the pixel's
// value
int offset = yo * width + xo;
if (xo < x1 && yo < y1)
image[offset] = raymarch(density, nVoxels, ray);
}
#else
foreach_tiled (y = y0 ... y1, x = x0 ... x1)
{
// Use viewing parameters to compute the corresponding ray
// for the pixel
Ray ray;
generateRay(raster2camera, camera2world, x, y, ray);
// And raymarch through the volume to compute the pixel's
// value
int offset = y * width + x;
image[offset] = raymarch(density, nVoxels, ray);
}
#endif
}