From 0933a77c1b14b32ac0fd0a8f7dff42057630edf4 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 4 Oct 2011 09:33:59 -0700 Subject: [PATCH] Improve task decomposition in ray tracing example. Specifically, launch all of the tasks in one statement, rather than still looping over spans in y and launching a collection of tasks across x for each span. This seems to give a few percent better performance. --- examples/rt/rt.ispc | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/examples/rt/rt.ispc b/examples/rt/rt.ispc index 0cc1caef..9f57ec14 100644 --- a/examples/rt/rt.ispc +++ b/examples/rt/rt.ispc @@ -283,19 +283,19 @@ export void raytrace_ispc(uniform int width, uniform int height, } -task void raytrace_tile_task(uniform int y0, uniform int y1, - uniform int width, uniform int height, +task void raytrace_tile_task(uniform int width, uniform int height, uniform int baseWidth, uniform int baseHeight, const uniform float raster2camera[4][4], const uniform float camera2world[4][4], uniform float image[], uniform int id[], const LinearBVHNode nodes[], const Triangle triangles[]) { - uniform int dx = 16; // must match dx below - uniform int xTasks = (width + (dx-1)) / dx; - uniform int x0 = (taskIndex % xTasks) * dx; - uniform int x1 = x0 + dx; - x1 = min(x1, width); + uniform int dx = 16, dy = 16; // must match dx, dy below + uniform int xBuckets = (width + (dx-1)) / dx; + uniform int x0 = (taskIndex % xBuckets) * dx; + uniform int x1 = min(x0 + dx, width); + uniform int y0 = (taskIndex / xBuckets) * dy; + uniform int y1 = min(y0 + dy, height); raytrace_tile(x0, x1, y0, y1, width, height, baseWidth, baseHeight, raster2camera, camera2world, image, @@ -311,11 +311,11 @@ export void raytrace_ispc_tasks(uniform int width, uniform int height, const LinearBVHNode nodes[], const Triangle triangles[]) { uniform int dx = 16, dy = 16; - uniform int nTasks = (width + (dx-1)) / dx; - for (uniform int y = 0; y < height; y += dy) { - uniform int y1 = min(y + dy, height); - launch[nTasks] < raytrace_tile_task(y, y1, width, height, baseWidth, - baseHeight, raster2camera, camera2world, - image, id, nodes, triangles) >; - } + uniform int xBuckets = (width + (dx-1)) / dx; + uniform int yBuckets = (height + (dy-1)) / dy; + uniform int nTasks = xBuckets * yBuckets; + launch[nTasks] < raytrace_tile_task(width, height, baseWidth, baseHeight, + raster2camera, camera2world, + image, id, nodes, triangles) >; } +