Use foreach in the deferred shading example

2011-12-01 17:00:30 -08:00
parent 3bb6bff15d
commit 24ef9dac8f
1 changed files with 29 additions and 45 deletions
--- a/examples/deferred/kernels.ispc
+++ b/examples/deferred/kernels.ispc
@@ -97,7 +97,6 @@ Float32ToUnorm8(float f) {
 }


-// tile width must be a multiple of programCount (SIMD size)
 static void
 ComputeZBounds(
    uniform int32 tileStartX, uniform int32 tileEndX,
@@ -117,9 +116,9 @@ ComputeZBounds(
    float laneMinZ = cameraFar;
    float laneMaxZ = cameraNear;
    for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
-        for (uniform int32 x = tileStartX; x < tileEndX; x += programCount) {
+        foreach (x = tileStartX ... tileEndX) {
            // Unproject depth buffer Z value into view space
-            float z = zBuffer[(y * gBufferWidth + x) + programIndex];
+            float z = zBuffer[y * gBufferWidth + x];
            float viewSpaceZ = cameraProj_43 / (z - cameraProj_33);

            // Work out Z bounds for our samples
@@ -135,8 +134,6 @@ ComputeZBounds(
 }


-// tile width must be a multiple of programCount (SIMD size)
-// numLights must currently be a multiple of programCount (SIMD size)
 export uniform int32
 IntersectLightsWithTileMinMax(
    uniform int32 tileStartX, uniform int32 tileEndX,
@@ -197,9 +194,7 @@ IntersectLightsWithTileMinMax(

    uniform int32 tileNumLights = 0;

-    for (uniform int32 baseLightIndex = 0; baseLightIndex < numLights; 
-         baseLightIndex += programCount) {
-        int32 lightIndex = baseLightIndex + programIndex;
+    foreach (lightIndex = 0 ... numLights) {
        float light_positionView_z = light_positionView_z_array[lightIndex];
        float light_attenuationEnd = light_attenuationEnd_array[lightIndex];
        float light_attenuationEndNeg = -light_attenuationEnd;
@@ -214,9 +209,7 @@ IntersectLightsWithTileMinMax(
        // don't actually need to mask the rest of this function - this is
        // just a greedy early-out.  Could also structure all of this as
        // nested if() statements, but this a bit easier to read
-        if (!any(inFrustum)) 
-            continue;
-
+        if (any(inFrustum)) {
            float light_positionView_x = light_positionView_x_array[lightIndex];
            float light_positionView_y = light_positionView_y_array[lightIndex];

@@ -242,13 +235,12 @@ IntersectLightsWithTileMinMax(
                                                     lightIndex);
            }
        }
+    }

    return tileNumLights;
 }


-// tile width must be a multiple of programCount (SIMD size)
-// numLights must currently be a multiple of programCount (SIMD size)
 static uniform int32
 IntersectLightsWithTile(
    uniform int32 tileStartX, uniform int32 tileEndX,
@@ -286,7 +278,6 @@ IntersectLightsWithTile(
 }


-// tile width must be a multiple of programCount (SIMD size)
 export void
 ShadeTile(
    uniform int32 tileStartX, uniform int32 tileEndX,
@@ -310,8 +301,8 @@ ShadeTile(
    if (tileNumLights == 0 || visualizeLightCount) {
        uniform unsigned int8 c = (unsigned int8)(min(tileNumLights << 2, 255));
        for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
-            for (uniform int32 x = tileStartX; x < tileEndX; x += programCount) {
-                int32 framebufferIndex = (y * gBufferWidth + x) + programIndex;
+            foreach (x = tileStartX ... tileEndX) {
+                int32 framebufferIndex = (y * gBufferWidth + x);
                framebuffer_r[framebufferIndex] = c;
                framebuffer_g[framebufferIndex] = c;
                framebuffer_b[framebufferIndex] = c;
@@ -324,9 +315,8 @@ ShadeTile(
        for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
            uniform float positionScreen_y = -(((0.5f + y) * twoOverGBufferHeight) - 1.f);

-            for (uniform int32 x = tileStartX; x < tileEndX; x += programCount) {
-                uniform int32 gBufferOffsetBase = y * gBufferWidth + x;
-                int32 gBufferOffset = gBufferOffsetBase + programIndex;
+            foreach (x = tileStartX ... tileEndX) {
+                int32 gBufferOffset = y * gBufferWidth + x;
                
                // Reconstruct position and (negative) view vector from G-buffer
                float surface_positionView_x, surface_positionView_y, surface_positionView_z;
@@ -336,7 +326,7 @@ ShadeTile(

                // Compute screen/clip-space position
                // NOTE: Mind DX11 viewport transform and pixel center!
-                float positionScreen_x = (0.5f + (float)(x + programIndex)) * 
+                float positionScreen_x = (0.5f + (float)(x)) * 
                    twoOverGBufferWidth - 1.0f;

                // Unproject depth buffer Z value into view space
@@ -550,7 +540,6 @@ RenderStatic(uniform InputHeader &inputHeader,
 // Routines for dynamic decomposition path

 // This computes the z min/max range for a whole row worth of tiles.
-// The tile width must be a multiple of programCount (SIMD size)
 export void
 ComputeZBoundsRow(
    uniform int32 tileY,
@@ -644,12 +633,7 @@ SplitTileMinMax(
    subtileLightOffset[2] = 2 * subtileIndicesPitch;
    subtileLightOffset[3] = 3 * subtileIndicesPitch;

-    for (int32 i = programIndex; i < numLights; i += programCount) {
-        // TODO: ISPC says gather required here when it actually
-        // isn't... this could be fixed this by nesting an if() within a
-        // uniform loop, but I'm not totally sure if that's a win
-        // overall. For now we'll just eat the perf cost for cleanliness
-        // since the below are real gathers anyways.
+    foreach (i = 0 ... numLights) {
        int32 lightIndex = lightIndices[i];

        float light_positionView_x = light_positionView_x_array[lightIndex];