some kernel tuning

This commit is contained in:
Evghenii
2013-11-11 14:24:13 +01:00
parent f2c66dc4c3
commit 6a1fb8ea31
2 changed files with 19 additions and 29 deletions

View File

@@ -398,8 +398,8 @@ IntersectLightsWithTileMinMax(
// don't actually need to mask the rest of this function - this is // don't actually need to mask the rest of this function - this is
// just a greedy early-out. Could also structure all of this as // just a greedy early-out. Could also structure all of this as
// nested if() statements, but this a bit easier to read // nested if() statements, but this a bit easier to read
int active = 0; if (__ballot(inFrustum) > 0)
if ((inFrustum)) { {
float light_positionView_x = light_positionView_x_array[lightIndex]; float light_positionView_x = light_positionView_x_array[lightIndex];
float light_positionView_y = light_positionView_y_array[lightIndex]; float light_positionView_y = light_positionView_y_array[lightIndex];
@@ -420,29 +420,21 @@ IntersectLightsWithTileMinMax(
inFrustum = inFrustum && (d >= light_attenuationEndNeg); inFrustum = inFrustum && (d >= light_attenuationEndNeg);
// Pack and store intersecting lights // Pack and store intersecting lights
const bool active = inFrustum && lightIndex < numLights;
#if 0 #if 0
if (inFrustum) { if (__ballot(active) > 0)
tileNumLights += packed_store_active(&tileLightIndices[tileNumLights], tileNumLights += packed_store_active(active, tileLightIndices.get_ptr(tileNumLights), lightIndex);
lightIndex);
}
#else #else
if (inFrustum) if (__ballot(active) > 0)
{ {
active = 1; const int2 res = warpBinExclusiveScan(active);
const int idx = tileNumLights + res.y;
const int nactive = res.x;
tileLightIndices.set(active, idx, lightIndex);
tileNumLights += nactive;
} }
#endif #endif
} }
#if 0
const int2 res = warpBinExclusiveScan(active);
const int idx = tileNumLights + res.x;
const int nactive = res.y;
tileLightIndices.set(active, idx,lightIndex);
tileNumLights += nactive;
#else
tileNumLights += packed_store_active(active, tileLightIndices.get_ptr(tileNumLights),
lightIndex);
#endif
} }
return tileNumLights; return tileNumLights;
@@ -590,13 +582,13 @@ ShadeTile(
// Gather light data relevant to initial culling // Gather light data relevant to initial culling
float light_positionView_x = float light_positionView_x =
inputData.lightPositionView_x[lightIndex]; __ldg(&inputData.lightPositionView_x[lightIndex]);
float light_positionView_y = float light_positionView_y =
inputData.lightPositionView_y[lightIndex]; __ldg(&inputData.lightPositionView_y[lightIndex]);
float light_positionView_z = float light_positionView_z =
inputData.lightPositionView_z[lightIndex]; __ldg(&inputData.lightPositionView_z[lightIndex]);
float light_attenuationEnd = float light_attenuationEnd =
inputData.lightAttenuationEnd[lightIndex]; __ldg(&inputData.lightAttenuationEnd[lightIndex]);
// Compute light vector // Compute light vector
float L_x = light_positionView_x - surface_positionView_x; float L_x = light_positionView_x - surface_positionView_x;
@@ -713,13 +705,8 @@ RenderTile( int num_groups_x, int num_groups_y,
float cameraProj_32 = inputHeader.cameraProj[3][2]; float cameraProj_32 = inputHeader.cameraProj[3][2];
// Light intersection: figure out which lights illuminate this tile. // Light intersection: figure out which lights illuminate this tile.
#if 1
// int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
Uniform<int,MAX_LIGHTS> tileLightIndices; // Light list for the tile Uniform<int,MAX_LIGHTS> tileLightIndices; // Light list for the tile
#else #if 1
__shared__ int tileLightIndicesFull[4*MAX_LIGHTS]; // Light list for the tile
int *tileLightIndices = &tileLightIndicesFull[warpIdx*MAX_LIGHTS];
#endif
int numTileLights = int numTileLights =
IntersectLightsWithTile(tile_start_x, tile_end_x, IntersectLightsWithTile(tile_start_x, tile_end_x,
tile_start_y, tile_end_y, tile_start_y, tile_end_y,
@@ -741,6 +728,7 @@ RenderTile( int num_groups_x, int num_groups_y,
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32, cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
tileLightIndices, numTileLights, visualizeLightCount, tileLightIndices, numTileLights, visualizeLightCount,
framebuffer_r, framebuffer_g, framebuffer_b); framebuffer_r, framebuffer_g, framebuffer_b);
#endif
} }

View File

@@ -511,6 +511,7 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
#else #else
uniform int tileLightIndices[MAX_LIGHTS]; // Light list for the tile uniform int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
#endif #endif
#if 1
uniform int numTileLights = uniform int numTileLights =
IntersectLightsWithTile(tile_start_x, tile_end_x, IntersectLightsWithTile(tile_start_x, tile_end_x,
tile_start_y, tile_end_y, tile_start_y, tile_end_y,
@@ -532,6 +533,7 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32, cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
tileLightIndices, numTileLights, visualizeLightCount, tileLightIndices, numTileLights, visualizeLightCount,
framebuffer_r, framebuffer_g, framebuffer_b); framebuffer_r, framebuffer_g, framebuffer_b);
#endif
#if 1 #if 1
delete tileLightIndices; delete tileLightIndices;
#endif #endif