some kernel tuning
This commit is contained in:
@@ -398,8 +398,8 @@ IntersectLightsWithTileMinMax(
|
|||||||
// don't actually need to mask the rest of this function - this is
|
// don't actually need to mask the rest of this function - this is
|
||||||
// just a greedy early-out. Could also structure all of this as
|
// just a greedy early-out. Could also structure all of this as
|
||||||
// nested if() statements, but this a bit easier to read
|
// nested if() statements, but this a bit easier to read
|
||||||
int active = 0;
|
if (__ballot(inFrustum) > 0)
|
||||||
if ((inFrustum)) {
|
{
|
||||||
float light_positionView_x = light_positionView_x_array[lightIndex];
|
float light_positionView_x = light_positionView_x_array[lightIndex];
|
||||||
float light_positionView_y = light_positionView_y_array[lightIndex];
|
float light_positionView_y = light_positionView_y_array[lightIndex];
|
||||||
|
|
||||||
@@ -420,29 +420,21 @@ IntersectLightsWithTileMinMax(
|
|||||||
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
inFrustum = inFrustum && (d >= light_attenuationEndNeg);
|
||||||
|
|
||||||
// Pack and store intersecting lights
|
// Pack and store intersecting lights
|
||||||
|
const bool active = inFrustum && lightIndex < numLights;
|
||||||
#if 0
|
#if 0
|
||||||
if (inFrustum) {
|
if (__ballot(active) > 0)
|
||||||
tileNumLights += packed_store_active(&tileLightIndices[tileNumLights],
|
tileNumLights += packed_store_active(active, tileLightIndices.get_ptr(tileNumLights), lightIndex);
|
||||||
lightIndex);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
if (inFrustum)
|
if (__ballot(active) > 0)
|
||||||
{
|
{
|
||||||
active = 1;
|
const int2 res = warpBinExclusiveScan(active);
|
||||||
|
const int idx = tileNumLights + res.y;
|
||||||
|
const int nactive = res.x;
|
||||||
|
tileLightIndices.set(active, idx, lightIndex);
|
||||||
|
tileNumLights += nactive;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
const int2 res = warpBinExclusiveScan(active);
|
|
||||||
const int idx = tileNumLights + res.x;
|
|
||||||
const int nactive = res.y;
|
|
||||||
tileLightIndices.set(active, idx,lightIndex);
|
|
||||||
tileNumLights += nactive;
|
|
||||||
#else
|
|
||||||
tileNumLights += packed_store_active(active, tileLightIndices.get_ptr(tileNumLights),
|
|
||||||
lightIndex);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return tileNumLights;
|
return tileNumLights;
|
||||||
@@ -590,13 +582,13 @@ ShadeTile(
|
|||||||
|
|
||||||
// Gather light data relevant to initial culling
|
// Gather light data relevant to initial culling
|
||||||
float light_positionView_x =
|
float light_positionView_x =
|
||||||
inputData.lightPositionView_x[lightIndex];
|
__ldg(&inputData.lightPositionView_x[lightIndex]);
|
||||||
float light_positionView_y =
|
float light_positionView_y =
|
||||||
inputData.lightPositionView_y[lightIndex];
|
__ldg(&inputData.lightPositionView_y[lightIndex]);
|
||||||
float light_positionView_z =
|
float light_positionView_z =
|
||||||
inputData.lightPositionView_z[lightIndex];
|
__ldg(&inputData.lightPositionView_z[lightIndex]);
|
||||||
float light_attenuationEnd =
|
float light_attenuationEnd =
|
||||||
inputData.lightAttenuationEnd[lightIndex];
|
__ldg(&inputData.lightAttenuationEnd[lightIndex]);
|
||||||
|
|
||||||
// Compute light vector
|
// Compute light vector
|
||||||
float L_x = light_positionView_x - surface_positionView_x;
|
float L_x = light_positionView_x - surface_positionView_x;
|
||||||
@@ -713,13 +705,8 @@ RenderTile( int num_groups_x, int num_groups_y,
|
|||||||
float cameraProj_32 = inputHeader.cameraProj[3][2];
|
float cameraProj_32 = inputHeader.cameraProj[3][2];
|
||||||
|
|
||||||
// Light intersection: figure out which lights illuminate this tile.
|
// Light intersection: figure out which lights illuminate this tile.
|
||||||
#if 1
|
|
||||||
// int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
|
|
||||||
Uniform<int,MAX_LIGHTS> tileLightIndices; // Light list for the tile
|
Uniform<int,MAX_LIGHTS> tileLightIndices; // Light list for the tile
|
||||||
#else
|
#if 1
|
||||||
__shared__ int tileLightIndicesFull[4*MAX_LIGHTS]; // Light list for the tile
|
|
||||||
int *tileLightIndices = &tileLightIndicesFull[warpIdx*MAX_LIGHTS];
|
|
||||||
#endif
|
|
||||||
int numTileLights =
|
int numTileLights =
|
||||||
IntersectLightsWithTile(tile_start_x, tile_end_x,
|
IntersectLightsWithTile(tile_start_x, tile_end_x,
|
||||||
tile_start_y, tile_end_y,
|
tile_start_y, tile_end_y,
|
||||||
@@ -741,6 +728,7 @@ RenderTile( int num_groups_x, int num_groups_y,
|
|||||||
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
|
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
|
||||||
tileLightIndices, numTileLights, visualizeLightCount,
|
tileLightIndices, numTileLights, visualizeLightCount,
|
||||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -511,6 +511,7 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
|||||||
#else
|
#else
|
||||||
uniform int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
|
uniform int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
|
||||||
#endif
|
#endif
|
||||||
|
#if 1
|
||||||
uniform int numTileLights =
|
uniform int numTileLights =
|
||||||
IntersectLightsWithTile(tile_start_x, tile_end_x,
|
IntersectLightsWithTile(tile_start_x, tile_end_x,
|
||||||
tile_start_y, tile_end_y,
|
tile_start_y, tile_end_y,
|
||||||
@@ -532,6 +533,7 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
|||||||
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
|
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
|
||||||
tileLightIndices, numTileLights, visualizeLightCount,
|
tileLightIndices, numTileLights, visualizeLightCount,
|
||||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||||
|
#endif
|
||||||
#if 1
|
#if 1
|
||||||
delete tileLightIndices;
|
delete tileLightIndices;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user