+1
This commit is contained in:
@@ -95,7 +95,7 @@ static CUmodule loadModule(
|
||||
|
||||
CUlinkState CUState;
|
||||
CUlinkState *lState = &CUState;
|
||||
const int nOptions = 7;
|
||||
const int nOptions = 8;
|
||||
CUjit_option options[nOptions];
|
||||
void* optionVals[nOptions];
|
||||
float walltime;
|
||||
@@ -129,6 +129,10 @@ static CUmodule loadModule(
|
||||
options[6] = CU_JIT_MAX_REGISTERS;
|
||||
int jitRegCount = maxrregcount;
|
||||
optionVals[6] = (void *)(size_t)jitRegCount;
|
||||
// Caching
|
||||
options[7] = CU_JIT_CACHE_MODE;
|
||||
optionVals[7] = (void *)CU_JIT_CACHE_OPTION_CA;
|
||||
// Create a pending linker invocation
|
||||
|
||||
// Create a pending linker invocation
|
||||
checkCudaErrors(cuLinkCreate(nOptions,options, optionVals, lState));
|
||||
|
||||
@@ -127,12 +127,8 @@ ComputeZBounds(
|
||||
// Find Z bounds
|
||||
float laneMinZ = cameraFar;
|
||||
float laneMaxZ = cameraNear;
|
||||
for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
|
||||
// foreach (x = tileStartX ... tileEndX) {
|
||||
for (uniform int xb = tileStartX; xb < tileEndX; xb += programCount)
|
||||
foreach_tiled (y = tileStartY ... tileEndY, x = tileStartX ... tileEndX)
|
||||
{
|
||||
const int x = xb + programIndex;
|
||||
if (x >= tileEndX) break;
|
||||
// Unproject depth buffer Z value into view space
|
||||
float z = zBuffer[y * gBufferWidth + x];
|
||||
float viewSpaceZ = cameraProj_43 / (z - cameraProj_33);
|
||||
@@ -144,7 +140,6 @@ ComputeZBounds(
|
||||
laneMaxZ = max(laneMaxZ, viewSpaceZ);
|
||||
}
|
||||
}
|
||||
}
|
||||
minZ = reduce_min(laneMinZ);
|
||||
maxZ = reduce_max(laneMaxZ);
|
||||
}
|
||||
@@ -194,10 +189,8 @@ IntersectLightsWithTileMinMax(
|
||||
|
||||
uniform int32 tileNumLights = 0;
|
||||
|
||||
// foreach (lightIndex = 0 ... numLights) {
|
||||
for (uniform int lightIndexB = 0; lightIndexB < numLights; lightIndexB += programCount)
|
||||
foreach (lightIndex = 0 ... numLights)
|
||||
{
|
||||
const int lightIndex = lightIndexB + programIndex;
|
||||
|
||||
float light_positionView_z = light_positionView_z_array[lightIndex];
|
||||
float light_attenuationEnd = light_attenuationEnd_array[lightIndex];
|
||||
@@ -305,29 +298,20 @@ ShadeTile(
|
||||
{
|
||||
if (tileNumLights == 0 || visualizeLightCount) {
|
||||
uniform unsigned int8 c = (unsigned int8)(min(tileNumLights << 2, 255));
|
||||
for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
|
||||
// foreach (x = tileStartX ... tileEndX)
|
||||
for (uniform int xb = tileStartX ; xb < tileEndX; xb += programCount)
|
||||
foreach_tiled (y = tileStartY ... tileEndY, x = tileStartX ... tileEndX)
|
||||
{
|
||||
const int x = xb + programIndex;
|
||||
if (x >= tileEndX) continue;
|
||||
int32 framebufferIndex = (y * gBufferWidth + x);
|
||||
framebuffer_r[framebufferIndex] = c;
|
||||
framebuffer_g[framebufferIndex] = c;
|
||||
framebuffer_b[framebufferIndex] = c;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uniform float twoOverGBufferWidth = 2.0f / gBufferWidth;
|
||||
uniform float twoOverGBufferHeight = 2.0f / gBufferHeight;
|
||||
|
||||
for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
|
||||
uniform float positionScreen_y = -(((0.5f + y) * twoOverGBufferHeight) - 1.f);
|
||||
|
||||
// foreach (x = tileStartX ... tileEndX) {
|
||||
for (uniform int xb = tileStartX ; xb < tileEndX; xb += programCount)
|
||||
foreach_tiled(y = tileStartY ... tileEndY, x = tileStartX ... tileEndX)
|
||||
{
|
||||
const int x = xb + programIndex;
|
||||
float positionScreen_y = -(((0.5f + y) * twoOverGBufferHeight) - 1.f);
|
||||
int32 gBufferOffset = y * gBufferWidth + x;
|
||||
|
||||
// Reconstruct position and (negative) view vector from G-buffer
|
||||
@@ -470,7 +454,6 @@ ShadeTile(
|
||||
framebuffer_b[gBufferOffset] = Float32ToUnorm8(lit_z);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user