+1
This commit is contained in:
@@ -95,7 +95,7 @@ static CUmodule loadModule(
|
|||||||
|
|
||||||
CUlinkState CUState;
|
CUlinkState CUState;
|
||||||
CUlinkState *lState = &CUState;
|
CUlinkState *lState = &CUState;
|
||||||
const int nOptions = 7;
|
const int nOptions = 8;
|
||||||
CUjit_option options[nOptions];
|
CUjit_option options[nOptions];
|
||||||
void* optionVals[nOptions];
|
void* optionVals[nOptions];
|
||||||
float walltime;
|
float walltime;
|
||||||
@@ -129,6 +129,10 @@ static CUmodule loadModule(
|
|||||||
options[6] = CU_JIT_MAX_REGISTERS;
|
options[6] = CU_JIT_MAX_REGISTERS;
|
||||||
int jitRegCount = maxrregcount;
|
int jitRegCount = maxrregcount;
|
||||||
optionVals[6] = (void *)(size_t)jitRegCount;
|
optionVals[6] = (void *)(size_t)jitRegCount;
|
||||||
|
// Caching
|
||||||
|
options[7] = CU_JIT_CACHE_MODE;
|
||||||
|
optionVals[7] = (void *)CU_JIT_CACHE_OPTION_CA;
|
||||||
|
// Create a pending linker invocation
|
||||||
|
|
||||||
// Create a pending linker invocation
|
// Create a pending linker invocation
|
||||||
checkCudaErrors(cuLinkCreate(nOptions,options, optionVals, lState));
|
checkCudaErrors(cuLinkCreate(nOptions,options, optionVals, lState));
|
||||||
|
|||||||
@@ -127,12 +127,8 @@ ComputeZBounds(
|
|||||||
// Find Z bounds
|
// Find Z bounds
|
||||||
float laneMinZ = cameraFar;
|
float laneMinZ = cameraFar;
|
||||||
float laneMaxZ = cameraNear;
|
float laneMaxZ = cameraNear;
|
||||||
for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
|
foreach_tiled (y = tileStartY ... tileEndY, x = tileStartX ... tileEndX)
|
||||||
// foreach (x = tileStartX ... tileEndX) {
|
|
||||||
for (uniform int xb = tileStartX; xb < tileEndX; xb += programCount)
|
|
||||||
{
|
{
|
||||||
const int x = xb + programIndex;
|
|
||||||
if (x >= tileEndX) break;
|
|
||||||
// Unproject depth buffer Z value into view space
|
// Unproject depth buffer Z value into view space
|
||||||
float z = zBuffer[y * gBufferWidth + x];
|
float z = zBuffer[y * gBufferWidth + x];
|
||||||
float viewSpaceZ = cameraProj_43 / (z - cameraProj_33);
|
float viewSpaceZ = cameraProj_43 / (z - cameraProj_33);
|
||||||
@@ -144,7 +140,6 @@ ComputeZBounds(
|
|||||||
laneMaxZ = max(laneMaxZ, viewSpaceZ);
|
laneMaxZ = max(laneMaxZ, viewSpaceZ);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
minZ = reduce_min(laneMinZ);
|
minZ = reduce_min(laneMinZ);
|
||||||
maxZ = reduce_max(laneMaxZ);
|
maxZ = reduce_max(laneMaxZ);
|
||||||
}
|
}
|
||||||
@@ -194,10 +189,8 @@ IntersectLightsWithTileMinMax(
|
|||||||
|
|
||||||
uniform int32 tileNumLights = 0;
|
uniform int32 tileNumLights = 0;
|
||||||
|
|
||||||
// foreach (lightIndex = 0 ... numLights) {
|
foreach (lightIndex = 0 ... numLights)
|
||||||
for (uniform int lightIndexB = 0; lightIndexB < numLights; lightIndexB += programCount)
|
|
||||||
{
|
{
|
||||||
const int lightIndex = lightIndexB + programIndex;
|
|
||||||
|
|
||||||
float light_positionView_z = light_positionView_z_array[lightIndex];
|
float light_positionView_z = light_positionView_z_array[lightIndex];
|
||||||
float light_attenuationEnd = light_attenuationEnd_array[lightIndex];
|
float light_attenuationEnd = light_attenuationEnd_array[lightIndex];
|
||||||
@@ -305,29 +298,20 @@ ShadeTile(
|
|||||||
{
|
{
|
||||||
if (tileNumLights == 0 || visualizeLightCount) {
|
if (tileNumLights == 0 || visualizeLightCount) {
|
||||||
uniform unsigned int8 c = (unsigned int8)(min(tileNumLights << 2, 255));
|
uniform unsigned int8 c = (unsigned int8)(min(tileNumLights << 2, 255));
|
||||||
for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
|
foreach_tiled (y = tileStartY ... tileEndY, x = tileStartX ... tileEndX)
|
||||||
// foreach (x = tileStartX ... tileEndX)
|
|
||||||
for (uniform int xb = tileStartX ; xb < tileEndX; xb += programCount)
|
|
||||||
{
|
{
|
||||||
const int x = xb + programIndex;
|
|
||||||
if (x >= tileEndX) continue;
|
|
||||||
int32 framebufferIndex = (y * gBufferWidth + x);
|
int32 framebufferIndex = (y * gBufferWidth + x);
|
||||||
framebuffer_r[framebufferIndex] = c;
|
framebuffer_r[framebufferIndex] = c;
|
||||||
framebuffer_g[framebufferIndex] = c;
|
framebuffer_g[framebufferIndex] = c;
|
||||||
framebuffer_b[framebufferIndex] = c;
|
framebuffer_b[framebufferIndex] = c;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
uniform float twoOverGBufferWidth = 2.0f / gBufferWidth;
|
uniform float twoOverGBufferWidth = 2.0f / gBufferWidth;
|
||||||
uniform float twoOverGBufferHeight = 2.0f / gBufferHeight;
|
uniform float twoOverGBufferHeight = 2.0f / gBufferHeight;
|
||||||
|
|
||||||
for (uniform int32 y = tileStartY; y < tileEndY; ++y) {
|
foreach_tiled(y = tileStartY ... tileEndY, x = tileStartX ... tileEndX)
|
||||||
uniform float positionScreen_y = -(((0.5f + y) * twoOverGBufferHeight) - 1.f);
|
|
||||||
|
|
||||||
// foreach (x = tileStartX ... tileEndX) {
|
|
||||||
for (uniform int xb = tileStartX ; xb < tileEndX; xb += programCount)
|
|
||||||
{
|
{
|
||||||
const int x = xb + programIndex;
|
float positionScreen_y = -(((0.5f + y) * twoOverGBufferHeight) - 1.f);
|
||||||
int32 gBufferOffset = y * gBufferWidth + x;
|
int32 gBufferOffset = y * gBufferWidth + x;
|
||||||
|
|
||||||
// Reconstruct position and (negative) view vector from G-buffer
|
// Reconstruct position and (negative) view vector from G-buffer
|
||||||
@@ -471,7 +455,6 @@ ShadeTile(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
Reference in New Issue
Block a user