deffered_shading probilem identified. need solution
This commit is contained in:
@@ -64,7 +64,8 @@ define i32 @__nctaid_z() nounwind readnone alwaysinline
|
|||||||
;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
|
||||||
include(`util.m4')
|
|
||||||
|
include(`util_ptx.m4')
|
||||||
|
|
||||||
stdlib_core()
|
stdlib_core()
|
||||||
packed_load_and_store()
|
packed_load_and_store()
|
||||||
|
|||||||
4506
builtins/util_ptx.m4
Normal file
4506
builtins/util_ptx.m4
Normal file
File diff suppressed because it is too large
Load Diff
@@ -96,18 +96,101 @@ dot3(float x, float y, float z, float a, float b, float c) {
|
|||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
static __shared__ int shdata_full[128];
|
||||||
template<typename T, int N>
|
template<typename T, int N>
|
||||||
struct Uniform
|
struct Uniform
|
||||||
{
|
{
|
||||||
T data[(N-1)/programCount+1];
|
T data[(N+programCount-1)/programCount];
|
||||||
|
volatile T *shdata;
|
||||||
|
|
||||||
__device__ inline const T& operator[](const int i) const
|
__device__ inline Uniform()
|
||||||
{
|
{
|
||||||
const int laneIdx = i & (programCount-1);
|
shdata = ((T*)shdata_full) + warpIdx*32;
|
||||||
const int chunkIdx = i >> 5;
|
|
||||||
return __shfl(data[chunkIdx], laneIdx);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
__device__ inline int2 get_chunk(const int i) const
|
||||||
|
{
|
||||||
|
const int elem = i & (programCount - 1);
|
||||||
|
const int chunk = i >> 5;
|
||||||
|
shdata[programIndex] = chunk;
|
||||||
|
shdata[ elem] = chunk;
|
||||||
|
return make_int2(shdata[programIndex], elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ inline const T get(const int i) const
|
||||||
|
{
|
||||||
|
const int2 idx = get_chunk(i);
|
||||||
|
return __shfl(data[idx.x], idx.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ inline void set(const bool active, const int i, T value)
|
||||||
|
{
|
||||||
|
const int2 idx = get_chunk(i);
|
||||||
|
const int chunkIdx = idx.x;
|
||||||
|
const int elemIdx = idx.y;
|
||||||
|
shdata[programIndex] = data[chunkIdx];
|
||||||
|
if (active) shdata[elemIdx] = value;
|
||||||
|
data[chunkIdx] = shdata[programIndex];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#elif 1
|
||||||
|
template<typename T, int N>
|
||||||
|
struct Uniform
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
T *data;
|
||||||
|
int32_t ptr[2];
|
||||||
|
};
|
||||||
|
|
||||||
|
__device__ inline Uniform()
|
||||||
|
{
|
||||||
|
if (programIndex == 0)
|
||||||
|
data = (T*)malloc(N*sizeof(T));
|
||||||
|
ptr[0] = __shfl(ptr[0], 0);
|
||||||
|
ptr[1] = __shfl(ptr[1], 0);
|
||||||
|
}
|
||||||
|
__device__ inline ~Uniform()
|
||||||
|
{
|
||||||
|
if (programIndex == 0)
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ inline const T get(const int i) const
|
||||||
|
{
|
||||||
|
return data[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ inline void set(const bool active, const int i, T value)
|
||||||
|
{
|
||||||
|
if (active)
|
||||||
|
data[i] = value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#else
|
||||||
|
__shared__ int shdata_full[4*MAX_LIGHTS];
|
||||||
|
template<typename T, int N>
|
||||||
|
struct Uniform
|
||||||
|
{
|
||||||
|
volatile T *shdata;
|
||||||
|
|
||||||
|
__device__ Uniform()
|
||||||
|
{
|
||||||
|
shdata = (T*)&shdata_full[warpIdx*MAX_LIGHTS];
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ inline const T get(const int i) const
|
||||||
|
{
|
||||||
|
return shdata[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ inline void set(const bool active, const int i, T value)
|
||||||
|
{
|
||||||
|
if (active)
|
||||||
|
shdata[i] = value;
|
||||||
|
}
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@@ -256,7 +339,7 @@ IntersectLightsWithTileMinMax(
|
|||||||
float light_positionView_z_array[],
|
float light_positionView_z_array[],
|
||||||
float light_attenuationEnd_array[],
|
float light_attenuationEnd_array[],
|
||||||
// Output
|
// Output
|
||||||
volatile int32 tileLightIndices[]
|
Uniform<int,MAX_LIGHTS> &tileLightIndices
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
float gBufferScale_x = 0.5f * (float)gBufferWidth;
|
float gBufferScale_x = 0.5f * (float)gBufferWidth;
|
||||||
@@ -346,8 +429,8 @@ IntersectLightsWithTileMinMax(
|
|||||||
const int idx = tileNumLights + res.x;
|
const int idx = tileNumLights + res.x;
|
||||||
const int nactive = res.y;
|
const int nactive = res.y;
|
||||||
#endif
|
#endif
|
||||||
if (active)
|
// if (active)
|
||||||
tileLightIndices[idx] = lightIndex;
|
tileLightIndices.set(active, idx,lightIndex);
|
||||||
tileNumLights += nactive;
|
tileNumLights += nactive;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -375,7 +458,7 @@ IntersectLightsWithTile(
|
|||||||
float light_positionView_z_array[],
|
float light_positionView_z_array[],
|
||||||
float light_attenuationEnd_array[],
|
float light_attenuationEnd_array[],
|
||||||
// Output
|
// Output
|
||||||
int32 tileLightIndices[]
|
Uniform<int,MAX_LIGHTS> &tileLightIndices
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
float minZ, maxZ;
|
float minZ, maxZ;
|
||||||
@@ -406,7 +489,7 @@ ShadeTile(
|
|||||||
float cameraProj_11, float cameraProj_22,
|
float cameraProj_11, float cameraProj_22,
|
||||||
float cameraProj_33, float cameraProj_43,
|
float cameraProj_33, float cameraProj_43,
|
||||||
// Light list
|
// Light list
|
||||||
volatile int32 tileLightIndices[],
|
Uniform<int,MAX_LIGHTS> &tileLightIndices,
|
||||||
int32 tileNumLights,
|
int32 tileNumLights,
|
||||||
// UI
|
// UI
|
||||||
bool visualizeLightCount,
|
bool visualizeLightCount,
|
||||||
@@ -491,7 +574,7 @@ ShadeTile(
|
|||||||
float lit_z = 0.0f;
|
float lit_z = 0.0f;
|
||||||
for ( int32 tileLightIndex = 0; tileLightIndex < tileNumLights;
|
for ( int32 tileLightIndex = 0; tileLightIndex < tileNumLights;
|
||||||
++tileLightIndex) {
|
++tileLightIndex) {
|
||||||
int32 lightIndex = tileLightIndices[tileLightIndex];
|
int32 lightIndex = tileLightIndices.get(tileLightIndex);
|
||||||
|
|
||||||
// Gather light data relevant to initial culling
|
// Gather light data relevant to initial culling
|
||||||
float light_positionView_x =
|
float light_positionView_x =
|
||||||
@@ -618,8 +701,9 @@ RenderTile( int num_groups_x, int num_groups_y,
|
|||||||
float cameraProj_32 = inputHeader.cameraProj[3][2];
|
float cameraProj_32 = inputHeader.cameraProj[3][2];
|
||||||
|
|
||||||
// Light intersection: figure out which lights illuminate this tile.
|
// Light intersection: figure out which lights illuminate this tile.
|
||||||
#if 0
|
#if 1
|
||||||
int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
|
// int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
|
||||||
|
Uniform<int,MAX_LIGHTS> tileLightIndices; // Light list for the tile
|
||||||
#else
|
#else
|
||||||
__shared__ int tileLightIndicesFull[4*MAX_LIGHTS]; // Light list for the tile
|
__shared__ int tileLightIndicesFull[4*MAX_LIGHTS]; // Light list for the tile
|
||||||
int *tileLightIndices = &tileLightIndicesFull[warpIdx*MAX_LIGHTS];
|
int *tileLightIndices = &tileLightIndicesFull[warpIdx*MAX_LIGHTS];
|
||||||
|
|||||||
@@ -505,7 +505,11 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
|||||||
uniform float cameraProj_32 = inputHeader.cameraProj[3][2];
|
uniform float cameraProj_32 = inputHeader.cameraProj[3][2];
|
||||||
|
|
||||||
// Light intersection: figure out which lights illuminate this tile.
|
// Light intersection: figure out which lights illuminate this tile.
|
||||||
|
#if 1
|
||||||
|
uniform int * uniform tileLightIndices = uniform new uniform int [MAX_LIGHTS];
|
||||||
|
#else
|
||||||
uniform int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
|
uniform int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
|
||||||
|
#endif
|
||||||
uniform int numTileLights =
|
uniform int numTileLights =
|
||||||
IntersectLightsWithTile(tile_start_x, tile_end_x,
|
IntersectLightsWithTile(tile_start_x, tile_end_x,
|
||||||
tile_start_y, tile_end_y,
|
tile_start_y, tile_end_y,
|
||||||
@@ -527,6 +531,9 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
|||||||
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
|
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
|
||||||
tileLightIndices, numTileLights, visualizeLightCount,
|
tileLightIndices, numTileLights, visualizeLightCount,
|
||||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||||
|
#if 1
|
||||||
|
delete tileLightIndices;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -379,7 +379,7 @@ int main(int argc, char** argv) {
|
|||||||
(uint8_t*)d_r,
|
(uint8_t*)d_r,
|
||||||
(uint8_t*)d_g,
|
(uint8_t*)d_g,
|
||||||
(uint8_t*)d_b);
|
(uint8_t*)d_b);
|
||||||
double mcycles = (rtc() - t0) / nframes;
|
double mcycles = 1000*(rtc() - t0) / nframes;
|
||||||
ispcCycles = std::min(ispcCycles, mcycles);
|
ispcCycles = std::min(ispcCycles, mcycles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user