Merge branch 'sm35_foreach' of github.com:egaburov/ispc into sm35_foreach
This commit is contained in:
@@ -133,45 +133,7 @@ struct Uniform
|
||||
data[chunkIdx] = shdata[programIndex];
|
||||
}
|
||||
};
|
||||
#elif 0
|
||||
static __shared__ void* shptr_full[128];
|
||||
template<typename T, int N>
|
||||
struct Uniform
|
||||
{
|
||||
T data[(N+programCount-1)/programCount];
|
||||
T* *shptr;
|
||||
|
||||
__device__ inline Uniform()
|
||||
{
|
||||
shptr = (T**)shptr_full;
|
||||
shptr[threadIdx.x] = data;
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
__device__ inline int2 get_chunk(const int i) const
|
||||
{
|
||||
const int elem = i & (programCount - 1);
|
||||
const int chunk = i >> 5;
|
||||
return make_int2(chunk, elem);
|
||||
}
|
||||
|
||||
__device__ inline const T get(const int i) const
|
||||
{
|
||||
const int2 idx = get_chunk(i);
|
||||
const int chunk = idx.x;
|
||||
const int elem = idx.y;
|
||||
return shptr[chunk][elem];
|
||||
}
|
||||
|
||||
__device__ inline void set(const bool active, const int i, T value)
|
||||
{
|
||||
const int2 idx = get_chunk(i);
|
||||
const int chunk = idx.x;
|
||||
const int elem = idx.y;
|
||||
shptr[chunk][elem] = value;
|
||||
}
|
||||
};
|
||||
#elif 0
|
||||
#elif 1
|
||||
template<typename T, int N>
|
||||
struct Uniform
|
||||
{
|
||||
@@ -181,32 +143,17 @@ struct Uniform
|
||||
int32_t ptr[2];
|
||||
};
|
||||
|
||||
|
||||
__device__ inline Uniform()
|
||||
{
|
||||
#if 1
|
||||
if (programIndex == 0)
|
||||
data = new T[N];
|
||||
data = (T*)malloc(N*sizeof(T));
|
||||
ptr[0] = __shfl(ptr[0], 0);
|
||||
ptr[1] = __shfl(ptr[1], 0);
|
||||
#else
|
||||
__shared__ T *ptr;
|
||||
if (threadIdx.x == 0)
|
||||
ptr = new T[4*N];
|
||||
__syncthreads();
|
||||
data = ptr;
|
||||
data += warpIdx*N;
|
||||
#endif
|
||||
}
|
||||
__device__ inline ~Uniform()
|
||||
{
|
||||
#if 1
|
||||
if (programIndex == 0)
|
||||
delete data;
|
||||
#else
|
||||
if (threadIdx.x == 0)
|
||||
delete data;
|
||||
#endif
|
||||
free(data);
|
||||
}
|
||||
|
||||
__device__ inline const T get(const int i) const
|
||||
@@ -717,7 +664,6 @@ ShadeTile(
|
||||
lit_y = pow(clamp(lit_y, 0.0f, 1.0f), gamma);
|
||||
lit_z = pow(clamp(lit_z, 0.0f, 1.0f), gamma);
|
||||
|
||||
if (x >= tileEndX) break;
|
||||
framebuffer_r[gBufferOffset] = Float32ToUnorm8(lit_x);
|
||||
framebuffer_g[gBufferOffset] = Float32ToUnorm8(lit_y);
|
||||
framebuffer_b[gBufferOffset] = Float32ToUnorm8(lit_z);
|
||||
@@ -730,21 +676,19 @@ ShadeTile(
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Static decomposition
|
||||
|
||||
|
||||
__global__ void
|
||||
RenderTile( int num_groups_x, int num_groups_y,
|
||||
const InputHeader inputHeaderPtr[],
|
||||
const InputDataArrays inputDataPtr[],
|
||||
const InputHeader *inputHeaderPtr,
|
||||
const InputDataArrays *inputDataPtr,
|
||||
int visualizeLightCount,
|
||||
// Output
|
||||
unsigned int8 framebuffer_r[],
|
||||
unsigned int8 framebuffer_g[],
|
||||
unsigned int8 framebuffer_b[]) {
|
||||
if (taskIndex >= taskCount) return;
|
||||
const InputHeader &inputHeader = *inputHeaderPtr;
|
||||
const InputDataArrays &inputData = *inputDataPtr;
|
||||
|
||||
#if 1
|
||||
const InputHeader inputHeader = *inputHeaderPtr;
|
||||
const InputDataArrays inputData = *inputDataPtr;
|
||||
int32 group_y = taskIndex / num_groups_x;
|
||||
int32 group_x = taskIndex % num_groups_x;
|
||||
|
||||
@@ -759,16 +703,9 @@ RenderTile( int num_groups_x, int num_groups_y,
|
||||
float cameraProj_11 = inputHeader.cameraProj[1][1];
|
||||
float cameraProj_22 = inputHeader.cameraProj[2][2];
|
||||
float cameraProj_32 = inputHeader.cameraProj[3][2];
|
||||
#endif
|
||||
|
||||
// Light intersection: figure out which lights illuminate this tile.
|
||||
Uniform<int,MAX_LIGHTS> tileLightIndices; // Light list for the tile
|
||||
|
||||
#if 0
|
||||
tileLightIndices.set(threadIdx.x&1, threadIdx.x, framebuffer_g[blockIdx.x]);
|
||||
framebuffer_r[threadIdx.x] = tileLightIndices.get(threadIdx.x);
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
int numTileLights =
|
||||
IntersectLightsWithTile(tile_start_x, tile_end_x,
|
||||
@@ -795,31 +732,30 @@ RenderTile( int num_groups_x, int num_groups_y,
|
||||
}
|
||||
|
||||
|
||||
extern "C"
|
||||
__global__ void
|
||||
RenderStatic(InputHeader inputHeaderPtr[],
|
||||
InputDataArrays inputDataPtr[],
|
||||
int visualizeLightCount,
|
||||
extern "C" __global__ void
|
||||
RenderStatic( InputHeader inputHeaderPtr[],
|
||||
InputDataArrays inputDataPtr[],
|
||||
int visualizeLightCount,
|
||||
// Output
|
||||
unsigned int8 framebuffer_r[],
|
||||
unsigned int8 framebuffer_g[],
|
||||
unsigned int8 framebuffer_b[]) {
|
||||
unsigned int8 framebuffer_r[],
|
||||
unsigned int8 framebuffer_g[],
|
||||
unsigned int8 framebuffer_b[]) {
|
||||
|
||||
const InputHeader inputHeader = *inputHeaderPtr;
|
||||
const InputDataArrays inputData = *inputDataPtr;
|
||||
const InputHeader inputHeader = *inputHeaderPtr;
|
||||
const InputDataArrays inputData = *inputDataPtr;
|
||||
|
||||
int num_groups_x = (inputHeader.framebufferWidth +
|
||||
|
||||
int num_groups_x = (inputHeader.framebufferWidth +
|
||||
MIN_TILE_WIDTH - 1) / MIN_TILE_WIDTH;
|
||||
int num_groups_y = (inputHeader.framebufferHeight +
|
||||
int num_groups_y = (inputHeader.framebufferHeight +
|
||||
MIN_TILE_HEIGHT - 1) / MIN_TILE_HEIGHT;
|
||||
int num_groups = num_groups_x * num_groups_y;
|
||||
int num_groups = num_groups_x * num_groups_y;
|
||||
|
||||
// Launch a task to render each tile, each of which is MIN_TILE_WIDTH
|
||||
// by MIN_TILE_HEIGHT pixels.
|
||||
if (programIndex == 0)
|
||||
RenderTile<<<num_groups, 128>>>(num_groups_x, num_groups_y,
|
||||
inputHeaderPtr, inputDataPtr, visualizeLightCount,
|
||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||
cudaDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
if (programIndex == 0)
|
||||
RenderTile<<<(num_groups+4-1)/4,128>>>(num_groups_x, num_groups_y,
|
||||
inputHeaderPtr, inputDataPtr, visualizeLightCount,
|
||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user