foreach[_tiled] seems to work now

This commit is contained in:
Evghenii
2013-11-14 16:29:40 +01:00
parent 48644813d4
commit af75afeb7a
5 changed files with 164 additions and 95 deletions

View File

@@ -228,10 +228,8 @@ static inline void ao_tile(
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 31)));
float invSamples = 1.f / nsubsamples;
for (uniform int y = y0; y < y1; y++)
for (uniform int xb = x0; xb < x1; xb += programCount)
foreach_tiled (y = y0 ... y1, x = x0 ... x1)
{
const int x = xb + programIndex;
const int offset = 3 * (y * w + x);
float res = 0.0f;
@@ -271,13 +269,12 @@ static inline void ao_tile(
}
}
if (xb < x1)
//if (x < x1)
{
image[offset ] = res;
image[offset+1] = res;
image[offset+2] = res;
}
}
}

View File

@@ -142,7 +142,7 @@ int main(int argc, char *argv[]) {
deviceFree(d_buf);
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
writePPM(buf, width, height, "mandelbrot-cuda.ppm");
//

View File

@@ -74,6 +74,7 @@ mandelbrot_scanline(uniform float x0, uniform float dx,
const uniform int ystart = taskIndex1 * yspan;
const uniform int yend = min(ystart + yspan, height);
#if 0
for (uniform int yi = ystart; yi < yend; yi++)
for (uniform int xi = xstart; xi < xend; xi += programCount)
{
@@ -85,6 +86,17 @@ mandelbrot_scanline(uniform float x0, uniform float dx,
if (xi + programIndex < xend)
output[index] = res;
}
#else
foreach (yi = ystart ... yend, xi = xstart ... xend)
{
const float x = x0 + xi * dx;
const float y = y0 + yi * dy;
const int res = mandel(x,y,maxIterations);
const int index = yi * width + xi;
output[index] = res;
}
#endif
}

View File

@@ -281,13 +281,11 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
// Work on 4x4=16 pixel big tiles of the image. This function thus
// implicitly assumes that both (x1-x0) and (y1-y0) are evenly divisble
// by 4.
for (uniform int y = y0; y < y1; y += 8) {
for (uniform int x = x0; x < x1; x += 8) {
// foreach (o = 0 ... 16) {
for (uniform int ob = 0; ob < 64; ob += programCount)
#if 0
for (uniform int y = y0; y < y1; y += 8)
for (uniform int x = x0; x < x1; x += 8)
foreach (o = 0 ... 64)
{
const int o = ob + programIndex;
// These two arrays encode the mapping from [0,15] to
// offsets within the 4x4 pixel block so that we render
// each pixel inside the block
@@ -314,8 +312,20 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
if (xo < x1 && yo < y1)
image[offset] = raymarch(density, nVoxels, ray);
}
#else
foreach_tiled (y = y0 ... y1, x = x0 ... x1)
{
// Use viewing parameters to compute the corresponding ray
// for the pixel
Ray ray;
generateRay(raster2camera, camera2world, x, y, ray);
// And raymarch through the volume to compute the pixel's
// value
int offset = y * width + x;
image[offset] = raymarch(density, nVoxels, ray);
}
}
#endif
}

View File

@@ -1325,7 +1325,9 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
const int vecWidth = 32;
for (int i = 0; i < vecWidth; ++i) {
std::vector<llvm::Constant*> constDeltaList;
for (int i = 0; i < vecWidth; ++i)
{
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
@@ -1336,17 +1338,63 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
// And now with what's left, figure out our own offset
delta[i] = d % spans[dim];
constDeltaList.push_back(LLVMInt8(delta[i]));
}
llvm::VectorType *LLVMTypes::Int32VectorSIMT = llvm::VectorType::get(LLVMTypes::Int32Type, 32);
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int32Type, 32);
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
/*Module=*/*m->module,
/*Type=*/ArrayDelta,
/*isConstant=*/true,
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/0, // has initializer, specified below
/*Name=*/"constDeltaForeach");
#if 0
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
/*unsigned AddressSpace=*/4 /*constant*/);
#endif
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
globalDelta->setInitializer(constDelta);
llvm::Function *func_tid_x = m->module->getFunction("__tid_x");
std::vector<llvm::Value *> allocArgs;
llvm::Value *__tid_x = ctx->CallInst(func_tid_x, NULL, allocArgs, "laneIdxForEach");
llvm::Value *laneIdx = ctx->BinaryOperator(llvm::Instruction::And, __tid_x, LLVMInt32(31), "__laneidx");
std::vector<llvm::Value*> ptr_arrayidx_indices;
ptr_arrayidx_indices.push_back(LLVMInt32(0));
ptr_arrayidx_indices.push_back(laneIdx);
#if 1
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
// llvm::UndefValue(LLVMInt32Vector),
const_packed_41,
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
#endif
// Add the deltas to compute the varying counter values; store the
// result to memory and then return it directly as well.
#if 0
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
LLVMInt32Vector(delta), "iter_val");
#else
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
packed_43, "iter_val");
#endif
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
@@ -1895,12 +1943,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
std::vector<int> span(nDims, 0);
const int vectorWidth = 32;
lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
#if 0
for (int i = 0; i < nDims; i++)
{
fprintf(stderr, " i= %d [ %d ] : %d \n",
i, nDims, span[i]);
}
fprintf(stderr, " --- \n");
#endif
for (int i = 0; i < nDims; ++i) {
// Basic blocks that we'll fill in later with the looping logic for