diff --git a/ctx.cpp b/ctx.cpp index 29495319..e0ae38ce 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -68,12 +68,19 @@ struct CFInfo { llvm::Value *savedContinueLanesPtr, llvm::Value *savedMask, llvm::Value *savedLoopMask); + static CFInfo *GetForeach(llvm::BasicBlock *breakTarget, + llvm::BasicBlock *continueTarget, + llvm::Value *savedBreakLanesPtr, + llvm::Value *savedContinueLanesPtr, + llvm::Value *savedMask, llvm::Value *savedLoopMask); + bool IsIf() { return type == If; } bool IsLoop() { return type == Loop; } + bool IsForeach() { return type == Foreach; } bool IsVaryingType() { return !isUniform; } bool IsUniform() { return isUniform; } - enum CFType { If, Loop }; + enum CFType { If, Loop, Foreach }; CFType type; bool isUniform; llvm::BasicBlock *savedBreakTarget, *savedContinueTarget; @@ -102,6 +109,19 @@ private: savedMask = sm; savedLoopMask = lm; } + CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct, + llvm::Value *sb, llvm::Value *sc, llvm::Value *sm, + llvm::Value *lm) { + assert(t == Foreach); + type = t; + isUniform = false; + savedBreakTarget = bt; + savedContinueTarget = ct; + savedBreakLanesPtr = sb; + savedContinueLanesPtr = sc; + savedMask = sm; + savedLoopMask = lm; + } }; @@ -122,6 +142,18 @@ CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, savedMask, savedLoopMask); } + +CFInfo * +CFInfo::GetForeach(llvm::BasicBlock *breakTarget, + llvm::BasicBlock *continueTarget, + llvm::Value *savedBreakLanesPtr, + llvm::Value *savedContinueLanesPtr, + llvm::Value *savedMask, llvm::Value *savedForeachMask) { + return new CFInfo(Foreach, breakTarget, continueTarget, + savedBreakLanesPtr, savedContinueLanesPtr, + savedMask, savedForeachMask); +} + /////////////////////////////////////////////////////////////////////////// FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, @@ -422,7 +454,7 @@ FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct, void FunctionEmitContext::EndLoop() { - assert(controlFlowInfo.size() && !controlFlowInfo.back()->IsIf()); + assert(controlFlowInfo.size() && controlFlowInfo.back()->IsLoop()); CFInfo *ci = controlFlowInfo.back(); controlFlowInfo.pop_back(); @@ -444,6 +476,36 @@ FunctionEmitContext::EndLoop() { } +void +FunctionEmitContext::StartForeach() { + // Store the current values of various loop-related state so that we + // can restore it when we exit this loop. + llvm::Value *oldMask = GetInternalMask(); + controlFlowInfo.push_back(CFInfo::GetForeach(breakTarget, continueTarget, breakLanesPtr, + continueLanesPtr, oldMask, loopMask)); + continueLanesPtr = breakLanesPtr = NULL; + breakTarget = NULL; + continueTarget = NULL; + loopMask = NULL; +} + + +void +FunctionEmitContext::EndForeach() { + assert(controlFlowInfo.size() && controlFlowInfo.back()->IsForeach()); + CFInfo *ci = controlFlowInfo.back(); + controlFlowInfo.pop_back(); + + // Restore the break/continue state information to what it was before + // we went into this loop. + breakTarget = ci->savedBreakTarget; + continueTarget = ci->savedContinueTarget; + breakLanesPtr = ci->savedBreakLanesPtr; + continueLanesPtr = ci->savedContinueLanesPtr; + loopMask = ci->savedLoopMask; +} + + void FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) { if (!bblock) @@ -638,6 +700,15 @@ FunctionEmitContext::VaryingCFDepth() const { } +bool +FunctionEmitContext::InForeachLoop() const { + for (unsigned int i = 0; i < controlFlowInfo.size(); ++i) + if (controlFlowInfo[i]->IsForeach()) + return true; + return false; +} + + void FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { const Type *returnType = function->GetReturnType(); diff --git a/ctx.h b/ctx.h index 298167a5..69d3037b 100644 --- a/ctx.h +++ b/ctx.h @@ -159,6 +159,10 @@ public: finished. */ void EndLoop(); + /** */ + void StartForeach(); + void EndForeach(); + /** Emit code for a 'break' statement in a loop. If doCoherenceCheck is true, then if we're in a 'varying' loop, code will be emitted to see if all of the lanes want to break, in which case a jump to the @@ -183,6 +187,8 @@ public: flow */ int VaryingCFDepth() const; + bool InForeachLoop() const; + /** Called to generate code for 'return' statement; value is the expression in the return statement (if non-NULL), and doCoherenceCheck indicates whether instructions should be generated diff --git a/examples/mandelbrot/mandelbrot.ispc b/examples/mandelbrot/mandelbrot.ispc index 9243b52a..8b471139 100644 --- a/examples/mandelbrot/mandelbrot.ispc +++ b/examples/mandelbrot/mandelbrot.ispc @@ -60,16 +60,16 @@ export void mandelbrot_ispc(uniform float x0, uniform float y0, // Note that we'll be doing programCount computations in parallel, // so increment i by that much. This assumes that width evenly // divides programCount. - for (uniform int i = 0; i < width; i += programCount) { + foreach (i = 0 ... width) { // Figure out the position on the complex plane to compute the // number of iterations at. Note that the x values are // different across different program instances, since its // initializer incorporates the value of the programIndex // variable. - float x = x0 + (programIndex + i) * dx; + float x = x0 + i * dx; float y = y0 + j * dy; - int index = j * width + i + programIndex; + int index = j * width + i; output[index] = mandel(x, y, maxIterations); } } diff --git a/examples/mandelbrot_tasks/mandelbrot.ispc b/examples/mandelbrot_tasks/mandelbrot.ispc index d4ffeff5..d8ba195b 100644 --- a/examples/mandelbrot_tasks/mandelbrot.ispc +++ b/examples/mandelbrot_tasks/mandelbrot.ispc @@ -61,14 +61,12 @@ mandelbrot_scanlines(uniform int ybase, uniform int span, uniform int ystart = ybase + taskIndex * span; uniform int yend = ystart + span; - for (uniform int j = ystart; j < yend; ++j) { - for (uniform int i = 0; i < width; i += programCount) { - float x = x0 + (programIndex + i) * dx; - float y = y0 + j * dy; + foreach (yi = ystart ... yend, xi = 0 ... width) { + float x = x0 + xi * dx; + float y = y0 + yi * dy; - int index = j * width + i + programIndex; - output[index] = mandel(x, y, maxIterations); - } + int index = yi * width + xi; + output[index] = mandel(x, y, maxIterations); } } diff --git a/examples/options/options.ispc b/examples/options/options.ispc index 89e53634..f1ea3678 100644 --- a/examples/options/options.ispc +++ b/examples/options/options.ispc @@ -59,15 +59,13 @@ export void black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[], uniform float ra[], uniform float va[], uniform float result[], uniform int count) { - for (uniform int i = 0; i < count; i += programCount) { - float S = Sa[i + programIndex], X = Xa[i + programIndex]; - float T = Ta[i + programIndex], r = ra[i + programIndex]; - float v = va[i + programIndex]; + foreach (i = 0 ... count) { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T)); float d2 = d1 - v * sqrt(T); - result[i + programIndex] = S * CND(d1) - X * exp(-r * T) * CND(d2); + result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2); } } @@ -78,10 +76,8 @@ binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[], uniform float result[], uniform int count) { float V[BINOMIAL_NUM]; - for (uniform int i = 0; i < count; i += programCount) { - float S = Sa[i + programIndex], X = Xa[i + programIndex]; - float T = Ta[i + programIndex], r = ra[i + programIndex]; - float v = va[i + programIndex]; + foreach (i = 0 ... count) { + float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i]; float dt = T / BINOMIAL_NUM; float u = exp(v * sqrt(dt)); @@ -98,6 +94,6 @@ binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[], for (uniform int k = 0; k < j; ++k) V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc; - result[i + programIndex] = V[0]; + result[i] = V[0]; } } diff --git a/examples/rt/rt.cpp b/examples/rt/rt.cpp index 4bc07c5e..2a0722bc 100644 --- a/examples/rt/rt.cpp +++ b/examples/rt/rt.cpp @@ -199,10 +199,8 @@ int main(int argc, char *argv[]) { } fclose(f); - // round image resolution up to multiple of 16 to make things easy for - // the code that assigns pixels to ispc program instances - int height = (int(baseHeight * scale) + 0xf) & ~0xf; - int width = (int(baseWidth * scale) + 0xf) & ~0xf; + int height = int(baseHeight * scale); + int width = int(baseWidth * scale); // allocate images; one to hold hit object ids, one to hold depth to // the first interseciton diff --git a/examples/rt/rt.ispc b/examples/rt/rt.ispc index 47abee80..015d66d7 100644 --- a/examples/rt/rt.ispc +++ b/examples/rt/rt.ispc @@ -244,34 +244,15 @@ static void raytrace_tile(uniform int x0, uniform int x1, uniform float widthScale = (float)(baseWidth) / (float)(width); uniform float heightScale = (float)(baseHeight) / (float)(height); - static const uniform float udx[16] = { 0, 1, 0, 1, 2, 3, 2, 3, - 0, 1, 0, 1, 2, 3, 2, 3 }; - static const uniform float udy[16] = { 0, 0, 1, 1, 0, 0, 1, 1, - 2, 2, 3, 3, 2, 2, 3, 3 }; + foreach_tiled (y = y0 ... y1, x = x0 ... x1) { + Ray ray; + generateRay(raster2camera, camera2world, x*widthScale, + y*heightScale, ray); + BVHIntersect(nodes, triangles, ray); - // The outer loops are always over blocks of 4x4 pixels - for (uniform int y = y0; y < y1; y += 4) { - for (uniform int x = x0; x < x1; x += 4) { - // Now we have a block of 4x4=16 pixels to process; it will - // take 16/programCount iterations of this loop to process - // them. - for (uniform int o = 0; o < 16 / programCount; ++o) { - // Map program instances to samples in the udx/udy arrays - // to figure out which pixel each program instance is - // responsible for - const float dx = udx[o * programCount + programIndex]; - const float dy = udy[o * programCount + programIndex]; - - Ray ray; - generateRay(raster2camera, camera2world, (x+dx)*widthScale, - (y+dy)*heightScale, ray); - BVHIntersect(nodes, triangles, ray); - - int offset = (y + (int)dy) * width + (x + (int)dx); - image[offset] = ray.maxt; - id[offset] = ray.hitId; - } - } + int offset = y * width + x; + image[offset] = ray.maxt; + id[offset] = ray.hitId; } } diff --git a/examples/stencil/stencil.ispc b/examples/stencil/stencil.ispc index d707640c..10b7b6a3 100644 --- a/examples/stencil/stencil.ispc +++ b/examples/stencil/stencil.ispc @@ -43,9 +43,8 @@ stencil_step(uniform int x0, uniform int x1, for (uniform int z = z0; z < z1; ++z) { for (uniform int y = y0; y < y1; ++y) { - // Assumes that (x1-x0) % programCount == 0 - for (uniform int x = x0; x < x1; x += programCount) { - int index = (z * Nxy) + (y * Nx) + x + programIndex; + foreach (x = x0 ... x1) { + int index = (z * Nxy) + (y * Nx) + x; #define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)] #define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)] float div = coef[0] * A_cur(0, 0, 0) + diff --git a/examples/volume_rendering/volume.ispc b/examples/volume_rendering/volume.ispc index c4bc0c1a..229f510b 100644 --- a/examples/volume_rendering/volume.ispc +++ b/examples/volume_rendering/volume.ispc @@ -310,11 +310,7 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1, // by 4. for (uniform int y = y0; y < y1; y += 4) { for (uniform int x = x0; x < x1; x += 4) { - // For each such tile, process programCount pixels at a time, - // until we've done all 16 of them. Thus, we're also assuming - // that programCount <= 16 and that 16 is evenly dividible by - // programCount. - for (uniform int o = 0; o < 16; o += programCount) { + foreach (o = 0 ... 16) { // These two arrays encode the mapping from [0,15] to // offsets within the 4x4 pixel block so that we render // each pixel inside the block @@ -324,8 +320,7 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1, 2, 2, 3, 3, 2, 2, 3, 3 }; // Figure out the pixel to render for this program instance - int xo = x + xoffsets[o + programIndex]; - int yo = y + yoffsets[o + programIndex]; + int xo = x + xoffsets[o], yo = y + yoffsets[o]; // Use viewing parameters to compute the corresponding ray // for the pixel diff --git a/lex.ll b/lex.ll index 4ee811b3..ce3c4af9 100644 --- a/lex.ll +++ b/lex.ll @@ -101,6 +101,8 @@ extern { return TOKEN_EXTERN; } false { return TOKEN_FALSE; } float { return TOKEN_FLOAT; } for { return TOKEN_FOR; } +foreach { return TOKEN_FOREACH; } +foreach_tiled { return TOKEN_FOREACH_TILED; } goto { return TOKEN_GOTO; } if { return TOKEN_IF; } inline { return TOKEN_INLINE; } @@ -132,6 +134,7 @@ varying { return TOKEN_VARYING; } void { return TOKEN_VOID; } while { return TOKEN_WHILE; } \"C\" { return TOKEN_STRING_C_LITERAL; } +\.\.\. { return TOKEN_DOTDOTDOT; } L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; } diff --git a/parse.yy b/parse.yy index 540437af..1b44f996 100644 --- a/parse.yy +++ b/parse.yy @@ -62,8 +62,12 @@ (Current).name = NULL; /* new */ \ } \ while (0) + +struct ForeachDimension; + } + %{ #include "ispc.h" @@ -102,11 +106,11 @@ static void lFinalizeEnumeratorSymbols(std::vector &enums, const EnumType *enumType); static const char *lBuiltinTokens[] = { - "bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor", + "assert", "bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor", "cif", "cwhile", "const", "continue", "creturn", "default", "do", "double", - "else", "enum", "export", "extern", "false", "float", "for", "goto", "if", - "inline", "int", "int8", "int16", "int32", "int64", "launch", "NULL", - "print", "return", "signed", "sizeof", + "else", "enum", "export", "extern", "false", "float", "for", "foreach", + "foreach_tiled", "goto", "if", "inline", "int", "int8", "int16", + "int32", "int64", "launch", "NULL", "print", "return", "signed", "sizeof", "static", "struct", "switch", "sync", "task", "true", "typedef", "uniform", "unsigned", "varying", "void", "while", NULL }; @@ -116,10 +120,26 @@ static const char *lParamListTokens[] = { "int8", "int16", "int32", "int64", "signed", "struct", "true", "uniform", "unsigned", "varying", "void", NULL }; - + +struct ForeachDimension { + ForeachDimension(Symbol *s = NULL, Expr *b = NULL, Expr *e = NULL) { + sym = s; + beginExpr = b; + endExpr = e; + } + Symbol *sym; + Expr *beginExpr, *endExpr; +}; + %} %union { + int32_t int32Val; + double floatVal; + int64_t int64Val; + std::string *stringVal; + const char *constCharPtr; + Expr *expr; ExprList *exprList; const Type *type; @@ -136,13 +156,10 @@ static const char *lParamListTokens[] = { StructDeclaration *structDeclaration; std::vector *structDeclarationList; const EnumType *enumType; - Symbol *enumerator; - std::vector *enumeratorList; - int32_t int32Val; - double floatVal; - int64_t int64Val; - std::string *stringVal; - const char *constCharPtr; + Symbol *symbol; + std::vector *symbolList; + ForeachDimension *foreachDimension; + std::vector *foreachDimensionList; } @@ -163,7 +180,7 @@ static const char *lParamListTokens[] = { %token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE %token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH -%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH +%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED TOKEN_DOTDOTDOT %token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN %token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK %token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT @@ -194,8 +211,8 @@ static const char *lParamListTokens[] = { %type struct_declaration %type struct_declaration_list -%type enumerator_list -%type enumerator +%type enumerator_list +%type enumerator foreach_identifier %type enum_specifier %type specifier_qualifier_list struct_or_union_specifier @@ -211,6 +228,9 @@ static const char *lParamListTokens[] = { %type struct_or_union_name enum_identifier %type int_constant soa_width_specifier +%type foreach_dimension_specifier +%type foreach_dimension_list + %start translation_unit %% @@ -1295,6 +1315,40 @@ cfor_scope : TOKEN_CFOR { m->symbolTable->PushScope(); } ; +foreach_scope + : TOKEN_FOREACH { m->symbolTable->PushScope(); } + ; + +foreach_tiled_scope + : TOKEN_FOREACH_TILED { m->symbolTable->PushScope(); } + ; + +foreach_identifier + : TOKEN_IDENTIFIER + { + $$ = new Symbol(yytext, @1, AtomicType::VaryingConstInt32); + } + ; + +foreach_dimension_specifier + : foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression + { + $$ = new ForeachDimension($1, $3, $5); + } + ; + +foreach_dimension_list + : foreach_dimension_specifier + { + $$ = new std::vector; + $$->push_back($1); + } + | foreach_dimension_list ',' foreach_dimension_specifier + { + $$->push_back($3); + } + ; + iteration_statement : TOKEN_WHILE '(' expression ')' statement { $$ = new ForStmt(NULL, $3, NULL, $5, false, @1); } @@ -1320,6 +1374,44 @@ iteration_statement { $$ = new ForStmt($3, $4, new ExprStmt($5, @5), $7, true, @1); m->symbolTable->PopScope(); } + | foreach_scope '(' foreach_dimension_list ')' + { + std::vector &dims = *$3; + for (unsigned int i = 0; i < dims.size(); ++i) + m->symbolTable->AddVariable(dims[i]->sym); + } + statement + { + std::vector &dims = *$3; + std::vector syms; + std::vector begins, ends; + for (unsigned int i = 0; i < dims.size(); ++i) { + syms.push_back(dims[i]->sym); + begins.push_back(dims[i]->beginExpr); + ends.push_back(dims[i]->endExpr); + } + $$ = new ForeachStmt(syms, begins, ends, $6, false, @1); + m->symbolTable->PopScope(); + } + | foreach_tiled_scope '(' foreach_dimension_list ')' + { + std::vector &dims = *$3; + for (unsigned int i = 0; i < dims.size(); ++i) + m->symbolTable->AddVariable(dims[i]->sym); + } + statement + { + std::vector &dims = *$3; + std::vector syms; + std::vector begins, ends; + for (unsigned int i = 0; i < dims.size(); ++i) { + syms.push_back(dims[i]->sym); + begins.push_back(dims[i]->beginExpr); + ends.push_back(dims[i]->endExpr); + } + $$ = new ForeachStmt(syms, begins, ends, $6, true, @1); + m->symbolTable->PopScope(); + } ; jump_statement diff --git a/stmt.cpp b/stmt.cpp index 93bc7569..cc275d85 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -819,6 +819,17 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) { lSafeToRunWithAllLanesOff(fs->step) && lSafeToRunWithAllLanesOff(fs->stmts)); + ForeachStmt *fes; + if ((fes = dynamic_cast(stmt)) != NULL) { + for (unsigned int i = 0; i < fes->startExprs.size(); ++i) + if (!lSafeToRunWithAllLanesOff(fes->startExprs[i])) + return false; + for (unsigned int i = 0; i < fes->endExprs.size(); ++i) + if (!lSafeToRunWithAllLanesOff(fes->endExprs[i])) + return false; + return lSafeToRunWithAllLanesOff(fes->stmts); + } + if (dynamic_cast(stmt) != NULL || dynamic_cast(stmt) != NULL) return true; @@ -1592,6 +1603,463 @@ ContinueStmt::Print(int indent) const { } +/////////////////////////////////////////////////////////////////////////// +// ForeachStmt + +ForeachStmt::ForeachStmt(const std::vector &lvs, + const std::vector &se, + const std::vector &ee, + Stmt *s, bool t, SourcePos pos) + : Stmt(pos), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t), + stmts(s) { +} + + +/* Given a uniform counter value in the memory location pointed to by + uniformCounterPtr, compute the corresponding set of varying counter + values for use within the loop body. + */ +static llvm::Value * +lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, + llvm::Value *uniformCounterPtr, + llvm::Value *varyingCounterPtr, + const std::vector &spans) { + // Smear the uniform counter value out to be varying + llvm::Value *counter = ctx->LoadInst(uniformCounterPtr); + llvm::Value *smearCounter = + llvm::UndefValue::get(LLVMTypes::Int32VectorType); + for (int i = 0; i < g->target.vectorWidth; ++i) + smearCounter = + ctx->InsertInst(smearCounter, counter, i, "smear_counter"); + + // Figure out the offsets; this is a little bit tricky. As an example, + // consider a 2D tiled foreach loop, where we're running 8-wide and + // where the inner dimension has a stride of 4 and the outer dimension + // has a stride of 2. For the inner dimension, we want the offsets + // (0,1,2,3,0,1,2,3), and for the outer dimension we want + // (0,0,0,0,1,1,1,1). + int32_t delta[ISPC_MAX_NVEC]; + for (int i = 0; i < g->target.vectorWidth; ++i) { + int d = i; + // First, account for the effect of any dimensions at deeper + // nesting levels than the current one. + int prevDimSpanCount = 1; + for (int j = dim; j < nDims-1; ++j) + prevDimSpanCount *= spans[j+1]; + d /= prevDimSpanCount; + + // And now with what's left, figure out our own offset + delta[i] = d % spans[dim]; + } + + // Add the deltas to compute the varying counter values; store the + // result to memory and then return it directly as well. + llvm::Value *varyingCounter = + ctx->BinaryOperator(llvm::Instruction::Add, smearCounter, + LLVMInt32Vector(delta), "iter_val"); + ctx->StoreInst(varyingCounter, varyingCounterPtr); + return varyingCounter; +} + + +/** Returns the integer log2 of the given integer. */ +static int +lLog2(int i) { + int ret = 0; + while (i != 0) { + ++ret; + i >>= 1; + } + return ret-1; +} + + +/* Figure out how many elements to process in each dimension for each time + through a foreach loop. The untiled case is easy; all of the outer + dimensions up until the innermost one have a span of 1, and the + innermost one takes the entire vector width. For the tiled case, we + give wider spans to the innermost dimensions while also trying to + generate relatively square domains. + + This code works recursively from outer dimensions to inner dimensions. + */ +static void +lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) { + if (dimsLeft == 0) { + // Nothing left to do but give all of the remaining work to the + // innermost domain. + *a = itemsLeft; + return; + } + + if (isTiled == false || (dimsLeft >= lLog2(itemsLeft))) + // If we're not tiled, or if there are enough dimensions left that + // giving this one any more than a span of one would mean that a + // later dimension would have to have a span of one, give this one + // a span of one to save the available items for later. + *a = 1; + else if (itemsLeft >= 16 && (dimsLeft == 1)) + // Special case to have 4x4 domains for the 2D case when running + // 16-wide. + *a = 4; + else + // Otherwise give this dimension a span of two. + *a = 2; + + lGetSpans(dimsLeft-1, nDims, itemsLeft / *a, isTiled, a+1); +} + + +/* Emit code for a foreach statement. We effectively emit code to run the + set of n-dimensional nested loops corresponding to the dimensionality of + the foreach statement along with the extra logic to deal with mismatches + between the vector width we're compiling to and the number of elements + to process. + */ +void +ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { + if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL) + return; + + llvm::BasicBlock *bbCheckExtras = ctx->CreateBasicBlock("foreach_check_extras"); + llvm::BasicBlock *bbDoExtras = ctx->CreateBasicBlock("foreach_do_extras"); + llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body"); + llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit"); + + llvm::Value *oldMask = ctx->GetInternalMask(); + + ctx->StartForeach(); + ctx->SetDebugPos(pos); + ctx->StartScope(); + + // This should be caught during typechecking + assert(startExprs.size() == dimVariables.size() && + endExprs.size() == dimVariables.size()); + int nDims = (int)dimVariables.size(); + + /////////////////////////////////////////////////////////////////////// + // Setup: compute the number of items we have to work on in each + // dimension and a number of derived values. + std::vector bbReset, bbStep, bbTest; + std::vector startVals, endVals, uniformCounterPtrs; + std::vector nItems, nExtras, alignedEnd; + std::vector extrasMaskPtrs; + + std::vector span(nDims, 0); + lGetSpans(nDims-1, nDims, g->target.vectorWidth, isTiled, &span[0]); + + for (int i = 0; i < nDims; ++i) { + // Basic blocks that we'll fill in later with the looping logic for + // this dimension. + bbReset.push_back(ctx->CreateBasicBlock("foreach_reset")); + bbStep.push_back(ctx->CreateBasicBlock("foreach_step")); + bbTest.push_back(ctx->CreateBasicBlock("foreach_test")); + + // Start and end value for this loop dimension + llvm::Value *sv = startExprs[i]->GetValue(ctx); + llvm::Value *ev = endExprs[i]->GetValue(ctx); + if (sv == NULL || ev == NULL) + return; + startVals.push_back(sv); + endVals.push_back(ev); + + // nItems = endVal - startVal + nItems.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, + "nitems")); + + // nExtras = nItems % (span for this dimension) + // This gives us the number of extra elements we need to deal with + // at the end of the loop for this dimension that don't fit cleanly + // into a vector width. + nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems[i], + LLVMInt32(span[i]), "nextras")); + + // alignedEnd = endVal - nExtras + alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev, + nExtras[i], "aligned_end")); + + /////////////////////////////////////////////////////////////////////// + // Each dimension has a loop counter that is a uniform value that + // goes from startVal to endVal, in steps of the span for this + // dimension. Its value is only used internally here for looping + // logic and isn't directly available in the user's program code. + uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type, + "counter")); + ctx->StoreInst(startVals[i], uniformCounterPtrs[i]); + + // There is also a varying variable that holds the set of index + // values for each dimension in the current loop iteration; this is + // the value that is program-visible. + dimVariables[i]->storagePtr = ctx->AllocaInst(LLVMTypes::Int32VectorType, + dimVariables[i]->name.c_str()); + dimVariables[i]->parentFunction = ctx->GetFunction(); + ctx->EmitVariableDebugInfo(dimVariables[i]); + + // Each dimension also maintains a mask that represents which of + // the varying elements in the current iteration should be + // processed. (i.e. this is used to disable the lanes that have + // out-of-bounds offsets.) + extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask")); + ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]); + } + + // On to the outermost loop's test + ctx->BranchInst(bbTest[0]); + + /////////////////////////////////////////////////////////////////////////// + // foreach_reset: this code runs when we need to reset the counter for + // a given dimension in preparation for running through its loop again, + // after the enclosing level advances its counter. + for (int i = 0; i < nDims; ++i) { + ctx->SetCurrentBasicBlock(bbReset[i]); + if (i == 0) + ctx->BranchInst(bbExit); + else { + ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]); + ctx->StoreInst(startVals[i], uniformCounterPtrs[i]); + ctx->BranchInst(bbStep[i-1]); + } + } + + /////////////////////////////////////////////////////////////////////////// + // foreach_test + std::vector inExtras; + for (int i = 0; i < nDims; ++i) { + ctx->SetCurrentBasicBlock(bbTest[i]); + + llvm::Value *haveExtras = + ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT, + endVals[i], alignedEnd[i], "have_extras"); + + llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter"); + llvm::Value *atAlignedEnd = + ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, + counter, alignedEnd[i], "at_aligned_end"); + llvm::Value *inEx = + ctx->BinaryOperator(llvm::Instruction::And, haveExtras, + atAlignedEnd, "in_extras"); + + if (i == 0) + inExtras.push_back(inEx); + else + inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx, + inExtras[i-1], "in_extras_all")); + + llvm::Value *varyingCounter = + lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i], + dimVariables[i]->storagePtr, span); + + llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); + for (int j = 0; j < g->target.vectorWidth; ++j) + smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end"); + // Do a vector compare of its value to the end value to generate a + // mask for this last bit of work. + llvm::Value *emask = + ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, + varyingCounter, smearEnd); + emask = ctx->I1VecToBoolVec(emask); + + if (i == 0) + ctx->StoreInst(emask, extrasMaskPtrs[i]); + else { + // FIXME: at least specialize the innermost loop to not do all + // this mask stuff each time through the test... + llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]); + llvm::Value *newMask = + ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask, + "extras_mask"); + ctx->StoreInst(newMask, extrasMaskPtrs[i]); + } + + llvm::Value *notAtEnd = + ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, + counter, endVals[i]); + if (i != nDims-1) + ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd); + else + ctx->BranchInst(bbCheckExtras, bbReset[i], notAtEnd); + } + + /////////////////////////////////////////////////////////////////////////// + // foreach_step: increment the uniform counter by the vector width. + // Note that we don't increment the varying counter here as well but + // just generate its value when we need it in the loop body. + for (int i = 0; i < nDims; ++i) { + ctx->SetCurrentBasicBlock(bbStep[i]); + llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]); + llvm::Value *newCounter = + ctx->BinaryOperator(llvm::Instruction::Add, counter, + LLVMInt32(span[i]), "new_counter"); + ctx->StoreInst(newCounter, uniformCounterPtrs[i]); + ctx->BranchInst(bbTest[i]); + } + + /////////////////////////////////////////////////////////////////////////// + // foreach_check_extras: see if we need to deal with any partial + // vector's worth of work that's left. + ctx->SetCurrentBasicBlock(bbCheckExtras); + ctx->AddInstrumentationPoint("foreach loop check extras"); + ctx->BranchInst(bbDoExtras, bbBody, inExtras[nDims-1]); + + /////////////////////////////////////////////////////////////////////////// + // foreach_body: do a full vector's worth of work. We know that all + // lanes will be running here, so we explicitly set the mask to be 'all + // on'. This ends up being relatively straightforward: just update the + // value of the varying loop counter and have the statements in the + // loop body emit their code. + ctx->SetCurrentBasicBlock(bbBody); + ctx->SetInternalMask(LLVMMaskAllOn); + ctx->AddInstrumentationPoint("foreach loop body"); + stmts->EmitCode(ctx); + assert(ctx->GetCurrentBasicBlock() != NULL); + ctx->BranchInst(bbStep[nDims-1]); + + /////////////////////////////////////////////////////////////////////////// + // foreach_doextras: set the mask and have the statements emit their + // code again. Note that it's generally worthwhile having two copies + // of the statements' code, since the code above is emitted with the + // mask known to be all-on, which in turn leads to more efficient code + // for that case. + ctx->SetCurrentBasicBlock(bbDoExtras); + llvm::Value *mask = ctx->LoadInst(extrasMaskPtrs[nDims-1]); + ctx->SetInternalMask(mask); + stmts->EmitCode(ctx); + ctx->BranchInst(bbStep[nDims-1]); + + /////////////////////////////////////////////////////////////////////////// + // foreach_exit: All done. Restore the old mask and clean up + ctx->SetCurrentBasicBlock(bbExit); + ctx->SetInternalMask(oldMask); + + ctx->EndForeach(); + ctx->EndScope(); +} + + +Stmt * +ForeachStmt::Optimize() { + bool anyErrors = false; + for (unsigned int i = 0; i < startExprs.size(); ++i) { + if (startExprs[i] != NULL) + startExprs[i]->Optimize(); + anyErrors |= (startExprs[i] == NULL); + } + for (unsigned int i = 0; i < endExprs.size(); ++i) { + if (endExprs[i] != NULL) + endExprs[i]->Optimize(); + anyErrors |= (endExprs[i] == NULL); + } + + if (stmts != NULL) + stmts = stmts->TypeCheck(); + anyErrors |= (stmts == NULL); + + return anyErrors ? NULL : this; +} + + +Stmt * +ForeachStmt::TypeCheck() { + bool anyErrors = false; + for (unsigned int i = 0; i < startExprs.size(); ++i) { + if (startExprs[i] != NULL) + startExprs[i] = TypeConvertExpr(startExprs[i], + AtomicType::UniformInt32, + "foreach starting value"); + if (startExprs[i] != NULL) + startExprs[i]->TypeCheck(); + anyErrors |= (startExprs[i] == NULL); + } + for (unsigned int i = 0; i < endExprs.size(); ++i) { + if (endExprs[i] != NULL) + endExprs[i] = TypeConvertExpr(endExprs[i], AtomicType::UniformInt32, + "foreach ending value"); + if (endExprs[i] != NULL) + endExprs[i]->TypeCheck(); + anyErrors |= (endExprs[i] == NULL); + } + + if (stmts != NULL) + stmts = stmts->TypeCheck(); + anyErrors |= (stmts == NULL); + + if (startExprs.size() < dimVariables.size()) { + Error(pos, "Not enough initial values provided for \"foreach\" loop; " + "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size()); + anyErrors = true; + } + else if (startExprs.size() > dimVariables.size()) { + Error(pos, "Too many initial values provided for \"foreach\" loop; " + "got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size()); + anyErrors = true; + } + + if (endExprs.size() < dimVariables.size()) { + Error(pos, "Not enough initial values provided for \"foreach\" loop; " + "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size()); + anyErrors = true; + } + else if (endExprs.size() > dimVariables.size()) { + Error(pos, "Too many initial values provided for \"foreach\" loop; " + "got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size()); + anyErrors = true; + } + + return anyErrors ? NULL : this; +} + + +int +ForeachStmt::EstimateCost() const { + return dimVariables.size() * (COST_UNIFORM_LOOP + COST_SIMPLE_ARITH_LOGIC_OP) + + (stmts ? stmts->EstimateCost() : 0); +} + + +void +ForeachStmt::Print(int indent) const { + printf("%*cForeach Stmt", indent, ' '); + pos.Print(); + printf("\n"); + + for (unsigned int i = 0; i < dimVariables.size(); ++i) + if (dimVariables[i] != NULL) + printf("%*cVar %d: %s\n", indent+4, ' ', i, + dimVariables[i]->name.c_str()); + else + printf("%*cVar %d: NULL\n", indent+4, ' ', i); + + printf("Start values:\n"); + for (unsigned int i = 0; i < startExprs.size(); ++i) { + if (startExprs[i] != NULL) + startExprs[i]->Print(); + else + printf("NULL"); + if (i != startExprs.size()-1) + printf(", "); + else + printf("\n"); + } + + printf("End values:\n"); + for (unsigned int i = 0; i < endExprs.size(); ++i) { + if (endExprs[i] != NULL) + endExprs[i]->Print(); + else + printf("NULL"); + if (i != endExprs.size()-1) + printf(", "); + else + printf("\n"); + } + + if (stmts != NULL) { + printf("%*cStmts:\n", indent+4, ' '); + stmts->Print(indent+8); + } +} + + /////////////////////////////////////////////////////////////////////////// // ReturnStmt @@ -1606,6 +2074,11 @@ ReturnStmt::EmitCode(FunctionEmitContext *ctx) const { if (!ctx->GetCurrentBasicBlock()) return; + if (ctx->InForeachLoop()) { + Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop."); + return; + } + ctx->SetDebugPos(pos); ctx->CurrentLanesReturned(val, doCoherenceCheck); } diff --git a/stmt.h b/stmt.h index bacb8efb..928a5e7a 100644 --- a/stmt.h +++ b/stmt.h @@ -241,6 +241,31 @@ private: }; +/** @brief Statement implementation for parallel 'foreach' loops. + */ +class ForeachStmt : public Stmt { +public: + ForeachStmt(const std::vector &loopVars, + const std::vector &startExprs, + const std::vector &endExprs, + Stmt *bodyStatements, bool tiled, SourcePos pos); + + void EmitCode(FunctionEmitContext *ctx) const; + void Print(int indent) const; + + Stmt *Optimize(); + Stmt *TypeCheck(); + int EstimateCost() const; + + std::vector dimVariables; + std::vector startExprs; + std::vector endExprs; + bool isTiled; + Stmt *stmts; +}; + + + /** @brief Statement implementation for a 'return' or 'coherent' return statement in the program. */ class ReturnStmt : public Stmt { diff --git a/tests/foreach-1.ispc b/tests/foreach-1.ispc new file mode 100644 index 00000000..fa3bcbbf --- /dev/null +++ b/tests/foreach-1.ispc @@ -0,0 +1,22 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform float val[programCount]; + for (uniform int i = 0; i < programCount; ++i) + val[i] = 0; + + foreach (i = 0 ... programCount) + val[i] += aFOO[i] - 1; + + uniform float sum = 0; + for (uniform int i = 0; i < programCount; ++i) + sum += val[i]; + + RET[programIndex] = sum; +} + +export void result(uniform float RET[]) { + RET[programIndex] = reduce_add(programIndex); +} diff --git a/tests/foreach-10.ispc b/tests/foreach-10.ispc new file mode 100644 index 00000000..19de3b3a --- /dev/null +++ b/tests/foreach-10.ispc @@ -0,0 +1,33 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#define NA 4 +#define NB 8 +#define NC 7 + uniform int a[NA][NB][NC]; + + for (uniform int i = 0; i < NA; ++i) + for (uniform int j = 0; j < NB; ++j) + for (uniform int k = 0; j < NC; ++j) + a[i][j][k] = 0; + + foreach_tiled (i = 0 ... NA, j = 0 ... NB, k = 0 ... NC) { + a[i][j][k] += 1; + } + + uniform int errs = 0; + for (uniform int i = 0; i < NA; ++i) + for (uniform int j = 0; j < NB; ++j) + for (uniform int k = 0; j < NC; ++j) + if (a[i][j][k] != 1) { +//CO print("% % % = %\n", i, j, k, a[i][j][k]); + ++errs; + } + + RET[programIndex] = errs; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/foreach-11.ispc b/tests/foreach-11.ispc new file mode 100644 index 00000000..47c607b3 --- /dev/null +++ b/tests/foreach-11.ispc @@ -0,0 +1,22 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform float val[programCount]; + for (uniform int i = 0; i < programCount; ++i) + val[i] = 0; + + foreach_tiled (i = 0 ... programCount) + val[i] += aFOO[i] - 1; + + uniform float sum = 0; + for (uniform int i = 0; i < programCount; ++i) + sum += val[i]; + + RET[programIndex] = sum; +} + +export void result(uniform float RET[]) { + RET[programIndex] = reduce_add(programIndex); +} diff --git a/tests/foreach-12.ispc b/tests/foreach-12.ispc new file mode 100644 index 00000000..2a3b9cda --- /dev/null +++ b/tests/foreach-12.ispc @@ -0,0 +1,26 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform float val[programCount]; + for (uniform int i = 0; i < programCount; ++i) + val[i] = 0; + + foreach_tiled (i = 2 ... programCount) + val[i] += i; + + uniform float sum = 0; + for (uniform int i = 0; i < programCount; ++i) { + sum += val[i]; + } + + RET[programIndex] = sum; +} + +export void result(uniform float RET[]) { + int pi = (programIndex >= 2) ? programIndex : 0; + RET[programIndex] = reduce_add(pi); +} diff --git a/tests/foreach-13.ispc b/tests/foreach-13.ispc new file mode 100644 index 00000000..88c55578 --- /dev/null +++ b/tests/foreach-13.ispc @@ -0,0 +1,19 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + for (uniform int i = 0; i < programCount; ++i) + RET[i] = 0; + + foreach_tiled (i = 0 ... 2) + RET[i] = i+1; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; + RET[0] = 1; + RET[1] = 2; +} diff --git a/tests/foreach-14.ispc b/tests/foreach-14.ispc new file mode 100644 index 00000000..ed4d5fe5 --- /dev/null +++ b/tests/foreach-14.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + for (uniform int i = 0; i < programCount; ++i) + RET[i] = 0; + + foreach_tiled (i = 2 ... 0) + RET[i] += 1234; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/foreach-15.ispc b/tests/foreach-15.ispc new file mode 100644 index 00000000..811ee769 --- /dev/null +++ b/tests/foreach-15.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + for (uniform int i = 0; i < programCount; ++i) + RET[i] = 0; + + foreach_tiled (i = 1 ... 1) + RET[i] = 1234; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/foreach-16.ispc b/tests/foreach-16.ispc new file mode 100644 index 00000000..6465a3d3 --- /dev/null +++ b/tests/foreach-16.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + for (uniform int i = 0; i < programCount; ++i) + RET[i] = 0; + + foreach_tiled (i = -2 ... programCount-2) + RET[i+2] = 1234; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1234; +} diff --git a/tests/foreach-17.ispc b/tests/foreach-17.ispc new file mode 100644 index 00000000..eb3294ea --- /dev/null +++ b/tests/foreach-17.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float sum = 0; + foreach_tiled (i = 0 ... 6) + sum += aFOO[i]; + RET[programIndex] = reduce_add(sum); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 21; +} diff --git a/tests/foreach-18.ispc b/tests/foreach-18.ispc new file mode 100644 index 00000000..23cc9fff --- /dev/null +++ b/tests/foreach-18.ispc @@ -0,0 +1,29 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#define NA 3 +#define NB 8 + uniform int a[NA][NB]; + + for (uniform int i = 0; i < NA; ++i) + for (uniform int j = 0; j < NB; ++j) + a[i][j] = 0; + + foreach_tiled (i = 0 ... NA, j = 0 ... NB) { + a[i][j] += 1; + } + + uniform int errs = 0; + for (uniform int i = 0; i < NA; ++i) + for (uniform int j = 0; j < NB; ++j) + if (a[i][j] != 1) { + ++errs; + } + + RET[programIndex] = errs; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/foreach-19.ispc b/tests/foreach-19.ispc new file mode 100644 index 00000000..aea51c4a --- /dev/null +++ b/tests/foreach-19.ispc @@ -0,0 +1,29 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#define NA 3 +#define NB 4 + uniform int a[NA][NB]; + + for (uniform int i = 0; i < NA; ++i) + for (uniform int j = 0; j < NB; ++j) + a[i][j] = 0; + + foreach_tiled (i = 0 ... NA, j = 0 ... NB) { + a[i][j] += 1; + } + + uniform int errs = 0; + for (uniform int i = 0; i < NA; ++i) + for (uniform int j = 0; j < NB; ++j) + if (a[i][j] != 1) { + ++errs; + } + + RET[programIndex] = errs; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/foreach-2.ispc b/tests/foreach-2.ispc new file mode 100644 index 00000000..a76dc15b --- /dev/null +++ b/tests/foreach-2.ispc @@ -0,0 +1,26 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform float val[programCount]; + for (uniform int i = 0; i < programCount; ++i) + val[i] = 0; + + foreach (i = 2 ... programCount) + val[i] += i; + + uniform float sum = 0; + for (uniform int i = 0; i < programCount; ++i) { + sum += val[i]; + } + + RET[programIndex] = sum; +} + +export void result(uniform float RET[]) { + int pi = (programIndex >= 2) ? programIndex : 0; + RET[programIndex] = reduce_add(pi); +} diff --git a/tests/foreach-3.ispc b/tests/foreach-3.ispc new file mode 100644 index 00000000..65d28b1d --- /dev/null +++ b/tests/foreach-3.ispc @@ -0,0 +1,19 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + for (uniform int i = 0; i < programCount; ++i) + RET[i] = 0; + + foreach (i = 0 ... 2) + RET[i] = i+1; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; + RET[0] = 1; + RET[1] = 2; +} diff --git a/tests/foreach-4.ispc b/tests/foreach-4.ispc new file mode 100644 index 00000000..6e71c56f --- /dev/null +++ b/tests/foreach-4.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + for (uniform int i = 0; i < programCount; ++i) + RET[i] = 0; + + foreach (i = 2 ... 0) + RET[i] -= 1234; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/foreach-5.ispc b/tests/foreach-5.ispc new file mode 100644 index 00000000..0043f770 --- /dev/null +++ b/tests/foreach-5.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + for (uniform int i = 0; i < programCount; ++i) + RET[i] = 0; + + foreach (i = 1 ... 1) + RET[i] = 1234; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/foreach-6.ispc b/tests/foreach-6.ispc new file mode 100644 index 00000000..158253fd --- /dev/null +++ b/tests/foreach-6.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + + +uniform int foo(int i); + +export void f_f(uniform float RET[], uniform float aFOO[]) { + for (uniform int i = 0; i < programCount; ++i) + RET[i] = 0; + + foreach (i = -2 ... programCount-2) + RET[i+2] += 1234; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1234; +} diff --git a/tests/foreach-7.ispc b/tests/foreach-7.ispc new file mode 100644 index 00000000..0506eaf4 --- /dev/null +++ b/tests/foreach-7.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float sum = 0; + foreach (i = 0 ... 6) + sum += aFOO[i]; + RET[programIndex] = reduce_add(sum); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 21; +} diff --git a/tests/foreach-8.ispc b/tests/foreach-8.ispc new file mode 100644 index 00000000..43c76b9f --- /dev/null +++ b/tests/foreach-8.ispc @@ -0,0 +1,23 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float sum1 = 0, sum2 = 0; + foreach (x = 0 ... 10, i = 0 ... 6) { + sum1 += aFOO[i]; + } + + for (uniform int x = 0; x < 10; ++x) { + for (uniform int i = 0; i < 6; i += programCount) { + int index = i + programIndex; + if (index < 6) + sum2 += aFOO[index]; + } + } + + RET[programIndex] = sum1 - sum2; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/foreach-9.ispc b/tests/foreach-9.ispc new file mode 100644 index 00000000..50689d3e --- /dev/null +++ b/tests/foreach-9.ispc @@ -0,0 +1,29 @@ + +export uniform int width() { return programCount; } + +#define NA 1 +#define NB 3 + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int a[NA][NB]; + + for (uniform int i = 0; i < NA; ++i) + for (uniform int j = 0; j < NB; ++j) + a[i][j] = 0; + + foreach (i = 0 ... NA, j = 0 ... NB) { + a[i][j] += 1; + } + + uniform int errs = 0; + for (uniform int i = 0; i < NA; ++i) + for (uniform int j = 0; j < NB; ++j) + if (a[i][j] != 1) + ++errs; + + RET[programIndex] = errs; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +}