Add foreach and foreach_tiled looping constructs

These make it easier to iterate over arbitrary amounts of data
elements; specifically, they automatically handle the "ragged
extra bits" that come up when the number of elements to be
processed isn't evenly divided by programCount.

TODO: documentation
This commit is contained in:
Matt Pharr
2011-11-30 13:17:31 -08:00
parent b48775a549
commit 8bc7367109
32 changed files with 1120 additions and 78 deletions

75
ctx.cpp
View File

@@ -68,12 +68,19 @@ struct CFInfo {
llvm::Value *savedContinueLanesPtr,
llvm::Value *savedMask, llvm::Value *savedLoopMask);
static CFInfo *GetForeach(llvm::BasicBlock *breakTarget,
llvm::BasicBlock *continueTarget,
llvm::Value *savedBreakLanesPtr,
llvm::Value *savedContinueLanesPtr,
llvm::Value *savedMask, llvm::Value *savedLoopMask);
bool IsIf() { return type == If; }
bool IsLoop() { return type == Loop; }
bool IsForeach() { return type == Foreach; }
bool IsVaryingType() { return !isUniform; }
bool IsUniform() { return isUniform; }
enum CFType { If, Loop };
enum CFType { If, Loop, Foreach };
CFType type;
bool isUniform;
llvm::BasicBlock *savedBreakTarget, *savedContinueTarget;
@@ -102,6 +109,19 @@ private:
savedMask = sm;
savedLoopMask = lm;
}
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
llvm::Value *lm) {
assert(t == Foreach);
type = t;
isUniform = false;
savedBreakTarget = bt;
savedContinueTarget = ct;
savedBreakLanesPtr = sb;
savedContinueLanesPtr = sc;
savedMask = sm;
savedLoopMask = lm;
}
};
@@ -122,6 +142,18 @@ CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
savedMask, savedLoopMask);
}
CFInfo *
CFInfo::GetForeach(llvm::BasicBlock *breakTarget,
llvm::BasicBlock *continueTarget,
llvm::Value *savedBreakLanesPtr,
llvm::Value *savedContinueLanesPtr,
llvm::Value *savedMask, llvm::Value *savedForeachMask) {
return new CFInfo(Foreach, breakTarget, continueTarget,
savedBreakLanesPtr, savedContinueLanesPtr,
savedMask, savedForeachMask);
}
///////////////////////////////////////////////////////////////////////////
FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
@@ -422,7 +454,7 @@ FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
void
FunctionEmitContext::EndLoop() {
assert(controlFlowInfo.size() && !controlFlowInfo.back()->IsIf());
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsLoop());
CFInfo *ci = controlFlowInfo.back();
controlFlowInfo.pop_back();
@@ -444,6 +476,36 @@ FunctionEmitContext::EndLoop() {
}
void
FunctionEmitContext::StartForeach() {
// Store the current values of various loop-related state so that we
// can restore it when we exit this loop.
llvm::Value *oldMask = GetInternalMask();
controlFlowInfo.push_back(CFInfo::GetForeach(breakTarget, continueTarget, breakLanesPtr,
continueLanesPtr, oldMask, loopMask));
continueLanesPtr = breakLanesPtr = NULL;
breakTarget = NULL;
continueTarget = NULL;
loopMask = NULL;
}
void
FunctionEmitContext::EndForeach() {
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsForeach());
CFInfo *ci = controlFlowInfo.back();
controlFlowInfo.pop_back();
// Restore the break/continue state information to what it was before
// we went into this loop.
breakTarget = ci->savedBreakTarget;
continueTarget = ci->savedContinueTarget;
breakLanesPtr = ci->savedBreakLanesPtr;
continueLanesPtr = ci->savedContinueLanesPtr;
loopMask = ci->savedLoopMask;
}
void
FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) {
if (!bblock)
@@ -638,6 +700,15 @@ FunctionEmitContext::VaryingCFDepth() const {
}
bool
FunctionEmitContext::InForeachLoop() const {
for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
if (controlFlowInfo[i]->IsForeach())
return true;
return false;
}
void
FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
const Type *returnType = function->GetReturnType();

6
ctx.h
View File

@@ -159,6 +159,10 @@ public:
finished. */
void EndLoop();
/** */
void StartForeach();
void EndForeach();
/** Emit code for a 'break' statement in a loop. If doCoherenceCheck
is true, then if we're in a 'varying' loop, code will be emitted to
see if all of the lanes want to break, in which case a jump to the
@@ -183,6 +187,8 @@ public:
flow */
int VaryingCFDepth() const;
bool InForeachLoop() const;
/** Called to generate code for 'return' statement; value is the
expression in the return statement (if non-NULL), and
doCoherenceCheck indicates whether instructions should be generated

View File

@@ -60,16 +60,16 @@ export void mandelbrot_ispc(uniform float x0, uniform float y0,
// Note that we'll be doing programCount computations in parallel,
// so increment i by that much. This assumes that width evenly
// divides programCount.
for (uniform int i = 0; i < width; i += programCount) {
foreach (i = 0 ... width) {
// Figure out the position on the complex plane to compute the
// number of iterations at. Note that the x values are
// different across different program instances, since its
// initializer incorporates the value of the programIndex
// variable.
float x = x0 + (programIndex + i) * dx;
float x = x0 + i * dx;
float y = y0 + j * dy;
int index = j * width + i + programIndex;
int index = j * width + i;
output[index] = mandel(x, y, maxIterations);
}
}

View File

@@ -61,14 +61,12 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
uniform int ystart = ybase + taskIndex * span;
uniform int yend = ystart + span;
for (uniform int j = ystart; j < yend; ++j) {
for (uniform int i = 0; i < width; i += programCount) {
float x = x0 + (programIndex + i) * dx;
float y = y0 + j * dy;
foreach (yi = ystart ... yend, xi = 0 ... width) {
float x = x0 + xi * dx;
float y = y0 + yi * dy;
int index = j * width + i + programIndex;
output[index] = mandel(x, y, maxIterations);
}
int index = yi * width + xi;
output[index] = mandel(x, y, maxIterations);
}
}

View File

@@ -59,15 +59,13 @@ export void
black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
uniform float ra[], uniform float va[],
uniform float result[], uniform int count) {
for (uniform int i = 0; i < count; i += programCount) {
float S = Sa[i + programIndex], X = Xa[i + programIndex];
float T = Ta[i + programIndex], r = ra[i + programIndex];
float v = va[i + programIndex];
foreach (i = 0 ... count) {
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T));
float d2 = d1 - v * sqrt(T);
result[i + programIndex] = S * CND(d1) - X * exp(-r * T) * CND(d2);
result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2);
}
}
@@ -78,10 +76,8 @@ binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
uniform float result[], uniform int count) {
float V[BINOMIAL_NUM];
for (uniform int i = 0; i < count; i += programCount) {
float S = Sa[i + programIndex], X = Xa[i + programIndex];
float T = Ta[i + programIndex], r = ra[i + programIndex];
float v = va[i + programIndex];
foreach (i = 0 ... count) {
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
float dt = T / BINOMIAL_NUM;
float u = exp(v * sqrt(dt));
@@ -98,6 +94,6 @@ binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
for (uniform int k = 0; k < j; ++k)
V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc;
result[i + programIndex] = V[0];
result[i] = V[0];
}
}

View File

@@ -199,10 +199,8 @@ int main(int argc, char *argv[]) {
}
fclose(f);
// round image resolution up to multiple of 16 to make things easy for
// the code that assigns pixels to ispc program instances
int height = (int(baseHeight * scale) + 0xf) & ~0xf;
int width = (int(baseWidth * scale) + 0xf) & ~0xf;
int height = int(baseHeight * scale);
int width = int(baseWidth * scale);
// allocate images; one to hold hit object ids, one to hold depth to
// the first interseciton

View File

@@ -244,34 +244,15 @@ static void raytrace_tile(uniform int x0, uniform int x1,
uniform float widthScale = (float)(baseWidth) / (float)(width);
uniform float heightScale = (float)(baseHeight) / (float)(height);
static const uniform float udx[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
0, 1, 0, 1, 2, 3, 2, 3 };
static const uniform float udy[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
2, 2, 3, 3, 2, 2, 3, 3 };
foreach_tiled (y = y0 ... y1, x = x0 ... x1) {
Ray ray;
generateRay(raster2camera, camera2world, x*widthScale,
y*heightScale, ray);
BVHIntersect(nodes, triangles, ray);
// The outer loops are always over blocks of 4x4 pixels
for (uniform int y = y0; y < y1; y += 4) {
for (uniform int x = x0; x < x1; x += 4) {
// Now we have a block of 4x4=16 pixels to process; it will
// take 16/programCount iterations of this loop to process
// them.
for (uniform int o = 0; o < 16 / programCount; ++o) {
// Map program instances to samples in the udx/udy arrays
// to figure out which pixel each program instance is
// responsible for
const float dx = udx[o * programCount + programIndex];
const float dy = udy[o * programCount + programIndex];
Ray ray;
generateRay(raster2camera, camera2world, (x+dx)*widthScale,
(y+dy)*heightScale, ray);
BVHIntersect(nodes, triangles, ray);
int offset = (y + (int)dy) * width + (x + (int)dx);
image[offset] = ray.maxt;
id[offset] = ray.hitId;
}
}
int offset = y * width + x;
image[offset] = ray.maxt;
id[offset] = ray.hitId;
}
}

View File

@@ -43,9 +43,8 @@ stencil_step(uniform int x0, uniform int x1,
for (uniform int z = z0; z < z1; ++z) {
for (uniform int y = y0; y < y1; ++y) {
// Assumes that (x1-x0) % programCount == 0
for (uniform int x = x0; x < x1; x += programCount) {
int index = (z * Nxy) + (y * Nx) + x + programIndex;
foreach (x = x0 ... x1) {
int index = (z * Nxy) + (y * Nx) + x;
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
float div = coef[0] * A_cur(0, 0, 0) +

View File

@@ -310,11 +310,7 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
// by 4.
for (uniform int y = y0; y < y1; y += 4) {
for (uniform int x = x0; x < x1; x += 4) {
// For each such tile, process programCount pixels at a time,
// until we've done all 16 of them. Thus, we're also assuming
// that programCount <= 16 and that 16 is evenly dividible by
// programCount.
for (uniform int o = 0; o < 16; o += programCount) {
foreach (o = 0 ... 16) {
// These two arrays encode the mapping from [0,15] to
// offsets within the 4x4 pixel block so that we render
// each pixel inside the block
@@ -324,8 +320,7 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
2, 2, 3, 3, 2, 2, 3, 3 };
// Figure out the pixel to render for this program instance
int xo = x + xoffsets[o + programIndex];
int yo = y + yoffsets[o + programIndex];
int xo = x + xoffsets[o], yo = y + yoffsets[o];
// Use viewing parameters to compute the corresponding ray
// for the pixel

3
lex.ll
View File

@@ -101,6 +101,8 @@ extern { return TOKEN_EXTERN; }
false { return TOKEN_FALSE; }
float { return TOKEN_FLOAT; }
for { return TOKEN_FOR; }
foreach { return TOKEN_FOREACH; }
foreach_tiled { return TOKEN_FOREACH_TILED; }
goto { return TOKEN_GOTO; }
if { return TOKEN_IF; }
inline { return TOKEN_INLINE; }
@@ -132,6 +134,7 @@ varying { return TOKEN_VARYING; }
void { return TOKEN_VOID; }
while { return TOKEN_WHILE; }
\"C\" { return TOKEN_STRING_C_LITERAL; }
\.\.\. { return TOKEN_DOTDOTDOT; }
L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; }

122
parse.yy
View File

@@ -62,8 +62,12 @@
(Current).name = NULL; /* new */ \
} \
while (0)
struct ForeachDimension;
}
%{
#include "ispc.h"
@@ -102,11 +106,11 @@ static void lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
const EnumType *enumType);
static const char *lBuiltinTokens[] = {
"bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor",
"assert", "bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor",
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
"else", "enum", "export", "extern", "false", "float", "for", "goto", "if",
"inline", "int", "int8", "int16", "int32", "int64", "launch", "NULL",
"print", "return", "signed", "sizeof",
"else", "enum", "export", "extern", "false", "float", "for", "foreach",
"foreach_tiled", "goto", "if", "inline", "int", "int8", "int16",
"int32", "int64", "launch", "NULL", "print", "return", "signed", "sizeof",
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
"unsigned", "varying", "void", "while", NULL
};
@@ -116,10 +120,26 @@ static const char *lParamListTokens[] = {
"int8", "int16", "int32", "int64", "signed", "struct", "true",
"uniform", "unsigned", "varying", "void", NULL
};
struct ForeachDimension {
ForeachDimension(Symbol *s = NULL, Expr *b = NULL, Expr *e = NULL) {
sym = s;
beginExpr = b;
endExpr = e;
}
Symbol *sym;
Expr *beginExpr, *endExpr;
};
%}
%union {
int32_t int32Val;
double floatVal;
int64_t int64Val;
std::string *stringVal;
const char *constCharPtr;
Expr *expr;
ExprList *exprList;
const Type *type;
@@ -136,13 +156,10 @@ static const char *lParamListTokens[] = {
StructDeclaration *structDeclaration;
std::vector<StructDeclaration *> *structDeclarationList;
const EnumType *enumType;
Symbol *enumerator;
std::vector<Symbol *> *enumeratorList;
int32_t int32Val;
double floatVal;
int64_t int64Val;
std::string *stringVal;
const char *constCharPtr;
Symbol *symbol;
std::vector<Symbol *> *symbolList;
ForeachDimension *foreachDimension;
std::vector<ForeachDimension *> *foreachDimensionList;
}
@@ -163,7 +180,7 @@ static const char *lParamListTokens[] = {
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED TOKEN_DOTDOTDOT
%token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN
%token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
%token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
@@ -194,8 +211,8 @@ static const char *lParamListTokens[] = {
%type <structDeclaration> struct_declaration
%type <structDeclarationList> struct_declaration_list
%type <enumeratorList> enumerator_list
%type <enumerator> enumerator
%type <symbolList> enumerator_list
%type <symbol> enumerator foreach_identifier
%type <enumType> enum_specifier
%type <type> specifier_qualifier_list struct_or_union_specifier
@@ -211,6 +228,9 @@ static const char *lParamListTokens[] = {
%type <constCharPtr> struct_or_union_name enum_identifier
%type <int32Val> int_constant soa_width_specifier
%type <foreachDimension> foreach_dimension_specifier
%type <foreachDimensionList> foreach_dimension_list
%start translation_unit
%%
@@ -1295,6 +1315,40 @@ cfor_scope
: TOKEN_CFOR { m->symbolTable->PushScope(); }
;
foreach_scope
: TOKEN_FOREACH { m->symbolTable->PushScope(); }
;
foreach_tiled_scope
: TOKEN_FOREACH_TILED { m->symbolTable->PushScope(); }
;
foreach_identifier
: TOKEN_IDENTIFIER
{
$$ = new Symbol(yytext, @1, AtomicType::VaryingConstInt32);
}
;
foreach_dimension_specifier
: foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression
{
$$ = new ForeachDimension($1, $3, $5);
}
;
foreach_dimension_list
: foreach_dimension_specifier
{
$$ = new std::vector<ForeachDimension *>;
$$->push_back($1);
}
| foreach_dimension_list ',' foreach_dimension_specifier
{
$$->push_back($3);
}
;
iteration_statement
: TOKEN_WHILE '(' expression ')' statement
{ $$ = new ForStmt(NULL, $3, NULL, $5, false, @1); }
@@ -1320,6 +1374,44 @@ iteration_statement
{ $$ = new ForStmt($3, $4, new ExprStmt($5, @5), $7, true, @1);
m->symbolTable->PopScope();
}
| foreach_scope '(' foreach_dimension_list ')'
{
std::vector<ForeachDimension *> &dims = *$3;
for (unsigned int i = 0; i < dims.size(); ++i)
m->symbolTable->AddVariable(dims[i]->sym);
}
statement
{
std::vector<ForeachDimension *> &dims = *$3;
std::vector<Symbol *> syms;
std::vector<Expr *> begins, ends;
for (unsigned int i = 0; i < dims.size(); ++i) {
syms.push_back(dims[i]->sym);
begins.push_back(dims[i]->beginExpr);
ends.push_back(dims[i]->endExpr);
}
$$ = new ForeachStmt(syms, begins, ends, $6, false, @1);
m->symbolTable->PopScope();
}
| foreach_tiled_scope '(' foreach_dimension_list ')'
{
std::vector<ForeachDimension *> &dims = *$3;
for (unsigned int i = 0; i < dims.size(); ++i)
m->symbolTable->AddVariable(dims[i]->sym);
}
statement
{
std::vector<ForeachDimension *> &dims = *$3;
std::vector<Symbol *> syms;
std::vector<Expr *> begins, ends;
for (unsigned int i = 0; i < dims.size(); ++i) {
syms.push_back(dims[i]->sym);
begins.push_back(dims[i]->beginExpr);
ends.push_back(dims[i]->endExpr);
}
$$ = new ForeachStmt(syms, begins, ends, $6, true, @1);
m->symbolTable->PopScope();
}
;
jump_statement

473
stmt.cpp
View File

@@ -819,6 +819,17 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) {
lSafeToRunWithAllLanesOff(fs->step) &&
lSafeToRunWithAllLanesOff(fs->stmts));
ForeachStmt *fes;
if ((fes = dynamic_cast<ForeachStmt *>(stmt)) != NULL) {
for (unsigned int i = 0; i < fes->startExprs.size(); ++i)
if (!lSafeToRunWithAllLanesOff(fes->startExprs[i]))
return false;
for (unsigned int i = 0; i < fes->endExprs.size(); ++i)
if (!lSafeToRunWithAllLanesOff(fes->endExprs[i]))
return false;
return lSafeToRunWithAllLanesOff(fes->stmts);
}
if (dynamic_cast<BreakStmt *>(stmt) != NULL ||
dynamic_cast<ContinueStmt *>(stmt) != NULL)
return true;
@@ -1592,6 +1603,463 @@ ContinueStmt::Print(int indent) const {
}
///////////////////////////////////////////////////////////////////////////
// ForeachStmt
ForeachStmt::ForeachStmt(const std::vector<Symbol *> &lvs,
const std::vector<Expr *> &se,
const std::vector<Expr *> &ee,
Stmt *s, bool t, SourcePos pos)
: Stmt(pos), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t),
stmts(s) {
}
/* Given a uniform counter value in the memory location pointed to by
uniformCounterPtr, compute the corresponding set of varying counter
values for use within the loop body.
*/
static llvm::Value *
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
llvm::Value *uniformCounterPtr,
llvm::Value *varyingCounterPtr,
const std::vector<int> &spans) {
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter =
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target.vectorWidth; ++i)
smearCounter =
ctx->InsertInst(smearCounter, counter, i, "smear_counter");
// Figure out the offsets; this is a little bit tricky. As an example,
// consider a 2D tiled foreach loop, where we're running 8-wide and
// where the inner dimension has a stride of 4 and the outer dimension
// has a stride of 2. For the inner dimension, we want the offsets
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
for (int i = 0; i < g->target.vectorWidth; ++i) {
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
int prevDimSpanCount = 1;
for (int j = dim; j < nDims-1; ++j)
prevDimSpanCount *= spans[j+1];
d /= prevDimSpanCount;
// And now with what's left, figure out our own offset
delta[i] = d % spans[dim];
}
// Add the deltas to compute the varying counter values; store the
// result to memory and then return it directly as well.
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
LLVMInt32Vector(delta), "iter_val");
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
/** Returns the integer log2 of the given integer. */
static int
lLog2(int i) {
int ret = 0;
while (i != 0) {
++ret;
i >>= 1;
}
return ret-1;
}
/* Figure out how many elements to process in each dimension for each time
through a foreach loop. The untiled case is easy; all of the outer
dimensions up until the innermost one have a span of 1, and the
innermost one takes the entire vector width. For the tiled case, we
give wider spans to the innermost dimensions while also trying to
generate relatively square domains.
This code works recursively from outer dimensions to inner dimensions.
*/
static void
lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) {
if (dimsLeft == 0) {
// Nothing left to do but give all of the remaining work to the
// innermost domain.
*a = itemsLeft;
return;
}
if (isTiled == false || (dimsLeft >= lLog2(itemsLeft)))
// If we're not tiled, or if there are enough dimensions left that
// giving this one any more than a span of one would mean that a
// later dimension would have to have a span of one, give this one
// a span of one to save the available items for later.
*a = 1;
else if (itemsLeft >= 16 && (dimsLeft == 1))
// Special case to have 4x4 domains for the 2D case when running
// 16-wide.
*a = 4;
else
// Otherwise give this dimension a span of two.
*a = 2;
lGetSpans(dimsLeft-1, nDims, itemsLeft / *a, isTiled, a+1);
}
/* Emit code for a foreach statement. We effectively emit code to run the
set of n-dimensional nested loops corresponding to the dimensionality of
the foreach statement along with the extra logic to deal with mismatches
between the vector width we're compiling to and the number of elements
to process.
*/
void
ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL)
return;
llvm::BasicBlock *bbCheckExtras = ctx->CreateBasicBlock("foreach_check_extras");
llvm::BasicBlock *bbDoExtras = ctx->CreateBasicBlock("foreach_do_extras");
llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body");
llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
llvm::Value *oldMask = ctx->GetInternalMask();
ctx->StartForeach();
ctx->SetDebugPos(pos);
ctx->StartScope();
// This should be caught during typechecking
assert(startExprs.size() == dimVariables.size() &&
endExprs.size() == dimVariables.size());
int nDims = (int)dimVariables.size();
///////////////////////////////////////////////////////////////////////
// Setup: compute the number of items we have to work on in each
// dimension and a number of derived values.
std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest;
std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs;
std::vector<llvm::Value *> nItems, nExtras, alignedEnd;
std::vector<llvm::Value *> extrasMaskPtrs;
std::vector<int> span(nDims, 0);
lGetSpans(nDims-1, nDims, g->target.vectorWidth, isTiled, &span[0]);
for (int i = 0; i < nDims; ++i) {
// Basic blocks that we'll fill in later with the looping logic for
// this dimension.
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
// Start and end value for this loop dimension
llvm::Value *sv = startExprs[i]->GetValue(ctx);
llvm::Value *ev = endExprs[i]->GetValue(ctx);
if (sv == NULL || ev == NULL)
return;
startVals.push_back(sv);
endVals.push_back(ev);
// nItems = endVal - startVal
nItems.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv,
"nitems"));
// nExtras = nItems % (span for this dimension)
// This gives us the number of extra elements we need to deal with
// at the end of the loop for this dimension that don't fit cleanly
// into a vector width.
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems[i],
LLVMInt32(span[i]), "nextras"));
// alignedEnd = endVal - nExtras
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
nExtras[i], "aligned_end"));
///////////////////////////////////////////////////////////////////////
// Each dimension has a loop counter that is a uniform value that
// goes from startVal to endVal, in steps of the span for this
// dimension. Its value is only used internally here for looping
// logic and isn't directly available in the user's program code.
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
"counter"));
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
// There is also a varying variable that holds the set of index
// values for each dimension in the current loop iteration; this is
// the value that is program-visible.
dimVariables[i]->storagePtr = ctx->AllocaInst(LLVMTypes::Int32VectorType,
dimVariables[i]->name.c_str());
dimVariables[i]->parentFunction = ctx->GetFunction();
ctx->EmitVariableDebugInfo(dimVariables[i]);
// Each dimension also maintains a mask that represents which of
// the varying elements in the current iteration should be
// processed. (i.e. this is used to disable the lanes that have
// out-of-bounds offsets.)
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
}
// On to the outermost loop's test
ctx->BranchInst(bbTest[0]);
///////////////////////////////////////////////////////////////////////////
// foreach_reset: this code runs when we need to reset the counter for
// a given dimension in preparation for running through its loop again,
// after the enclosing level advances its counter.
for (int i = 0; i < nDims; ++i) {
ctx->SetCurrentBasicBlock(bbReset[i]);
if (i == 0)
ctx->BranchInst(bbExit);
else {
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
ctx->BranchInst(bbStep[i-1]);
}
}
///////////////////////////////////////////////////////////////////////////
// foreach_test
std::vector<llvm::Value *> inExtras;
for (int i = 0; i < nDims; ++i) {
ctx->SetCurrentBasicBlock(bbTest[i]);
llvm::Value *haveExtras =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
endVals[i], alignedEnd[i], "have_extras");
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
llvm::Value *atAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
counter, alignedEnd[i], "at_aligned_end");
llvm::Value *inEx =
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
atAlignedEnd, "in_extras");
if (i == 0)
inExtras.push_back(inEx);
else
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
inExtras[i-1], "in_extras_all"));
llvm::Value *varyingCounter =
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
dimVariables[i]->storagePtr, span);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int j = 0; j < g->target.vectorWidth; ++j)
smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
// Do a vector compare of its value to the end value to generate a
// mask for this last bit of work.
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
if (i == 0)
ctx->StoreInst(emask, extrasMaskPtrs[i]);
else {
// FIXME: at least specialize the innermost loop to not do all
// this mask stuff each time through the test...
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
}
llvm::Value *notAtEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[i]);
if (i != nDims-1)
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
else
ctx->BranchInst(bbCheckExtras, bbReset[i], notAtEnd);
}
///////////////////////////////////////////////////////////////////////////
// foreach_step: increment the uniform counter by the vector width.
// Note that we don't increment the varying counter here as well but
// just generate its value when we need it in the loop body.
for (int i = 0; i < nDims; ++i) {
ctx->SetCurrentBasicBlock(bbStep[i]);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[i]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
ctx->BranchInst(bbTest[i]);
}
///////////////////////////////////////////////////////////////////////////
// foreach_check_extras: see if we need to deal with any partial
// vector's worth of work that's left.
ctx->SetCurrentBasicBlock(bbCheckExtras);
ctx->AddInstrumentationPoint("foreach loop check extras");
ctx->BranchInst(bbDoExtras, bbBody, inExtras[nDims-1]);
///////////////////////////////////////////////////////////////////////////
// foreach_body: do a full vector's worth of work. We know that all
// lanes will be running here, so we explicitly set the mask to be 'all
// on'. This ends up being relatively straightforward: just update the
// value of the varying loop counter and have the statements in the
// loop body emit their code.
ctx->SetCurrentBasicBlock(bbBody);
ctx->SetInternalMask(LLVMMaskAllOn);
ctx->AddInstrumentationPoint("foreach loop body");
stmts->EmitCode(ctx);
assert(ctx->GetCurrentBasicBlock() != NULL);
ctx->BranchInst(bbStep[nDims-1]);
///////////////////////////////////////////////////////////////////////////
// foreach_doextras: set the mask and have the statements emit their
// code again. Note that it's generally worthwhile having two copies
// of the statements' code, since the code above is emitted with the
// mask known to be all-on, which in turn leads to more efficient code
// for that case.
ctx->SetCurrentBasicBlock(bbDoExtras);
llvm::Value *mask = ctx->LoadInst(extrasMaskPtrs[nDims-1]);
ctx->SetInternalMask(mask);
stmts->EmitCode(ctx);
ctx->BranchInst(bbStep[nDims-1]);
///////////////////////////////////////////////////////////////////////////
// foreach_exit: All done. Restore the old mask and clean up
ctx->SetCurrentBasicBlock(bbExit);
ctx->SetInternalMask(oldMask);
ctx->EndForeach();
ctx->EndScope();
}
Stmt *
ForeachStmt::Optimize() {
bool anyErrors = false;
for (unsigned int i = 0; i < startExprs.size(); ++i) {
if (startExprs[i] != NULL)
startExprs[i]->Optimize();
anyErrors |= (startExprs[i] == NULL);
}
for (unsigned int i = 0; i < endExprs.size(); ++i) {
if (endExprs[i] != NULL)
endExprs[i]->Optimize();
anyErrors |= (endExprs[i] == NULL);
}
if (stmts != NULL)
stmts = stmts->TypeCheck();
anyErrors |= (stmts == NULL);
return anyErrors ? NULL : this;
}
Stmt *
ForeachStmt::TypeCheck() {
bool anyErrors = false;
for (unsigned int i = 0; i < startExprs.size(); ++i) {
if (startExprs[i] != NULL)
startExprs[i] = TypeConvertExpr(startExprs[i],
AtomicType::UniformInt32,
"foreach starting value");
if (startExprs[i] != NULL)
startExprs[i]->TypeCheck();
anyErrors |= (startExprs[i] == NULL);
}
for (unsigned int i = 0; i < endExprs.size(); ++i) {
if (endExprs[i] != NULL)
endExprs[i] = TypeConvertExpr(endExprs[i], AtomicType::UniformInt32,
"foreach ending value");
if (endExprs[i] != NULL)
endExprs[i]->TypeCheck();
anyErrors |= (endExprs[i] == NULL);
}
if (stmts != NULL)
stmts = stmts->TypeCheck();
anyErrors |= (stmts == NULL);
if (startExprs.size() < dimVariables.size()) {
Error(pos, "Not enough initial values provided for \"foreach\" loop; "
"got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
anyErrors = true;
}
else if (startExprs.size() > dimVariables.size()) {
Error(pos, "Too many initial values provided for \"foreach\" loop; "
"got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
anyErrors = true;
}
if (endExprs.size() < dimVariables.size()) {
Error(pos, "Not enough initial values provided for \"foreach\" loop; "
"got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
anyErrors = true;
}
else if (endExprs.size() > dimVariables.size()) {
Error(pos, "Too many initial values provided for \"foreach\" loop; "
"got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
anyErrors = true;
}
return anyErrors ? NULL : this;
}
int
ForeachStmt::EstimateCost() const {
return dimVariables.size() * (COST_UNIFORM_LOOP + COST_SIMPLE_ARITH_LOGIC_OP) +
(stmts ? stmts->EstimateCost() : 0);
}
void
ForeachStmt::Print(int indent) const {
printf("%*cForeach Stmt", indent, ' ');
pos.Print();
printf("\n");
for (unsigned int i = 0; i < dimVariables.size(); ++i)
if (dimVariables[i] != NULL)
printf("%*cVar %d: %s\n", indent+4, ' ', i,
dimVariables[i]->name.c_str());
else
printf("%*cVar %d: NULL\n", indent+4, ' ', i);
printf("Start values:\n");
for (unsigned int i = 0; i < startExprs.size(); ++i) {
if (startExprs[i] != NULL)
startExprs[i]->Print();
else
printf("NULL");
if (i != startExprs.size()-1)
printf(", ");
else
printf("\n");
}
printf("End values:\n");
for (unsigned int i = 0; i < endExprs.size(); ++i) {
if (endExprs[i] != NULL)
endExprs[i]->Print();
else
printf("NULL");
if (i != endExprs.size()-1)
printf(", ");
else
printf("\n");
}
if (stmts != NULL) {
printf("%*cStmts:\n", indent+4, ' ');
stmts->Print(indent+8);
}
}
///////////////////////////////////////////////////////////////////////////
// ReturnStmt
@@ -1606,6 +2074,11 @@ ReturnStmt::EmitCode(FunctionEmitContext *ctx) const {
if (!ctx->GetCurrentBasicBlock())
return;
if (ctx->InForeachLoop()) {
Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop.");
return;
}
ctx->SetDebugPos(pos);
ctx->CurrentLanesReturned(val, doCoherenceCheck);
}

25
stmt.h
View File

@@ -241,6 +241,31 @@ private:
};
/** @brief Statement implementation for parallel 'foreach' loops.
*/
class ForeachStmt : public Stmt {
public:
ForeachStmt(const std::vector<Symbol *> &loopVars,
const std::vector<Expr *> &startExprs,
const std::vector<Expr *> &endExprs,
Stmt *bodyStatements, bool tiled, SourcePos pos);
void EmitCode(FunctionEmitContext *ctx) const;
void Print(int indent) const;
Stmt *Optimize();
Stmt *TypeCheck();
int EstimateCost() const;
std::vector<Symbol *> dimVariables;
std::vector<Expr *> startExprs;
std::vector<Expr *> endExprs;
bool isTiled;
Stmt *stmts;
};
/** @brief Statement implementation for a 'return' or 'coherent' return
statement in the program. */
class ReturnStmt : public Stmt {

22
tests/foreach-1.ispc Normal file
View File

@@ -0,0 +1,22 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float val[programCount];
for (uniform int i = 0; i < programCount; ++i)
val[i] = 0;
foreach (i = 0 ... programCount)
val[i] += aFOO[i] - 1;
uniform float sum = 0;
for (uniform int i = 0; i < programCount; ++i)
sum += val[i];
RET[programIndex] = sum;
}
export void result(uniform float RET[]) {
RET[programIndex] = reduce_add(programIndex);
}

33
tests/foreach-10.ispc Normal file
View File

@@ -0,0 +1,33 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
#define NA 4
#define NB 8
#define NC 7
uniform int a[NA][NB][NC];
for (uniform int i = 0; i < NA; ++i)
for (uniform int j = 0; j < NB; ++j)
for (uniform int k = 0; j < NC; ++j)
a[i][j][k] = 0;
foreach_tiled (i = 0 ... NA, j = 0 ... NB, k = 0 ... NC) {
a[i][j][k] += 1;
}
uniform int errs = 0;
for (uniform int i = 0; i < NA; ++i)
for (uniform int j = 0; j < NB; ++j)
for (uniform int k = 0; j < NC; ++j)
if (a[i][j][k] != 1) {
//CO print("% % % = %\n", i, j, k, a[i][j][k]);
++errs;
}
RET[programIndex] = errs;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

22
tests/foreach-11.ispc Normal file
View File

@@ -0,0 +1,22 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float val[programCount];
for (uniform int i = 0; i < programCount; ++i)
val[i] = 0;
foreach_tiled (i = 0 ... programCount)
val[i] += aFOO[i] - 1;
uniform float sum = 0;
for (uniform int i = 0; i < programCount; ++i)
sum += val[i];
RET[programIndex] = sum;
}
export void result(uniform float RET[]) {
RET[programIndex] = reduce_add(programIndex);
}

26
tests/foreach-12.ispc Normal file
View File

@@ -0,0 +1,26 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float val[programCount];
for (uniform int i = 0; i < programCount; ++i)
val[i] = 0;
foreach_tiled (i = 2 ... programCount)
val[i] += i;
uniform float sum = 0;
for (uniform int i = 0; i < programCount; ++i) {
sum += val[i];
}
RET[programIndex] = sum;
}
export void result(uniform float RET[]) {
int pi = (programIndex >= 2) ? programIndex : 0;
RET[programIndex] = reduce_add(pi);
}

19
tests/foreach-13.ispc Normal file
View File

@@ -0,0 +1,19 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
RET[i] = 0;
foreach_tiled (i = 0 ... 2)
RET[i] = i+1;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
RET[0] = 1;
RET[1] = 2;
}

17
tests/foreach-14.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
RET[i] = 0;
foreach_tiled (i = 2 ... 0)
RET[i] += 1234;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

17
tests/foreach-15.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
RET[i] = 0;
foreach_tiled (i = 1 ... 1)
RET[i] = 1234;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

17
tests/foreach-16.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
RET[i] = 0;
foreach_tiled (i = -2 ... programCount-2)
RET[i+2] = 1234;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1234;
}

13
tests/foreach-17.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float sum = 0;
foreach_tiled (i = 0 ... 6)
sum += aFOO[i];
RET[programIndex] = reduce_add(sum);
}
export void result(uniform float RET[]) {
RET[programIndex] = 21;
}

29
tests/foreach-18.ispc Normal file
View File

@@ -0,0 +1,29 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
#define NA 3
#define NB 8
uniform int a[NA][NB];
for (uniform int i = 0; i < NA; ++i)
for (uniform int j = 0; j < NB; ++j)
a[i][j] = 0;
foreach_tiled (i = 0 ... NA, j = 0 ... NB) {
a[i][j] += 1;
}
uniform int errs = 0;
for (uniform int i = 0; i < NA; ++i)
for (uniform int j = 0; j < NB; ++j)
if (a[i][j] != 1) {
++errs;
}
RET[programIndex] = errs;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

29
tests/foreach-19.ispc Normal file
View File

@@ -0,0 +1,29 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
#define NA 3
#define NB 4
uniform int a[NA][NB];
for (uniform int i = 0; i < NA; ++i)
for (uniform int j = 0; j < NB; ++j)
a[i][j] = 0;
foreach_tiled (i = 0 ... NA, j = 0 ... NB) {
a[i][j] += 1;
}
uniform int errs = 0;
for (uniform int i = 0; i < NA; ++i)
for (uniform int j = 0; j < NB; ++j)
if (a[i][j] != 1) {
++errs;
}
RET[programIndex] = errs;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

26
tests/foreach-2.ispc Normal file
View File

@@ -0,0 +1,26 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float val[programCount];
for (uniform int i = 0; i < programCount; ++i)
val[i] = 0;
foreach (i = 2 ... programCount)
val[i] += i;
uniform float sum = 0;
for (uniform int i = 0; i < programCount; ++i) {
sum += val[i];
}
RET[programIndex] = sum;
}
export void result(uniform float RET[]) {
int pi = (programIndex >= 2) ? programIndex : 0;
RET[programIndex] = reduce_add(pi);
}

19
tests/foreach-3.ispc Normal file
View File

@@ -0,0 +1,19 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
RET[i] = 0;
foreach (i = 0 ... 2)
RET[i] = i+1;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
RET[0] = 1;
RET[1] = 2;
}

17
tests/foreach-4.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
RET[i] = 0;
foreach (i = 2 ... 0)
RET[i] -= 1234;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

17
tests/foreach-5.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
RET[i] = 0;
foreach (i = 1 ... 1)
RET[i] = 1234;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

17
tests/foreach-6.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform int foo(int i);
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
RET[i] = 0;
foreach (i = -2 ... programCount-2)
RET[i+2] += 1234;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1234;
}

13
tests/foreach-7.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float sum = 0;
foreach (i = 0 ... 6)
sum += aFOO[i];
RET[programIndex] = reduce_add(sum);
}
export void result(uniform float RET[]) {
RET[programIndex] = 21;
}

23
tests/foreach-8.ispc Normal file
View File

@@ -0,0 +1,23 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float sum1 = 0, sum2 = 0;
foreach (x = 0 ... 10, i = 0 ... 6) {
sum1 += aFOO[i];
}
for (uniform int x = 0; x < 10; ++x) {
for (uniform int i = 0; i < 6; i += programCount) {
int index = i + programIndex;
if (index < 6)
sum2 += aFOO[index];
}
}
RET[programIndex] = sum1 - sum2;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

29
tests/foreach-9.ispc Normal file
View File

@@ -0,0 +1,29 @@
export uniform int width() { return programCount; }
#define NA 1
#define NB 3
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform int a[NA][NB];
for (uniform int i = 0; i < NA; ++i)
for (uniform int j = 0; j < NB; ++j)
a[i][j] = 0;
foreach (i = 0 ... NA, j = 0 ... NB) {
a[i][j] += 1;
}
uniform int errs = 0;
for (uniform int i = 0; i < NA; ++i)
for (uniform int j = 0; j < NB; ++j)
if (a[i][j] != 1)
++errs;
RET[programIndex] = errs;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}