Add foreach and foreach_tiled looping constructs
These make it easier to iterate over arbitrary amounts of data elements; specifically, they automatically handle the "ragged extra bits" that come up when the number of elements to be processed isn't evenly divided by programCount. TODO: documentation
This commit is contained in:
75
ctx.cpp
75
ctx.cpp
@@ -68,12 +68,19 @@ struct CFInfo {
|
||||
llvm::Value *savedContinueLanesPtr,
|
||||
llvm::Value *savedMask, llvm::Value *savedLoopMask);
|
||||
|
||||
static CFInfo *GetForeach(llvm::BasicBlock *breakTarget,
|
||||
llvm::BasicBlock *continueTarget,
|
||||
llvm::Value *savedBreakLanesPtr,
|
||||
llvm::Value *savedContinueLanesPtr,
|
||||
llvm::Value *savedMask, llvm::Value *savedLoopMask);
|
||||
|
||||
bool IsIf() { return type == If; }
|
||||
bool IsLoop() { return type == Loop; }
|
||||
bool IsForeach() { return type == Foreach; }
|
||||
bool IsVaryingType() { return !isUniform; }
|
||||
bool IsUniform() { return isUniform; }
|
||||
|
||||
enum CFType { If, Loop };
|
||||
enum CFType { If, Loop, Foreach };
|
||||
CFType type;
|
||||
bool isUniform;
|
||||
llvm::BasicBlock *savedBreakTarget, *savedContinueTarget;
|
||||
@@ -102,6 +109,19 @@ private:
|
||||
savedMask = sm;
|
||||
savedLoopMask = lm;
|
||||
}
|
||||
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
||||
llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
|
||||
llvm::Value *lm) {
|
||||
assert(t == Foreach);
|
||||
type = t;
|
||||
isUniform = false;
|
||||
savedBreakTarget = bt;
|
||||
savedContinueTarget = ct;
|
||||
savedBreakLanesPtr = sb;
|
||||
savedContinueLanesPtr = sc;
|
||||
savedMask = sm;
|
||||
savedLoopMask = lm;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -122,6 +142,18 @@ CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
|
||||
savedMask, savedLoopMask);
|
||||
}
|
||||
|
||||
|
||||
CFInfo *
|
||||
CFInfo::GetForeach(llvm::BasicBlock *breakTarget,
|
||||
llvm::BasicBlock *continueTarget,
|
||||
llvm::Value *savedBreakLanesPtr,
|
||||
llvm::Value *savedContinueLanesPtr,
|
||||
llvm::Value *savedMask, llvm::Value *savedForeachMask) {
|
||||
return new CFInfo(Foreach, breakTarget, continueTarget,
|
||||
savedBreakLanesPtr, savedContinueLanesPtr,
|
||||
savedMask, savedForeachMask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
@@ -422,7 +454,7 @@ FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
||||
|
||||
void
|
||||
FunctionEmitContext::EndLoop() {
|
||||
assert(controlFlowInfo.size() && !controlFlowInfo.back()->IsIf());
|
||||
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsLoop());
|
||||
CFInfo *ci = controlFlowInfo.back();
|
||||
controlFlowInfo.pop_back();
|
||||
|
||||
@@ -444,6 +476,36 @@ FunctionEmitContext::EndLoop() {
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::StartForeach() {
|
||||
// Store the current values of various loop-related state so that we
|
||||
// can restore it when we exit this loop.
|
||||
llvm::Value *oldMask = GetInternalMask();
|
||||
controlFlowInfo.push_back(CFInfo::GetForeach(breakTarget, continueTarget, breakLanesPtr,
|
||||
continueLanesPtr, oldMask, loopMask));
|
||||
continueLanesPtr = breakLanesPtr = NULL;
|
||||
breakTarget = NULL;
|
||||
continueTarget = NULL;
|
||||
loopMask = NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::EndForeach() {
|
||||
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsForeach());
|
||||
CFInfo *ci = controlFlowInfo.back();
|
||||
controlFlowInfo.pop_back();
|
||||
|
||||
// Restore the break/continue state information to what it was before
|
||||
// we went into this loop.
|
||||
breakTarget = ci->savedBreakTarget;
|
||||
continueTarget = ci->savedContinueTarget;
|
||||
breakLanesPtr = ci->savedBreakLanesPtr;
|
||||
continueLanesPtr = ci->savedContinueLanesPtr;
|
||||
loopMask = ci->savedLoopMask;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) {
|
||||
if (!bblock)
|
||||
@@ -638,6 +700,15 @@ FunctionEmitContext::VaryingCFDepth() const {
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
FunctionEmitContext::InForeachLoop() const {
|
||||
for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
|
||||
if (controlFlowInfo[i]->IsForeach())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
||||
const Type *returnType = function->GetReturnType();
|
||||
|
||||
6
ctx.h
6
ctx.h
@@ -159,6 +159,10 @@ public:
|
||||
finished. */
|
||||
void EndLoop();
|
||||
|
||||
/** */
|
||||
void StartForeach();
|
||||
void EndForeach();
|
||||
|
||||
/** Emit code for a 'break' statement in a loop. If doCoherenceCheck
|
||||
is true, then if we're in a 'varying' loop, code will be emitted to
|
||||
see if all of the lanes want to break, in which case a jump to the
|
||||
@@ -183,6 +187,8 @@ public:
|
||||
flow */
|
||||
int VaryingCFDepth() const;
|
||||
|
||||
bool InForeachLoop() const;
|
||||
|
||||
/** Called to generate code for 'return' statement; value is the
|
||||
expression in the return statement (if non-NULL), and
|
||||
doCoherenceCheck indicates whether instructions should be generated
|
||||
|
||||
@@ -60,16 +60,16 @@ export void mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
// Note that we'll be doing programCount computations in parallel,
|
||||
// so increment i by that much. This assumes that width evenly
|
||||
// divides programCount.
|
||||
for (uniform int i = 0; i < width; i += programCount) {
|
||||
foreach (i = 0 ... width) {
|
||||
// Figure out the position on the complex plane to compute the
|
||||
// number of iterations at. Note that the x values are
|
||||
// different across different program instances, since its
|
||||
// initializer incorporates the value of the programIndex
|
||||
// variable.
|
||||
float x = x0 + (programIndex + i) * dx;
|
||||
float x = x0 + i * dx;
|
||||
float y = y0 + j * dy;
|
||||
|
||||
int index = j * width + i + programIndex;
|
||||
int index = j * width + i;
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,14 +61,12 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
|
||||
uniform int ystart = ybase + taskIndex * span;
|
||||
uniform int yend = ystart + span;
|
||||
|
||||
for (uniform int j = ystart; j < yend; ++j) {
|
||||
for (uniform int i = 0; i < width; i += programCount) {
|
||||
float x = x0 + (programIndex + i) * dx;
|
||||
float y = y0 + j * dy;
|
||||
foreach (yi = ystart ... yend, xi = 0 ... width) {
|
||||
float x = x0 + xi * dx;
|
||||
float y = y0 + yi * dy;
|
||||
|
||||
int index = j * width + i + programIndex;
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
int index = yi * width + xi;
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -59,15 +59,13 @@ export void
|
||||
black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
||||
uniform float ra[], uniform float va[],
|
||||
uniform float result[], uniform int count) {
|
||||
for (uniform int i = 0; i < count; i += programCount) {
|
||||
float S = Sa[i + programIndex], X = Xa[i + programIndex];
|
||||
float T = Ta[i + programIndex], r = ra[i + programIndex];
|
||||
float v = va[i + programIndex];
|
||||
foreach (i = 0 ... count) {
|
||||
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
|
||||
|
||||
float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T));
|
||||
float d2 = d1 - v * sqrt(T);
|
||||
|
||||
result[i + programIndex] = S * CND(d1) - X * exp(-r * T) * CND(d2);
|
||||
result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,10 +76,8 @@ binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
||||
uniform float result[], uniform int count) {
|
||||
float V[BINOMIAL_NUM];
|
||||
|
||||
for (uniform int i = 0; i < count; i += programCount) {
|
||||
float S = Sa[i + programIndex], X = Xa[i + programIndex];
|
||||
float T = Ta[i + programIndex], r = ra[i + programIndex];
|
||||
float v = va[i + programIndex];
|
||||
foreach (i = 0 ... count) {
|
||||
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
|
||||
|
||||
float dt = T / BINOMIAL_NUM;
|
||||
float u = exp(v * sqrt(dt));
|
||||
@@ -98,6 +94,6 @@ binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
||||
for (uniform int k = 0; k < j; ++k)
|
||||
V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc;
|
||||
|
||||
result[i + programIndex] = V[0];
|
||||
result[i] = V[0];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -199,10 +199,8 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
// round image resolution up to multiple of 16 to make things easy for
|
||||
// the code that assigns pixels to ispc program instances
|
||||
int height = (int(baseHeight * scale) + 0xf) & ~0xf;
|
||||
int width = (int(baseWidth * scale) + 0xf) & ~0xf;
|
||||
int height = int(baseHeight * scale);
|
||||
int width = int(baseWidth * scale);
|
||||
|
||||
// allocate images; one to hold hit object ids, one to hold depth to
|
||||
// the first interseciton
|
||||
|
||||
@@ -244,34 +244,15 @@ static void raytrace_tile(uniform int x0, uniform int x1,
|
||||
uniform float widthScale = (float)(baseWidth) / (float)(width);
|
||||
uniform float heightScale = (float)(baseHeight) / (float)(height);
|
||||
|
||||
static const uniform float udx[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
|
||||
0, 1, 0, 1, 2, 3, 2, 3 };
|
||||
static const uniform float udy[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
|
||||
2, 2, 3, 3, 2, 2, 3, 3 };
|
||||
foreach_tiled (y = y0 ... y1, x = x0 ... x1) {
|
||||
Ray ray;
|
||||
generateRay(raster2camera, camera2world, x*widthScale,
|
||||
y*heightScale, ray);
|
||||
BVHIntersect(nodes, triangles, ray);
|
||||
|
||||
// The outer loops are always over blocks of 4x4 pixels
|
||||
for (uniform int y = y0; y < y1; y += 4) {
|
||||
for (uniform int x = x0; x < x1; x += 4) {
|
||||
// Now we have a block of 4x4=16 pixels to process; it will
|
||||
// take 16/programCount iterations of this loop to process
|
||||
// them.
|
||||
for (uniform int o = 0; o < 16 / programCount; ++o) {
|
||||
// Map program instances to samples in the udx/udy arrays
|
||||
// to figure out which pixel each program instance is
|
||||
// responsible for
|
||||
const float dx = udx[o * programCount + programIndex];
|
||||
const float dy = udy[o * programCount + programIndex];
|
||||
|
||||
Ray ray;
|
||||
generateRay(raster2camera, camera2world, (x+dx)*widthScale,
|
||||
(y+dy)*heightScale, ray);
|
||||
BVHIntersect(nodes, triangles, ray);
|
||||
|
||||
int offset = (y + (int)dy) * width + (x + (int)dx);
|
||||
image[offset] = ray.maxt;
|
||||
id[offset] = ray.hitId;
|
||||
}
|
||||
}
|
||||
int offset = y * width + x;
|
||||
image[offset] = ray.maxt;
|
||||
id[offset] = ray.hitId;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -43,9 +43,8 @@ stencil_step(uniform int x0, uniform int x1,
|
||||
|
||||
for (uniform int z = z0; z < z1; ++z) {
|
||||
for (uniform int y = y0; y < y1; ++y) {
|
||||
// Assumes that (x1-x0) % programCount == 0
|
||||
for (uniform int x = x0; x < x1; x += programCount) {
|
||||
int index = (z * Nxy) + (y * Nx) + x + programIndex;
|
||||
foreach (x = x0 ... x1) {
|
||||
int index = (z * Nxy) + (y * Nx) + x;
|
||||
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||
float div = coef[0] * A_cur(0, 0, 0) +
|
||||
|
||||
@@ -310,11 +310,7 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
|
||||
// by 4.
|
||||
for (uniform int y = y0; y < y1; y += 4) {
|
||||
for (uniform int x = x0; x < x1; x += 4) {
|
||||
// For each such tile, process programCount pixels at a time,
|
||||
// until we've done all 16 of them. Thus, we're also assuming
|
||||
// that programCount <= 16 and that 16 is evenly dividible by
|
||||
// programCount.
|
||||
for (uniform int o = 0; o < 16; o += programCount) {
|
||||
foreach (o = 0 ... 16) {
|
||||
// These two arrays encode the mapping from [0,15] to
|
||||
// offsets within the 4x4 pixel block so that we render
|
||||
// each pixel inside the block
|
||||
@@ -324,8 +320,7 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
|
||||
2, 2, 3, 3, 2, 2, 3, 3 };
|
||||
|
||||
// Figure out the pixel to render for this program instance
|
||||
int xo = x + xoffsets[o + programIndex];
|
||||
int yo = y + yoffsets[o + programIndex];
|
||||
int xo = x + xoffsets[o], yo = y + yoffsets[o];
|
||||
|
||||
// Use viewing parameters to compute the corresponding ray
|
||||
// for the pixel
|
||||
|
||||
3
lex.ll
3
lex.ll
@@ -101,6 +101,8 @@ extern { return TOKEN_EXTERN; }
|
||||
false { return TOKEN_FALSE; }
|
||||
float { return TOKEN_FLOAT; }
|
||||
for { return TOKEN_FOR; }
|
||||
foreach { return TOKEN_FOREACH; }
|
||||
foreach_tiled { return TOKEN_FOREACH_TILED; }
|
||||
goto { return TOKEN_GOTO; }
|
||||
if { return TOKEN_IF; }
|
||||
inline { return TOKEN_INLINE; }
|
||||
@@ -132,6 +134,7 @@ varying { return TOKEN_VARYING; }
|
||||
void { return TOKEN_VOID; }
|
||||
while { return TOKEN_WHILE; }
|
||||
\"C\" { return TOKEN_STRING_C_LITERAL; }
|
||||
\.\.\. { return TOKEN_DOTDOTDOT; }
|
||||
|
||||
L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; }
|
||||
|
||||
|
||||
122
parse.yy
122
parse.yy
@@ -62,8 +62,12 @@
|
||||
(Current).name = NULL; /* new */ \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
struct ForeachDimension;
|
||||
|
||||
}
|
||||
|
||||
|
||||
%{
|
||||
|
||||
#include "ispc.h"
|
||||
@@ -102,11 +106,11 @@ static void lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
||||
const EnumType *enumType);
|
||||
|
||||
static const char *lBuiltinTokens[] = {
|
||||
"bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor",
|
||||
"assert", "bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor",
|
||||
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
|
||||
"else", "enum", "export", "extern", "false", "float", "for", "goto", "if",
|
||||
"inline", "int", "int8", "int16", "int32", "int64", "launch", "NULL",
|
||||
"print", "return", "signed", "sizeof",
|
||||
"else", "enum", "export", "extern", "false", "float", "for", "foreach",
|
||||
"foreach_tiled", "goto", "if", "inline", "int", "int8", "int16",
|
||||
"int32", "int64", "launch", "NULL", "print", "return", "signed", "sizeof",
|
||||
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
|
||||
"unsigned", "varying", "void", "while", NULL
|
||||
};
|
||||
@@ -116,10 +120,26 @@ static const char *lParamListTokens[] = {
|
||||
"int8", "int16", "int32", "int64", "signed", "struct", "true",
|
||||
"uniform", "unsigned", "varying", "void", NULL
|
||||
};
|
||||
|
||||
|
||||
struct ForeachDimension {
|
||||
ForeachDimension(Symbol *s = NULL, Expr *b = NULL, Expr *e = NULL) {
|
||||
sym = s;
|
||||
beginExpr = b;
|
||||
endExpr = e;
|
||||
}
|
||||
Symbol *sym;
|
||||
Expr *beginExpr, *endExpr;
|
||||
};
|
||||
|
||||
%}
|
||||
|
||||
%union {
|
||||
int32_t int32Val;
|
||||
double floatVal;
|
||||
int64_t int64Val;
|
||||
std::string *stringVal;
|
||||
const char *constCharPtr;
|
||||
|
||||
Expr *expr;
|
||||
ExprList *exprList;
|
||||
const Type *type;
|
||||
@@ -136,13 +156,10 @@ static const char *lParamListTokens[] = {
|
||||
StructDeclaration *structDeclaration;
|
||||
std::vector<StructDeclaration *> *structDeclarationList;
|
||||
const EnumType *enumType;
|
||||
Symbol *enumerator;
|
||||
std::vector<Symbol *> *enumeratorList;
|
||||
int32_t int32Val;
|
||||
double floatVal;
|
||||
int64_t int64Val;
|
||||
std::string *stringVal;
|
||||
const char *constCharPtr;
|
||||
Symbol *symbol;
|
||||
std::vector<Symbol *> *symbolList;
|
||||
ForeachDimension *foreachDimension;
|
||||
std::vector<ForeachDimension *> *foreachDimensionList;
|
||||
}
|
||||
|
||||
|
||||
@@ -163,7 +180,7 @@ static const char *lParamListTokens[] = {
|
||||
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE
|
||||
|
||||
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
|
||||
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH
|
||||
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED TOKEN_DOTDOTDOT
|
||||
%token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN
|
||||
%token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
|
||||
%token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
|
||||
@@ -194,8 +211,8 @@ static const char *lParamListTokens[] = {
|
||||
%type <structDeclaration> struct_declaration
|
||||
%type <structDeclarationList> struct_declaration_list
|
||||
|
||||
%type <enumeratorList> enumerator_list
|
||||
%type <enumerator> enumerator
|
||||
%type <symbolList> enumerator_list
|
||||
%type <symbol> enumerator foreach_identifier
|
||||
%type <enumType> enum_specifier
|
||||
|
||||
%type <type> specifier_qualifier_list struct_or_union_specifier
|
||||
@@ -211,6 +228,9 @@ static const char *lParamListTokens[] = {
|
||||
%type <constCharPtr> struct_or_union_name enum_identifier
|
||||
%type <int32Val> int_constant soa_width_specifier
|
||||
|
||||
%type <foreachDimension> foreach_dimension_specifier
|
||||
%type <foreachDimensionList> foreach_dimension_list
|
||||
|
||||
%start translation_unit
|
||||
%%
|
||||
|
||||
@@ -1295,6 +1315,40 @@ cfor_scope
|
||||
: TOKEN_CFOR { m->symbolTable->PushScope(); }
|
||||
;
|
||||
|
||||
foreach_scope
|
||||
: TOKEN_FOREACH { m->symbolTable->PushScope(); }
|
||||
;
|
||||
|
||||
foreach_tiled_scope
|
||||
: TOKEN_FOREACH_TILED { m->symbolTable->PushScope(); }
|
||||
;
|
||||
|
||||
foreach_identifier
|
||||
: TOKEN_IDENTIFIER
|
||||
{
|
||||
$$ = new Symbol(yytext, @1, AtomicType::VaryingConstInt32);
|
||||
}
|
||||
;
|
||||
|
||||
foreach_dimension_specifier
|
||||
: foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression
|
||||
{
|
||||
$$ = new ForeachDimension($1, $3, $5);
|
||||
}
|
||||
;
|
||||
|
||||
foreach_dimension_list
|
||||
: foreach_dimension_specifier
|
||||
{
|
||||
$$ = new std::vector<ForeachDimension *>;
|
||||
$$->push_back($1);
|
||||
}
|
||||
| foreach_dimension_list ',' foreach_dimension_specifier
|
||||
{
|
||||
$$->push_back($3);
|
||||
}
|
||||
;
|
||||
|
||||
iteration_statement
|
||||
: TOKEN_WHILE '(' expression ')' statement
|
||||
{ $$ = new ForStmt(NULL, $3, NULL, $5, false, @1); }
|
||||
@@ -1320,6 +1374,44 @@ iteration_statement
|
||||
{ $$ = new ForStmt($3, $4, new ExprStmt($5, @5), $7, true, @1);
|
||||
m->symbolTable->PopScope();
|
||||
}
|
||||
| foreach_scope '(' foreach_dimension_list ')'
|
||||
{
|
||||
std::vector<ForeachDimension *> &dims = *$3;
|
||||
for (unsigned int i = 0; i < dims.size(); ++i)
|
||||
m->symbolTable->AddVariable(dims[i]->sym);
|
||||
}
|
||||
statement
|
||||
{
|
||||
std::vector<ForeachDimension *> &dims = *$3;
|
||||
std::vector<Symbol *> syms;
|
||||
std::vector<Expr *> begins, ends;
|
||||
for (unsigned int i = 0; i < dims.size(); ++i) {
|
||||
syms.push_back(dims[i]->sym);
|
||||
begins.push_back(dims[i]->beginExpr);
|
||||
ends.push_back(dims[i]->endExpr);
|
||||
}
|
||||
$$ = new ForeachStmt(syms, begins, ends, $6, false, @1);
|
||||
m->symbolTable->PopScope();
|
||||
}
|
||||
| foreach_tiled_scope '(' foreach_dimension_list ')'
|
||||
{
|
||||
std::vector<ForeachDimension *> &dims = *$3;
|
||||
for (unsigned int i = 0; i < dims.size(); ++i)
|
||||
m->symbolTable->AddVariable(dims[i]->sym);
|
||||
}
|
||||
statement
|
||||
{
|
||||
std::vector<ForeachDimension *> &dims = *$3;
|
||||
std::vector<Symbol *> syms;
|
||||
std::vector<Expr *> begins, ends;
|
||||
for (unsigned int i = 0; i < dims.size(); ++i) {
|
||||
syms.push_back(dims[i]->sym);
|
||||
begins.push_back(dims[i]->beginExpr);
|
||||
ends.push_back(dims[i]->endExpr);
|
||||
}
|
||||
$$ = new ForeachStmt(syms, begins, ends, $6, true, @1);
|
||||
m->symbolTable->PopScope();
|
||||
}
|
||||
;
|
||||
|
||||
jump_statement
|
||||
|
||||
473
stmt.cpp
473
stmt.cpp
@@ -819,6 +819,17 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) {
|
||||
lSafeToRunWithAllLanesOff(fs->step) &&
|
||||
lSafeToRunWithAllLanesOff(fs->stmts));
|
||||
|
||||
ForeachStmt *fes;
|
||||
if ((fes = dynamic_cast<ForeachStmt *>(stmt)) != NULL) {
|
||||
for (unsigned int i = 0; i < fes->startExprs.size(); ++i)
|
||||
if (!lSafeToRunWithAllLanesOff(fes->startExprs[i]))
|
||||
return false;
|
||||
for (unsigned int i = 0; i < fes->endExprs.size(); ++i)
|
||||
if (!lSafeToRunWithAllLanesOff(fes->endExprs[i]))
|
||||
return false;
|
||||
return lSafeToRunWithAllLanesOff(fes->stmts);
|
||||
}
|
||||
|
||||
if (dynamic_cast<BreakStmt *>(stmt) != NULL ||
|
||||
dynamic_cast<ContinueStmt *>(stmt) != NULL)
|
||||
return true;
|
||||
@@ -1592,6 +1603,463 @@ ContinueStmt::Print(int indent) const {
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// ForeachStmt
|
||||
|
||||
ForeachStmt::ForeachStmt(const std::vector<Symbol *> &lvs,
|
||||
const std::vector<Expr *> &se,
|
||||
const std::vector<Expr *> &ee,
|
||||
Stmt *s, bool t, SourcePos pos)
|
||||
: Stmt(pos), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t),
|
||||
stmts(s) {
|
||||
}
|
||||
|
||||
|
||||
/* Given a uniform counter value in the memory location pointed to by
|
||||
uniformCounterPtr, compute the corresponding set of varying counter
|
||||
values for use within the loop body.
|
||||
*/
|
||||
static llvm::Value *
|
||||
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
||||
llvm::Value *uniformCounterPtr,
|
||||
llvm::Value *varyingCounterPtr,
|
||||
const std::vector<int> &spans) {
|
||||
// Smear the uniform counter value out to be varying
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||
llvm::Value *smearCounter =
|
||||
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
smearCounter =
|
||||
ctx->InsertInst(smearCounter, counter, i, "smear_counter");
|
||||
|
||||
// Figure out the offsets; this is a little bit tricky. As an example,
|
||||
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
||||
// where the inner dimension has a stride of 4 and the outer dimension
|
||||
// has a stride of 2. For the inner dimension, we want the offsets
|
||||
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
|
||||
// (0,0,0,0,1,1,1,1).
|
||||
int32_t delta[ISPC_MAX_NVEC];
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
||||
int d = i;
|
||||
// First, account for the effect of any dimensions at deeper
|
||||
// nesting levels than the current one.
|
||||
int prevDimSpanCount = 1;
|
||||
for (int j = dim; j < nDims-1; ++j)
|
||||
prevDimSpanCount *= spans[j+1];
|
||||
d /= prevDimSpanCount;
|
||||
|
||||
// And now with what's left, figure out our own offset
|
||||
delta[i] = d % spans[dim];
|
||||
}
|
||||
|
||||
// Add the deltas to compute the varying counter values; store the
|
||||
// result to memory and then return it directly as well.
|
||||
llvm::Value *varyingCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
||||
LLVMInt32Vector(delta), "iter_val");
|
||||
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
||||
return varyingCounter;
|
||||
}
|
||||
|
||||
|
||||
/** Returns the integer log2 of the given integer. */
|
||||
static int
|
||||
lLog2(int i) {
|
||||
int ret = 0;
|
||||
while (i != 0) {
|
||||
++ret;
|
||||
i >>= 1;
|
||||
}
|
||||
return ret-1;
|
||||
}
|
||||
|
||||
|
||||
/* Figure out how many elements to process in each dimension for each time
|
||||
through a foreach loop. The untiled case is easy; all of the outer
|
||||
dimensions up until the innermost one have a span of 1, and the
|
||||
innermost one takes the entire vector width. For the tiled case, we
|
||||
give wider spans to the innermost dimensions while also trying to
|
||||
generate relatively square domains.
|
||||
|
||||
This code works recursively from outer dimensions to inner dimensions.
|
||||
*/
|
||||
static void
|
||||
lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) {
|
||||
if (dimsLeft == 0) {
|
||||
// Nothing left to do but give all of the remaining work to the
|
||||
// innermost domain.
|
||||
*a = itemsLeft;
|
||||
return;
|
||||
}
|
||||
|
||||
if (isTiled == false || (dimsLeft >= lLog2(itemsLeft)))
|
||||
// If we're not tiled, or if there are enough dimensions left that
|
||||
// giving this one any more than a span of one would mean that a
|
||||
// later dimension would have to have a span of one, give this one
|
||||
// a span of one to save the available items for later.
|
||||
*a = 1;
|
||||
else if (itemsLeft >= 16 && (dimsLeft == 1))
|
||||
// Special case to have 4x4 domains for the 2D case when running
|
||||
// 16-wide.
|
||||
*a = 4;
|
||||
else
|
||||
// Otherwise give this dimension a span of two.
|
||||
*a = 2;
|
||||
|
||||
lGetSpans(dimsLeft-1, nDims, itemsLeft / *a, isTiled, a+1);
|
||||
}
|
||||
|
||||
|
||||
/* Emit code for a foreach statement. We effectively emit code to run the
|
||||
set of n-dimensional nested loops corresponding to the dimensionality of
|
||||
the foreach statement along with the extra logic to deal with mismatches
|
||||
between the vector width we're compiling to and the number of elements
|
||||
to process.
|
||||
*/
|
||||
void
|
||||
ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL)
|
||||
return;
|
||||
|
||||
llvm::BasicBlock *bbCheckExtras = ctx->CreateBasicBlock("foreach_check_extras");
|
||||
llvm::BasicBlock *bbDoExtras = ctx->CreateBasicBlock("foreach_do_extras");
|
||||
llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body");
|
||||
llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
|
||||
|
||||
llvm::Value *oldMask = ctx->GetInternalMask();
|
||||
|
||||
ctx->StartForeach();
|
||||
ctx->SetDebugPos(pos);
|
||||
ctx->StartScope();
|
||||
|
||||
// This should be caught during typechecking
|
||||
assert(startExprs.size() == dimVariables.size() &&
|
||||
endExprs.size() == dimVariables.size());
|
||||
int nDims = (int)dimVariables.size();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// Setup: compute the number of items we have to work on in each
|
||||
// dimension and a number of derived values.
|
||||
std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest;
|
||||
std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs;
|
||||
std::vector<llvm::Value *> nItems, nExtras, alignedEnd;
|
||||
std::vector<llvm::Value *> extrasMaskPtrs;
|
||||
|
||||
std::vector<int> span(nDims, 0);
|
||||
lGetSpans(nDims-1, nDims, g->target.vectorWidth, isTiled, &span[0]);
|
||||
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
// Basic blocks that we'll fill in later with the looping logic for
|
||||
// this dimension.
|
||||
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
|
||||
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
|
||||
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
|
||||
|
||||
// Start and end value for this loop dimension
|
||||
llvm::Value *sv = startExprs[i]->GetValue(ctx);
|
||||
llvm::Value *ev = endExprs[i]->GetValue(ctx);
|
||||
if (sv == NULL || ev == NULL)
|
||||
return;
|
||||
startVals.push_back(sv);
|
||||
endVals.push_back(ev);
|
||||
|
||||
// nItems = endVal - startVal
|
||||
nItems.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv,
|
||||
"nitems"));
|
||||
|
||||
// nExtras = nItems % (span for this dimension)
|
||||
// This gives us the number of extra elements we need to deal with
|
||||
// at the end of the loop for this dimension that don't fit cleanly
|
||||
// into a vector width.
|
||||
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems[i],
|
||||
LLVMInt32(span[i]), "nextras"));
|
||||
|
||||
// alignedEnd = endVal - nExtras
|
||||
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
|
||||
nExtras[i], "aligned_end"));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// Each dimension has a loop counter that is a uniform value that
|
||||
// goes from startVal to endVal, in steps of the span for this
|
||||
// dimension. Its value is only used internally here for looping
|
||||
// logic and isn't directly available in the user's program code.
|
||||
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
|
||||
"counter"));
|
||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||
|
||||
// There is also a varying variable that holds the set of index
|
||||
// values for each dimension in the current loop iteration; this is
|
||||
// the value that is program-visible.
|
||||
dimVariables[i]->storagePtr = ctx->AllocaInst(LLVMTypes::Int32VectorType,
|
||||
dimVariables[i]->name.c_str());
|
||||
dimVariables[i]->parentFunction = ctx->GetFunction();
|
||||
ctx->EmitVariableDebugInfo(dimVariables[i]);
|
||||
|
||||
// Each dimension also maintains a mask that represents which of
|
||||
// the varying elements in the current iteration should be
|
||||
// processed. (i.e. this is used to disable the lanes that have
|
||||
// out-of-bounds offsets.)
|
||||
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
|
||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||
}
|
||||
|
||||
// On to the outermost loop's test
|
||||
ctx->BranchInst(bbTest[0]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_reset: this code runs when we need to reset the counter for
|
||||
// a given dimension in preparation for running through its loop again,
|
||||
// after the enclosing level advances its counter.
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbReset[i]);
|
||||
if (i == 0)
|
||||
ctx->BranchInst(bbExit);
|
||||
else {
|
||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||
ctx->BranchInst(bbStep[i-1]);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_test
|
||||
std::vector<llvm::Value *> inExtras;
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbTest[i]);
|
||||
|
||||
llvm::Value *haveExtras =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
|
||||
endVals[i], alignedEnd[i], "have_extras");
|
||||
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
|
||||
llvm::Value *atAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||
counter, alignedEnd[i], "at_aligned_end");
|
||||
llvm::Value *inEx =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
|
||||
atAlignedEnd, "in_extras");
|
||||
|
||||
if (i == 0)
|
||||
inExtras.push_back(inEx);
|
||||
else
|
||||
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
|
||||
inExtras[i-1], "in_extras_all"));
|
||||
|
||||
llvm::Value *varyingCounter =
|
||||
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
||||
dimVariables[i]->storagePtr, span);
|
||||
|
||||
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
||||
for (int j = 0; j < g->target.vectorWidth; ++j)
|
||||
smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
|
||||
// Do a vector compare of its value to the end value to generate a
|
||||
// mask for this last bit of work.
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
|
||||
if (i == 0)
|
||||
ctx->StoreInst(emask, extrasMaskPtrs[i]);
|
||||
else {
|
||||
// FIXME: at least specialize the innermost loop to not do all
|
||||
// this mask stuff each time through the test...
|
||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
|
||||
llvm::Value *newMask =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||
"extras_mask");
|
||||
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
|
||||
}
|
||||
|
||||
llvm::Value *notAtEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, endVals[i]);
|
||||
if (i != nDims-1)
|
||||
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
|
||||
else
|
||||
ctx->BranchInst(bbCheckExtras, bbReset[i], notAtEnd);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_step: increment the uniform counter by the vector width.
|
||||
// Note that we don't increment the varying counter here as well but
|
||||
// just generate its value when we need it in the loop body.
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbStep[i]);
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[i]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
|
||||
ctx->BranchInst(bbTest[i]);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_check_extras: see if we need to deal with any partial
|
||||
// vector's worth of work that's left.
|
||||
ctx->SetCurrentBasicBlock(bbCheckExtras);
|
||||
ctx->AddInstrumentationPoint("foreach loop check extras");
|
||||
ctx->BranchInst(bbDoExtras, bbBody, inExtras[nDims-1]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_body: do a full vector's worth of work. We know that all
|
||||
// lanes will be running here, so we explicitly set the mask to be 'all
|
||||
// on'. This ends up being relatively straightforward: just update the
|
||||
// value of the varying loop counter and have the statements in the
|
||||
// loop body emit their code.
|
||||
ctx->SetCurrentBasicBlock(bbBody);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
ctx->AddInstrumentationPoint("foreach loop body");
|
||||
stmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock() != NULL);
|
||||
ctx->BranchInst(bbStep[nDims-1]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_doextras: set the mask and have the statements emit their
|
||||
// code again. Note that it's generally worthwhile having two copies
|
||||
// of the statements' code, since the code above is emitted with the
|
||||
// mask known to be all-on, which in turn leads to more efficient code
|
||||
// for that case.
|
||||
ctx->SetCurrentBasicBlock(bbDoExtras);
|
||||
llvm::Value *mask = ctx->LoadInst(extrasMaskPtrs[nDims-1]);
|
||||
ctx->SetInternalMask(mask);
|
||||
stmts->EmitCode(ctx);
|
||||
ctx->BranchInst(bbStep[nDims-1]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_exit: All done. Restore the old mask and clean up
|
||||
ctx->SetCurrentBasicBlock(bbExit);
|
||||
ctx->SetInternalMask(oldMask);
|
||||
|
||||
ctx->EndForeach();
|
||||
ctx->EndScope();
|
||||
}
|
||||
|
||||
|
||||
Stmt *
|
||||
ForeachStmt::Optimize() {
|
||||
bool anyErrors = false;
|
||||
for (unsigned int i = 0; i < startExprs.size(); ++i) {
|
||||
if (startExprs[i] != NULL)
|
||||
startExprs[i]->Optimize();
|
||||
anyErrors |= (startExprs[i] == NULL);
|
||||
}
|
||||
for (unsigned int i = 0; i < endExprs.size(); ++i) {
|
||||
if (endExprs[i] != NULL)
|
||||
endExprs[i]->Optimize();
|
||||
anyErrors |= (endExprs[i] == NULL);
|
||||
}
|
||||
|
||||
if (stmts != NULL)
|
||||
stmts = stmts->TypeCheck();
|
||||
anyErrors |= (stmts == NULL);
|
||||
|
||||
return anyErrors ? NULL : this;
|
||||
}
|
||||
|
||||
|
||||
Stmt *
|
||||
ForeachStmt::TypeCheck() {
|
||||
bool anyErrors = false;
|
||||
for (unsigned int i = 0; i < startExprs.size(); ++i) {
|
||||
if (startExprs[i] != NULL)
|
||||
startExprs[i] = TypeConvertExpr(startExprs[i],
|
||||
AtomicType::UniformInt32,
|
||||
"foreach starting value");
|
||||
if (startExprs[i] != NULL)
|
||||
startExprs[i]->TypeCheck();
|
||||
anyErrors |= (startExprs[i] == NULL);
|
||||
}
|
||||
for (unsigned int i = 0; i < endExprs.size(); ++i) {
|
||||
if (endExprs[i] != NULL)
|
||||
endExprs[i] = TypeConvertExpr(endExprs[i], AtomicType::UniformInt32,
|
||||
"foreach ending value");
|
||||
if (endExprs[i] != NULL)
|
||||
endExprs[i]->TypeCheck();
|
||||
anyErrors |= (endExprs[i] == NULL);
|
||||
}
|
||||
|
||||
if (stmts != NULL)
|
||||
stmts = stmts->TypeCheck();
|
||||
anyErrors |= (stmts == NULL);
|
||||
|
||||
if (startExprs.size() < dimVariables.size()) {
|
||||
Error(pos, "Not enough initial values provided for \"foreach\" loop; "
|
||||
"got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
|
||||
anyErrors = true;
|
||||
}
|
||||
else if (startExprs.size() > dimVariables.size()) {
|
||||
Error(pos, "Too many initial values provided for \"foreach\" loop; "
|
||||
"got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
|
||||
anyErrors = true;
|
||||
}
|
||||
|
||||
if (endExprs.size() < dimVariables.size()) {
|
||||
Error(pos, "Not enough initial values provided for \"foreach\" loop; "
|
||||
"got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
|
||||
anyErrors = true;
|
||||
}
|
||||
else if (endExprs.size() > dimVariables.size()) {
|
||||
Error(pos, "Too many initial values provided for \"foreach\" loop; "
|
||||
"got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
|
||||
anyErrors = true;
|
||||
}
|
||||
|
||||
return anyErrors ? NULL : this;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ForeachStmt::EstimateCost() const {
|
||||
return dimVariables.size() * (COST_UNIFORM_LOOP + COST_SIMPLE_ARITH_LOGIC_OP) +
|
||||
(stmts ? stmts->EstimateCost() : 0);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ForeachStmt::Print(int indent) const {
|
||||
printf("%*cForeach Stmt", indent, ' ');
|
||||
pos.Print();
|
||||
printf("\n");
|
||||
|
||||
for (unsigned int i = 0; i < dimVariables.size(); ++i)
|
||||
if (dimVariables[i] != NULL)
|
||||
printf("%*cVar %d: %s\n", indent+4, ' ', i,
|
||||
dimVariables[i]->name.c_str());
|
||||
else
|
||||
printf("%*cVar %d: NULL\n", indent+4, ' ', i);
|
||||
|
||||
printf("Start values:\n");
|
||||
for (unsigned int i = 0; i < startExprs.size(); ++i) {
|
||||
if (startExprs[i] != NULL)
|
||||
startExprs[i]->Print();
|
||||
else
|
||||
printf("NULL");
|
||||
if (i != startExprs.size()-1)
|
||||
printf(", ");
|
||||
else
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("End values:\n");
|
||||
for (unsigned int i = 0; i < endExprs.size(); ++i) {
|
||||
if (endExprs[i] != NULL)
|
||||
endExprs[i]->Print();
|
||||
else
|
||||
printf("NULL");
|
||||
if (i != endExprs.size()-1)
|
||||
printf(", ");
|
||||
else
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (stmts != NULL) {
|
||||
printf("%*cStmts:\n", indent+4, ' ');
|
||||
stmts->Print(indent+8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// ReturnStmt
|
||||
|
||||
@@ -1606,6 +2074,11 @@ ReturnStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (!ctx->GetCurrentBasicBlock())
|
||||
return;
|
||||
|
||||
if (ctx->InForeachLoop()) {
|
||||
Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop.");
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->SetDebugPos(pos);
|
||||
ctx->CurrentLanesReturned(val, doCoherenceCheck);
|
||||
}
|
||||
|
||||
25
stmt.h
25
stmt.h
@@ -241,6 +241,31 @@ private:
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement implementation for parallel 'foreach' loops.
|
||||
*/
|
||||
class ForeachStmt : public Stmt {
|
||||
public:
|
||||
ForeachStmt(const std::vector<Symbol *> &loopVars,
|
||||
const std::vector<Expr *> &startExprs,
|
||||
const std::vector<Expr *> &endExprs,
|
||||
Stmt *bodyStatements, bool tiled, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
std::vector<Symbol *> dimVariables;
|
||||
std::vector<Expr *> startExprs;
|
||||
std::vector<Expr *> endExprs;
|
||||
bool isTiled;
|
||||
Stmt *stmts;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** @brief Statement implementation for a 'return' or 'coherent' return
|
||||
statement in the program. */
|
||||
class ReturnStmt : public Stmt {
|
||||
|
||||
22
tests/foreach-1.ispc
Normal file
22
tests/foreach-1.ispc
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform float val[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
val[i] = 0;
|
||||
|
||||
foreach (i = 0 ... programCount)
|
||||
val[i] += aFOO[i] - 1;
|
||||
|
||||
uniform float sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
sum += val[i];
|
||||
|
||||
RET[programIndex] = sum;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = reduce_add(programIndex);
|
||||
}
|
||||
33
tests/foreach-10.ispc
Normal file
33
tests/foreach-10.ispc
Normal file
@@ -0,0 +1,33 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
#define NA 4
|
||||
#define NB 8
|
||||
#define NC 7
|
||||
uniform int a[NA][NB][NC];
|
||||
|
||||
for (uniform int i = 0; i < NA; ++i)
|
||||
for (uniform int j = 0; j < NB; ++j)
|
||||
for (uniform int k = 0; j < NC; ++j)
|
||||
a[i][j][k] = 0;
|
||||
|
||||
foreach_tiled (i = 0 ... NA, j = 0 ... NB, k = 0 ... NC) {
|
||||
a[i][j][k] += 1;
|
||||
}
|
||||
|
||||
uniform int errs = 0;
|
||||
for (uniform int i = 0; i < NA; ++i)
|
||||
for (uniform int j = 0; j < NB; ++j)
|
||||
for (uniform int k = 0; j < NC; ++j)
|
||||
if (a[i][j][k] != 1) {
|
||||
//CO print("% % % = %\n", i, j, k, a[i][j][k]);
|
||||
++errs;
|
||||
}
|
||||
|
||||
RET[programIndex] = errs;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
22
tests/foreach-11.ispc
Normal file
22
tests/foreach-11.ispc
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform float val[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
val[i] = 0;
|
||||
|
||||
foreach_tiled (i = 0 ... programCount)
|
||||
val[i] += aFOO[i] - 1;
|
||||
|
||||
uniform float sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
sum += val[i];
|
||||
|
||||
RET[programIndex] = sum;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = reduce_add(programIndex);
|
||||
}
|
||||
26
tests/foreach-12.ispc
Normal file
26
tests/foreach-12.ispc
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform float val[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
val[i] = 0;
|
||||
|
||||
foreach_tiled (i = 2 ... programCount)
|
||||
val[i] += i;
|
||||
|
||||
uniform float sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
sum += val[i];
|
||||
}
|
||||
|
||||
RET[programIndex] = sum;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
int pi = (programIndex >= 2) ? programIndex : 0;
|
||||
RET[programIndex] = reduce_add(pi);
|
||||
}
|
||||
19
tests/foreach-13.ispc
Normal file
19
tests/foreach-13.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
RET[i] = 0;
|
||||
|
||||
foreach_tiled (i = 0 ... 2)
|
||||
RET[i] = i+1;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
RET[0] = 1;
|
||||
RET[1] = 2;
|
||||
}
|
||||
17
tests/foreach-14.ispc
Normal file
17
tests/foreach-14.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
RET[i] = 0;
|
||||
|
||||
foreach_tiled (i = 2 ... 0)
|
||||
RET[i] += 1234;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
17
tests/foreach-15.ispc
Normal file
17
tests/foreach-15.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
RET[i] = 0;
|
||||
|
||||
foreach_tiled (i = 1 ... 1)
|
||||
RET[i] = 1234;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
17
tests/foreach-16.ispc
Normal file
17
tests/foreach-16.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
RET[i] = 0;
|
||||
|
||||
foreach_tiled (i = -2 ... programCount-2)
|
||||
RET[i+2] = 1234;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1234;
|
||||
}
|
||||
13
tests/foreach-17.ispc
Normal file
13
tests/foreach-17.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float sum = 0;
|
||||
foreach_tiled (i = 0 ... 6)
|
||||
sum += aFOO[i];
|
||||
RET[programIndex] = reduce_add(sum);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 21;
|
||||
}
|
||||
29
tests/foreach-18.ispc
Normal file
29
tests/foreach-18.ispc
Normal file
@@ -0,0 +1,29 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
#define NA 3
|
||||
#define NB 8
|
||||
uniform int a[NA][NB];
|
||||
|
||||
for (uniform int i = 0; i < NA; ++i)
|
||||
for (uniform int j = 0; j < NB; ++j)
|
||||
a[i][j] = 0;
|
||||
|
||||
foreach_tiled (i = 0 ... NA, j = 0 ... NB) {
|
||||
a[i][j] += 1;
|
||||
}
|
||||
|
||||
uniform int errs = 0;
|
||||
for (uniform int i = 0; i < NA; ++i)
|
||||
for (uniform int j = 0; j < NB; ++j)
|
||||
if (a[i][j] != 1) {
|
||||
++errs;
|
||||
}
|
||||
|
||||
RET[programIndex] = errs;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
29
tests/foreach-19.ispc
Normal file
29
tests/foreach-19.ispc
Normal file
@@ -0,0 +1,29 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
#define NA 3
|
||||
#define NB 4
|
||||
uniform int a[NA][NB];
|
||||
|
||||
for (uniform int i = 0; i < NA; ++i)
|
||||
for (uniform int j = 0; j < NB; ++j)
|
||||
a[i][j] = 0;
|
||||
|
||||
foreach_tiled (i = 0 ... NA, j = 0 ... NB) {
|
||||
a[i][j] += 1;
|
||||
}
|
||||
|
||||
uniform int errs = 0;
|
||||
for (uniform int i = 0; i < NA; ++i)
|
||||
for (uniform int j = 0; j < NB; ++j)
|
||||
if (a[i][j] != 1) {
|
||||
++errs;
|
||||
}
|
||||
|
||||
RET[programIndex] = errs;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
26
tests/foreach-2.ispc
Normal file
26
tests/foreach-2.ispc
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform float val[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
val[i] = 0;
|
||||
|
||||
foreach (i = 2 ... programCount)
|
||||
val[i] += i;
|
||||
|
||||
uniform float sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
sum += val[i];
|
||||
}
|
||||
|
||||
RET[programIndex] = sum;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
int pi = (programIndex >= 2) ? programIndex : 0;
|
||||
RET[programIndex] = reduce_add(pi);
|
||||
}
|
||||
19
tests/foreach-3.ispc
Normal file
19
tests/foreach-3.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
RET[i] = 0;
|
||||
|
||||
foreach (i = 0 ... 2)
|
||||
RET[i] = i+1;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
RET[0] = 1;
|
||||
RET[1] = 2;
|
||||
}
|
||||
17
tests/foreach-4.ispc
Normal file
17
tests/foreach-4.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
RET[i] = 0;
|
||||
|
||||
foreach (i = 2 ... 0)
|
||||
RET[i] -= 1234;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
17
tests/foreach-5.ispc
Normal file
17
tests/foreach-5.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
RET[i] = 0;
|
||||
|
||||
foreach (i = 1 ... 1)
|
||||
RET[i] = 1234;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
17
tests/foreach-6.ispc
Normal file
17
tests/foreach-6.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
uniform int foo(int i);
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
RET[i] = 0;
|
||||
|
||||
foreach (i = -2 ... programCount-2)
|
||||
RET[i+2] += 1234;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1234;
|
||||
}
|
||||
13
tests/foreach-7.ispc
Normal file
13
tests/foreach-7.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float sum = 0;
|
||||
foreach (i = 0 ... 6)
|
||||
sum += aFOO[i];
|
||||
RET[programIndex] = reduce_add(sum);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 21;
|
||||
}
|
||||
23
tests/foreach-8.ispc
Normal file
23
tests/foreach-8.ispc
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float sum1 = 0, sum2 = 0;
|
||||
foreach (x = 0 ... 10, i = 0 ... 6) {
|
||||
sum1 += aFOO[i];
|
||||
}
|
||||
|
||||
for (uniform int x = 0; x < 10; ++x) {
|
||||
for (uniform int i = 0; i < 6; i += programCount) {
|
||||
int index = i + programIndex;
|
||||
if (index < 6)
|
||||
sum2 += aFOO[index];
|
||||
}
|
||||
}
|
||||
|
||||
RET[programIndex] = sum1 - sum2;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
29
tests/foreach-9.ispc
Normal file
29
tests/foreach-9.ispc
Normal file
@@ -0,0 +1,29 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
#define NA 1
|
||||
#define NB 3
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int a[NA][NB];
|
||||
|
||||
for (uniform int i = 0; i < NA; ++i)
|
||||
for (uniform int j = 0; j < NB; ++j)
|
||||
a[i][j] = 0;
|
||||
|
||||
foreach (i = 0 ... NA, j = 0 ... NB) {
|
||||
a[i][j] += 1;
|
||||
}
|
||||
|
||||
uniform int errs = 0;
|
||||
for (uniform int i = 0; i < NA; ++i)
|
||||
for (uniform int j = 0; j < NB; ++j)
|
||||
if (a[i][j] != 1)
|
||||
++errs;
|
||||
|
||||
RET[programIndex] = errs;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
Reference in New Issue
Block a user