Add foreach and foreach_tiled looping constructs
These make it easier to iterate over arbitrary amounts of data elements; specifically, they automatically handle the "ragged extra bits" that come up when the number of elements to be processed isn't evenly divided by programCount. TODO: documentation
This commit is contained in:
75
ctx.cpp
75
ctx.cpp
@@ -68,12 +68,19 @@ struct CFInfo {
|
|||||||
llvm::Value *savedContinueLanesPtr,
|
llvm::Value *savedContinueLanesPtr,
|
||||||
llvm::Value *savedMask, llvm::Value *savedLoopMask);
|
llvm::Value *savedMask, llvm::Value *savedLoopMask);
|
||||||
|
|
||||||
|
static CFInfo *GetForeach(llvm::BasicBlock *breakTarget,
|
||||||
|
llvm::BasicBlock *continueTarget,
|
||||||
|
llvm::Value *savedBreakLanesPtr,
|
||||||
|
llvm::Value *savedContinueLanesPtr,
|
||||||
|
llvm::Value *savedMask, llvm::Value *savedLoopMask);
|
||||||
|
|
||||||
bool IsIf() { return type == If; }
|
bool IsIf() { return type == If; }
|
||||||
bool IsLoop() { return type == Loop; }
|
bool IsLoop() { return type == Loop; }
|
||||||
|
bool IsForeach() { return type == Foreach; }
|
||||||
bool IsVaryingType() { return !isUniform; }
|
bool IsVaryingType() { return !isUniform; }
|
||||||
bool IsUniform() { return isUniform; }
|
bool IsUniform() { return isUniform; }
|
||||||
|
|
||||||
enum CFType { If, Loop };
|
enum CFType { If, Loop, Foreach };
|
||||||
CFType type;
|
CFType type;
|
||||||
bool isUniform;
|
bool isUniform;
|
||||||
llvm::BasicBlock *savedBreakTarget, *savedContinueTarget;
|
llvm::BasicBlock *savedBreakTarget, *savedContinueTarget;
|
||||||
@@ -102,6 +109,19 @@ private:
|
|||||||
savedMask = sm;
|
savedMask = sm;
|
||||||
savedLoopMask = lm;
|
savedLoopMask = lm;
|
||||||
}
|
}
|
||||||
|
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
||||||
|
llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
|
||||||
|
llvm::Value *lm) {
|
||||||
|
assert(t == Foreach);
|
||||||
|
type = t;
|
||||||
|
isUniform = false;
|
||||||
|
savedBreakTarget = bt;
|
||||||
|
savedContinueTarget = ct;
|
||||||
|
savedBreakLanesPtr = sb;
|
||||||
|
savedContinueLanesPtr = sc;
|
||||||
|
savedMask = sm;
|
||||||
|
savedLoopMask = lm;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -122,6 +142,18 @@ CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
|
|||||||
savedMask, savedLoopMask);
|
savedMask, savedLoopMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CFInfo *
|
||||||
|
CFInfo::GetForeach(llvm::BasicBlock *breakTarget,
|
||||||
|
llvm::BasicBlock *continueTarget,
|
||||||
|
llvm::Value *savedBreakLanesPtr,
|
||||||
|
llvm::Value *savedContinueLanesPtr,
|
||||||
|
llvm::Value *savedMask, llvm::Value *savedForeachMask) {
|
||||||
|
return new CFInfo(Foreach, breakTarget, continueTarget,
|
||||||
|
savedBreakLanesPtr, savedContinueLanesPtr,
|
||||||
|
savedMask, savedForeachMask);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||||
@@ -422,7 +454,7 @@ FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
|||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::EndLoop() {
|
FunctionEmitContext::EndLoop() {
|
||||||
assert(controlFlowInfo.size() && !controlFlowInfo.back()->IsIf());
|
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsLoop());
|
||||||
CFInfo *ci = controlFlowInfo.back();
|
CFInfo *ci = controlFlowInfo.back();
|
||||||
controlFlowInfo.pop_back();
|
controlFlowInfo.pop_back();
|
||||||
|
|
||||||
@@ -444,6 +476,36 @@ FunctionEmitContext::EndLoop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
FunctionEmitContext::StartForeach() {
|
||||||
|
// Store the current values of various loop-related state so that we
|
||||||
|
// can restore it when we exit this loop.
|
||||||
|
llvm::Value *oldMask = GetInternalMask();
|
||||||
|
controlFlowInfo.push_back(CFInfo::GetForeach(breakTarget, continueTarget, breakLanesPtr,
|
||||||
|
continueLanesPtr, oldMask, loopMask));
|
||||||
|
continueLanesPtr = breakLanesPtr = NULL;
|
||||||
|
breakTarget = NULL;
|
||||||
|
continueTarget = NULL;
|
||||||
|
loopMask = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
FunctionEmitContext::EndForeach() {
|
||||||
|
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsForeach());
|
||||||
|
CFInfo *ci = controlFlowInfo.back();
|
||||||
|
controlFlowInfo.pop_back();
|
||||||
|
|
||||||
|
// Restore the break/continue state information to what it was before
|
||||||
|
// we went into this loop.
|
||||||
|
breakTarget = ci->savedBreakTarget;
|
||||||
|
continueTarget = ci->savedContinueTarget;
|
||||||
|
breakLanesPtr = ci->savedBreakLanesPtr;
|
||||||
|
continueLanesPtr = ci->savedContinueLanesPtr;
|
||||||
|
loopMask = ci->savedLoopMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) {
|
FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) {
|
||||||
if (!bblock)
|
if (!bblock)
|
||||||
@@ -638,6 +700,15 @@ FunctionEmitContext::VaryingCFDepth() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
FunctionEmitContext::InForeachLoop() const {
|
||||||
|
for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
|
||||||
|
if (controlFlowInfo[i]->IsForeach())
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
||||||
const Type *returnType = function->GetReturnType();
|
const Type *returnType = function->GetReturnType();
|
||||||
|
|||||||
6
ctx.h
6
ctx.h
@@ -159,6 +159,10 @@ public:
|
|||||||
finished. */
|
finished. */
|
||||||
void EndLoop();
|
void EndLoop();
|
||||||
|
|
||||||
|
/** */
|
||||||
|
void StartForeach();
|
||||||
|
void EndForeach();
|
||||||
|
|
||||||
/** Emit code for a 'break' statement in a loop. If doCoherenceCheck
|
/** Emit code for a 'break' statement in a loop. If doCoherenceCheck
|
||||||
is true, then if we're in a 'varying' loop, code will be emitted to
|
is true, then if we're in a 'varying' loop, code will be emitted to
|
||||||
see if all of the lanes want to break, in which case a jump to the
|
see if all of the lanes want to break, in which case a jump to the
|
||||||
@@ -183,6 +187,8 @@ public:
|
|||||||
flow */
|
flow */
|
||||||
int VaryingCFDepth() const;
|
int VaryingCFDepth() const;
|
||||||
|
|
||||||
|
bool InForeachLoop() const;
|
||||||
|
|
||||||
/** Called to generate code for 'return' statement; value is the
|
/** Called to generate code for 'return' statement; value is the
|
||||||
expression in the return statement (if non-NULL), and
|
expression in the return statement (if non-NULL), and
|
||||||
doCoherenceCheck indicates whether instructions should be generated
|
doCoherenceCheck indicates whether instructions should be generated
|
||||||
|
|||||||
@@ -60,16 +60,16 @@ export void mandelbrot_ispc(uniform float x0, uniform float y0,
|
|||||||
// Note that we'll be doing programCount computations in parallel,
|
// Note that we'll be doing programCount computations in parallel,
|
||||||
// so increment i by that much. This assumes that width evenly
|
// so increment i by that much. This assumes that width evenly
|
||||||
// divides programCount.
|
// divides programCount.
|
||||||
for (uniform int i = 0; i < width; i += programCount) {
|
foreach (i = 0 ... width) {
|
||||||
// Figure out the position on the complex plane to compute the
|
// Figure out the position on the complex plane to compute the
|
||||||
// number of iterations at. Note that the x values are
|
// number of iterations at. Note that the x values are
|
||||||
// different across different program instances, since its
|
// different across different program instances, since its
|
||||||
// initializer incorporates the value of the programIndex
|
// initializer incorporates the value of the programIndex
|
||||||
// variable.
|
// variable.
|
||||||
float x = x0 + (programIndex + i) * dx;
|
float x = x0 + i * dx;
|
||||||
float y = y0 + j * dy;
|
float y = y0 + j * dy;
|
||||||
|
|
||||||
int index = j * width + i + programIndex;
|
int index = j * width + i;
|
||||||
output[index] = mandel(x, y, maxIterations);
|
output[index] = mandel(x, y, maxIterations);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -61,14 +61,12 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
|
|||||||
uniform int ystart = ybase + taskIndex * span;
|
uniform int ystart = ybase + taskIndex * span;
|
||||||
uniform int yend = ystart + span;
|
uniform int yend = ystart + span;
|
||||||
|
|
||||||
for (uniform int j = ystart; j < yend; ++j) {
|
foreach (yi = ystart ... yend, xi = 0 ... width) {
|
||||||
for (uniform int i = 0; i < width; i += programCount) {
|
float x = x0 + xi * dx;
|
||||||
float x = x0 + (programIndex + i) * dx;
|
float y = y0 + yi * dy;
|
||||||
float y = y0 + j * dy;
|
|
||||||
|
|
||||||
int index = j * width + i + programIndex;
|
int index = yi * width + xi;
|
||||||
output[index] = mandel(x, y, maxIterations);
|
output[index] = mandel(x, y, maxIterations);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -59,15 +59,13 @@ export void
|
|||||||
black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
||||||
uniform float ra[], uniform float va[],
|
uniform float ra[], uniform float va[],
|
||||||
uniform float result[], uniform int count) {
|
uniform float result[], uniform int count) {
|
||||||
for (uniform int i = 0; i < count; i += programCount) {
|
foreach (i = 0 ... count) {
|
||||||
float S = Sa[i + programIndex], X = Xa[i + programIndex];
|
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
|
||||||
float T = Ta[i + programIndex], r = ra[i + programIndex];
|
|
||||||
float v = va[i + programIndex];
|
|
||||||
|
|
||||||
float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T));
|
float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T));
|
||||||
float d2 = d1 - v * sqrt(T);
|
float d2 = d1 - v * sqrt(T);
|
||||||
|
|
||||||
result[i + programIndex] = S * CND(d1) - X * exp(-r * T) * CND(d2);
|
result[i] = S * CND(d1) - X * exp(-r * T) * CND(d2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,10 +76,8 @@ binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
|||||||
uniform float result[], uniform int count) {
|
uniform float result[], uniform int count) {
|
||||||
float V[BINOMIAL_NUM];
|
float V[BINOMIAL_NUM];
|
||||||
|
|
||||||
for (uniform int i = 0; i < count; i += programCount) {
|
foreach (i = 0 ... count) {
|
||||||
float S = Sa[i + programIndex], X = Xa[i + programIndex];
|
float S = Sa[i], X = Xa[i], T = Ta[i], r = ra[i], v = va[i];
|
||||||
float T = Ta[i + programIndex], r = ra[i + programIndex];
|
|
||||||
float v = va[i + programIndex];
|
|
||||||
|
|
||||||
float dt = T / BINOMIAL_NUM;
|
float dt = T / BINOMIAL_NUM;
|
||||||
float u = exp(v * sqrt(dt));
|
float u = exp(v * sqrt(dt));
|
||||||
@@ -98,6 +94,6 @@ binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
|||||||
for (uniform int k = 0; k < j; ++k)
|
for (uniform int k = 0; k < j; ++k)
|
||||||
V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc;
|
V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc;
|
||||||
|
|
||||||
result[i + programIndex] = V[0];
|
result[i] = V[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -199,10 +199,8 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
fclose(f);
|
fclose(f);
|
||||||
|
|
||||||
// round image resolution up to multiple of 16 to make things easy for
|
int height = int(baseHeight * scale);
|
||||||
// the code that assigns pixels to ispc program instances
|
int width = int(baseWidth * scale);
|
||||||
int height = (int(baseHeight * scale) + 0xf) & ~0xf;
|
|
||||||
int width = (int(baseWidth * scale) + 0xf) & ~0xf;
|
|
||||||
|
|
||||||
// allocate images; one to hold hit object ids, one to hold depth to
|
// allocate images; one to hold hit object ids, one to hold depth to
|
||||||
// the first interseciton
|
// the first interseciton
|
||||||
|
|||||||
@@ -244,34 +244,15 @@ static void raytrace_tile(uniform int x0, uniform int x1,
|
|||||||
uniform float widthScale = (float)(baseWidth) / (float)(width);
|
uniform float widthScale = (float)(baseWidth) / (float)(width);
|
||||||
uniform float heightScale = (float)(baseHeight) / (float)(height);
|
uniform float heightScale = (float)(baseHeight) / (float)(height);
|
||||||
|
|
||||||
static const uniform float udx[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
|
foreach_tiled (y = y0 ... y1, x = x0 ... x1) {
|
||||||
0, 1, 0, 1, 2, 3, 2, 3 };
|
Ray ray;
|
||||||
static const uniform float udy[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
|
generateRay(raster2camera, camera2world, x*widthScale,
|
||||||
2, 2, 3, 3, 2, 2, 3, 3 };
|
y*heightScale, ray);
|
||||||
|
BVHIntersect(nodes, triangles, ray);
|
||||||
|
|
||||||
// The outer loops are always over blocks of 4x4 pixels
|
int offset = y * width + x;
|
||||||
for (uniform int y = y0; y < y1; y += 4) {
|
image[offset] = ray.maxt;
|
||||||
for (uniform int x = x0; x < x1; x += 4) {
|
id[offset] = ray.hitId;
|
||||||
// Now we have a block of 4x4=16 pixels to process; it will
|
|
||||||
// take 16/programCount iterations of this loop to process
|
|
||||||
// them.
|
|
||||||
for (uniform int o = 0; o < 16 / programCount; ++o) {
|
|
||||||
// Map program instances to samples in the udx/udy arrays
|
|
||||||
// to figure out which pixel each program instance is
|
|
||||||
// responsible for
|
|
||||||
const float dx = udx[o * programCount + programIndex];
|
|
||||||
const float dy = udy[o * programCount + programIndex];
|
|
||||||
|
|
||||||
Ray ray;
|
|
||||||
generateRay(raster2camera, camera2world, (x+dx)*widthScale,
|
|
||||||
(y+dy)*heightScale, ray);
|
|
||||||
BVHIntersect(nodes, triangles, ray);
|
|
||||||
|
|
||||||
int offset = (y + (int)dy) * width + (x + (int)dx);
|
|
||||||
image[offset] = ray.maxt;
|
|
||||||
id[offset] = ray.hitId;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -43,9 +43,8 @@ stencil_step(uniform int x0, uniform int x1,
|
|||||||
|
|
||||||
for (uniform int z = z0; z < z1; ++z) {
|
for (uniform int z = z0; z < z1; ++z) {
|
||||||
for (uniform int y = y0; y < y1; ++y) {
|
for (uniform int y = y0; y < y1; ++y) {
|
||||||
// Assumes that (x1-x0) % programCount == 0
|
foreach (x = x0 ... x1) {
|
||||||
for (uniform int x = x0; x < x1; x += programCount) {
|
int index = (z * Nxy) + (y * Nx) + x;
|
||||||
int index = (z * Nxy) + (y * Nx) + x + programIndex;
|
|
||||||
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||||
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||||
float div = coef[0] * A_cur(0, 0, 0) +
|
float div = coef[0] * A_cur(0, 0, 0) +
|
||||||
|
|||||||
@@ -310,11 +310,7 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
|
|||||||
// by 4.
|
// by 4.
|
||||||
for (uniform int y = y0; y < y1; y += 4) {
|
for (uniform int y = y0; y < y1; y += 4) {
|
||||||
for (uniform int x = x0; x < x1; x += 4) {
|
for (uniform int x = x0; x < x1; x += 4) {
|
||||||
// For each such tile, process programCount pixels at a time,
|
foreach (o = 0 ... 16) {
|
||||||
// until we've done all 16 of them. Thus, we're also assuming
|
|
||||||
// that programCount <= 16 and that 16 is evenly dividible by
|
|
||||||
// programCount.
|
|
||||||
for (uniform int o = 0; o < 16; o += programCount) {
|
|
||||||
// These two arrays encode the mapping from [0,15] to
|
// These two arrays encode the mapping from [0,15] to
|
||||||
// offsets within the 4x4 pixel block so that we render
|
// offsets within the 4x4 pixel block so that we render
|
||||||
// each pixel inside the block
|
// each pixel inside the block
|
||||||
@@ -324,8 +320,7 @@ volume_tile(uniform int x0, uniform int y0, uniform int x1,
|
|||||||
2, 2, 3, 3, 2, 2, 3, 3 };
|
2, 2, 3, 3, 2, 2, 3, 3 };
|
||||||
|
|
||||||
// Figure out the pixel to render for this program instance
|
// Figure out the pixel to render for this program instance
|
||||||
int xo = x + xoffsets[o + programIndex];
|
int xo = x + xoffsets[o], yo = y + yoffsets[o];
|
||||||
int yo = y + yoffsets[o + programIndex];
|
|
||||||
|
|
||||||
// Use viewing parameters to compute the corresponding ray
|
// Use viewing parameters to compute the corresponding ray
|
||||||
// for the pixel
|
// for the pixel
|
||||||
|
|||||||
3
lex.ll
3
lex.ll
@@ -101,6 +101,8 @@ extern { return TOKEN_EXTERN; }
|
|||||||
false { return TOKEN_FALSE; }
|
false { return TOKEN_FALSE; }
|
||||||
float { return TOKEN_FLOAT; }
|
float { return TOKEN_FLOAT; }
|
||||||
for { return TOKEN_FOR; }
|
for { return TOKEN_FOR; }
|
||||||
|
foreach { return TOKEN_FOREACH; }
|
||||||
|
foreach_tiled { return TOKEN_FOREACH_TILED; }
|
||||||
goto { return TOKEN_GOTO; }
|
goto { return TOKEN_GOTO; }
|
||||||
if { return TOKEN_IF; }
|
if { return TOKEN_IF; }
|
||||||
inline { return TOKEN_INLINE; }
|
inline { return TOKEN_INLINE; }
|
||||||
@@ -132,6 +134,7 @@ varying { return TOKEN_VARYING; }
|
|||||||
void { return TOKEN_VOID; }
|
void { return TOKEN_VOID; }
|
||||||
while { return TOKEN_WHILE; }
|
while { return TOKEN_WHILE; }
|
||||||
\"C\" { return TOKEN_STRING_C_LITERAL; }
|
\"C\" { return TOKEN_STRING_C_LITERAL; }
|
||||||
|
\.\.\. { return TOKEN_DOTDOTDOT; }
|
||||||
|
|
||||||
L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; }
|
L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; }
|
||||||
|
|
||||||
|
|||||||
122
parse.yy
122
parse.yy
@@ -62,8 +62,12 @@
|
|||||||
(Current).name = NULL; /* new */ \
|
(Current).name = NULL; /* new */ \
|
||||||
} \
|
} \
|
||||||
while (0)
|
while (0)
|
||||||
|
|
||||||
|
struct ForeachDimension;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
%{
|
%{
|
||||||
|
|
||||||
#include "ispc.h"
|
#include "ispc.h"
|
||||||
@@ -102,11 +106,11 @@ static void lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
|||||||
const EnumType *enumType);
|
const EnumType *enumType);
|
||||||
|
|
||||||
static const char *lBuiltinTokens[] = {
|
static const char *lBuiltinTokens[] = {
|
||||||
"bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor",
|
"assert", "bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor",
|
||||||
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
|
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
|
||||||
"else", "enum", "export", "extern", "false", "float", "for", "goto", "if",
|
"else", "enum", "export", "extern", "false", "float", "for", "foreach",
|
||||||
"inline", "int", "int8", "int16", "int32", "int64", "launch", "NULL",
|
"foreach_tiled", "goto", "if", "inline", "int", "int8", "int16",
|
||||||
"print", "return", "signed", "sizeof",
|
"int32", "int64", "launch", "NULL", "print", "return", "signed", "sizeof",
|
||||||
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
|
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
|
||||||
"unsigned", "varying", "void", "while", NULL
|
"unsigned", "varying", "void", "while", NULL
|
||||||
};
|
};
|
||||||
@@ -116,10 +120,26 @@ static const char *lParamListTokens[] = {
|
|||||||
"int8", "int16", "int32", "int64", "signed", "struct", "true",
|
"int8", "int16", "int32", "int64", "signed", "struct", "true",
|
||||||
"uniform", "unsigned", "varying", "void", NULL
|
"uniform", "unsigned", "varying", "void", NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ForeachDimension {
|
||||||
|
ForeachDimension(Symbol *s = NULL, Expr *b = NULL, Expr *e = NULL) {
|
||||||
|
sym = s;
|
||||||
|
beginExpr = b;
|
||||||
|
endExpr = e;
|
||||||
|
}
|
||||||
|
Symbol *sym;
|
||||||
|
Expr *beginExpr, *endExpr;
|
||||||
|
};
|
||||||
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
%union {
|
%union {
|
||||||
|
int32_t int32Val;
|
||||||
|
double floatVal;
|
||||||
|
int64_t int64Val;
|
||||||
|
std::string *stringVal;
|
||||||
|
const char *constCharPtr;
|
||||||
|
|
||||||
Expr *expr;
|
Expr *expr;
|
||||||
ExprList *exprList;
|
ExprList *exprList;
|
||||||
const Type *type;
|
const Type *type;
|
||||||
@@ -136,13 +156,10 @@ static const char *lParamListTokens[] = {
|
|||||||
StructDeclaration *structDeclaration;
|
StructDeclaration *structDeclaration;
|
||||||
std::vector<StructDeclaration *> *structDeclarationList;
|
std::vector<StructDeclaration *> *structDeclarationList;
|
||||||
const EnumType *enumType;
|
const EnumType *enumType;
|
||||||
Symbol *enumerator;
|
Symbol *symbol;
|
||||||
std::vector<Symbol *> *enumeratorList;
|
std::vector<Symbol *> *symbolList;
|
||||||
int32_t int32Val;
|
ForeachDimension *foreachDimension;
|
||||||
double floatVal;
|
std::vector<ForeachDimension *> *foreachDimensionList;
|
||||||
int64_t int64Val;
|
|
||||||
std::string *stringVal;
|
|
||||||
const char *constCharPtr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -163,7 +180,7 @@ static const char *lParamListTokens[] = {
|
|||||||
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE
|
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE
|
||||||
|
|
||||||
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
|
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
|
||||||
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH
|
%token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED TOKEN_DOTDOTDOT
|
||||||
%token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN
|
%token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN
|
||||||
%token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
|
%token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
|
||||||
%token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
|
%token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
|
||||||
@@ -194,8 +211,8 @@ static const char *lParamListTokens[] = {
|
|||||||
%type <structDeclaration> struct_declaration
|
%type <structDeclaration> struct_declaration
|
||||||
%type <structDeclarationList> struct_declaration_list
|
%type <structDeclarationList> struct_declaration_list
|
||||||
|
|
||||||
%type <enumeratorList> enumerator_list
|
%type <symbolList> enumerator_list
|
||||||
%type <enumerator> enumerator
|
%type <symbol> enumerator foreach_identifier
|
||||||
%type <enumType> enum_specifier
|
%type <enumType> enum_specifier
|
||||||
|
|
||||||
%type <type> specifier_qualifier_list struct_or_union_specifier
|
%type <type> specifier_qualifier_list struct_or_union_specifier
|
||||||
@@ -211,6 +228,9 @@ static const char *lParamListTokens[] = {
|
|||||||
%type <constCharPtr> struct_or_union_name enum_identifier
|
%type <constCharPtr> struct_or_union_name enum_identifier
|
||||||
%type <int32Val> int_constant soa_width_specifier
|
%type <int32Val> int_constant soa_width_specifier
|
||||||
|
|
||||||
|
%type <foreachDimension> foreach_dimension_specifier
|
||||||
|
%type <foreachDimensionList> foreach_dimension_list
|
||||||
|
|
||||||
%start translation_unit
|
%start translation_unit
|
||||||
%%
|
%%
|
||||||
|
|
||||||
@@ -1295,6 +1315,40 @@ cfor_scope
|
|||||||
: TOKEN_CFOR { m->symbolTable->PushScope(); }
|
: TOKEN_CFOR { m->symbolTable->PushScope(); }
|
||||||
;
|
;
|
||||||
|
|
||||||
|
foreach_scope
|
||||||
|
: TOKEN_FOREACH { m->symbolTable->PushScope(); }
|
||||||
|
;
|
||||||
|
|
||||||
|
foreach_tiled_scope
|
||||||
|
: TOKEN_FOREACH_TILED { m->symbolTable->PushScope(); }
|
||||||
|
;
|
||||||
|
|
||||||
|
foreach_identifier
|
||||||
|
: TOKEN_IDENTIFIER
|
||||||
|
{
|
||||||
|
$$ = new Symbol(yytext, @1, AtomicType::VaryingConstInt32);
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
foreach_dimension_specifier
|
||||||
|
: foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression
|
||||||
|
{
|
||||||
|
$$ = new ForeachDimension($1, $3, $5);
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
foreach_dimension_list
|
||||||
|
: foreach_dimension_specifier
|
||||||
|
{
|
||||||
|
$$ = new std::vector<ForeachDimension *>;
|
||||||
|
$$->push_back($1);
|
||||||
|
}
|
||||||
|
| foreach_dimension_list ',' foreach_dimension_specifier
|
||||||
|
{
|
||||||
|
$$->push_back($3);
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
iteration_statement
|
iteration_statement
|
||||||
: TOKEN_WHILE '(' expression ')' statement
|
: TOKEN_WHILE '(' expression ')' statement
|
||||||
{ $$ = new ForStmt(NULL, $3, NULL, $5, false, @1); }
|
{ $$ = new ForStmt(NULL, $3, NULL, $5, false, @1); }
|
||||||
@@ -1320,6 +1374,44 @@ iteration_statement
|
|||||||
{ $$ = new ForStmt($3, $4, new ExprStmt($5, @5), $7, true, @1);
|
{ $$ = new ForStmt($3, $4, new ExprStmt($5, @5), $7, true, @1);
|
||||||
m->symbolTable->PopScope();
|
m->symbolTable->PopScope();
|
||||||
}
|
}
|
||||||
|
| foreach_scope '(' foreach_dimension_list ')'
|
||||||
|
{
|
||||||
|
std::vector<ForeachDimension *> &dims = *$3;
|
||||||
|
for (unsigned int i = 0; i < dims.size(); ++i)
|
||||||
|
m->symbolTable->AddVariable(dims[i]->sym);
|
||||||
|
}
|
||||||
|
statement
|
||||||
|
{
|
||||||
|
std::vector<ForeachDimension *> &dims = *$3;
|
||||||
|
std::vector<Symbol *> syms;
|
||||||
|
std::vector<Expr *> begins, ends;
|
||||||
|
for (unsigned int i = 0; i < dims.size(); ++i) {
|
||||||
|
syms.push_back(dims[i]->sym);
|
||||||
|
begins.push_back(dims[i]->beginExpr);
|
||||||
|
ends.push_back(dims[i]->endExpr);
|
||||||
|
}
|
||||||
|
$$ = new ForeachStmt(syms, begins, ends, $6, false, @1);
|
||||||
|
m->symbolTable->PopScope();
|
||||||
|
}
|
||||||
|
| foreach_tiled_scope '(' foreach_dimension_list ')'
|
||||||
|
{
|
||||||
|
std::vector<ForeachDimension *> &dims = *$3;
|
||||||
|
for (unsigned int i = 0; i < dims.size(); ++i)
|
||||||
|
m->symbolTable->AddVariable(dims[i]->sym);
|
||||||
|
}
|
||||||
|
statement
|
||||||
|
{
|
||||||
|
std::vector<ForeachDimension *> &dims = *$3;
|
||||||
|
std::vector<Symbol *> syms;
|
||||||
|
std::vector<Expr *> begins, ends;
|
||||||
|
for (unsigned int i = 0; i < dims.size(); ++i) {
|
||||||
|
syms.push_back(dims[i]->sym);
|
||||||
|
begins.push_back(dims[i]->beginExpr);
|
||||||
|
ends.push_back(dims[i]->endExpr);
|
||||||
|
}
|
||||||
|
$$ = new ForeachStmt(syms, begins, ends, $6, true, @1);
|
||||||
|
m->symbolTable->PopScope();
|
||||||
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
jump_statement
|
jump_statement
|
||||||
|
|||||||
473
stmt.cpp
473
stmt.cpp
@@ -819,6 +819,17 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) {
|
|||||||
lSafeToRunWithAllLanesOff(fs->step) &&
|
lSafeToRunWithAllLanesOff(fs->step) &&
|
||||||
lSafeToRunWithAllLanesOff(fs->stmts));
|
lSafeToRunWithAllLanesOff(fs->stmts));
|
||||||
|
|
||||||
|
ForeachStmt *fes;
|
||||||
|
if ((fes = dynamic_cast<ForeachStmt *>(stmt)) != NULL) {
|
||||||
|
for (unsigned int i = 0; i < fes->startExprs.size(); ++i)
|
||||||
|
if (!lSafeToRunWithAllLanesOff(fes->startExprs[i]))
|
||||||
|
return false;
|
||||||
|
for (unsigned int i = 0; i < fes->endExprs.size(); ++i)
|
||||||
|
if (!lSafeToRunWithAllLanesOff(fes->endExprs[i]))
|
||||||
|
return false;
|
||||||
|
return lSafeToRunWithAllLanesOff(fes->stmts);
|
||||||
|
}
|
||||||
|
|
||||||
if (dynamic_cast<BreakStmt *>(stmt) != NULL ||
|
if (dynamic_cast<BreakStmt *>(stmt) != NULL ||
|
||||||
dynamic_cast<ContinueStmt *>(stmt) != NULL)
|
dynamic_cast<ContinueStmt *>(stmt) != NULL)
|
||||||
return true;
|
return true;
|
||||||
@@ -1592,6 +1603,463 @@ ContinueStmt::Print(int indent) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// ForeachStmt
|
||||||
|
|
||||||
|
ForeachStmt::ForeachStmt(const std::vector<Symbol *> &lvs,
|
||||||
|
const std::vector<Expr *> &se,
|
||||||
|
const std::vector<Expr *> &ee,
|
||||||
|
Stmt *s, bool t, SourcePos pos)
|
||||||
|
: Stmt(pos), dimVariables(lvs), startExprs(se), endExprs(ee), isTiled(t),
|
||||||
|
stmts(s) {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Given a uniform counter value in the memory location pointed to by
|
||||||
|
uniformCounterPtr, compute the corresponding set of varying counter
|
||||||
|
values for use within the loop body.
|
||||||
|
*/
|
||||||
|
static llvm::Value *
|
||||||
|
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
||||||
|
llvm::Value *uniformCounterPtr,
|
||||||
|
llvm::Value *varyingCounterPtr,
|
||||||
|
const std::vector<int> &spans) {
|
||||||
|
// Smear the uniform counter value out to be varying
|
||||||
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||||
|
llvm::Value *smearCounter =
|
||||||
|
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
smearCounter =
|
||||||
|
ctx->InsertInst(smearCounter, counter, i, "smear_counter");
|
||||||
|
|
||||||
|
// Figure out the offsets; this is a little bit tricky. As an example,
|
||||||
|
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
||||||
|
// where the inner dimension has a stride of 4 and the outer dimension
|
||||||
|
// has a stride of 2. For the inner dimension, we want the offsets
|
||||||
|
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
|
||||||
|
// (0,0,0,0,1,1,1,1).
|
||||||
|
int32_t delta[ISPC_MAX_NVEC];
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
||||||
|
int d = i;
|
||||||
|
// First, account for the effect of any dimensions at deeper
|
||||||
|
// nesting levels than the current one.
|
||||||
|
int prevDimSpanCount = 1;
|
||||||
|
for (int j = dim; j < nDims-1; ++j)
|
||||||
|
prevDimSpanCount *= spans[j+1];
|
||||||
|
d /= prevDimSpanCount;
|
||||||
|
|
||||||
|
// And now with what's left, figure out our own offset
|
||||||
|
delta[i] = d % spans[dim];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the deltas to compute the varying counter values; store the
|
||||||
|
// result to memory and then return it directly as well.
|
||||||
|
llvm::Value *varyingCounter =
|
||||||
|
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
||||||
|
LLVMInt32Vector(delta), "iter_val");
|
||||||
|
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
||||||
|
return varyingCounter;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns the integer log2 of the given integer. */
|
||||||
|
static int
|
||||||
|
lLog2(int i) {
|
||||||
|
int ret = 0;
|
||||||
|
while (i != 0) {
|
||||||
|
++ret;
|
||||||
|
i >>= 1;
|
||||||
|
}
|
||||||
|
return ret-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Figure out how many elements to process in each dimension for each time
|
||||||
|
through a foreach loop. The untiled case is easy; all of the outer
|
||||||
|
dimensions up until the innermost one have a span of 1, and the
|
||||||
|
innermost one takes the entire vector width. For the tiled case, we
|
||||||
|
give wider spans to the innermost dimensions while also trying to
|
||||||
|
generate relatively square domains.
|
||||||
|
|
||||||
|
This code works recursively from outer dimensions to inner dimensions.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
lGetSpans(int dimsLeft, int nDims, int itemsLeft, bool isTiled, int *a) {
|
||||||
|
if (dimsLeft == 0) {
|
||||||
|
// Nothing left to do but give all of the remaining work to the
|
||||||
|
// innermost domain.
|
||||||
|
*a = itemsLeft;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isTiled == false || (dimsLeft >= lLog2(itemsLeft)))
|
||||||
|
// If we're not tiled, or if there are enough dimensions left that
|
||||||
|
// giving this one any more than a span of one would mean that a
|
||||||
|
// later dimension would have to have a span of one, give this one
|
||||||
|
// a span of one to save the available items for later.
|
||||||
|
*a = 1;
|
||||||
|
else if (itemsLeft >= 16 && (dimsLeft == 1))
|
||||||
|
// Special case to have 4x4 domains for the 2D case when running
|
||||||
|
// 16-wide.
|
||||||
|
*a = 4;
|
||||||
|
else
|
||||||
|
// Otherwise give this dimension a span of two.
|
||||||
|
*a = 2;
|
||||||
|
|
||||||
|
lGetSpans(dimsLeft-1, nDims, itemsLeft / *a, isTiled, a+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Emit code for a foreach statement. We effectively emit code to run the
|
||||||
|
set of n-dimensional nested loops corresponding to the dimensionality of
|
||||||
|
the foreach statement along with the extra logic to deal with mismatches
|
||||||
|
between the vector width we're compiling to and the number of elements
|
||||||
|
to process.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||||
|
if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
llvm::BasicBlock *bbCheckExtras = ctx->CreateBasicBlock("foreach_check_extras");
|
||||||
|
llvm::BasicBlock *bbDoExtras = ctx->CreateBasicBlock("foreach_do_extras");
|
||||||
|
llvm::BasicBlock *bbBody = ctx->CreateBasicBlock("foreach_body");
|
||||||
|
llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
|
||||||
|
|
||||||
|
llvm::Value *oldMask = ctx->GetInternalMask();
|
||||||
|
|
||||||
|
ctx->StartForeach();
|
||||||
|
ctx->SetDebugPos(pos);
|
||||||
|
ctx->StartScope();
|
||||||
|
|
||||||
|
// This should be caught during typechecking
|
||||||
|
assert(startExprs.size() == dimVariables.size() &&
|
||||||
|
endExprs.size() == dimVariables.size());
|
||||||
|
int nDims = (int)dimVariables.size();
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Setup: compute the number of items we have to work on in each
|
||||||
|
// dimension and a number of derived values.
|
||||||
|
std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest;
|
||||||
|
std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs;
|
||||||
|
std::vector<llvm::Value *> nItems, nExtras, alignedEnd;
|
||||||
|
std::vector<llvm::Value *> extrasMaskPtrs;
|
||||||
|
|
||||||
|
std::vector<int> span(nDims, 0);
|
||||||
|
lGetSpans(nDims-1, nDims, g->target.vectorWidth, isTiled, &span[0]);
|
||||||
|
|
||||||
|
for (int i = 0; i < nDims; ++i) {
|
||||||
|
// Basic blocks that we'll fill in later with the looping logic for
|
||||||
|
// this dimension.
|
||||||
|
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
|
||||||
|
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
|
||||||
|
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
|
||||||
|
|
||||||
|
// Start and end value for this loop dimension
|
||||||
|
llvm::Value *sv = startExprs[i]->GetValue(ctx);
|
||||||
|
llvm::Value *ev = endExprs[i]->GetValue(ctx);
|
||||||
|
if (sv == NULL || ev == NULL)
|
||||||
|
return;
|
||||||
|
startVals.push_back(sv);
|
||||||
|
endVals.push_back(ev);
|
||||||
|
|
||||||
|
// nItems = endVal - startVal
|
||||||
|
nItems.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv,
|
||||||
|
"nitems"));
|
||||||
|
|
||||||
|
// nExtras = nItems % (span for this dimension)
|
||||||
|
// This gives us the number of extra elements we need to deal with
|
||||||
|
// at the end of the loop for this dimension that don't fit cleanly
|
||||||
|
// into a vector width.
|
||||||
|
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems[i],
|
||||||
|
LLVMInt32(span[i]), "nextras"));
|
||||||
|
|
||||||
|
// alignedEnd = endVal - nExtras
|
||||||
|
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
|
||||||
|
nExtras[i], "aligned_end"));
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Each dimension has a loop counter that is a uniform value that
|
||||||
|
// goes from startVal to endVal, in steps of the span for this
|
||||||
|
// dimension. Its value is only used internally here for looping
|
||||||
|
// logic and isn't directly available in the user's program code.
|
||||||
|
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
|
||||||
|
"counter"));
|
||||||
|
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||||
|
|
||||||
|
// There is also a varying variable that holds the set of index
|
||||||
|
// values for each dimension in the current loop iteration; this is
|
||||||
|
// the value that is program-visible.
|
||||||
|
dimVariables[i]->storagePtr = ctx->AllocaInst(LLVMTypes::Int32VectorType,
|
||||||
|
dimVariables[i]->name.c_str());
|
||||||
|
dimVariables[i]->parentFunction = ctx->GetFunction();
|
||||||
|
ctx->EmitVariableDebugInfo(dimVariables[i]);
|
||||||
|
|
||||||
|
// Each dimension also maintains a mask that represents which of
|
||||||
|
// the varying elements in the current iteration should be
|
||||||
|
// processed. (i.e. this is used to disable the lanes that have
|
||||||
|
// out-of-bounds offsets.)
|
||||||
|
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
|
||||||
|
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// On to the outermost loop's test
|
||||||
|
ctx->BranchInst(bbTest[0]);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// foreach_reset: this code runs when we need to reset the counter for
|
||||||
|
// a given dimension in preparation for running through its loop again,
|
||||||
|
// after the enclosing level advances its counter.
|
||||||
|
for (int i = 0; i < nDims; ++i) {
|
||||||
|
ctx->SetCurrentBasicBlock(bbReset[i]);
|
||||||
|
if (i == 0)
|
||||||
|
ctx->BranchInst(bbExit);
|
||||||
|
else {
|
||||||
|
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||||
|
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||||
|
ctx->BranchInst(bbStep[i-1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// foreach_test
|
||||||
|
std::vector<llvm::Value *> inExtras;
|
||||||
|
for (int i = 0; i < nDims; ++i) {
|
||||||
|
ctx->SetCurrentBasicBlock(bbTest[i]);
|
||||||
|
|
||||||
|
llvm::Value *haveExtras =
|
||||||
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
|
||||||
|
endVals[i], alignedEnd[i], "have_extras");
|
||||||
|
|
||||||
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
|
||||||
|
llvm::Value *atAlignedEnd =
|
||||||
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||||
|
counter, alignedEnd[i], "at_aligned_end");
|
||||||
|
llvm::Value *inEx =
|
||||||
|
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
|
||||||
|
atAlignedEnd, "in_extras");
|
||||||
|
|
||||||
|
if (i == 0)
|
||||||
|
inExtras.push_back(inEx);
|
||||||
|
else
|
||||||
|
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
|
||||||
|
inExtras[i-1], "in_extras_all"));
|
||||||
|
|
||||||
|
llvm::Value *varyingCounter =
|
||||||
|
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
||||||
|
dimVariables[i]->storagePtr, span);
|
||||||
|
|
||||||
|
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
||||||
|
for (int j = 0; j < g->target.vectorWidth; ++j)
|
||||||
|
smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
|
||||||
|
// Do a vector compare of its value to the end value to generate a
|
||||||
|
// mask for this last bit of work.
|
||||||
|
llvm::Value *emask =
|
||||||
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
|
varyingCounter, smearEnd);
|
||||||
|
emask = ctx->I1VecToBoolVec(emask);
|
||||||
|
|
||||||
|
if (i == 0)
|
||||||
|
ctx->StoreInst(emask, extrasMaskPtrs[i]);
|
||||||
|
else {
|
||||||
|
// FIXME: at least specialize the innermost loop to not do all
|
||||||
|
// this mask stuff each time through the test...
|
||||||
|
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
|
||||||
|
llvm::Value *newMask =
|
||||||
|
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||||
|
"extras_mask");
|
||||||
|
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Value *notAtEnd =
|
||||||
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
|
counter, endVals[i]);
|
||||||
|
if (i != nDims-1)
|
||||||
|
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
|
||||||
|
else
|
||||||
|
ctx->BranchInst(bbCheckExtras, bbReset[i], notAtEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// foreach_step: increment the uniform counter by the vector width.
|
||||||
|
// Note that we don't increment the varying counter here as well but
|
||||||
|
// just generate its value when we need it in the loop body.
|
||||||
|
for (int i = 0; i < nDims; ++i) {
|
||||||
|
ctx->SetCurrentBasicBlock(bbStep[i]);
|
||||||
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
|
||||||
|
llvm::Value *newCounter =
|
||||||
|
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||||
|
LLVMInt32(span[i]), "new_counter");
|
||||||
|
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
|
||||||
|
ctx->BranchInst(bbTest[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// foreach_check_extras: see if we need to deal with any partial
|
||||||
|
// vector's worth of work that's left.
|
||||||
|
ctx->SetCurrentBasicBlock(bbCheckExtras);
|
||||||
|
ctx->AddInstrumentationPoint("foreach loop check extras");
|
||||||
|
ctx->BranchInst(bbDoExtras, bbBody, inExtras[nDims-1]);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// foreach_body: do a full vector's worth of work. We know that all
|
||||||
|
// lanes will be running here, so we explicitly set the mask to be 'all
|
||||||
|
// on'. This ends up being relatively straightforward: just update the
|
||||||
|
// value of the varying loop counter and have the statements in the
|
||||||
|
// loop body emit their code.
|
||||||
|
ctx->SetCurrentBasicBlock(bbBody);
|
||||||
|
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||||
|
ctx->AddInstrumentationPoint("foreach loop body");
|
||||||
|
stmts->EmitCode(ctx);
|
||||||
|
assert(ctx->GetCurrentBasicBlock() != NULL);
|
||||||
|
ctx->BranchInst(bbStep[nDims-1]);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// foreach_doextras: set the mask and have the statements emit their
|
||||||
|
// code again. Note that it's generally worthwhile having two copies
|
||||||
|
// of the statements' code, since the code above is emitted with the
|
||||||
|
// mask known to be all-on, which in turn leads to more efficient code
|
||||||
|
// for that case.
|
||||||
|
ctx->SetCurrentBasicBlock(bbDoExtras);
|
||||||
|
llvm::Value *mask = ctx->LoadInst(extrasMaskPtrs[nDims-1]);
|
||||||
|
ctx->SetInternalMask(mask);
|
||||||
|
stmts->EmitCode(ctx);
|
||||||
|
ctx->BranchInst(bbStep[nDims-1]);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// foreach_exit: All done. Restore the old mask and clean up
|
||||||
|
ctx->SetCurrentBasicBlock(bbExit);
|
||||||
|
ctx->SetInternalMask(oldMask);
|
||||||
|
|
||||||
|
ctx->EndForeach();
|
||||||
|
ctx->EndScope();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Stmt *
|
||||||
|
ForeachStmt::Optimize() {
|
||||||
|
bool anyErrors = false;
|
||||||
|
for (unsigned int i = 0; i < startExprs.size(); ++i) {
|
||||||
|
if (startExprs[i] != NULL)
|
||||||
|
startExprs[i]->Optimize();
|
||||||
|
anyErrors |= (startExprs[i] == NULL);
|
||||||
|
}
|
||||||
|
for (unsigned int i = 0; i < endExprs.size(); ++i) {
|
||||||
|
if (endExprs[i] != NULL)
|
||||||
|
endExprs[i]->Optimize();
|
||||||
|
anyErrors |= (endExprs[i] == NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stmts != NULL)
|
||||||
|
stmts = stmts->TypeCheck();
|
||||||
|
anyErrors |= (stmts == NULL);
|
||||||
|
|
||||||
|
return anyErrors ? NULL : this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Stmt *
|
||||||
|
ForeachStmt::TypeCheck() {
|
||||||
|
bool anyErrors = false;
|
||||||
|
for (unsigned int i = 0; i < startExprs.size(); ++i) {
|
||||||
|
if (startExprs[i] != NULL)
|
||||||
|
startExprs[i] = TypeConvertExpr(startExprs[i],
|
||||||
|
AtomicType::UniformInt32,
|
||||||
|
"foreach starting value");
|
||||||
|
if (startExprs[i] != NULL)
|
||||||
|
startExprs[i]->TypeCheck();
|
||||||
|
anyErrors |= (startExprs[i] == NULL);
|
||||||
|
}
|
||||||
|
for (unsigned int i = 0; i < endExprs.size(); ++i) {
|
||||||
|
if (endExprs[i] != NULL)
|
||||||
|
endExprs[i] = TypeConvertExpr(endExprs[i], AtomicType::UniformInt32,
|
||||||
|
"foreach ending value");
|
||||||
|
if (endExprs[i] != NULL)
|
||||||
|
endExprs[i]->TypeCheck();
|
||||||
|
anyErrors |= (endExprs[i] == NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stmts != NULL)
|
||||||
|
stmts = stmts->TypeCheck();
|
||||||
|
anyErrors |= (stmts == NULL);
|
||||||
|
|
||||||
|
if (startExprs.size() < dimVariables.size()) {
|
||||||
|
Error(pos, "Not enough initial values provided for \"foreach\" loop; "
|
||||||
|
"got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
|
||||||
|
anyErrors = true;
|
||||||
|
}
|
||||||
|
else if (startExprs.size() > dimVariables.size()) {
|
||||||
|
Error(pos, "Too many initial values provided for \"foreach\" loop; "
|
||||||
|
"got %d, expected %d\n", (int)startExprs.size(), (int)dimVariables.size());
|
||||||
|
anyErrors = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (endExprs.size() < dimVariables.size()) {
|
||||||
|
Error(pos, "Not enough initial values provided for \"foreach\" loop; "
|
||||||
|
"got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
|
||||||
|
anyErrors = true;
|
||||||
|
}
|
||||||
|
else if (endExprs.size() > dimVariables.size()) {
|
||||||
|
Error(pos, "Too many initial values provided for \"foreach\" loop; "
|
||||||
|
"got %d, expected %d\n", (int)endExprs.size(), (int)dimVariables.size());
|
||||||
|
anyErrors = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return anyErrors ? NULL : this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ForeachStmt::EstimateCost() const {
|
||||||
|
return dimVariables.size() * (COST_UNIFORM_LOOP + COST_SIMPLE_ARITH_LOGIC_OP) +
|
||||||
|
(stmts ? stmts->EstimateCost() : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
ForeachStmt::Print(int indent) const {
|
||||||
|
printf("%*cForeach Stmt", indent, ' ');
|
||||||
|
pos.Print();
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < dimVariables.size(); ++i)
|
||||||
|
if (dimVariables[i] != NULL)
|
||||||
|
printf("%*cVar %d: %s\n", indent+4, ' ', i,
|
||||||
|
dimVariables[i]->name.c_str());
|
||||||
|
else
|
||||||
|
printf("%*cVar %d: NULL\n", indent+4, ' ', i);
|
||||||
|
|
||||||
|
printf("Start values:\n");
|
||||||
|
for (unsigned int i = 0; i < startExprs.size(); ++i) {
|
||||||
|
if (startExprs[i] != NULL)
|
||||||
|
startExprs[i]->Print();
|
||||||
|
else
|
||||||
|
printf("NULL");
|
||||||
|
if (i != startExprs.size()-1)
|
||||||
|
printf(", ");
|
||||||
|
else
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("End values:\n");
|
||||||
|
for (unsigned int i = 0; i < endExprs.size(); ++i) {
|
||||||
|
if (endExprs[i] != NULL)
|
||||||
|
endExprs[i]->Print();
|
||||||
|
else
|
||||||
|
printf("NULL");
|
||||||
|
if (i != endExprs.size()-1)
|
||||||
|
printf(", ");
|
||||||
|
else
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stmts != NULL) {
|
||||||
|
printf("%*cStmts:\n", indent+4, ' ');
|
||||||
|
stmts->Print(indent+8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// ReturnStmt
|
// ReturnStmt
|
||||||
|
|
||||||
@@ -1606,6 +2074,11 @@ ReturnStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
if (!ctx->GetCurrentBasicBlock())
|
if (!ctx->GetCurrentBasicBlock())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (ctx->InForeachLoop()) {
|
||||||
|
Error(pos, "\"return\" statement is illegal inside a \"foreach\" loop.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
ctx->SetDebugPos(pos);
|
ctx->SetDebugPos(pos);
|
||||||
ctx->CurrentLanesReturned(val, doCoherenceCheck);
|
ctx->CurrentLanesReturned(val, doCoherenceCheck);
|
||||||
}
|
}
|
||||||
|
|||||||
25
stmt.h
25
stmt.h
@@ -241,6 +241,31 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/** @brief Statement implementation for parallel 'foreach' loops.
|
||||||
|
*/
|
||||||
|
class ForeachStmt : public Stmt {
|
||||||
|
public:
|
||||||
|
ForeachStmt(const std::vector<Symbol *> &loopVars,
|
||||||
|
const std::vector<Expr *> &startExprs,
|
||||||
|
const std::vector<Expr *> &endExprs,
|
||||||
|
Stmt *bodyStatements, bool tiled, SourcePos pos);
|
||||||
|
|
||||||
|
void EmitCode(FunctionEmitContext *ctx) const;
|
||||||
|
void Print(int indent) const;
|
||||||
|
|
||||||
|
Stmt *Optimize();
|
||||||
|
Stmt *TypeCheck();
|
||||||
|
int EstimateCost() const;
|
||||||
|
|
||||||
|
std::vector<Symbol *> dimVariables;
|
||||||
|
std::vector<Expr *> startExprs;
|
||||||
|
std::vector<Expr *> endExprs;
|
||||||
|
bool isTiled;
|
||||||
|
Stmt *stmts;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** @brief Statement implementation for a 'return' or 'coherent' return
|
/** @brief Statement implementation for a 'return' or 'coherent' return
|
||||||
statement in the program. */
|
statement in the program. */
|
||||||
class ReturnStmt : public Stmt {
|
class ReturnStmt : public Stmt {
|
||||||
|
|||||||
22
tests/foreach-1.ispc
Normal file
22
tests/foreach-1.ispc
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
uniform float val[programCount];
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
val[i] = 0;
|
||||||
|
|
||||||
|
foreach (i = 0 ... programCount)
|
||||||
|
val[i] += aFOO[i] - 1;
|
||||||
|
|
||||||
|
uniform float sum = 0;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
sum += val[i];
|
||||||
|
|
||||||
|
RET[programIndex] = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = reduce_add(programIndex);
|
||||||
|
}
|
||||||
33
tests/foreach-10.ispc
Normal file
33
tests/foreach-10.ispc
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
#define NA 4
|
||||||
|
#define NB 8
|
||||||
|
#define NC 7
|
||||||
|
uniform int a[NA][NB][NC];
|
||||||
|
|
||||||
|
for (uniform int i = 0; i < NA; ++i)
|
||||||
|
for (uniform int j = 0; j < NB; ++j)
|
||||||
|
for (uniform int k = 0; j < NC; ++j)
|
||||||
|
a[i][j][k] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = 0 ... NA, j = 0 ... NB, k = 0 ... NC) {
|
||||||
|
a[i][j][k] += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uniform int errs = 0;
|
||||||
|
for (uniform int i = 0; i < NA; ++i)
|
||||||
|
for (uniform int j = 0; j < NB; ++j)
|
||||||
|
for (uniform int k = 0; j < NC; ++j)
|
||||||
|
if (a[i][j][k] != 1) {
|
||||||
|
//CO print("% % % = %\n", i, j, k, a[i][j][k]);
|
||||||
|
++errs;
|
||||||
|
}
|
||||||
|
|
||||||
|
RET[programIndex] = errs;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
22
tests/foreach-11.ispc
Normal file
22
tests/foreach-11.ispc
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
uniform float val[programCount];
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
val[i] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = 0 ... programCount)
|
||||||
|
val[i] += aFOO[i] - 1;
|
||||||
|
|
||||||
|
uniform float sum = 0;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
sum += val[i];
|
||||||
|
|
||||||
|
RET[programIndex] = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = reduce_add(programIndex);
|
||||||
|
}
|
||||||
26
tests/foreach-12.ispc
Normal file
26
tests/foreach-12.ispc
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
uniform float val[programCount];
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
val[i] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = 2 ... programCount)
|
||||||
|
val[i] += i;
|
||||||
|
|
||||||
|
uniform float sum = 0;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i) {
|
||||||
|
sum += val[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
RET[programIndex] = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
int pi = (programIndex >= 2) ? programIndex : 0;
|
||||||
|
RET[programIndex] = reduce_add(pi);
|
||||||
|
}
|
||||||
19
tests/foreach-13.ispc
Normal file
19
tests/foreach-13.ispc
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
RET[i] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = 0 ... 2)
|
||||||
|
RET[i] = i+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
RET[0] = 1;
|
||||||
|
RET[1] = 2;
|
||||||
|
}
|
||||||
17
tests/foreach-14.ispc
Normal file
17
tests/foreach-14.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
RET[i] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = 2 ... 0)
|
||||||
|
RET[i] += 1234;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
17
tests/foreach-15.ispc
Normal file
17
tests/foreach-15.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
RET[i] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = 1 ... 1)
|
||||||
|
RET[i] = 1234;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
17
tests/foreach-16.ispc
Normal file
17
tests/foreach-16.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
RET[i] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = -2 ... programCount-2)
|
||||||
|
RET[i+2] = 1234;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1234;
|
||||||
|
}
|
||||||
13
tests/foreach-17.ispc
Normal file
13
tests/foreach-17.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float sum = 0;
|
||||||
|
foreach_tiled (i = 0 ... 6)
|
||||||
|
sum += aFOO[i];
|
||||||
|
RET[programIndex] = reduce_add(sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 21;
|
||||||
|
}
|
||||||
29
tests/foreach-18.ispc
Normal file
29
tests/foreach-18.ispc
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
#define NA 3
|
||||||
|
#define NB 8
|
||||||
|
uniform int a[NA][NB];
|
||||||
|
|
||||||
|
for (uniform int i = 0; i < NA; ++i)
|
||||||
|
for (uniform int j = 0; j < NB; ++j)
|
||||||
|
a[i][j] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = 0 ... NA, j = 0 ... NB) {
|
||||||
|
a[i][j] += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uniform int errs = 0;
|
||||||
|
for (uniform int i = 0; i < NA; ++i)
|
||||||
|
for (uniform int j = 0; j < NB; ++j)
|
||||||
|
if (a[i][j] != 1) {
|
||||||
|
++errs;
|
||||||
|
}
|
||||||
|
|
||||||
|
RET[programIndex] = errs;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
29
tests/foreach-19.ispc
Normal file
29
tests/foreach-19.ispc
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
#define NA 3
|
||||||
|
#define NB 4
|
||||||
|
uniform int a[NA][NB];
|
||||||
|
|
||||||
|
for (uniform int i = 0; i < NA; ++i)
|
||||||
|
for (uniform int j = 0; j < NB; ++j)
|
||||||
|
a[i][j] = 0;
|
||||||
|
|
||||||
|
foreach_tiled (i = 0 ... NA, j = 0 ... NB) {
|
||||||
|
a[i][j] += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uniform int errs = 0;
|
||||||
|
for (uniform int i = 0; i < NA; ++i)
|
||||||
|
for (uniform int j = 0; j < NB; ++j)
|
||||||
|
if (a[i][j] != 1) {
|
||||||
|
++errs;
|
||||||
|
}
|
||||||
|
|
||||||
|
RET[programIndex] = errs;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
26
tests/foreach-2.ispc
Normal file
26
tests/foreach-2.ispc
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
uniform float val[programCount];
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
val[i] = 0;
|
||||||
|
|
||||||
|
foreach (i = 2 ... programCount)
|
||||||
|
val[i] += i;
|
||||||
|
|
||||||
|
uniform float sum = 0;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i) {
|
||||||
|
sum += val[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
RET[programIndex] = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
int pi = (programIndex >= 2) ? programIndex : 0;
|
||||||
|
RET[programIndex] = reduce_add(pi);
|
||||||
|
}
|
||||||
19
tests/foreach-3.ispc
Normal file
19
tests/foreach-3.ispc
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
RET[i] = 0;
|
||||||
|
|
||||||
|
foreach (i = 0 ... 2)
|
||||||
|
RET[i] = i+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
RET[0] = 1;
|
||||||
|
RET[1] = 2;
|
||||||
|
}
|
||||||
17
tests/foreach-4.ispc
Normal file
17
tests/foreach-4.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
RET[i] = 0;
|
||||||
|
|
||||||
|
foreach (i = 2 ... 0)
|
||||||
|
RET[i] -= 1234;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
17
tests/foreach-5.ispc
Normal file
17
tests/foreach-5.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
RET[i] = 0;
|
||||||
|
|
||||||
|
foreach (i = 1 ... 1)
|
||||||
|
RET[i] = 1234;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
17
tests/foreach-6.ispc
Normal file
17
tests/foreach-6.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
uniform int foo(int i);
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
RET[i] = 0;
|
||||||
|
|
||||||
|
foreach (i = -2 ... programCount-2)
|
||||||
|
RET[i+2] += 1234;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1234;
|
||||||
|
}
|
||||||
13
tests/foreach-7.ispc
Normal file
13
tests/foreach-7.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float sum = 0;
|
||||||
|
foreach (i = 0 ... 6)
|
||||||
|
sum += aFOO[i];
|
||||||
|
RET[programIndex] = reduce_add(sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 21;
|
||||||
|
}
|
||||||
23
tests/foreach-8.ispc
Normal file
23
tests/foreach-8.ispc
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float sum1 = 0, sum2 = 0;
|
||||||
|
foreach (x = 0 ... 10, i = 0 ... 6) {
|
||||||
|
sum1 += aFOO[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uniform int x = 0; x < 10; ++x) {
|
||||||
|
for (uniform int i = 0; i < 6; i += programCount) {
|
||||||
|
int index = i + programIndex;
|
||||||
|
if (index < 6)
|
||||||
|
sum2 += aFOO[index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RET[programIndex] = sum1 - sum2;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
29
tests/foreach-9.ispc
Normal file
29
tests/foreach-9.ispc
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
#define NA 1
|
||||||
|
#define NB 3
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
uniform int a[NA][NB];
|
||||||
|
|
||||||
|
for (uniform int i = 0; i < NA; ++i)
|
||||||
|
for (uniform int j = 0; j < NB; ++j)
|
||||||
|
a[i][j] = 0;
|
||||||
|
|
||||||
|
foreach (i = 0 ... NA, j = 0 ... NB) {
|
||||||
|
a[i][j] += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uniform int errs = 0;
|
||||||
|
for (uniform int i = 0; i < NA; ++i)
|
||||||
|
for (uniform int j = 0; j < NB; ++j)
|
||||||
|
if (a[i][j] != 1)
|
||||||
|
++errs;
|
||||||
|
|
||||||
|
RET[programIndex] = errs;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user