Predicated 'if' statement performance improvements.
Go back to running both sides of 'if' statements with masking and without branching if we can determine that the code is relatively simple (as per the simple cost model), and is safe to run even if the mask is 'all off'. This gives a bit of a performance improvement for some of the examples (most notably, the ray tracer), and is the code that one wants generated in this case anyhow.
This commit is contained in:
1
expr.cpp
1
expr.cpp
@@ -1715,6 +1715,7 @@ int
|
||||
AssignExpr::EstimateCost() const {
|
||||
int cost = ((lvalue ? lvalue->EstimateCost() : 0) +
|
||||
(rvalue ? rvalue->EstimateCost() : 0));
|
||||
cost += COST_ASSIGN;
|
||||
if (op == Assign)
|
||||
return cost;
|
||||
if (op == DivAssign || op == ModAssign)
|
||||
|
||||
11
expr.h
11
expr.h
@@ -123,7 +123,6 @@ public:
|
||||
Expr *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
const Op op;
|
||||
Expr *expr;
|
||||
};
|
||||
@@ -167,7 +166,6 @@ public:
|
||||
Expr *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
const Op op;
|
||||
Expr *arg0, *arg1;
|
||||
};
|
||||
@@ -200,7 +198,6 @@ public:
|
||||
Expr *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
const Op op;
|
||||
Expr *lvalue, *rvalue;
|
||||
};
|
||||
@@ -222,7 +219,6 @@ public:
|
||||
Expr *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Expr *test, *expr1, *expr2;
|
||||
};
|
||||
|
||||
@@ -264,11 +260,11 @@ public:
|
||||
Expr *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Expr *func;
|
||||
ExprList *args;
|
||||
bool isLaunch;
|
||||
|
||||
private:
|
||||
void resolveFunctionOverloads();
|
||||
bool tryResolve(bool (*matchFunc)(Expr *, const Type *));
|
||||
};
|
||||
@@ -293,7 +289,6 @@ public:
|
||||
Expr *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Expr *arrayOrVector, *index;
|
||||
};
|
||||
|
||||
@@ -321,7 +316,6 @@ public:
|
||||
|
||||
virtual int getElementNumber() const;
|
||||
|
||||
protected:
|
||||
std::string getCandidateNearMatches() const;
|
||||
|
||||
Expr *expr;
|
||||
@@ -507,7 +501,6 @@ public:
|
||||
Expr *Optimize();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
const Type *type;
|
||||
Expr *expr;
|
||||
};
|
||||
@@ -527,7 +520,6 @@ public:
|
||||
Expr *Optimize();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
@@ -547,7 +539,6 @@ public:
|
||||
Expr *Optimize();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
|
||||
28
ispc.h
28
ispc.h
@@ -368,22 +368,26 @@ struct Globals {
|
||||
};
|
||||
|
||||
enum {
|
||||
COST_FUNCALL = 4,
|
||||
COST_TASK_LAUNCH = 16,
|
||||
COST_SELECT = 4,
|
||||
COST_RETURN = 4,
|
||||
COST_SIMPLE_ARITH_LOGIC_OP = 1,
|
||||
COST_COMPLEX_ARITH_OP = 4,
|
||||
COST_ASSIGN = 1,
|
||||
COST_COHERENT_BREAK_CONTINE = 4,
|
||||
COST_COMPLEX_ARITH_OP = 4,
|
||||
COST_DEREF = 4,
|
||||
COST_FUNCALL = 4,
|
||||
COST_GATHER = 8,
|
||||
COST_LOAD = 2,
|
||||
COST_REGULAR_BREAK_CONTINUE = 2,
|
||||
COST_RETURN = 4,
|
||||
COST_SELECT = 4,
|
||||
COST_SIMPLE_ARITH_LOGIC_OP = 1,
|
||||
COST_SYNC = 32,
|
||||
COST_TASK_LAUNCH = 16,
|
||||
COST_TYPECAST_COMPLEX = 4,
|
||||
COST_TYPECAST_SIMPLE = 1,
|
||||
COST_UNIFORM_LOOP = 4,
|
||||
COST_VARYING_LOOP = 6,
|
||||
COST_SYNC = 32,
|
||||
COST_LOAD = 2,
|
||||
COST_DEREF = 4,
|
||||
COST_TYPECAST_SIMPLE = 1,
|
||||
COST_TYPECAST_COMPLEX = 4,
|
||||
COST_GATHER = 8
|
||||
|
||||
CHECK_MASK_AT_FUNCTION_START_COST = 16,
|
||||
PREDICATE_SAFE_IF_STATEMENT_COST = 6,
|
||||
};
|
||||
|
||||
extern Globals *g;
|
||||
|
||||
@@ -705,7 +705,7 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
int costEstimate = code->EstimateCost();
|
||||
bool checkMask = (ft->isTask == true) ||
|
||||
((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) &&
|
||||
costEstimate > 16);
|
||||
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
|
||||
Debug(code->pos, "Estimated cost for function \"%s\" = %d\n",
|
||||
funSym->name.c_str(), costEstimate);
|
||||
// If the body of the function is non-trivial, then we wrap the
|
||||
|
||||
213
stmt.cpp
213
stmt.cpp
@@ -585,6 +585,180 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
}
|
||||
|
||||
|
||||
/** Similar to the Stmt variant of this function, this conservatively
|
||||
checks to see if it's safe to run the code for the given Expr even if
|
||||
the mask is 'all off'.
|
||||
*/
|
||||
static bool
|
||||
lSafeToRunWithAllLanesOff(Expr *expr) {
|
||||
if (expr == NULL)
|
||||
return false;
|
||||
|
||||
UnaryExpr *ue;
|
||||
if ((ue = dynamic_cast<UnaryExpr *>(expr)) != NULL)
|
||||
return lSafeToRunWithAllLanesOff(ue->expr);
|
||||
|
||||
BinaryExpr *be;
|
||||
if ((be = dynamic_cast<BinaryExpr *>(expr)) != NULL)
|
||||
return (lSafeToRunWithAllLanesOff(be->arg0) &&
|
||||
lSafeToRunWithAllLanesOff(be->arg1));
|
||||
|
||||
AssignExpr *ae;
|
||||
if ((ae = dynamic_cast<AssignExpr *>(expr)) != NULL)
|
||||
return (lSafeToRunWithAllLanesOff(ae->lvalue) &&
|
||||
lSafeToRunWithAllLanesOff(ae->rvalue));
|
||||
|
||||
SelectExpr *se;
|
||||
if ((se = dynamic_cast<SelectExpr *>(expr)) != NULL)
|
||||
return (lSafeToRunWithAllLanesOff(se->test) &&
|
||||
lSafeToRunWithAllLanesOff(se->expr1) &&
|
||||
lSafeToRunWithAllLanesOff(se->expr2));
|
||||
|
||||
ExprList *el;
|
||||
if ((el = dynamic_cast<ExprList *>(expr)) != NULL) {
|
||||
for (unsigned int i = 0; i < el->exprs.size(); ++i)
|
||||
if (!lSafeToRunWithAllLanesOff(el->exprs[i]))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
FunctionCallExpr *fce;
|
||||
if ((fce = dynamic_cast<FunctionCallExpr *>(expr)) != NULL)
|
||||
// FIXME: If we could somehow determine that the function being
|
||||
// called was safe (and all of the args Exprs were safe, then it'd
|
||||
// be nice to be able to return true here. (Consider a call to
|
||||
// e.g. floatbits() in the stdlib.) Unfortunately for now we just
|
||||
// have to be conservative.
|
||||
return false;
|
||||
|
||||
IndexExpr *ie;
|
||||
if ((ie = dynamic_cast<IndexExpr *>(expr)) != NULL) {
|
||||
// If we can determine at compile time the size of the array/vector
|
||||
// and if the indices are compile-time constants, then we may be
|
||||
// able to safely run this under a predicated if statement..
|
||||
if (ie->arrayOrVector == NULL)
|
||||
return false;
|
||||
|
||||
const Type *type = ie->arrayOrVector->GetType();
|
||||
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
|
||||
if (type == NULL || ce == NULL)
|
||||
return false;
|
||||
if (dynamic_cast<const ReferenceType *>(type) != NULL)
|
||||
type = type->GetReferenceTarget();
|
||||
|
||||
const SequentialType *seqType =
|
||||
dynamic_cast<const SequentialType *>(type);
|
||||
assert(seqType != NULL);
|
||||
int nElements = seqType->GetElementCount();
|
||||
if (nElements == 0)
|
||||
// Unsized array, so we can't be sure
|
||||
return false;
|
||||
|
||||
int32_t indices[ISPC_MAX_NVEC];
|
||||
int count = ce->AsInt32(indices);
|
||||
for (int i = 0; i < count; ++i)
|
||||
if (indices[i] < 0 || indices[i] >= nElements)
|
||||
return false;
|
||||
|
||||
// All indices are in-bounds
|
||||
return true;
|
||||
}
|
||||
|
||||
MemberExpr *me;
|
||||
if ((me = dynamic_cast<MemberExpr *>(expr)) != NULL)
|
||||
return lSafeToRunWithAllLanesOff(me->expr);
|
||||
|
||||
if (dynamic_cast<ConstExpr *>(expr) != NULL)
|
||||
return true;
|
||||
|
||||
TypeCastExpr *tce;
|
||||
if ((tce = dynamic_cast<TypeCastExpr *>(expr)) != NULL)
|
||||
return lSafeToRunWithAllLanesOff(tce->expr);
|
||||
|
||||
ReferenceExpr *re;
|
||||
if ((re = dynamic_cast<ReferenceExpr *>(expr)) != NULL)
|
||||
return lSafeToRunWithAllLanesOff(re->expr);
|
||||
|
||||
DereferenceExpr *dre;
|
||||
if ((dre = dynamic_cast<DereferenceExpr *>(expr)) != NULL)
|
||||
return lSafeToRunWithAllLanesOff(dre->expr);
|
||||
|
||||
if (dynamic_cast<SymbolExpr *>(expr) != NULL ||
|
||||
dynamic_cast<FunctionSymbolExpr *>(expr) != NULL ||
|
||||
dynamic_cast<SyncExpr *>(expr) != NULL)
|
||||
return true;
|
||||
|
||||
FATAL("Unknown Expr type in lSafeToRunWithAllLanesOff()");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Given an arbitrary statement, this function conservatively tests to see
|
||||
if it's safe to run the code for the statement even if the mask is all
|
||||
off. Here we just need to determine which kind of statement we have
|
||||
and recursively traverse it and/or the expressions inside of it.
|
||||
*/
|
||||
static bool
|
||||
lSafeToRunWithAllLanesOff(Stmt *stmt) {
|
||||
if (stmt == NULL)
|
||||
return true;
|
||||
|
||||
ExprStmt *es;
|
||||
if ((es = dynamic_cast<ExprStmt *>(stmt)) != NULL)
|
||||
return lSafeToRunWithAllLanesOff(es->expr);
|
||||
|
||||
DeclStmt *ds;
|
||||
if ((ds = dynamic_cast<DeclStmt *>(stmt)) != NULL) {
|
||||
for (unsigned int i = 0; i < ds->declaration->declarators.size(); ++i)
|
||||
if (!lSafeToRunWithAllLanesOff(ds->declaration->declarators[i]->initExpr))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
IfStmt *is;
|
||||
if ((is = dynamic_cast<IfStmt *>(stmt)) != NULL)
|
||||
return (lSafeToRunWithAllLanesOff(is->test) &&
|
||||
lSafeToRunWithAllLanesOff(is->trueStmts) &&
|
||||
lSafeToRunWithAllLanesOff(is->falseStmts));
|
||||
|
||||
DoStmt *dos;
|
||||
if ((dos = dynamic_cast<DoStmt *>(stmt)) != NULL)
|
||||
return (lSafeToRunWithAllLanesOff(dos->testExpr) &&
|
||||
lSafeToRunWithAllLanesOff(dos->bodyStmts));
|
||||
|
||||
ForStmt *fs;
|
||||
if ((fs = dynamic_cast<ForStmt *>(stmt)) != NULL)
|
||||
return (lSafeToRunWithAllLanesOff(fs->init) &&
|
||||
lSafeToRunWithAllLanesOff(fs->test) &&
|
||||
lSafeToRunWithAllLanesOff(fs->step) &&
|
||||
lSafeToRunWithAllLanesOff(fs->stmts));
|
||||
|
||||
if (dynamic_cast<BreakStmt *>(stmt) != NULL ||
|
||||
dynamic_cast<ContinueStmt *>(stmt) != NULL)
|
||||
return true;
|
||||
|
||||
ReturnStmt *rs;
|
||||
if ((rs = dynamic_cast<ReturnStmt *>(stmt)) != NULL)
|
||||
return lSafeToRunWithAllLanesOff(rs->val);
|
||||
|
||||
StmtList *sl;
|
||||
if ((sl = dynamic_cast<StmtList *>(stmt)) != NULL) {
|
||||
const std::vector<Stmt *> &sls = sl->GetStatements();
|
||||
for (unsigned int i = 0; i < sls.size(); ++i)
|
||||
if (!lSafeToRunWithAllLanesOff(sls[i]))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
PrintStmt *ps;
|
||||
if ((ps = dynamic_cast<PrintStmt *>(stmt)) != NULL)
|
||||
return lSafeToRunWithAllLanesOff(ps->values);
|
||||
|
||||
FATAL("Unexpected stmt type in lSafeToRunWithAllLanesOff()");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Emit code for an if test that checks the mask and the test values and
|
||||
tries to be smart about jumping over code that doesn't need to be run.
|
||||
*/
|
||||
@@ -631,10 +805,41 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
ctx->SetCurrentBasicBlock(bDone);
|
||||
}
|
||||
else if (trueStmts != NULL || falseStmts != NULL) {
|
||||
assert(doAnyCheck);
|
||||
llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done");
|
||||
emitMaskMixed(ctx, oldMask, ltest, bDone);
|
||||
ctx->SetCurrentBasicBlock(bDone);
|
||||
// If there is nothing that is potentially unsafe to run with all
|
||||
// lanes off in the true and false statements and if the total
|
||||
// complexity of those two is relatively simple, then we'll go
|
||||
// ahead and emit straightline code that runs both sides, updating
|
||||
// the mask accordingly. This is useful for efficiently compiling
|
||||
// things like:
|
||||
//
|
||||
// if (foo) x = 0;
|
||||
// else ++x;
|
||||
//
|
||||
// Where the overhead of checking if any of the program instances wants
|
||||
// to run one side or the other is more than the actual computation.
|
||||
// The lSafeToRunWithAllLanesOff() checks to make sure that we don't do this
|
||||
// for potentially dangerous code like:
|
||||
//
|
||||
// if (index < count) array[index] = 0;
|
||||
//
|
||||
// where our use of blend for conditional assignments doesn't check
|
||||
// for the 'all lanes' off case.
|
||||
if (lSafeToRunWithAllLanesOff(trueStmts) &&
|
||||
lSafeToRunWithAllLanesOff(falseStmts) &&
|
||||
(((trueStmts ? trueStmts->EstimateCost() : 0) +
|
||||
(falseStmts ? falseStmts->EstimateCost() : 0)) <
|
||||
PREDICATE_SAFE_IF_STATEMENT_COST)) {
|
||||
ctx->StartVaryingIf(oldMask);
|
||||
emitMaskedTrueAndFalse(ctx, oldMask, ltest);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->EndIf();
|
||||
}
|
||||
else {
|
||||
assert(doAnyCheck);
|
||||
llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done");
|
||||
emitMaskMixed(ctx, oldMask, ltest, bDone);
|
||||
ctx->SetCurrentBasicBlock(bDone);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
6
stmt.h
6
stmt.h
@@ -77,7 +77,6 @@ public:
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
@@ -95,7 +94,6 @@ public:
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Declaration *declaration;
|
||||
};
|
||||
|
||||
@@ -156,7 +154,6 @@ public:
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Expr *testExpr;
|
||||
Stmt *bodyStmts;
|
||||
const bool doCoherentCheck;
|
||||
@@ -178,7 +175,6 @@ public:
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
/** 'for' statment initializer; may be NULL, indicating no intitializer */
|
||||
Stmt *init;
|
||||
/** expression that returns a value indicating whether the loop should
|
||||
@@ -250,7 +246,6 @@ public:
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
Expr *val;
|
||||
/** This indicates whether the generated code will check to see if no
|
||||
more program instances are currently running after the return, in
|
||||
@@ -301,7 +296,6 @@ public:
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
private:
|
||||
/** Format string for the print() statement. */
|
||||
const std::string format;
|
||||
/** This holds the arguments passed to the print() statement. If more
|
||||
|
||||
Reference in New Issue
Block a user