From 9921b8e530780a766eadc171e5858c171e1bcca0 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 19 Sep 2011 09:54:09 -0700 Subject: [PATCH] Predicated 'if' statement performance improvements. Go back to running both sides of 'if' statements with masking and without branching if we can determine that the code is relatively simple (as per the simple cost model), and is safe to run even if the mask is 'all off'. This gives a bit of a performance improvement for some of the examples (most notably, the ray tracer), and is the code that one wants generated in this case anyhow. --- expr.cpp | 1 + expr.h | 11 +-- ispc.h | 28 ++++--- module.cpp | 2 +- stmt.cpp | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++- stmt.h | 6 -- 6 files changed, 228 insertions(+), 33 deletions(-) diff --git a/expr.cpp b/expr.cpp index 0322ef27..6697236a 100644 --- a/expr.cpp +++ b/expr.cpp @@ -1715,6 +1715,7 @@ int AssignExpr::EstimateCost() const { int cost = ((lvalue ? lvalue->EstimateCost() : 0) + (rvalue ? rvalue->EstimateCost() : 0)); + cost += COST_ASSIGN; if (op == Assign) return cost; if (op == DivAssign || op == ModAssign) diff --git a/expr.h b/expr.h index 3764b19c..73e09647 100644 --- a/expr.h +++ b/expr.h @@ -123,7 +123,6 @@ public: Expr *TypeCheck(); int EstimateCost() const; -private: const Op op; Expr *expr; }; @@ -167,7 +166,6 @@ public: Expr *TypeCheck(); int EstimateCost() const; -private: const Op op; Expr *arg0, *arg1; }; @@ -200,7 +198,6 @@ public: Expr *TypeCheck(); int EstimateCost() const; -private: const Op op; Expr *lvalue, *rvalue; }; @@ -222,7 +219,6 @@ public: Expr *TypeCheck(); int EstimateCost() const; -private: Expr *test, *expr1, *expr2; }; @@ -264,11 +260,11 @@ public: Expr *TypeCheck(); int EstimateCost() const; -private: Expr *func; ExprList *args; bool isLaunch; +private: void resolveFunctionOverloads(); bool tryResolve(bool (*matchFunc)(Expr *, const Type *)); }; @@ -293,7 +289,6 @@ public: Expr *TypeCheck(); int EstimateCost() const; -private: Expr *arrayOrVector, *index; }; @@ -321,7 +316,6 @@ public: virtual int getElementNumber() const; -protected: std::string getCandidateNearMatches() const; Expr *expr; @@ -507,7 +501,6 @@ public: Expr *Optimize(); int EstimateCost() const; -private: const Type *type; Expr *expr; }; @@ -527,7 +520,6 @@ public: Expr *Optimize(); int EstimateCost() const; -private: Expr *expr; }; @@ -547,7 +539,6 @@ public: Expr *Optimize(); int EstimateCost() const; -private: Expr *expr; }; diff --git a/ispc.h b/ispc.h index b53134c6..836ff0f1 100644 --- a/ispc.h +++ b/ispc.h @@ -368,22 +368,26 @@ struct Globals { }; enum { - COST_FUNCALL = 4, - COST_TASK_LAUNCH = 16, - COST_SELECT = 4, - COST_RETURN = 4, - COST_SIMPLE_ARITH_LOGIC_OP = 1, - COST_COMPLEX_ARITH_OP = 4, + COST_ASSIGN = 1, COST_COHERENT_BREAK_CONTINE = 4, + COST_COMPLEX_ARITH_OP = 4, + COST_DEREF = 4, + COST_FUNCALL = 4, + COST_GATHER = 8, + COST_LOAD = 2, COST_REGULAR_BREAK_CONTINUE = 2, + COST_RETURN = 4, + COST_SELECT = 4, + COST_SIMPLE_ARITH_LOGIC_OP = 1, + COST_SYNC = 32, + COST_TASK_LAUNCH = 16, + COST_TYPECAST_COMPLEX = 4, + COST_TYPECAST_SIMPLE = 1, COST_UNIFORM_LOOP = 4, COST_VARYING_LOOP = 6, - COST_SYNC = 32, - COST_LOAD = 2, - COST_DEREF = 4, - COST_TYPECAST_SIMPLE = 1, - COST_TYPECAST_COMPLEX = 4, - COST_GATHER = 8 + + CHECK_MASK_AT_FUNCTION_START_COST = 16, + PREDICATE_SAFE_IF_STATEMENT_COST = 6, }; extern Globals *g; diff --git a/module.cpp b/module.cpp index c2a4703b..689594e0 100644 --- a/module.cpp +++ b/module.cpp @@ -705,7 +705,7 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function, int costEstimate = code->EstimateCost(); bool checkMask = (ft->isTask == true) || ((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) && - costEstimate > 16); + costEstimate > CHECK_MASK_AT_FUNCTION_START_COST); Debug(code->pos, "Estimated cost for function \"%s\" = %d\n", funSym->name.c_str(), costEstimate); // If the body of the function is non-trivial, then we wrap the diff --git a/stmt.cpp b/stmt.cpp index 928d50df..6be10752 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -585,6 +585,180 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, } +/** Similar to the Stmt variant of this function, this conservatively + checks to see if it's safe to run the code for the given Expr even if + the mask is 'all off'. + */ +static bool +lSafeToRunWithAllLanesOff(Expr *expr) { + if (expr == NULL) + return false; + + UnaryExpr *ue; + if ((ue = dynamic_cast(expr)) != NULL) + return lSafeToRunWithAllLanesOff(ue->expr); + + BinaryExpr *be; + if ((be = dynamic_cast(expr)) != NULL) + return (lSafeToRunWithAllLanesOff(be->arg0) && + lSafeToRunWithAllLanesOff(be->arg1)); + + AssignExpr *ae; + if ((ae = dynamic_cast(expr)) != NULL) + return (lSafeToRunWithAllLanesOff(ae->lvalue) && + lSafeToRunWithAllLanesOff(ae->rvalue)); + + SelectExpr *se; + if ((se = dynamic_cast(expr)) != NULL) + return (lSafeToRunWithAllLanesOff(se->test) && + lSafeToRunWithAllLanesOff(se->expr1) && + lSafeToRunWithAllLanesOff(se->expr2)); + + ExprList *el; + if ((el = dynamic_cast(expr)) != NULL) { + for (unsigned int i = 0; i < el->exprs.size(); ++i) + if (!lSafeToRunWithAllLanesOff(el->exprs[i])) + return false; + return true; + } + + FunctionCallExpr *fce; + if ((fce = dynamic_cast(expr)) != NULL) + // FIXME: If we could somehow determine that the function being + // called was safe (and all of the args Exprs were safe, then it'd + // be nice to be able to return true here. (Consider a call to + // e.g. floatbits() in the stdlib.) Unfortunately for now we just + // have to be conservative. + return false; + + IndexExpr *ie; + if ((ie = dynamic_cast(expr)) != NULL) { + // If we can determine at compile time the size of the array/vector + // and if the indices are compile-time constants, then we may be + // able to safely run this under a predicated if statement.. + if (ie->arrayOrVector == NULL) + return false; + + const Type *type = ie->arrayOrVector->GetType(); + ConstExpr *ce = dynamic_cast(ie->index); + if (type == NULL || ce == NULL) + return false; + if (dynamic_cast(type) != NULL) + type = type->GetReferenceTarget(); + + const SequentialType *seqType = + dynamic_cast(type); + assert(seqType != NULL); + int nElements = seqType->GetElementCount(); + if (nElements == 0) + // Unsized array, so we can't be sure + return false; + + int32_t indices[ISPC_MAX_NVEC]; + int count = ce->AsInt32(indices); + for (int i = 0; i < count; ++i) + if (indices[i] < 0 || indices[i] >= nElements) + return false; + + // All indices are in-bounds + return true; + } + + MemberExpr *me; + if ((me = dynamic_cast(expr)) != NULL) + return lSafeToRunWithAllLanesOff(me->expr); + + if (dynamic_cast(expr) != NULL) + return true; + + TypeCastExpr *tce; + if ((tce = dynamic_cast(expr)) != NULL) + return lSafeToRunWithAllLanesOff(tce->expr); + + ReferenceExpr *re; + if ((re = dynamic_cast(expr)) != NULL) + return lSafeToRunWithAllLanesOff(re->expr); + + DereferenceExpr *dre; + if ((dre = dynamic_cast(expr)) != NULL) + return lSafeToRunWithAllLanesOff(dre->expr); + + if (dynamic_cast(expr) != NULL || + dynamic_cast(expr) != NULL || + dynamic_cast(expr) != NULL) + return true; + + FATAL("Unknown Expr type in lSafeToRunWithAllLanesOff()"); + return false; +} + + +/** Given an arbitrary statement, this function conservatively tests to see + if it's safe to run the code for the statement even if the mask is all + off. Here we just need to determine which kind of statement we have + and recursively traverse it and/or the expressions inside of it. + */ +static bool +lSafeToRunWithAllLanesOff(Stmt *stmt) { + if (stmt == NULL) + return true; + + ExprStmt *es; + if ((es = dynamic_cast(stmt)) != NULL) + return lSafeToRunWithAllLanesOff(es->expr); + + DeclStmt *ds; + if ((ds = dynamic_cast(stmt)) != NULL) { + for (unsigned int i = 0; i < ds->declaration->declarators.size(); ++i) + if (!lSafeToRunWithAllLanesOff(ds->declaration->declarators[i]->initExpr)) + return false; + return true; + } + + IfStmt *is; + if ((is = dynamic_cast(stmt)) != NULL) + return (lSafeToRunWithAllLanesOff(is->test) && + lSafeToRunWithAllLanesOff(is->trueStmts) && + lSafeToRunWithAllLanesOff(is->falseStmts)); + + DoStmt *dos; + if ((dos = dynamic_cast(stmt)) != NULL) + return (lSafeToRunWithAllLanesOff(dos->testExpr) && + lSafeToRunWithAllLanesOff(dos->bodyStmts)); + + ForStmt *fs; + if ((fs = dynamic_cast(stmt)) != NULL) + return (lSafeToRunWithAllLanesOff(fs->init) && + lSafeToRunWithAllLanesOff(fs->test) && + lSafeToRunWithAllLanesOff(fs->step) && + lSafeToRunWithAllLanesOff(fs->stmts)); + + if (dynamic_cast(stmt) != NULL || + dynamic_cast(stmt) != NULL) + return true; + + ReturnStmt *rs; + if ((rs = dynamic_cast(stmt)) != NULL) + return lSafeToRunWithAllLanesOff(rs->val); + + StmtList *sl; + if ((sl = dynamic_cast(stmt)) != NULL) { + const std::vector &sls = sl->GetStatements(); + for (unsigned int i = 0; i < sls.size(); ++i) + if (!lSafeToRunWithAllLanesOff(sls[i])) + return false; + return true; + } + + PrintStmt *ps; + if ((ps = dynamic_cast(stmt)) != NULL) + return lSafeToRunWithAllLanesOff(ps->values); + + FATAL("Unexpected stmt type in lSafeToRunWithAllLanesOff()"); + return false; +} + + /** Emit code for an if test that checks the mask and the test values and tries to be smart about jumping over code that doesn't need to be run. */ @@ -631,10 +805,41 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const { ctx->SetCurrentBasicBlock(bDone); } else if (trueStmts != NULL || falseStmts != NULL) { - assert(doAnyCheck); - llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done"); - emitMaskMixed(ctx, oldMask, ltest, bDone); - ctx->SetCurrentBasicBlock(bDone); + // If there is nothing that is potentially unsafe to run with all + // lanes off in the true and false statements and if the total + // complexity of those two is relatively simple, then we'll go + // ahead and emit straightline code that runs both sides, updating + // the mask accordingly. This is useful for efficiently compiling + // things like: + // + // if (foo) x = 0; + // else ++x; + // + // Where the overhead of checking if any of the program instances wants + // to run one side or the other is more than the actual computation. + // The lSafeToRunWithAllLanesOff() checks to make sure that we don't do this + // for potentially dangerous code like: + // + // if (index < count) array[index] = 0; + // + // where our use of blend for conditional assignments doesn't check + // for the 'all lanes' off case. + if (lSafeToRunWithAllLanesOff(trueStmts) && + lSafeToRunWithAllLanesOff(falseStmts) && + (((trueStmts ? trueStmts->EstimateCost() : 0) + + (falseStmts ? falseStmts->EstimateCost() : 0)) < + PREDICATE_SAFE_IF_STATEMENT_COST)) { + ctx->StartVaryingIf(oldMask); + emitMaskedTrueAndFalse(ctx, oldMask, ltest); + assert(ctx->GetCurrentBasicBlock()); + ctx->EndIf(); + } + else { + assert(doAnyCheck); + llvm::BasicBlock *bDone = ctx->CreateBasicBlock("if_done"); + emitMaskMixed(ctx, oldMask, ltest, bDone); + ctx->SetCurrentBasicBlock(bDone); + } } } diff --git a/stmt.h b/stmt.h index 3dac745a..b918554c 100644 --- a/stmt.h +++ b/stmt.h @@ -77,7 +77,6 @@ public: Stmt *TypeCheck(); int EstimateCost() const; -private: Expr *expr; }; @@ -95,7 +94,6 @@ public: Stmt *TypeCheck(); int EstimateCost() const; - private: Declaration *declaration; }; @@ -156,7 +154,6 @@ public: Stmt *TypeCheck(); int EstimateCost() const; -private: Expr *testExpr; Stmt *bodyStmts; const bool doCoherentCheck; @@ -178,7 +175,6 @@ public: Stmt *TypeCheck(); int EstimateCost() const; -private: /** 'for' statment initializer; may be NULL, indicating no intitializer */ Stmt *init; /** expression that returns a value indicating whether the loop should @@ -250,7 +246,6 @@ public: Stmt *TypeCheck(); int EstimateCost() const; -private: Expr *val; /** This indicates whether the generated code will check to see if no more program instances are currently running after the return, in @@ -301,7 +296,6 @@ public: Stmt *TypeCheck(); int EstimateCost() const; -private: /** Format string for the print() statement. */ const std::string format; /** This holds the arguments passed to the print() statement. If more