Short-circuit evaluation of && and || operators.

We now follow C's approach of evaluating these: we don't evaluate the second expression in the operator if the value of the first one determines the overall result. Thus, these can now be used idiomatically like (index < limit && array[index] > 0) and such. For varying expressions, the mask is set appropriately when evaluating the second expression. (For expressions that can be determined to be both simple and safe to evaluate with the mask all off, we still evaluate both sides and compute the logical op result directly, which saves a number of branches and tests. However, the effect of this should never be visible to the programmer.) Issue #4.
2012-01-30 05:58:41 -08:00
parent 0575b1f38d
commit e19f4931d1
16 changed files with 658 additions and 136 deletions
--- a/ast.cpp
+++ b/ast.cpp
@@ -315,3 +315,116 @@ EstimateCost(ASTNode *root) {
    return cost;
 }
 /** Given an AST node, check to see if it's safe if we happen to run the
    code for that node with the execution mask all off.
 */
 static bool
 lCheckAllOffSafety(ASTNode *node, void *data) {
    bool *okPtr = (bool *)data;
    if (dynamic_cast<FunctionCallExpr *>(node) != NULL) {
        // FIXME: If we could somehow determine that the function being
        // called was safe (and all of the args Exprs were safe, then it'd
        // be nice to be able to return true here.  (Consider a call to
        // e.g. floatbits() in the stdlib.)  Unfortunately for now we just
        // have to be conservative.
        *okPtr = false;
        return false;
    }
    if (dynamic_cast<AssertStmt *>(node) != NULL) {
        // While it's fine to run the assert for varying tests, it's not
        // desirable to check an assert on a uniform variable if all of the
        // lanes are off.
        *okPtr = false;
        return false;
    }
    if (dynamic_cast<NewExpr *>(node) != NULL ||
        dynamic_cast<DeleteStmt *>(node) != NULL) {
        // We definitely don't want to run the uniform variants of these if
        // the mask is all off.  It's also worth skipping the overhead of
        // executing the varying versions of them in the all-off mask case.
        *okPtr = false;
        return false;
    }
    if (g->target.allOffMaskIsSafe == true)
        // Don't worry about memory accesses if we have a target that can
        // safely run them with the mask all off
        return true;
    IndexExpr *ie;
    if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
        const Type *type = ie->baseExpr->GetType();
        if (type == NULL)
            return true;
        if (dynamic_cast<const ReferenceType *>(type) != NULL)
            type = type->GetReferenceTarget();
        ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
        if (ce == NULL) {
            // indexing with a variable... -> not safe
            *okPtr = false;
            return false;
        }
        const PointerType *pointerType = 
            dynamic_cast<const PointerType *>(type);
        if (pointerType != NULL) {
            // pointer[index] -> can't be sure -> not safe
            *okPtr = false;
            return false;
        }
        const SequentialType *seqType = 
            dynamic_cast<const SequentialType *>(type);
        Assert(seqType != NULL);
        int nElements = seqType->GetElementCount();
        if (nElements == 0) {
            // Unsized array, so we can't be sure -> not safe
            *okPtr = false;
            return false;
        }
        int32_t indices[ISPC_MAX_NVEC];
        int count = ce->AsInt32(indices);
        for (int i = 0; i < count; ++i) {
            if (indices[i] < 0 || indices[i] >= nElements) {
                // Index is out of bounds -> not safe
                *okPtr = false;
                return false;
            }
        }
        // All indices are in-bounds
        return true;
    }
    MemberExpr *me;
    if ((me = dynamic_cast<MemberExpr *>(node)) != NULL &&
        me->dereferenceExpr) {
        *okPtr = false;
        return false;
    }
    DereferenceExpr *de;
    if ((de = dynamic_cast<DereferenceExpr *>(node)) != NULL) {
        const Type *exprType = de->expr->GetType();
        if (dynamic_cast<const PointerType *>(exprType) != NULL) {
            *okPtr = false;
            return false;
        }
    }
    return true;
 }
 bool
 SafeToRunWithMaskAllOff(ASTNode *root) {
    bool safe = true;
    WalkAST(root, lCheckAllOffSafety, NULL, &safe);
    return safe;
 }
--- a/ast.h
+++ b/ast.h
@@ -144,4 +144,8 @@ extern Stmt *TypeCheck(Stmt *);
    the given root. */
 extern int EstimateCost(ASTNode *root);
 /** Returns true if it would be safe to run the given code with an "all
    off" mask. */ 
 extern bool SafeToRunWithMaskAllOff(ASTNode *root);
 #endif // ISPC_AST_H
--- a/docs/ispc.rst
+++ b/docs/ispc.rst
@@ -1184,7 +1184,6 @@ C++:
 There are a number of features of C89 that are not supported in ``ispc``
 but are likely to be supported in future releases:
 * Short circuiting of logical operations
 * There are no types named ``char``, ``short``, or ``long`` (or ``long
  double``).  However, there are built-in ``int8``, ``int16``, and
  ``int64`` types
@@ -1969,6 +1968,18 @@ operator also work as expected.
    (*fp).a = 0;
    fp->b = 1;
 As in C and C++, evaluation of the ``||`` and ``&&`` logical operators is
 "short-circuited"; the right hand side won't be evaluated if the value from
 the left-hand side determines the logical operator's value.  For example,
 in the following code, ``array[index]`` won't be evaluated for values of
 ``index`` that are greater than or equal to ``NUM_ITEMS``.
 ::
    if (index < NUM_ITEMS && array[index] > 0) {
        // ...
    }
 Dynamic Memory Allocation
 -------------------------
--- a/expr.cpp
+++ b/expr.cpp
@@ -1405,13 +1405,274 @@ BinaryExpr::BinaryExpr(Op o, Expr *a, Expr *b, SourcePos p)
 }
 /** Emit code for a && or || logical operator.  In particular, the code
    here handles "short-circuit" evaluation, where the second expression
    isn't evaluated if the value of the first one determines the value of
    the result. 
 */ 
 llvm::Value *
-BinaryExpr::GetValue(FunctionEmitContext *ctx) const {
+lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1,
-    if (!arg0 || !arg1)
+               FunctionEmitContext *ctx, SourcePos pos) {
    const Type *type0 = arg0->GetType(), *type1 = arg1->GetType();
    if (type0 == NULL || type1 == NULL) {
        Assert(m->errorCount > 0);
        return NULL;
    }
    // There is overhead (branches, etc.), to short-circuiting, so if the
    // right side of the expression is a) relatively simple, and b) can be
    // safely executed with an all-off execution mask, then we just
    // evaluate both sides and then the logical operator in that case.
    // FIXME: not sure what we should do about vector types here...
    bool shortCircuit = (EstimateCost(arg1) > PREDICATE_SAFE_IF_STATEMENT_COST ||
                         SafeToRunWithMaskAllOff(arg1) == false ||
                         dynamic_cast<const VectorType *>(type0) != NULL ||
                         dynamic_cast<const VectorType *>(type1) != NULL);
    if (shortCircuit == false) {
        // If one of the operands is uniform but the other is varying,
        // promote the uniform one to varying
        if (type0->IsUniformType() && type1->IsVaryingType()) {
            arg0 = TypeConvertExpr(arg0, AtomicType::VaryingBool, lOpString(op));
            Assert(arg0 != NULL);
        }
        if (type1->IsUniformType() && type0->IsVaryingType()) {
            arg1 = TypeConvertExpr(arg1, AtomicType::VaryingBool, lOpString(op));
            Assert(arg1 != NULL);
        }
        llvm::Value *value0 = arg0->GetValue(ctx);
        llvm::Value *value1 = arg1->GetValue(ctx);
        if (value0 == NULL || value1 == NULL) {
            Assert(m->errorCount > 0);
            return NULL;
        }
        if (op == BinaryExpr::LogicalAnd)
            return ctx->BinaryOperator(llvm::Instruction::And, value0, value1,
                                       "logical_and");
        else {
            Assert(op == BinaryExpr::LogicalOr);
            return ctx->BinaryOperator(llvm::Instruction::Or, value0, value1, 
                                       "logical_or");
        }
    }
    // Allocate temporary storage for the return value
    const Type *retType = Type::MoreGeneralType(type0, type1, pos, lOpString(op));
    LLVM_TYPE_CONST llvm::Type *llvmRetType = retType->LLVMType(g->ctx);
    llvm::Value *retPtr = ctx->AllocaInst(llvmRetType, "logical_op_mem");
    llvm::BasicBlock *bbSkipEvalValue1 = ctx->CreateBasicBlock("skip_eval_1");
    llvm::BasicBlock *bbEvalValue1 = ctx->CreateBasicBlock("eval_1");
    llvm::BasicBlock *bbLogicalDone = ctx->CreateBasicBlock("logical_op_done");
    // Evaluate the first operand
    llvm::Value *value0 = arg0->GetValue(ctx);
    if (value0 == NULL) {
        Assert(m->errorCount > 0);
        return NULL;
    }
    if (type0->IsUniformType()) {
        // Check to see if the value of the first operand is true or false
        llvm::Value *value0True = 
            ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
                         value0, LLVMTrue);
        if (op == BinaryExpr::LogicalOr) {
            // For ||, if value0 is true, then we skip evaluating value1
            // entirely.
            ctx->BranchInst(bbSkipEvalValue1, bbEvalValue1, value0True);
            // If value0 is true, the complete result is true (either
            // uniform or varying)
            ctx->SetCurrentBasicBlock(bbSkipEvalValue1);
            llvm::Value *trueValue = retType->IsUniformType() ? LLVMTrue :
                LLVMMaskAllOn;
            ctx->StoreInst(trueValue, retPtr);
            ctx->BranchInst(bbLogicalDone);
        }
        else {
            Assert(op == BinaryExpr::LogicalAnd);
            // Conversely, for &&, if value0 is false, we skip evaluating
            // value1.
            ctx->BranchInst(bbEvalValue1, bbSkipEvalValue1, value0True);
            // In this case, the complete result is false (again, either a
            // uniform or varying false).
            ctx->SetCurrentBasicBlock(bbSkipEvalValue1);
            llvm::Value *falseValue = retType->IsUniformType() ? LLVMFalse :
                LLVMMaskAllOff;
            ctx->StoreInst(falseValue, retPtr);
            ctx->BranchInst(bbLogicalDone);
        }
        // Both || and && are in the same situation if the first operand's
        // value didn't resolve the final result: they need to evaluate the
        // value of the second operand, which in turn gives the value for
        // the full expression.
        ctx->SetCurrentBasicBlock(bbEvalValue1);
        if (type1->IsUniformType() && retType->IsVaryingType()) {
            arg1 = TypeConvertExpr(arg1, AtomicType::VaryingBool, "logical op");
            Assert(arg1 != NULL);
        }
        llvm::Value *value1 = arg1->GetValue(ctx);
        if (value1 == NULL) {
            Assert(m->errorCount > 0);
            return NULL;
        }
        ctx->StoreInst(value1, retPtr);
        ctx->BranchInst(bbLogicalDone);
        // In all cases, we end up at the bbLogicalDone basic block;
        // loading the value stored in retPtr in turn gives the overall
        // result.
        ctx->SetCurrentBasicBlock(bbLogicalDone);
        return ctx->LoadInst(retPtr);
    }
    else {
        // Otherwise, the first operand is varying...  Save the current
        // value of the mask so that we can restore it at the end.
        llvm::Value *oldMask = ctx->GetInternalMask();
        // Convert the second operand to be varying as well, so that we can
        // perform logical vector ops with its value.
        if (type1->IsUniformType()) {
            arg1 = TypeConvertExpr(arg1, AtomicType::VaryingBool, "logical op");
            Assert(arg1 != NULL);
            type1 = arg1->GetType();
        }
        if (op == BinaryExpr::LogicalOr) {
            // See if value0 is true for all currently executing
            // lanes--i.e. if (value0 & mask) == mask.  If so, we don't
            // need to evaluate the second operand of the expression.
            llvm::Value *value0AndMask = 
                ctx->BinaryOperator(llvm::Instruction::And, value0, oldMask, 
                                    "op&mask");
            llvm::Value *equalsMask =
                ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
                             value0AndMask, oldMask, "value0&mask==mask");
            equalsMask = ctx->I1VecToBoolVec(equalsMask);
            llvm::Value *allMatch = ctx->All(equalsMask);
            ctx->BranchInst(bbSkipEvalValue1, bbEvalValue1, allMatch);
            // value0 is true for all running lanes, so it can be used for
            // the final result
            ctx->SetCurrentBasicBlock(bbSkipEvalValue1);
            ctx->StoreInst(value0, retPtr);
            ctx->BranchInst(bbLogicalDone);
            // Otherwise, we need to valuate arg1. However, first we need
            // to set the execution mask to be (oldMask & ~a); in other
            // words, only execute the instances where value0 is false.
            // For the instances where value0 was true, we need to inhibit
            // execution.
            ctx->SetCurrentBasicBlock(bbEvalValue1);
            llvm::Value *not0 = ctx->NotOperator(value0);
            ctx->SetInternalMaskAnd(oldMask, not0);
            llvm::Value *value1 = arg1->GetValue(ctx);
            if (value1 == NULL) {
                Assert(m->errorCount > 0);
                return NULL;
            }
            // We need to compute the result carefully, since vector
            // elements that were computed when the corresponding lane was
            // disabled have undefined values:
            // result = (value0 & old_mask) | (value1 & current_mask)
            llvm::Value *value1AndMask =
                ctx->BinaryOperator(llvm::Instruction::And, value1, 
                                    ctx->GetInternalMask(), "op&mask");
            llvm::Value *result =
                ctx->BinaryOperator(llvm::Instruction::Or, value0AndMask, 
                                    value1AndMask, "or_result");
            ctx->StoreInst(result, retPtr);
            ctx->BranchInst(bbLogicalDone);
        }
        else {
            Assert(op == BinaryExpr::LogicalAnd);
            // If value0 is false for all currently running lanes, the
            // overall result must be false: this corresponds to checking
            // if (mask & ~value0) == mask.
            llvm::Value *notValue0 = ctx->NotOperator(value0, "not_value0");
            llvm::Value *notValue0AndMask = 
                ctx->BinaryOperator(llvm::Instruction::And, notValue0, oldMask, 
                                    "not_value0&mask");
            llvm::Value *equalsMask =
                ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
                             notValue0AndMask, oldMask, "not_value0&mask==mask");
            equalsMask = ctx->I1VecToBoolVec(equalsMask);
            llvm::Value *allMatch = ctx->All(equalsMask);
            ctx->BranchInst(bbSkipEvalValue1, bbEvalValue1, allMatch);
            // value0 was false for all running lanes, so use its value as
            // the overall result.
            ctx->SetCurrentBasicBlock(bbSkipEvalValue1);
            ctx->StoreInst(value0, retPtr);
            ctx->BranchInst(bbLogicalDone);
            // Otherwise we need to evaluate value1, but again with the
            // mask set to only be on for the lanes where value0 was true.
            // For the lanes where value0 was false, execution needs to be
            // disabled: mask = (mask & value0).
            ctx->SetCurrentBasicBlock(bbEvalValue1);
            ctx->SetInternalMaskAnd(oldMask, value0);
            llvm::Value *value1 = arg1->GetValue(ctx);
            if (value1 == NULL) {
                Assert(m->errorCount > 0);
                return NULL;
            }
            // And as in the || case, we compute the overall result by
            // masking off the valid lanes before we AND them together:
            // result = (value0 & old_mask) & (value1 & current_mask)
            llvm::Value *value0AndMask = 
                ctx->BinaryOperator(llvm::Instruction::And, value0, oldMask,
                                    "op&mask");
            llvm::Value *value1AndMask =
                ctx->BinaryOperator(llvm::Instruction::And, value1,
                                    ctx->GetInternalMask(), "value1&mask");
            llvm::Value *result =
                ctx->BinaryOperator(llvm::Instruction::And, value0AndMask, 
                                    value1AndMask, "or_result");
            ctx->StoreInst(result, retPtr);
            ctx->BranchInst(bbLogicalDone);
        }
        // And finally we always end up in bbLogicalDone, where we restore
        // the old mask and return the computed result
        ctx->SetCurrentBasicBlock(bbLogicalDone);
        ctx->SetInternalMask(oldMask);
        return ctx->LoadInst(retPtr);
    }
 }
 llvm::Value *
 BinaryExpr::GetValue(FunctionEmitContext *ctx) const {
    if (!arg0 || !arg1) {
        Assert(m->errorCount > 0);
        return NULL;
    }
    // Handle these specially, since we want to short-circuit their evaluation...
    if (op == LogicalAnd || op == LogicalOr)
        return lEmitLogicalOp(op, arg0, arg1, ctx, pos);
    llvm::Value *value0 = arg0->GetValue(ctx);
    llvm::Value *value1 = arg1->GetValue(ctx);
    if (value0 == NULL || value1 == NULL) {
        Assert(m->errorCount > 0);
        return NULL;
    }
    ctx->SetDebugPos(pos);
    switch (op) {
@@ -1441,12 +1702,6 @@ BinaryExpr::GetValue(FunctionEmitContext *ctx) const {
        return lEmitBinaryBitOp(op, value0, value1, 
                                arg0->GetType()->IsUnsignedType(), ctx);
    }
    case LogicalAnd:
        return ctx->BinaryOperator(llvm::Instruction::And, value0, value1,
                                   "logical_and");
    case LogicalOr:
        return ctx->BinaryOperator(llvm::Instruction::Or, value0, value1, 
                                   "logical_or");
    case Comma:
        return value1;
    default:
@@ -2017,12 +2272,15 @@ BinaryExpr::TypeCheck() {
    }
    case LogicalAnd:
    case LogicalOr: {
-        // We need to type convert to a boolean type of the more general
+        // For now, we just type convert to boolean types, of the same
-        // shape of the two types
+        // variability as the original types.  (When generating code, it's
-        bool isUniform = (type0->IsUniformType() && type1->IsUniformType());
+        // useful to have preserved the uniform/varying distinction.)
-        const AtomicType *boolType = isUniform ? AtomicType::UniformBool : 
+        const AtomicType *boolType0 = type0->IsUniformType() ? 
-                                                 AtomicType::VaryingBool;
+            AtomicType::UniformBool : AtomicType::VaryingBool;
-        const Type *destType = NULL;
+        const AtomicType *boolType1 = type1->IsUniformType() ? 
            AtomicType::UniformBool : AtomicType::VaryingBool;
        const Type *destType0 = NULL, *destType1 = NULL;
        const VectorType *vtype0 = dynamic_cast<const VectorType *>(type0);
        const VectorType *vtype1 = dynamic_cast<const VectorType *>(type1);
        if (vtype0 && vtype1) {
@@ -2032,17 +2290,24 @@ BinaryExpr::TypeCheck() {
                      "different sizes (%d vs. %d).", lOpString(op), sz0, sz1);
                return NULL;
            }
-            destType = new VectorType(boolType, sz0);
+            destType0 = new VectorType(boolType0, sz0);
            destType1 = new VectorType(boolType1, sz1);
        }
        else if (vtype0 != NULL) {
            destType0 = new VectorType(boolType0, vtype0->GetElementCount());
            destType1 = new VectorType(boolType1, vtype0->GetElementCount());
        }
        else if (vtype1 != NULL) {
            destType0 = new VectorType(boolType0, vtype1->GetElementCount());
            destType1 = new VectorType(boolType1, vtype1->GetElementCount());
        }
        else {
            destType0 = boolType0;
            destType1 = boolType1;
        }
        else if (vtype0)
            destType = new VectorType(boolType, vtype0->GetElementCount());
        else if (vtype1)
            destType = new VectorType(boolType, vtype1->GetElementCount());
        else
            destType = boolType;
-        arg0 = TypeConvertExpr(arg0, destType, lOpString(op));
+        arg0 = TypeConvertExpr(arg0, destType0, lOpString(op));
-        arg1 = TypeConvertExpr(arg1, destType, lOpString(op));
+        arg1 = TypeConvertExpr(arg1, destType1, lOpString(op));
        if (arg0 == NULL || arg1 == NULL)
            return NULL;
        return this;
--- a/stmt.cpp
+++ b/stmt.cpp
@@ -473,112 +473,6 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
 }
 /** Given an AST node, check to see if it's safe if we happen to run the
    code for that node with the execution mask all off.
 */
 static bool
 lCheckAllOffSafety(ASTNode *node, void *data) {
    bool *okPtr = (bool *)data;
    if (dynamic_cast<FunctionCallExpr *>(node) != NULL) {
        // FIXME: If we could somehow determine that the function being
        // called was safe (and all of the args Exprs were safe, then it'd
        // be nice to be able to return true here.  (Consider a call to
        // e.g. floatbits() in the stdlib.)  Unfortunately for now we just
        // have to be conservative.
        *okPtr = false;
        return false;
    }
    if (dynamic_cast<AssertStmt *>(node) != NULL) {
        // While it's fine to run the assert for varying tests, it's not
        // desirable to check an assert on a uniform variable if all of the
        // lanes are off.
        *okPtr = false;
        return false;
    }
    if (dynamic_cast<NewExpr *>(node) != NULL ||
        dynamic_cast<DeleteStmt *>(node) != NULL) {
        // We definitely don't want to run the uniform variants of these if
        // the mask is all off.  It's also worth skipping the overhead of
        // executing the varying versions of them in the all-off mask case.
        *okPtr = false;
        return false;
    }
    if (g->target.allOffMaskIsSafe == true)
        // Don't worry about memory accesses if we have a target that can
        // safely run them with the mask all off
        return true;
    IndexExpr *ie;
    if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
        const Type *type = ie->baseExpr->GetType();
        if (type == NULL)
            return true;
        if (dynamic_cast<const ReferenceType *>(type) != NULL)
            type = type->GetReferenceTarget();
        ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
        if (ce == NULL) {
            // indexing with a variable... -> not safe
            *okPtr = false;
            return false;
        }
        const PointerType *pointerType = 
            dynamic_cast<const PointerType *>(type);
        if (pointerType != NULL) {
            // pointer[index] -> can't be sure -> not safe
            *okPtr = false;
            return false;
        }
        const SequentialType *seqType = 
            dynamic_cast<const SequentialType *>(type);
        Assert(seqType != NULL);
        int nElements = seqType->GetElementCount();
        if (nElements == 0) {
            // Unsized array, so we can't be sure -> not safe
            *okPtr = false;
            return false;
        }
        int32_t indices[ISPC_MAX_NVEC];
        int count = ce->AsInt32(indices);
        for (int i = 0; i < count; ++i) {
            if (indices[i] < 0 || indices[i] >= nElements) {
                // Index is out of bounds -> not safe
                *okPtr = false;
                return false;
            }
        }
        // All indices are in-bounds
        return true;
    }
    MemberExpr *me;
    if ((me = dynamic_cast<MemberExpr *>(node)) != NULL &&
        me->dereferenceExpr) {
        *okPtr = false;
        return false;
    }
    DereferenceExpr *de;
    if ((de = dynamic_cast<DereferenceExpr *>(node)) != NULL) {
        const Type *exprType = de->expr->GetType();
        if (dynamic_cast<const PointerType *>(exprType) != NULL) {
            *okPtr = false;
            return false;
        }
    }
    return true;
 }
 /** Emit code for an if test that checks the mask and the test values and
    tries to be smart about jumping over code that doesn't need to be run.
 */
@@ -632,7 +526,7 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
        //
        // Where the overhead of checking if any of the program instances wants
        // to run one side or the other is more than the actual computation.
-        // The lSafeToRunWithAllLanesOff() checks to make sure that we don't do this
+        // SafeToRunWithMaskAllOff() checks to make sure that we don't do this
        // for potentially dangerous code like:
        //
        // if (index < count) array[index] = 0;
@@ -644,9 +538,8 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
        bool costIsAcceptable = (trueFalseCost <
                                 PREDICATE_SAFE_IF_STATEMENT_COST);
-        bool safeToRunWithAllLanesOff = true;
+        bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) &&
-        WalkAST(trueStmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff);
+                                         SafeToRunWithMaskAllOff(falseStmts));
        WalkAST(falseStmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff);
        if (safeToRunWithAllLanesOff &&
            (costIsAcceptable || g->opt.disableCoherentControlFlow)) {
@@ -1984,9 +1877,7 @@ lCheckMask(Stmt *stmts) {
        return false;
    int cost = EstimateCost(stmts);
-
+    bool safeToRunWithAllLanesOff = SafeToRunWithMaskAllOff(stmts);
    bool safeToRunWithAllLanesOff = true;
    WalkAST(stmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff);
    // The mask should be checked if the code following the
    // 'case'/'default' is relatively complex, or if it would be unsafe to
--- a/tests/short-circuit-1.ispc
+++ b/tests/short-circuit-1.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 uniform bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    uniform float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 < a1 || crash())
        RET[programIndex] = 1;
    else
        RET[programIndex] = 0;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-10.ispc
+++ b/tests/short-circuit-10.ispc
@@ -0,0 +1,24 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 bool crashEven() {
    if (programIndex & 1) 
        return true;
    else
        return (*ptr > 0);
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    float a0 = aFOO[0], a1 = aFOO[1];
    if (((programIndex & 1) == 0) || crashEven())
        RET[programIndex] = 1;
    else
        RET[programIndex] = 0;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-11.ispc
+++ b/tests/short-circuit-11.ispc
@@ -0,0 +1,25 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 bool crashEven() {
 //CO    return (programIndex & 1) ? true : (*ptr > 0);
    if (programIndex & 1) 
        return true;
    else
        return (*ptr > 0);
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    float a0 = aFOO[0], a1 = aFOO[1];
    if (((programIndex & 1) == 1) && crashEven())
        RET[programIndex] = 1;
    else
        RET[programIndex] = 2;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = (programIndex & 1) ? 1 : 2;
 }
--- a/tests/short-circuit-2.ispc
+++ b/tests/short-circuit-2.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 uniform bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    uniform float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 > a1 && crash())
        RET[programIndex] = 0;
    else
        RET[programIndex] = 1;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-3.ispc
+++ b/tests/short-circuit-3.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    uniform float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 < a1 || crash())
        RET[programIndex] = 1;
    else
        RET[programIndex] = 0;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-4.ispc
+++ b/tests/short-circuit-4.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    uniform float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 > a1 && crash())
        RET[programIndex] = 0;
    else
        RET[programIndex] = 1;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-5.ispc
+++ b/tests/short-circuit-5.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 < a1 || crash())
        RET[programIndex] = 1;
    else
        RET[programIndex] = 0;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-6.ispc
+++ b/tests/short-circuit-6.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 > a1 && crash())
        RET[programIndex] = 0;
    else
        RET[programIndex] = 1;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-7.ispc
+++ b/tests/short-circuit-7.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 uniform bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 < a1 || crash())
        RET[programIndex] = 1;
    else
        RET[programIndex] = 0;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-8.ispc
+++ b/tests/short-circuit-8.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 uniform bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 > a1 && crash())
        RET[programIndex] = 0;
    else
        RET[programIndex] = 1;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }
--- a/tests/short-circuit-9.ispc
+++ b/tests/short-circuit-9.ispc
@@ -0,0 +1,21 @@
 export uniform int width() { return programCount; }
 uniform int * uniform ptr;
 bool crash() {
    return *ptr > 0;
 }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    float a0 = aFOO[0], a1 = aFOO[1];
    if (a0 > a1 && crash())
        RET[programIndex] = 0;
    else
        RET[programIndex] = 1;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1;
 }