From fdb4eaf437872566371b70cc74a2334ad5620736 Mon Sep 17 00:00:00 2001
From: Matt Pharr <matt.pharr@intel.com>
Date: Wed, 1 Feb 2012 08:17:25 -0800
Subject: [PATCH] Fix bug in &&/|| short-circuiting.

Use full mask, not internal mask when checking "any lanes running"
before evaluating expressions.

Added some more tests to try to cover this case.
---
 expr.cpp                    | 17 +++++++++--------
 tests/short-circuit-12.ispc | 25 +++++++++++++++++++++++++
 tests/short-circuit-13.ispc | 25 +++++++++++++++++++++++++
 tests/short-circuit-14.ispc | 29 +++++++++++++++++++++++++++++
 tests/short-circuit-15.ispc | 29 +++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+), 8 deletions(-)
 create mode 100644 tests/short-circuit-12.ispc
 create mode 100644 tests/short-circuit-13.ispc
 create mode 100644 tests/short-circuit-14.ispc
 create mode 100644 tests/short-circuit-15.ispc

diff --git a/expr.cpp b/expr.cpp
index 1f810998..14415beb 100644
--- a/expr.cpp
+++ b/expr.cpp
@@ -1537,6 +1537,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1,
         // Otherwise, the first operand is varying...  Save the current
         // value of the mask so that we can restore it at the end.
         llvm::Value *oldMask = ctx->GetInternalMask();
+        llvm::Value *oldFullMask = ctx->GetFullMask();
 
         // Convert the second operand to be varying as well, so that we can
         // perform logical vector ops with its value.
@@ -1551,11 +1552,11 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1,
             // lanes--i.e. if (value0 & mask) == mask.  If so, we don't
             // need to evaluate the second operand of the expression.
             llvm::Value *value0AndMask = 
-                ctx->BinaryOperator(llvm::Instruction::And, value0, oldMask, 
-                                    "op&mask");
+                ctx->BinaryOperator(llvm::Instruction::And, value0, 
+                                    oldFullMask, "op&mask");
             llvm::Value *equalsMask =
                 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
-                             value0AndMask, oldMask, "value0&mask==mask");
+                             value0AndMask, oldFullMask, "value0&mask==mask");
             equalsMask = ctx->I1VecToBoolVec(equalsMask);
             llvm::Value *allMatch = ctx->All(equalsMask);
             ctx->BranchInst(bbSkipEvalValue1, bbEvalValue1, allMatch);
@@ -1602,11 +1603,11 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1,
             // if (mask & ~value0) == mask.
             llvm::Value *notValue0 = ctx->NotOperator(value0, "not_value0");
             llvm::Value *notValue0AndMask = 
-                ctx->BinaryOperator(llvm::Instruction::And, notValue0, oldMask, 
-                                    "not_value0&mask");
+                ctx->BinaryOperator(llvm::Instruction::And, notValue0, 
+                                    oldFullMask, "not_value0&mask");
             llvm::Value *equalsMask =
                 ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
-                             notValue0AndMask, oldMask, "not_value0&mask==mask");
+                             notValue0AndMask, oldFullMask, "not_value0&mask==mask");
             equalsMask = ctx->I1VecToBoolVec(equalsMask);
             llvm::Value *allMatch = ctx->All(equalsMask);
             ctx->BranchInst(bbSkipEvalValue1, bbEvalValue1, allMatch);
@@ -1634,8 +1635,8 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1,
             // masking off the valid lanes before we AND them together:
             // result = (value0 & old_mask) & (value1 & current_mask)
             llvm::Value *value0AndMask = 
-                ctx->BinaryOperator(llvm::Instruction::And, value0, oldMask,
-                                    "op&mask");
+                ctx->BinaryOperator(llvm::Instruction::And, value0, 
+                                    oldFullMask, "op&mask");
             llvm::Value *value1AndMask =
                 ctx->BinaryOperator(llvm::Instruction::And, value1,
                                     ctx->GetInternalMask(), "value1&mask");
diff --git a/tests/short-circuit-12.ispc b/tests/short-circuit-12.ispc
new file mode 100644
index 00000000..fb0a94a2
--- /dev/null
+++ b/tests/short-circuit-12.ispc
@@ -0,0 +1,25 @@
+
+export uniform int width() { return programCount; }
+
+uniform int * uniform ptr;
+
+float foo(uniform float a[]) {
+    int index = (programIndex & 1) * 10000;
+    if (a[programIndex] < 10000 && a[index] == 1)
+        return 1;
+    else
+        return 1234;
+}
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    float a = aFOO[programIndex]; 
+    float a0 = aFOO[0], a1 = aFOO[1];
+    if ((programIndex & 1) == 0)
+        RET[programIndex] = foo(aFOO);
+    else
+        RET[programIndex] = 2;
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = (programIndex & 1) ? 2 : 1;
+}
diff --git a/tests/short-circuit-13.ispc b/tests/short-circuit-13.ispc
new file mode 100644
index 00000000..fb0a94a2
--- /dev/null
+++ b/tests/short-circuit-13.ispc
@@ -0,0 +1,25 @@
+
+export uniform int width() { return programCount; }
+
+uniform int * uniform ptr;
+
+float foo(uniform float a[]) {
+    int index = (programIndex & 1) * 10000;
+    if (a[programIndex] < 10000 && a[index] == 1)
+        return 1;
+    else
+        return 1234;
+}
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    float a = aFOO[programIndex]; 
+    float a0 = aFOO[0], a1 = aFOO[1];
+    if ((programIndex & 1) == 0)
+        RET[programIndex] = foo(aFOO);
+    else
+        RET[programIndex] = 2;
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = (programIndex & 1) ? 2 : 1;
+}
diff --git a/tests/short-circuit-14.ispc b/tests/short-circuit-14.ispc
new file mode 100644
index 00000000..846a8ec3
--- /dev/null
+++ b/tests/short-circuit-14.ispc
@@ -0,0 +1,29 @@
+
+export uniform int width() { return programCount; }
+
+ int * uniform ptr;
+
+int crash() {
+    return *ptr;
+}
+
+float foo(uniform float a[]) {
+    int index = (programIndex & 1);
+    if (a[index] == 2 && crash())
+        return 1234;
+    else
+        return 1;
+}
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    float a = aFOO[programIndex]; 
+    float a0 = aFOO[0], a1 = aFOO[1];
+    if ((programIndex & 1) == 0)
+        RET[programIndex] = foo(aFOO);
+    else
+        RET[programIndex] = 2;
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = (programIndex & 1) ? 2 : 1;
+}
diff --git a/tests/short-circuit-15.ispc b/tests/short-circuit-15.ispc
new file mode 100644
index 00000000..4e6f7e32
--- /dev/null
+++ b/tests/short-circuit-15.ispc
@@ -0,0 +1,29 @@
+
+export uniform int width() { return programCount; }
+
+uniform int * uniform ptr;
+
+uniform int crash() {
+    return *ptr;
+}
+
+float foo(uniform float a[]) {
+    int index = (programIndex & 1);
+    if (a[index] == 2 && crash())
+        return 1234;
+    else
+        return 1;
+}
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    float a = aFOO[programIndex]; 
+    float a0 = aFOO[0], a1 = aFOO[1];
+    if ((programIndex & 1) == 0)
+        RET[programIndex] = foo(aFOO);
+    else
+        RET[programIndex] = 2;
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = (programIndex & 1) ? 2 : 1;
+}