Merge pull request #670 from dbabokin/sext_patch

LLVM patches for sse4-i16x8 and sse4-i8x16 targets
2013-11-28 01:56:09 -08:00
parent 9b19f0aaba eaa483d6e4
commit c751e44c6c
3 changed files with 114 additions and 6 deletions
--- a/fail_db.txt
+++ b/fail_db.txt
@@ -277,13 +277,7 @@
 .\tests\reduce-min-uint64.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.4         cl -O2 *
 ./tests/atomics-13.ispc compfail     x86     sse4-i16x8   Linux LLVM 3.3 clang++3.3 -O2 *
 ./tests/atomics-13.ispc compfail  x86-64     sse4-i16x8   Linux LLVM 3.3 clang++3.3 -O2 *
-./tests/funcptr-null-4.ispc runfail     x86     sse4-i8x16   Linux LLVM 3.3 clang++3.3 -O2 *
-./tests/funcptr-null-5.ispc runfail     x86     sse4-i8x16   Linux LLVM 3.3 clang++3.3 -O2 *
-./tests/funcptr-null-6.ispc runfail     x86     sse4-i8x16   Linux LLVM 3.3 clang++3.3 -O2 *
 ./tests/atomics-13.ispc compfail     x86     sse4-i8x16   Linux LLVM 3.3 clang++3.3 -O2 *
-./tests/funcptr-null-4.ispc runfail  x86-64     sse4-i8x16   Linux LLVM 3.3 clang++3.3 -O2 *
-./tests/funcptr-null-5.ispc runfail  x86-64     sse4-i8x16   Linux LLVM 3.3 clang++3.3 -O2 *
-./tests/funcptr-null-6.ispc runfail  x86-64     sse4-i8x16   Linux LLVM 3.3 clang++3.3 -O2 *
 ./tests/atomics-13.ispc compfail  x86-64     sse4-i8x16   Linux LLVM 3.3 clang++3.3 -O2 *
 ./tests/ptr-assign-lhs-math-1.ispc compfail  x86-64      generic-4   Linux LLVM 3.3 clang++3.3 -O2 *
 ./tests/short-vec-8.ispc compfail  x86-64      generic-4   Linux LLVM 3.3 clang++3.3 -O2 *
--- a/llvm_patches/3_3_r195476_r195779_i16_sext.patch
+++ b/llvm_patches/3_3_r195476_r195779_i16_sext.patch
@@ -0,0 +1,57 @@
+Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
+
+Index: lib/Target/X86/X86ISelLowering.cpp
+===================================================================
+--- lib/Target/X86/X86ISelLowering.cpp	(revision 195862)
+++ lib/Target/X86/X86ISelLowering.cpp	(working copy)
+@@ -12099,19 +12099,27 @@
+       // fall through
+     case MVT::v4i32:
+     case MVT::v8i16: {
+-      // (sext (vzext x)) -> (vsext x)
+       SDValue Op0 = Op.getOperand(0);
+       SDValue Op00 = Op0.getOperand(0);
+       SDValue Tmp1;
+       // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+       if (Op0.getOpcode() == ISD::BITCAST &&
+-          Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+          Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
+        // (sext (vzext x)) -> (vsext x)
+         Tmp1 = LowerVectorIntExtend(Op00, DAG);
+-      if (Tmp1.getNode()) {
+-        SDValue Tmp1Op0 = Tmp1.getOperand(0);
+-        assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+-               "This optimization is invalid without a VZEXT.");
+-        return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+        if (Tmp1.getNode()) {
+          EVT ExtraEltVT = ExtraVT.getVectorElementType();
+          // This folding is only valid when the in-reg type is a vector of i8,
+          // i16, or i32.
+          if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
+              ExtraEltVT == MVT::i32) {
+            SDValue Tmp1Op0 = Tmp1.getOperand(0);
+            assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+                   "This optimization is invalid without a VZEXT.");
+            return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+          }
+          Op0 = Tmp1;
+        }
+       }
+ 
+       // If the above didn't work, then just use Shift-Left + Shift-Right.
+@@ -15826,6 +15834,15 @@
+     if (BitWidth == 1)
+       return SDValue();
+ 
+    // Check all uses of that condition operand to check whether it will be
+    // consumed by non-BLEND instructions, which may depend on all bits are set
+    // properly.
+    for (SDNode::use_iterator I = Cond->use_begin(),
+                              E = Cond->use_end(); I != E; ++I)
+      if (I->getOpcode() != ISD::VSELECT)
+        // TODO: Add other opcodes eventually lowered into BLEND.
+        return SDValue();
+
+     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+     APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
+ 
--- a/llvm_patches/3_4_r195476_r195779_i16_sext.patch
+++ b/llvm_patches/3_4_r195476_r195779_i16_sext.patch
@@ -0,0 +1,57 @@
+Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
+
+Index: lib/Target/X86/X86ISelLowering.cpp
+===================================================================
+--- lib/Target/X86/X86ISelLowering.cpp	(revision 195863)
+++ lib/Target/X86/X86ISelLowering.cpp	(working copy)
+@@ -13120,19 +13120,27 @@
+       // fall through
+     case MVT::v4i32:
+     case MVT::v8i16: {
+-      // (sext (vzext x)) -> (vsext x)
+       SDValue Op0 = Op.getOperand(0);
+       SDValue Op00 = Op0.getOperand(0);
+       SDValue Tmp1;
+       // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+       if (Op0.getOpcode() == ISD::BITCAST &&
+-          Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+          Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
+        // (sext (vzext x)) -> (vsext x)
+         Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
+-      if (Tmp1.getNode()) {
+-        SDValue Tmp1Op0 = Tmp1.getOperand(0);
+-        assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+-               "This optimization is invalid without a VZEXT.");
+-        return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+        if (Tmp1.getNode()) {
+          EVT ExtraEltVT = ExtraVT.getVectorElementType();
+          // This folding is only valid when the in-reg type is a vector of i8,
+          // i16, or i32.
+          if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
+              ExtraEltVT == MVT::i32) {
+            SDValue Tmp1Op0 = Tmp1.getOperand(0);
+            assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+                   "This optimization is invalid without a VZEXT.");
+            return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+          }
+          Op0 = Tmp1;
+        }
+       }
+ 
+       // If the above didn't work, then just use Shift-Left + Shift-Right.
+@@ -17007,6 +17015,15 @@
+     if (BitWidth == 1)
+       return SDValue();
+ 
+    // Check all uses of that condition operand to check whether it will be
+    // consumed by non-BLEND instructions, which may depend on all bits are set
+    // properly.
+    for (SDNode::use_iterator I = Cond->use_begin(),
+                              E = Cond->use_end(); I != E; ++I)
+      if (I->getOpcode() != ISD::VSELECT)
+        // TODO: Add other opcodes eventually lowered into BLEND.
+        return SDValue();
+
+     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+     APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
+