From 672d43a6cfa135eeeb0b9640cf691526e7eac72d Mon Sep 17 00:00:00 2001
From: Dmitry Babokin <babokin@gmail.com>
Date: Wed, 27 Nov 2013 23:22:50 +0400
Subject: [PATCH] Adding patch for sse4-i16x8 and sse4-i8x16 targets

---
 .../3_3_r195476_r195779_i16_sext.patch        | 57 +++++++++++++++++++
 .../3_4_r195476_r195779_i16_sext.patch        | 57 +++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 llvm_patches/3_3_r195476_r195779_i16_sext.patch
 create mode 100644 llvm_patches/3_4_r195476_r195779_i16_sext.patch

diff --git a/llvm_patches/3_3_r195476_r195779_i16_sext.patch b/llvm_patches/3_3_r195476_r195779_i16_sext.patch
new file mode 100644
index 00000000..a49325c9
--- /dev/null
+++ b/llvm_patches/3_3_r195476_r195779_i16_sext.patch
@@ -0,0 +1,57 @@
+Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
+
+Index: lib/Target/X86/X86ISelLowering.cpp
+===================================================================
+--- lib/Target/X86/X86ISelLowering.cpp	(revision 195862)
++++ lib/Target/X86/X86ISelLowering.cpp	(working copy)
+@@ -12099,19 +12099,27 @@
+       // fall through
+     case MVT::v4i32:
+     case MVT::v8i16: {
+-      // (sext (vzext x)) -> (vsext x)
+       SDValue Op0 = Op.getOperand(0);
+       SDValue Op00 = Op0.getOperand(0);
+       SDValue Tmp1;
+       // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+       if (Op0.getOpcode() == ISD::BITCAST &&
+-          Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
++          Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
++        // (sext (vzext x)) -> (vsext x)
+         Tmp1 = LowerVectorIntExtend(Op00, DAG);
+-      if (Tmp1.getNode()) {
+-        SDValue Tmp1Op0 = Tmp1.getOperand(0);
+-        assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+-               "This optimization is invalid without a VZEXT.");
+-        return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
++        if (Tmp1.getNode()) {
++          EVT ExtraEltVT = ExtraVT.getVectorElementType();
++          // This folding is only valid when the in-reg type is a vector of i8,
++          // i16, or i32.
++          if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
++              ExtraEltVT == MVT::i32) {
++            SDValue Tmp1Op0 = Tmp1.getOperand(0);
++            assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
++                   "This optimization is invalid without a VZEXT.");
++            return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
++          }
++          Op0 = Tmp1;
++        }
+       }
+ 
+       // If the above didn't work, then just use Shift-Left + Shift-Right.
+@@ -15826,6 +15834,15 @@
+     if (BitWidth == 1)
+       return SDValue();
+ 
++    // Check all uses of that condition operand to check whether it will be
++    // consumed by non-BLEND instructions, which may depend on all bits are set
++    // properly.
++    for (SDNode::use_iterator I = Cond->use_begin(),
++                              E = Cond->use_end(); I != E; ++I)
++      if (I->getOpcode() != ISD::VSELECT)
++        // TODO: Add other opcodes eventually lowered into BLEND.
++        return SDValue();
++
+     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+     APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
+ 
diff --git a/llvm_patches/3_4_r195476_r195779_i16_sext.patch b/llvm_patches/3_4_r195476_r195779_i16_sext.patch
new file mode 100644
index 00000000..4e2c0f6b
--- /dev/null
+++ b/llvm_patches/3_4_r195476_r195779_i16_sext.patch
@@ -0,0 +1,57 @@
+Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
+
+Index: lib/Target/X86/X86ISelLowering.cpp
+===================================================================
+--- lib/Target/X86/X86ISelLowering.cpp	(revision 195863)
++++ lib/Target/X86/X86ISelLowering.cpp	(working copy)
+@@ -13120,19 +13120,27 @@
+       // fall through
+     case MVT::v4i32:
+     case MVT::v8i16: {
+-      // (sext (vzext x)) -> (vsext x)
+       SDValue Op0 = Op.getOperand(0);
+       SDValue Op00 = Op0.getOperand(0);
+       SDValue Tmp1;
+       // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+       if (Op0.getOpcode() == ISD::BITCAST &&
+-          Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
++          Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
++        // (sext (vzext x)) -> (vsext x)
+         Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
+-      if (Tmp1.getNode()) {
+-        SDValue Tmp1Op0 = Tmp1.getOperand(0);
+-        assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+-               "This optimization is invalid without a VZEXT.");
+-        return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
++        if (Tmp1.getNode()) {
++          EVT ExtraEltVT = ExtraVT.getVectorElementType();
++          // This folding is only valid when the in-reg type is a vector of i8,
++          // i16, or i32.
++          if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
++              ExtraEltVT == MVT::i32) {
++            SDValue Tmp1Op0 = Tmp1.getOperand(0);
++            assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
++                   "This optimization is invalid without a VZEXT.");
++            return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
++          }
++          Op0 = Tmp1;
++        }
+       }
+ 
+       // If the above didn't work, then just use Shift-Left + Shift-Right.
+@@ -17007,6 +17015,15 @@
+     if (BitWidth == 1)
+       return SDValue();
+ 
++    // Check all uses of that condition operand to check whether it will be
++    // consumed by non-BLEND instructions, which may depend on all bits are set
++    // properly.
++    for (SDNode::use_iterator I = Cond->use_begin(),
++                              E = Cond->use_end(); I != E; ++I)
++      if (I->getOpcode() != ISD::VSELECT)
++        // TODO: Add other opcodes eventually lowered into BLEND.
++        return SDValue();
++
+     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+     APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
+