From 672d43a6cfa135eeeb0b9640cf691526e7eac72d Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 27 Nov 2013 23:22:50 +0400 Subject: [PATCH] Adding patch for sse4-i16x8 and sse4-i8x16 targets --- .../3_3_r195476_r195779_i16_sext.patch | 57 +++++++++++++++++++ .../3_4_r195476_r195779_i16_sext.patch | 57 +++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 llvm_patches/3_3_r195476_r195779_i16_sext.patch create mode 100644 llvm_patches/3_4_r195476_r195779_i16_sext.patch diff --git a/llvm_patches/3_3_r195476_r195779_i16_sext.patch b/llvm_patches/3_3_r195476_r195779_i16_sext.patch new file mode 100644 index 00000000..a49325c9 --- /dev/null +++ b/llvm_patches/3_3_r195476_r195779_i16_sext.patch @@ -0,0 +1,57 @@ +Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details. + +Index: lib/Target/X86/X86ISelLowering.cpp +=================================================================== +--- lib/Target/X86/X86ISelLowering.cpp (revision 195862) ++++ lib/Target/X86/X86ISelLowering.cpp (working copy) +@@ -12099,19 +12099,27 @@ + // fall through + case MVT::v4i32: + case MVT::v8i16: { +- // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && +- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) ++ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { ++ // (sext (vzext x)) -> (vsext x) + Tmp1 = LowerVectorIntExtend(Op00, DAG); +- if (Tmp1.getNode()) { +- SDValue Tmp1Op0 = Tmp1.getOperand(0); +- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && +- "This optimization is invalid without a VZEXT."); +- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ if (Tmp1.getNode()) { ++ EVT ExtraEltVT = ExtraVT.getVectorElementType(); ++ // This folding is only valid when the in-reg type is a vector of i8, ++ // i16, or i32. ++ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || ++ ExtraEltVT == MVT::i32) { ++ SDValue Tmp1Op0 = Tmp1.getOperand(0); ++ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && ++ "This optimization is invalid without a VZEXT."); ++ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ } ++ Op0 = Tmp1; ++ } + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. +@@ -15826,6 +15834,15 @@ + if (BitWidth == 1) + return SDValue(); + ++ // Check all uses of that condition operand to check whether it will be ++ // consumed by non-BLEND instructions, which may depend on all bits are set ++ // properly. ++ for (SDNode::use_iterator I = Cond->use_begin(), ++ E = Cond->use_end(); I != E; ++I) ++ if (I->getOpcode() != ISD::VSELECT) ++ // TODO: Add other opcodes eventually lowered into BLEND. ++ return SDValue(); ++ + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); + diff --git a/llvm_patches/3_4_r195476_r195779_i16_sext.patch b/llvm_patches/3_4_r195476_r195779_i16_sext.patch new file mode 100644 index 00000000..4e2c0f6b --- /dev/null +++ b/llvm_patches/3_4_r195476_r195779_i16_sext.patch @@ -0,0 +1,57 @@ +Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details. + +Index: lib/Target/X86/X86ISelLowering.cpp +=================================================================== +--- lib/Target/X86/X86ISelLowering.cpp (revision 195863) ++++ lib/Target/X86/X86ISelLowering.cpp (working copy) +@@ -13120,19 +13120,27 @@ + // fall through + case MVT::v4i32: + case MVT::v8i16: { +- // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && +- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) ++ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { ++ // (sext (vzext x)) -> (vsext x) + Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG); +- if (Tmp1.getNode()) { +- SDValue Tmp1Op0 = Tmp1.getOperand(0); +- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && +- "This optimization is invalid without a VZEXT."); +- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ if (Tmp1.getNode()) { ++ EVT ExtraEltVT = ExtraVT.getVectorElementType(); ++ // This folding is only valid when the in-reg type is a vector of i8, ++ // i16, or i32. ++ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || ++ ExtraEltVT == MVT::i32) { ++ SDValue Tmp1Op0 = Tmp1.getOperand(0); ++ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && ++ "This optimization is invalid without a VZEXT."); ++ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ } ++ Op0 = Tmp1; ++ } + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. +@@ -17007,6 +17015,15 @@ + if (BitWidth == 1) + return SDValue(); + ++ // Check all uses of that condition operand to check whether it will be ++ // consumed by non-BLEND instructions, which may depend on all bits are set ++ // properly. ++ for (SDNode::use_iterator I = Cond->use_begin(), ++ E = Cond->use_end(); I != E; ++I) ++ if (I->getOpcode() != ISD::VSELECT) ++ // TODO: Add other opcodes eventually lowered into BLEND. ++ return SDValue(); ++ + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); +