Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details. Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp (revision 195862) +++ lib/Target/X86/X86ISelLowering.cpp (working copy) @@ -12099,19 +12099,27 @@ // fall through case MVT::v4i32: case MVT::v8i16: { - // (sext (vzext x)) -> (vsext x) SDValue Op0 = Op.getOperand(0); SDValue Op00 = Op0.getOperand(0); SDValue Tmp1; // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. if (Op0.getOpcode() == ISD::BITCAST && - Op00.getOpcode() == ISD::VECTOR_SHUFFLE) + Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { + // (sext (vzext x)) -> (vsext x) Tmp1 = LowerVectorIntExtend(Op00, DAG); - if (Tmp1.getNode()) { - SDValue Tmp1Op0 = Tmp1.getOperand(0); - assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && - "This optimization is invalid without a VZEXT."); - return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); + if (Tmp1.getNode()) { + EVT ExtraEltVT = ExtraVT.getVectorElementType(); + // This folding is only valid when the in-reg type is a vector of i8, + // i16, or i32. + if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || + ExtraEltVT == MVT::i32) { + SDValue Tmp1Op0 = Tmp1.getOperand(0); + assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && + "This optimization is invalid without a VZEXT."); + return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); + } + Op0 = Tmp1; + } } // If the above didn't work, then just use Shift-Left + Shift-Right. @@ -15826,6 +15834,15 @@ if (BitWidth == 1) return SDValue(); + // Check all uses of that condition operand to check whether it will be + // consumed by non-BLEND instructions, which may depend on all bits are set + // properly. + for (SDNode::use_iterator I = Cond->use_begin(), + E = Cond->use_end(); I != E; ++I) + if (I->getOpcode() != ISD::VSELECT) + // TODO: Add other opcodes eventually lowered into BLEND. + return SDValue(); + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);