58 lines
2.4 KiB
Diff
58 lines
2.4 KiB
Diff
Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
|
|
|
|
Index: lib/Target/X86/X86ISelLowering.cpp
|
|
===================================================================
|
|
--- lib/Target/X86/X86ISelLowering.cpp (revision 195862)
|
|
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)
|
|
@@ -12099,19 +12099,27 @@
|
|
// fall through
|
|
case MVT::v4i32:
|
|
case MVT::v8i16: {
|
|
- // (sext (vzext x)) -> (vsext x)
|
|
SDValue Op0 = Op.getOperand(0);
|
|
SDValue Op00 = Op0.getOperand(0);
|
|
SDValue Tmp1;
|
|
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
|
|
if (Op0.getOpcode() == ISD::BITCAST &&
|
|
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
|
|
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
|
+ // (sext (vzext x)) -> (vsext x)
|
|
Tmp1 = LowerVectorIntExtend(Op00, DAG);
|
|
- if (Tmp1.getNode()) {
|
|
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
|
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
|
- "This optimization is invalid without a VZEXT.");
|
|
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
|
+ if (Tmp1.getNode()) {
|
|
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
|
|
+ // This folding is only valid when the in-reg type is a vector of i8,
|
|
+ // i16, or i32.
|
|
+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
|
|
+ ExtraEltVT == MVT::i32) {
|
|
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
|
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
|
+ "This optimization is invalid without a VZEXT.");
|
|
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
|
+ }
|
|
+ Op0 = Tmp1;
|
|
+ }
|
|
}
|
|
|
|
// If the above didn't work, then just use Shift-Left + Shift-Right.
|
|
@@ -15826,6 +15834,15 @@
|
|
if (BitWidth == 1)
|
|
return SDValue();
|
|
|
|
+ // Check all uses of that condition operand to check whether it will be
|
|
+ // consumed by non-BLEND instructions, which may depend on all bits are set
|
|
+ // properly.
|
|
+ for (SDNode::use_iterator I = Cond->use_begin(),
|
|
+ E = Cond->use_end(); I != E; ++I)
|
|
+ if (I->getOpcode() != ISD::VSELECT)
|
|
+ // TODO: Add other opcodes eventually lowered into BLEND.
|
|
+ return SDValue();
|
|
+
|
|
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
|
|
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
|
|
|