Adding patch for sse4-i16x8 and sse4-i8x16 targets
This commit is contained in:
57
llvm_patches/3_3_r195476_r195779_i16_sext.patch
Normal file
57
llvm_patches/3_3_r195476_r195779_i16_sext.patch
Normal file
@@ -0,0 +1,57 @@
|
||||
Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
|
||||
|
||||
Index: lib/Target/X86/X86ISelLowering.cpp
|
||||
===================================================================
|
||||
--- lib/Target/X86/X86ISelLowering.cpp (revision 195862)
|
||||
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)
|
||||
@@ -12099,19 +12099,27 @@
|
||||
// fall through
|
||||
case MVT::v4i32:
|
||||
case MVT::v8i16: {
|
||||
- // (sext (vzext x)) -> (vsext x)
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op00 = Op0.getOperand(0);
|
||||
SDValue Tmp1;
|
||||
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
|
||||
if (Op0.getOpcode() == ISD::BITCAST &&
|
||||
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
|
||||
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||
+ // (sext (vzext x)) -> (vsext x)
|
||||
Tmp1 = LowerVectorIntExtend(Op00, DAG);
|
||||
- if (Tmp1.getNode()) {
|
||||
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
||||
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
||||
- "This optimization is invalid without a VZEXT.");
|
||||
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
||||
+ if (Tmp1.getNode()) {
|
||||
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
|
||||
+ // This folding is only valid when the in-reg type is a vector of i8,
|
||||
+ // i16, or i32.
|
||||
+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
|
||||
+ ExtraEltVT == MVT::i32) {
|
||||
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
||||
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
||||
+ "This optimization is invalid without a VZEXT.");
|
||||
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
||||
+ }
|
||||
+ Op0 = Tmp1;
|
||||
+ }
|
||||
}
|
||||
|
||||
// If the above didn't work, then just use Shift-Left + Shift-Right.
|
||||
@@ -15826,6 +15834,15 @@
|
||||
if (BitWidth == 1)
|
||||
return SDValue();
|
||||
|
||||
+ // Check all uses of that condition operand to check whether it will be
|
||||
+ // consumed by non-BLEND instructions, which may depend on all bits are set
|
||||
+ // properly.
|
||||
+ for (SDNode::use_iterator I = Cond->use_begin(),
|
||||
+ E = Cond->use_end(); I != E; ++I)
|
||||
+ if (I->getOpcode() != ISD::VSELECT)
|
||||
+ // TODO: Add other opcodes eventually lowered into BLEND.
|
||||
+ return SDValue();
|
||||
+
|
||||
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
|
||||
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
|
||||
|
||||
57
llvm_patches/3_4_r195476_r195779_i16_sext.patch
Normal file
57
llvm_patches/3_4_r195476_r195779_i16_sext.patch
Normal file
@@ -0,0 +1,57 @@
|
||||
Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
|
||||
|
||||
Index: lib/Target/X86/X86ISelLowering.cpp
|
||||
===================================================================
|
||||
--- lib/Target/X86/X86ISelLowering.cpp (revision 195863)
|
||||
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)
|
||||
@@ -13120,19 +13120,27 @@
|
||||
// fall through
|
||||
case MVT::v4i32:
|
||||
case MVT::v8i16: {
|
||||
- // (sext (vzext x)) -> (vsext x)
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op00 = Op0.getOperand(0);
|
||||
SDValue Tmp1;
|
||||
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
|
||||
if (Op0.getOpcode() == ISD::BITCAST &&
|
||||
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
|
||||
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||
+ // (sext (vzext x)) -> (vsext x)
|
||||
Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
|
||||
- if (Tmp1.getNode()) {
|
||||
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
||||
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
||||
- "This optimization is invalid without a VZEXT.");
|
||||
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
||||
+ if (Tmp1.getNode()) {
|
||||
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
|
||||
+ // This folding is only valid when the in-reg type is a vector of i8,
|
||||
+ // i16, or i32.
|
||||
+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
|
||||
+ ExtraEltVT == MVT::i32) {
|
||||
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
||||
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
||||
+ "This optimization is invalid without a VZEXT.");
|
||||
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
||||
+ }
|
||||
+ Op0 = Tmp1;
|
||||
+ }
|
||||
}
|
||||
|
||||
// If the above didn't work, then just use Shift-Left + Shift-Right.
|
||||
@@ -17007,6 +17015,15 @@
|
||||
if (BitWidth == 1)
|
||||
return SDValue();
|
||||
|
||||
+ // Check all uses of that condition operand to check whether it will be
|
||||
+ // consumed by non-BLEND instructions, which may depend on all bits are set
|
||||
+ // properly.
|
||||
+ for (SDNode::use_iterator I = Cond->use_begin(),
|
||||
+ E = Cond->use_end(); I != E; ++I)
|
||||
+ if (I->getOpcode() != ISD::VSELECT)
|
||||
+ // TODO: Add other opcodes eventually lowered into BLEND.
|
||||
+ return SDValue();
|
||||
+
|
||||
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
|
||||
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
|
||||
|
||||
Reference in New Issue
Block a user