Merge pull request #670 from dbabokin/sext_patch

LLVM patches for sse4-i16x8 and sse4-i8x16 targets
This commit is contained in:
Dmitry Babokin
2013-11-28 01:56:09 -08:00
3 changed files with 114 additions and 6 deletions

View File

@@ -277,13 +277,7 @@
.\tests\reduce-min-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.4 cl -O2 *
./tests/atomics-13.ispc compfail x86 sse4-i16x8 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/atomics-13.ispc compfail x86-64 sse4-i16x8 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/atomics-13.ispc compfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/atomics-13.ispc compfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.3 clang++3.3 -O2 *
./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.3 clang++3.3 -O2 *

View File

@@ -0,0 +1,57 @@
Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp (revision 195862)
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)
@@ -12099,19 +12099,27 @@
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- // (sext (vzext x)) -> (vsext x)
SDValue Op0 = Op.getOperand(0);
SDValue Op00 = Op0.getOperand(0);
SDValue Tmp1;
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
if (Op0.getOpcode() == ISD::BITCAST &&
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ // (sext (vzext x)) -> (vsext x)
Tmp1 = LowerVectorIntExtend(Op00, DAG);
- if (Tmp1.getNode()) {
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
- "This optimization is invalid without a VZEXT.");
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ if (Tmp1.getNode()) {
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ // This folding is only valid when the in-reg type is a vector of i8,
+ // i16, or i32.
+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
+ ExtraEltVT == MVT::i32) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+ Op0 = Tmp1;
+ }
}
// If the above didn't work, then just use Shift-Left + Shift-Right.
@@ -15826,6 +15834,15 @@
if (BitWidth == 1)
return SDValue();
+ // Check all uses of that condition operand to check whether it will be
+ // consumed by non-BLEND instructions, which may depend on all bits are set
+ // properly.
+ for (SDNode::use_iterator I = Cond->use_begin(),
+ E = Cond->use_end(); I != E; ++I)
+ if (I->getOpcode() != ISD::VSELECT)
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ return SDValue();
+
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);

View File

@@ -0,0 +1,57 @@
Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details.
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp (revision 195863)
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)
@@ -13120,19 +13120,27 @@
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- // (sext (vzext x)) -> (vsext x)
SDValue Op0 = Op.getOperand(0);
SDValue Op00 = Op0.getOperand(0);
SDValue Tmp1;
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
if (Op0.getOpcode() == ISD::BITCAST &&
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ // (sext (vzext x)) -> (vsext x)
Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
- if (Tmp1.getNode()) {
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
- "This optimization is invalid without a VZEXT.");
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ if (Tmp1.getNode()) {
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ // This folding is only valid when the in-reg type is a vector of i8,
+ // i16, or i32.
+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
+ ExtraEltVT == MVT::i32) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+ Op0 = Tmp1;
+ }
}
// If the above didn't work, then just use Shift-Left + Shift-Right.
@@ -17007,6 +17015,15 @@
if (BitWidth == 1)
return SDValue();
+ // Check all uses of that condition operand to check whether it will be
+ // consumed by non-BLEND instructions, which may depend on all bits are set
+ // properly.
+ for (SDNode::use_iterator I = Cond->use_begin(),
+ E = Cond->use_end(); I != E; ++I)
+ if (I->getOpcode() != ISD::VSELECT)
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ return SDValue();
+
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);