55 lines
2.2 KiB
Diff
55 lines
2.2 KiB
Diff
This patch needs to be applied to LLVM 3.2/3.3 to fix bunch of fails on AVX2 target.
|
|
LLVM 3.4 contains this fix (r183327).
|
|
|
|
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
|
|
===================================================================
|
|
--- lib/Target/X86/X86ISelDAGToDAG.cpp (revision 183626)
|
|
+++ lib/Target/X86/X86ISelDAGToDAG.cpp (working copy)
|
|
@@ -2013,6 +2013,8 @@
|
|
case Intrinsic::x86_avx2_gather_d_d_256:
|
|
case Intrinsic::x86_avx2_gather_q_d:
|
|
case Intrinsic::x86_avx2_gather_q_d_256: {
|
|
+ if (!Subtarget->hasAVX2())
|
|
+ break;
|
|
unsigned Opc;
|
|
switch (IntNo) {
|
|
default: llvm_unreachable("Impossible intrinsic");
|
|
Index: lib/Target/X86/X86InstrSSE.td
|
|
===================================================================
|
|
--- lib/Target/X86/X86InstrSSE.td (revision 183626)
|
|
+++ lib/Target/X86/X86InstrSSE.td (working copy)
|
|
@@ -8367,7 +8367,9 @@
|
|
[]>, VEX_4VOp3, VEX_L;
|
|
}
|
|
|
|
-let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
|
|
+let mayLoad = 1, Constraints
|
|
+ = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
|
|
+ in {
|
|
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
|
|
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
|
|
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
|
|
Index: test/CodeGen/X86/avx2-gather.ll
|
|
===================================================================
|
|
--- test/CodeGen/X86/avx2-gather.ll (revision 0)
|
|
+++ test/CodeGen/X86/avx2-gather.ll (working copy)
|
|
@@ -0,0 +1,18 @@
|
|
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx
|
|
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
|
|
+
|
|
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
|
|
+ <4 x i32>, <4 x float>, i8) nounwind readonly
|
|
+
|
|
+define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1,
|
|
+ <4 x i32> %idx, <4 x float> %mask) {
|
|
+ %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef,
|
|
+ i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
|
|
+ ret <4 x float> %res
|
|
+}
|
|
+
|
|
+; CHECK: test_x86_avx2_gather_d_ps
|
|
+; CHECK: vgatherdps
|
|
+; CHECK-NOT: [[DST]]
|
|
+; CHECK: [[DST:%xmm[0-9]+]]{{$}}
|
|
+; CHECK: ret
|