Files
ispc/llvm_patches/3_3_r183327-AVX2-GATHER.patch
Ilia Filippov 00cd90c6b0 test system
2013-09-19 12:26:57 +04:00

55 lines
2.2 KiB
Diff

This patch needs to be applied to LLVM 3.2/3.3 to fix bunch of fails on AVX2 target.
LLVM 3.4 contains this fix (r183327).
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- lib/Target/X86/X86ISelDAGToDAG.cpp (revision 183626)
+++ lib/Target/X86/X86ISelDAGToDAG.cpp (working copy)
@@ -2013,6 +2013,8 @@
case Intrinsic::x86_avx2_gather_d_d_256:
case Intrinsic::x86_avx2_gather_q_d:
case Intrinsic::x86_avx2_gather_q_d_256: {
+ if (!Subtarget->hasAVX2())
+ break;
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic");
Index: lib/Target/X86/X86InstrSSE.td
===================================================================
--- lib/Target/X86/X86InstrSSE.td (revision 183626)
+++ lib/Target/X86/X86InstrSSE.td (working copy)
@@ -8367,7 +8367,9 @@
[]>, VEX_4VOp3, VEX_L;
}
-let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
+let mayLoad = 1, Constraints
+ = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
+ in {
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
Index: test/CodeGen/X86/avx2-gather.ll
===================================================================
--- test/CodeGen/X86/avx2-gather.ll (revision 0)
+++ test/CodeGen/X86/avx2-gather.ll (working copy)
@@ -0,0 +1,18 @@
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
+
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
+ <4 x i32>, <4 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1,
+ <4 x i32> %idx, <4 x float> %mask) {
+ %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef,
+ i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+
+; CHECK: test_x86_avx2_gather_d_ps
+; CHECK: vgatherdps
+; CHECK-NOT: [[DST]]
+; CHECK: [[DST:%xmm[0-9]+]]{{$}}
+; CHECK: ret