diff --git a/llvm_patches/3_7_r251275-AVX-512-not-materializable-instructions.patch b/llvm_patches/3_7_r251275-AVX-512-not-materializable-instructions.patch new file mode 100644 index 00000000..903f7533 --- /dev/null +++ b/llvm_patches/3_7_r251275-AVX-512-not-materializable-instructions.patch @@ -0,0 +1,80 @@ +Index: test/CodeGen/X86/avx512-bugfix-25270.ll +=================================================================== +--- test/CodeGen/X86/avx512-bugfix-25270.ll (revision 0) ++++ test/CodeGen/X86/avx512-bugfix-25270.ll (revision 251275) +@@ -0,0 +1,34 @@ ++; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s ++ ++declare void @Print__512(<16 x i32>) #0 ++ ++define void @bar__512(<16 x i32>* %var) #0 { ++; CHECK-LABEL: bar__512: ++; CHECK: ## BB#0: ## %allocas ++; CHECK-NEXT: pushq %rbx ++; CHECK-NEXT: subq $112, %rsp ++; CHECK-NEXT: movq %rdi, %rbx ++; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0 ++; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill ++; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 ++; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx) ++; CHECK-NEXT: callq _Print__512 ++; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload ++; CHECK-NEXT: callq _Print__512 ++; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 ++; CHECK-NEXT: vmovdqa32 %zmm0, (%rbx) ++; CHECK-NEXT: addq $112, %rsp ++; CHECK-NEXT: popq %rbx ++; CHECK-NEXT: retq ++allocas: ++ %var_load_load = load <16 x i32>, <16 x i32>* %var, align 1 ++ store <16 x i32> , <16 x i32>* %var, align 64 ++ call void @Print__512(<16 x i32> %var_load_load) ++ ; %var_load_load value should be reloaded ++ call void @Print__512(<16 x i32> %var_load_load) ++ store <16 x i32> , <16 x i32>* %var, align 64 ++ ret void ++} ++ ++ ++attributes #0 = { nounwind } +Index: lib/Target/X86/X86InstrInfo.cpp +=================================================================== +--- lib/Target/X86/X86InstrInfo.cpp (revision 251274) ++++ lib/Target/X86/X86InstrInfo.cpp (revision 251275) +@@ -2287,7 +2287,35 @@ + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::FsMOVAPSrm: +- case X86::FsMOVAPDrm: { ++ case X86::FsMOVAPDrm: ++ // AVX-512 ++ case X86::VMOVAPDZ128rm: ++ case X86::VMOVAPDZ256rm: ++ case X86::VMOVAPDZrm: ++ case X86::VMOVAPSZ128rm: ++ case X86::VMOVAPSZ256rm: ++ case X86::VMOVAPSZrm: ++ case X86::VMOVDQA32Z128rm: ++ case X86::VMOVDQA32Z256rm: ++ case X86::VMOVDQA32Zrm: ++ case X86::VMOVDQA64Z128rm: ++ case X86::VMOVDQA64Z256rm: ++ case X86::VMOVDQA64Zrm: ++ case X86::VMOVDQU16Z128rm: ++ case X86::VMOVDQU16Z256rm: ++ case X86::VMOVDQU16Zrm: ++ case X86::VMOVDQU32Z128rm: ++ case X86::VMOVDQU32Z256rm: ++ case X86::VMOVDQU32Zrm: ++ case X86::VMOVDQU64Z128rm: ++ case X86::VMOVDQU64Z256rm: ++ case X86::VMOVDQU64Zrm: ++ case X86::VMOVDQU8Z128rm: ++ case X86::VMOVDQU8Z256rm: ++ case X86::VMOVDQU8Zrm: ++ case X86::VMOVUPSZ128rm: ++ case X86::VMOVUPSZ256rm: ++ case X86::VMOVUPSZrm: { + // Loads from constant pools are trivially rematerializable. + if (MI->getOperand(1+X86::AddrBaseReg).isReg() && + MI->getOperand(1+X86::AddrScaleAmt).isImm() &&