Files
ispc/llvm_patches/3_3_r193261_bug17631_196261_win_vzeroupper.patch
2013-12-05 00:50:55 +04:00

116 lines
3.7 KiB
Diff

From b9b016cda57d8afc26a150de7ee329b54a994c85 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Mon, 21 Oct 2013 17:47:58 -0700
Subject: [PATCH] Fix PR17631
- Skip instructions added in prolog. For specific targets, prolog may
insert helper function calls (e.g. _chkstk will be called when
there're more than 4K bytes allocated on stack). However, these
helpers don't use/def YMM/XMM registers.
It also include second fix for the problem: r196261+r196391.
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 477f75a..0d37a7d 100644
--- lib/Target/X86/X86VZeroUpper.cpp
+++ lib/Target/X86/X86VZeroUpper.cpp
@@ -121,7 +121,7 @@
}
static bool clobbersAllYmmRegs(const MachineOperand &MO) {
- for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) {
+ for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
if (!MO.clobbersPhysReg(reg))
return false;
}
@@ -143,6 +143,21 @@
return false;
}
+/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
+/// instruction.
+static bool clobbersAnyYmmReg(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isRegMask())
+ continue;
+ for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ }
+ return false;
+}
+
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// vzero upper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
@@ -226,8 +241,9 @@
bool BBHasCall = false;
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ DebugLoc dl = I->getDebugLoc();
MachineInstr *MI = I;
- DebugLoc dl = I->getDebugLoc();
+
bool isControlFlow = MI->isCall() || MI->isReturn();
// Shortcut: don't need to check regular instructions in dirty state.
@@ -246,6 +262,14 @@
if (!isControlFlow)
continue;
+ // If the call won't clobber any YMM register, skip it as well. It usually
+ // happens on helper function calls (such as '_chkstk', '_ftol2') where
+ // standard calling convention is not used (RegMask is not used to mark
+ // register clobbered and register usage (def/imp-def/use) is well-dfined
+ // and explicitly specified.
+ if (MI->isCall() && !clobbersAnyYmmReg(MI))
+ continue;
+
BBHasCall = true;
// The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll
new file mode 100644
index 0000000..a572ff2
--- /dev/null
+++ test/CodeGen/X86/pr17631.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s
+
+%struct_type = type { [64 x <8 x float>], <8 x float> }
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
+
+; Function Attrs: nounwind
+define i32 @equal(<8 x i32> %A) {
+allocas:
+ %first_alloc = alloca [64 x <8 x i32>]
+ %second_alloc = alloca %struct_type
+
+ %A1 = bitcast <8 x i32> %A to <8 x float>
+ %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1)
+ ret i32 %A2
+}
+
+; CHECK: equal
+; CHECK-NOT: vzeroupper
+; CHECK: _chkstk
+; CHECK: ret
+
+define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
+ %i = fptoui double %x to i64
+ store i64 %i, i64* %p
+ %ret = fadd <8 x float> %y, %y
+ ret <8 x float> %ret
+}
+
+; CHECK: foo
+; CHECK-NOT: vzeroupper
+; CHECK: _ftol2
+; CHECK: ret
--
1.8.1.2