From b9b016cda57d8afc26a150de7ee329b54a994c85 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 21 Oct 2013 17:47:58 -0700 Subject: [PATCH] Fix PR17631 - Skip instructions added in prolog. For specific targets, prolog may insert helper function calls (e.g. _chkstk will be called when there're more than 4K bytes allocated on stack). However, these helpers don't use/def YMM/XMM registers. It also include second fix for the problem: r196261+r196391. diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 477f75a..0d37a7d 100644 --- lib/Target/X86/X86VZeroUpper.cpp +++ lib/Target/X86/X86VZeroUpper.cpp @@ -121,7 +121,7 @@ } static bool clobbersAllYmmRegs(const MachineOperand &MO) { - for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) { + for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) { if (!MO.clobbersPhysReg(reg)) return false; } @@ -143,6 +143,21 @@ return false; } +/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this +/// instruction. +static bool clobbersAnyYmmReg(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isRegMask()) + continue; + for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) { + if (MO.clobbersPhysReg(reg)) + return true; + } + } + return false; +} + /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// vzero upper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { @@ -226,8 +241,9 @@ bool BBHasCall = false; for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + DebugLoc dl = I->getDebugLoc(); MachineInstr *MI = I; - DebugLoc dl = I->getDebugLoc(); + bool isControlFlow = MI->isCall() || MI->isReturn(); // Shortcut: don't need to check regular instructions in dirty state. @@ -246,6 +262,14 @@ if (!isControlFlow) continue; + // If the call won't clobber any YMM register, skip it as well. It usually + // happens on helper function calls (such as '_chkstk', '_ftol2') where + // standard calling convention is not used (RegMask is not used to mark + // register clobbered and register usage (def/imp-def/use) is well-dfined + // and explicitly specified. + if (MI->isCall() && !clobbersAnyYmmReg(MI)) + continue; + BBHasCall = true; // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll new file mode 100644 index 0000000..a572ff2 --- /dev/null +++ test/CodeGen/X86/pr17631.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s + +%struct_type = type { [64 x <8 x float>], <8 x float> } + +; Function Attrs: nounwind readnone +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) + +; Function Attrs: nounwind +define i32 @equal(<8 x i32> %A) { +allocas: + %first_alloc = alloca [64 x <8 x i32>] + %second_alloc = alloca %struct_type + + %A1 = bitcast <8 x i32> %A to <8 x float> + %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1) + ret i32 %A2 +} + +; CHECK: equal +; CHECK-NOT: vzeroupper +; CHECK: _chkstk +; CHECK: ret + +define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) { + %i = fptoui double %x to i64 + store i64 %i, i64* %p + %ret = fadd <8 x float> %y, %y + ret <8 x float> %ret +} + +; CHECK: foo +; CHECK-NOT: vzeroupper +; CHECK: _ftol2 +; CHECK: ret -- 1.8.1.2