Merge pull request #643 from ifilippov/testing
patch and regression test for problem with vzeroupper
This commit is contained in:
0
llvm_patches/3_3_0001-Fix-PR16807.patch
Executable file → Normal file
0
llvm_patches/3_3_0001-Fix-PR16807.patch
Executable file → Normal file
69
llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch
Normal file
69
llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
From b9b016cda57d8afc26a150de7ee329b54a994c85 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Michael Liao <michael.hliao@gmail.com>
|
||||||
|
Date: Mon, 21 Oct 2013 17:47:58 -0700
|
||||||
|
Subject: [PATCH] Fix PR17631
|
||||||
|
|
||||||
|
- Skip instructions added in prolog. For specific targets, prolog may
|
||||||
|
insert helper function calls (e.g. _chkstk will be called when
|
||||||
|
there're more than 4K bytes allocated on stack). However, these
|
||||||
|
helpers don't use/def YMM/XMM registers.
|
||||||
|
---
|
||||||
|
lib/Target/X86/X86VZeroUpper.cpp | 11 ++++++++++-
|
||||||
|
test/CodeGen/X86/pr17631.ll | 22 ++++++++++++++++++++++
|
||||||
|
2 files changed, 32 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 test/CodeGen/X86/pr17631.ll
|
||||||
|
|
||||||
|
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
|
||||||
|
index 477f75a..0d37a7d 100644
|
||||||
|
--- lib/Target/X86/X86VZeroUpper.cpp
|
||||||
|
+++ lib/Target/X86/X86VZeroUpper.cpp
|
||||||
|
@@ -231,8 +231,17 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
|
||||||
|
bool BBHasCall = false;
|
||||||
|
|
||||||
|
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
|
||||||
|
- MachineInstr *MI = I;
|
||||||
|
DebugLoc dl = I->getDebugLoc();
|
||||||
|
+ MachineInstr *MI = I;
|
||||||
|
+
|
||||||
|
+ // Don't need to check instructions added in prolog.
|
||||||
|
+ // In prolog, special function calls may be added for specific targets
|
||||||
|
+ // (e.g. on Windows, a prolog helper '_chkstk' is called when the local
|
||||||
|
+ // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM
|
||||||
|
+ // registers.
|
||||||
|
+ if (MI->getFlag(MachineInstr::FrameSetup))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
bool isControlFlow = MI->isCall() || MI->isReturn();
|
||||||
|
|
||||||
|
// Shortcut: don't need to check regular instructions in dirty state.
|
||||||
|
diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..a572ff2
|
||||||
|
--- /dev/null
|
||||||
|
+++ test/CodeGen/X86/pr17631.ll
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s
|
||||||
|
+
|
||||||
|
+%struct_type = type { [64 x <8 x float>], <8 x float> }
|
||||||
|
+
|
||||||
|
+; Function Attrs: nounwind readnone
|
||||||
|
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
|
||||||
|
+
|
||||||
|
+; Function Attrs: nounwind
|
||||||
|
+define i32 @equal(<8 x i32> %A) {
|
||||||
|
+allocas:
|
||||||
|
+ %first_alloc = alloca [64 x <8 x i32>]
|
||||||
|
+ %second_alloc = alloca %struct_type
|
||||||
|
+
|
||||||
|
+ %A1 = bitcast <8 x i32> %A to <8 x float>
|
||||||
|
+ %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1)
|
||||||
|
+ ret i32 %A2
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+; CHECK: equal
|
||||||
|
+; CHECK-NOT: vzeroupper
|
||||||
|
+; CHECK: _chkstk
|
||||||
|
+; CHECK: ret
|
||||||
|
--
|
||||||
|
1.8.1.2
|
||||||
|
|
||||||
49
tests/chkstk.ispc
Normal file
49
tests/chkstk.ispc
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
//test for 17631 bug in LLVM.
|
||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
struct s_temp
|
||||||
|
{
|
||||||
|
float temp[64];
|
||||||
|
};
|
||||||
|
|
||||||
|
int CompressBlockBC7(int A, uniform float b)
|
||||||
|
{
|
||||||
|
// This declaration caused problem because LLVM inserted
|
||||||
|
// _chkstk after declaration and vzeroupper before it's call.
|
||||||
|
// A will be in ymm at avx, so we lose a half of it.
|
||||||
|
s_temp _state;
|
||||||
|
// These two loops are here to prevent elimination of declaration
|
||||||
|
for (int i=0; i<64; i++) {
|
||||||
|
float ii = i;
|
||||||
|
_state.temp[i] = b + sin(ii);
|
||||||
|
}
|
||||||
|
float r = 0;
|
||||||
|
for (int j=0; j<64; j+=9) {
|
||||||
|
r += _state.temp[j] + j;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Here upper bits of A in ymm can be zeros. This will crash the test.
|
||||||
|
int B;
|
||||||
|
if (A!=0) {
|
||||||
|
B = 20;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
B = 30;
|
||||||
|
}
|
||||||
|
if(A == 1) {
|
||||||
|
B = r;
|
||||||
|
}
|
||||||
|
return B;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int A = programIndex;
|
||||||
|
RET[programIndex] = CompressBlockBC7(A, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 20;
|
||||||
|
RET[0] = 30;
|
||||||
|
RET[1] = 292;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user