diff --git a/llvm_patches/3_3_0001-Fix-PR16807.patch b/llvm_patches/3_3_0001-Fix-PR16807.patch old mode 100755 new mode 100644 diff --git a/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch b/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch new file mode 100644 index 00000000..b6abb1d3 --- /dev/null +++ b/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch @@ -0,0 +1,69 @@ +From b9b016cda57d8afc26a150de7ee329b54a994c85 Mon Sep 17 00:00:00 2001 +From: Michael Liao +Date: Mon, 21 Oct 2013 17:47:58 -0700 +Subject: [PATCH] Fix PR17631 + +- Skip instructions added in prolog. For specific targets, prolog may + insert helper function calls (e.g. _chkstk will be called when + there're more than 4K bytes allocated on stack). However, these + helpers don't use/def YMM/XMM registers. +--- + lib/Target/X86/X86VZeroUpper.cpp | 11 ++++++++++- + test/CodeGen/X86/pr17631.ll | 22 ++++++++++++++++++++++ + 2 files changed, 32 insertions(+), 1 deletion(-) + create mode 100644 test/CodeGen/X86/pr17631.ll + +diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp +index 477f75a..0d37a7d 100644 +--- lib/Target/X86/X86VZeroUpper.cpp ++++ lib/Target/X86/X86VZeroUpper.cpp +@@ -231,8 +231,17 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, + bool BBHasCall = false; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { +- MachineInstr *MI = I; + DebugLoc dl = I->getDebugLoc(); ++ MachineInstr *MI = I; ++ ++ // Don't need to check instructions added in prolog. ++ // In prolog, special function calls may be added for specific targets ++ // (e.g. on Windows, a prolog helper '_chkstk' is called when the local ++ // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM ++ // registers. ++ if (MI->getFlag(MachineInstr::FrameSetup)) ++ continue; ++ + bool isControlFlow = MI->isCall() || MI->isReturn(); + + // Shortcut: don't need to check regular instructions in dirty state. +diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll +new file mode 100644 +index 0000000..a572ff2 +--- /dev/null ++++ test/CodeGen/X86/pr17631.ll +@@ -0,0 +1,22 @@ ++; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s ++ ++%struct_type = type { [64 x <8 x float>], <8 x float> } ++ ++; Function Attrs: nounwind readnone ++declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) ++ ++; Function Attrs: nounwind ++define i32 @equal(<8 x i32> %A) { ++allocas: ++ %first_alloc = alloca [64 x <8 x i32>] ++ %second_alloc = alloca %struct_type ++ ++ %A1 = bitcast <8 x i32> %A to <8 x float> ++ %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1) ++ ret i32 %A2 ++} ++ ++; CHECK: equal ++; CHECK-NOT: vzeroupper ++; CHECK: _chkstk ++; CHECK: ret +-- +1.8.1.2 + diff --git a/tests/chkstk.ispc b/tests/chkstk.ispc new file mode 100644 index 00000000..bd0a8299 --- /dev/null +++ b/tests/chkstk.ispc @@ -0,0 +1,49 @@ +//test for 17631 bug in LLVM. + +export uniform int width() { return programCount; } + +struct s_temp +{ + float temp[64]; +}; + +int CompressBlockBC7(int A, uniform float b) +{ + // This declaration caused problem because LLVM inserted + // _chkstk after declaration and vzeroupper before it's call. + // A will be in ymm at avx, so we lose a half of it. + s_temp _state; + // These two loops are here to prevent elimination of declaration + for (int i=0; i<64; i++) { + float ii = i; + _state.temp[i] = b + sin(ii); + } + float r = 0; + for (int j=0; j<64; j+=9) { + r += _state.temp[j] + j; + } + + // Here upper bits of A in ymm can be zeros. This will crash the test. + int B; + if (A!=0) { + B = 20; + } + else { + B = 30; + } + if(A == 1) { + B = r; + } + return B; +} + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int A = programIndex; + RET[programIndex] = CompressBlockBC7(A, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 20; + RET[0] = 30; + RET[1] = 292; +}