diff --git a/llvm_patches/3_3_PR17764_reverse_operands_avx2.patch b/llvm_patches/3_3_PR17764_reverse_operands_avx2.patch new file mode 100644 index 00000000..2719633a --- /dev/null +++ b/llvm_patches/3_3_PR17764_reverse_operands_avx2.patch @@ -0,0 +1,51 @@ +From 13c33dd2931ae9d9c5c9f142677f025281fbefca Mon Sep 17 00:00:00 2001 +From: Michael Liao +Date: Fri, 1 Nov 2013 11:08:08 -0700 +Subject: [PATCH] Fix PR17764 + +- %ret = select %mask, %v1, %v2 is equivalent to + + %ret = %mask ? %v1 : %v2 + + but VPBLENDVB %mask, %v1, %v2, %ret (operands are in Intel assembly + order) is equivalent to + + %ret = %mask ? %v2 : %v1 +--- + lib/Target/X86/X86InstrSSE.td | 2 +- + test/CodeGen/X86/pr17764.ll | 10 ++++++++++ + 2 files changed, 11 insertions(+), 1 deletion(-) + create mode 100644 test/CodeGen/X86/pr17764.ll + +diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td +index 7cae485..bac88f9 100644 +--- lib/Target/X86/X86InstrSSE.td ++++ lib/Target/X86/X86InstrSSE.td +@@ -6965,7 +6965,7 @@ let Predicates = [HasAVX] in { + let Predicates = [HasAVX2] in { + def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), + (v32i8 VR256:$src2))), +- (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>; ++ (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2), + (imm:$mask))), + (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>; +diff --git a/test/CodeGen/X86/pr17764.ll b/test/CodeGen/X86/pr17764.ll +new file mode 100644 +index 0000000..7a3fd6d +--- /dev/null ++++ test/CodeGen/X86/pr17764.ll +@@ -0,0 +1,10 @@ ++; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s ++ ++define <16 x i16> @foo(<16 x i1> %mask, <16 x i16> %x, <16 x i16> %y) { ++ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %y ++ ret <16 x i16> %ret ++} ++ ++; CHECK: foo ++; CHECK: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ++; CHECK: ret +-- +1.8.1.2 + diff --git a/tests/test-141.ispc b/tests/test-141.ispc index a533b605..b69be1fa 100644 --- a/tests/test-141.ispc +++ b/tests/test-141.ispc @@ -3,8 +3,9 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - float a = aFOO[programIndex]; - RET[programIndex] = (exp(-log(1/a)) - a) < 1e-7 ? 1 : 0; + float a = aFOO[programIndex]; + // calculation error 1e-6 is the same as in icc + RET[programIndex] = (exp(-log(1/a)) - a) < 1e-6 ? 1 : 0; } export void result(uniform float RET[4]) {