patch for LLVM 3.3 and test correction at avx2

This commit is contained in:
Ilia Filippov
2013-11-18 13:44:59 +04:00
parent 4977933d81
commit 4579d339ea
2 changed files with 54 additions and 2 deletions

View File

@@ -0,0 +1,51 @@
From 13c33dd2931ae9d9c5c9f142677f025281fbefca Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Fri, 1 Nov 2013 11:08:08 -0700
Subject: [PATCH] Fix PR17764
- %ret = select %mask, %v1, %v2 is equivalent to
%ret = %mask ? %v1 : %v2
but VPBLENDVB %mask, %v1, %v2, %ret (operands are in Intel assembly
order) is equivalent to
%ret = %mask ? %v2 : %v1
---
lib/Target/X86/X86InstrSSE.td | 2 +-
test/CodeGen/X86/pr17764.ll | 10 ++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
create mode 100644 test/CodeGen/X86/pr17764.ll
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7cae485..bac88f9 100644
--- lib/Target/X86/X86InstrSSE.td
+++ lib/Target/X86/X86InstrSSE.td
@@ -6965,7 +6965,7 @@ let Predicates = [HasAVX] in {
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
- (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
+ (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
(imm:$mask))),
(VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
diff --git a/test/CodeGen/X86/pr17764.ll b/test/CodeGen/X86/pr17764.ll
new file mode 100644
index 0000000..7a3fd6d
--- /dev/null
+++ test/CodeGen/X86/pr17764.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s
+
+define <16 x i16> @foo(<16 x i1> %mask, <16 x i16> %x, <16 x i16> %y) {
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %y
+ ret <16 x i16> %ret
+}
+
+; CHECK: foo
+; CHECK: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; CHECK: ret
--
1.8.1.2

View File

@@ -3,8 +3,9 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
RET[programIndex] = (exp(-log(1/a)) - a) < 1e-7 ? 1 : 0;
float a = aFOO[programIndex];
// calculation error 1e-6 is the same as in icc
RET[programIndex] = (exp(-log(1/a)) - a) < 1e-6 ? 1 : 0;
}
export void result(uniform float RET[4]) {