Release 1.4.2, 11 June 2013

This commit is contained in:
Dmitry Babokin
2013-06-11 17:18:54 +04:00
parent 7589ae0de5
commit cf9ceb6bf9
5 changed files with 84 additions and 3 deletions

View File

@@ -1,3 +1,19 @@
=== v1.4.2 === (11 June 2013)
A minor version update with a few important changes:
* Stability fix for AVX2 target (Haswell) - problem with gather instructions was
released in LLVM 3.4, if you build with LLVM 3.2 or 3.3, it's available in our
repository (llvm_patches/r183327-AVX2-GATHER.patch) and needs to be applied
manually.
* Stability fix for widespread issue on Win32 platform (#503).
* Performance improvements for Xeon Phi related to mask representation.
Also LLVM 3.3 has been released and now it's the recommended version for building ISPC.
Precompiled binaries are also built with LLVM 3.3.
=== v1.4.1 === (28 May 2013)
A major new version of ispc has been released with stability and performance
@@ -20,7 +36,7 @@ Important bug fixes/changes:
* FMA instructions are enabled for AVX2 instruction set.
* Support of RDRAND instruction when availible via library function rdrand (Ivy Bridge).
* Support of RDRAND instruction when available via library function rdrand (Ivy Bridge).
Release also contains numerous bug fixes and minor improvements.

View File

@@ -2,6 +2,17 @@
ispc News
=========
ispc 1.4.2 is Released
----------------------
A minor update of ``ispc`` has been released with stability fix for AVX2
(Haswell), fix for Win32 platform and performance improvements on Xeon Phi.
As usual, it's available on all supported platforms (Windows, Linux and MacOS).
This version supports LLVM 3.1, 3.2, 3.3 and 3.4, but now we are recommending
to avoid 3.1, as it's known to contain a number of stability problems and we are
planning to deprecate its support soon.
The released binaries are built with 3.3.
ispc 1.4.1 is Released
----------------------

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 1.4.1
PROJECT_NUMBER = 1.4.2
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

2
ispc.h
View File

@@ -38,7 +38,7 @@
#ifndef ISPC_H
#define ISPC_H
#define ISPC_VERSION "1.4.1dev"
#define ISPC_VERSION "1.4.2"
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4)
#error "Only LLVM 3.1, 3.2, 3.3 and the 3.4 development branch are supported"

View File

@@ -0,0 +1,54 @@
This patch needs to be applied to LLVM 3.2/3.3 to fix bunch of fails on AVX2 target.
LLVM 3.4 contains this fix (r183327).
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- lib/Target/X86/X86ISelDAGToDAG.cpp (revision 183626)
+++ lib/Target/X86/X86ISelDAGToDAG.cpp (working copy)
@@ -2013,6 +2013,8 @@
case Intrinsic::x86_avx2_gather_d_d_256:
case Intrinsic::x86_avx2_gather_q_d:
case Intrinsic::x86_avx2_gather_q_d_256: {
+ if (!Subtarget->hasAVX2())
+ break;
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic");
Index: lib/Target/X86/X86InstrSSE.td
===================================================================
--- lib/Target/X86/X86InstrSSE.td (revision 183626)
+++ lib/Target/X86/X86InstrSSE.td (working copy)
@@ -8367,7 +8367,9 @@
[]>, VEX_4VOp3, VEX_L;
}
-let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
+let mayLoad = 1, Constraints
+ = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
+ in {
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
Index: test/CodeGen/X86/avx2-gather.ll
===================================================================
--- test/CodeGen/X86/avx2-gather.ll (revision 0)
+++ test/CodeGen/X86/avx2-gather.ll (working copy)
@@ -0,0 +1,18 @@
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
+
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
+ <4 x i32>, <4 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1,
+ <4 x i32> %idx, <4 x float> %mask) {
+ %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef,
+ i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+
+; CHECK: test_x86_avx2_gather_d_ps
+; CHECK: vgatherdps
+; CHECK-NOT: [[DST]]
+; CHECK: [[DST:%xmm[0-9]+]]{{$}}
+; CHECK: ret