Release 1.4.2, 11 June 2013
This commit is contained in:
@@ -1,3 +1,19 @@
|
||||
=== v1.4.2 === (11 June 2013)
|
||||
|
||||
A minor version update with a few important changes:
|
||||
|
||||
* Stability fix for AVX2 target (Haswell) - problem with gather instructions was
|
||||
released in LLVM 3.4, if you build with LLVM 3.2 or 3.3, it's available in our
|
||||
repository (llvm_patches/r183327-AVX2-GATHER.patch) and needs to be applied
|
||||
manually.
|
||||
|
||||
* Stability fix for widespread issue on Win32 platform (#503).
|
||||
|
||||
* Performance improvements for Xeon Phi related to mask representation.
|
||||
|
||||
Also LLVM 3.3 has been released and now it's the recommended version for building ISPC.
|
||||
Precompiled binaries are also built with LLVM 3.3.
|
||||
|
||||
=== v1.4.1 === (28 May 2013)
|
||||
|
||||
A major new version of ispc has been released with stability and performance
|
||||
@@ -20,7 +36,7 @@ Important bug fixes/changes:
|
||||
|
||||
* FMA instructions are enabled for AVX2 instruction set.
|
||||
|
||||
* Support of RDRAND instruction when availible via library function rdrand (Ivy Bridge).
|
||||
* Support of RDRAND instruction when available via library function rdrand (Ivy Bridge).
|
||||
|
||||
Release also contains numerous bug fixes and minor improvements.
|
||||
|
||||
|
||||
@@ -2,6 +2,17 @@
|
||||
ispc News
|
||||
=========
|
||||
|
||||
ispc 1.4.2 is Released
|
||||
----------------------
|
||||
|
||||
A minor update of ``ispc`` has been released with stability fix for AVX2
|
||||
(Haswell), fix for Win32 platform and performance improvements on Xeon Phi.
|
||||
As usual, it's available on all supported platforms (Windows, Linux and MacOS).
|
||||
This version supports LLVM 3.1, 3.2, 3.3 and 3.4, but now we are recommending
|
||||
to avoid 3.1, as it's known to contain a number of stability problems and we are
|
||||
planning to deprecate its support soon.
|
||||
The released binaries are built with 3.3.
|
||||
|
||||
ispc 1.4.1 is Released
|
||||
----------------------
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.4.1
|
||||
PROJECT_NUMBER = 1.4.2
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
2
ispc.h
2
ispc.h
@@ -38,7 +38,7 @@
|
||||
#ifndef ISPC_H
|
||||
#define ISPC_H
|
||||
|
||||
#define ISPC_VERSION "1.4.1dev"
|
||||
#define ISPC_VERSION "1.4.2"
|
||||
|
||||
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4)
|
||||
#error "Only LLVM 3.1, 3.2, 3.3 and the 3.4 development branch are supported"
|
||||
|
||||
54
llvm_patches/r183327-AVX2-GATHER.patch
Executable file
54
llvm_patches/r183327-AVX2-GATHER.patch
Executable file
@@ -0,0 +1,54 @@
|
||||
This patch needs to be applied to LLVM 3.2/3.3 to fix bunch of fails on AVX2 target.
|
||||
LLVM 3.4 contains this fix (r183327).
|
||||
|
||||
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
|
||||
===================================================================
|
||||
--- lib/Target/X86/X86ISelDAGToDAG.cpp (revision 183626)
|
||||
+++ lib/Target/X86/X86ISelDAGToDAG.cpp (working copy)
|
||||
@@ -2013,6 +2013,8 @@
|
||||
case Intrinsic::x86_avx2_gather_d_d_256:
|
||||
case Intrinsic::x86_avx2_gather_q_d:
|
||||
case Intrinsic::x86_avx2_gather_q_d_256: {
|
||||
+ if (!Subtarget->hasAVX2())
|
||||
+ break;
|
||||
unsigned Opc;
|
||||
switch (IntNo) {
|
||||
default: llvm_unreachable("Impossible intrinsic");
|
||||
Index: lib/Target/X86/X86InstrSSE.td
|
||||
===================================================================
|
||||
--- lib/Target/X86/X86InstrSSE.td (revision 183626)
|
||||
+++ lib/Target/X86/X86InstrSSE.td (working copy)
|
||||
@@ -8367,7 +8367,9 @@
|
||||
[]>, VEX_4VOp3, VEX_L;
|
||||
}
|
||||
|
||||
-let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
|
||||
+let mayLoad = 1, Constraints
|
||||
+ = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
|
||||
+ in {
|
||||
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
|
||||
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
|
||||
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
|
||||
Index: test/CodeGen/X86/avx2-gather.ll
|
||||
===================================================================
|
||||
--- test/CodeGen/X86/avx2-gather.ll (revision 0)
|
||||
+++ test/CodeGen/X86/avx2-gather.ll (working copy)
|
||||
@@ -0,0 +1,18 @@
|
||||
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx
|
||||
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
|
||||
+
|
||||
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
|
||||
+ <4 x i32>, <4 x float>, i8) nounwind readonly
|
||||
+
|
||||
+define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1,
|
||||
+ <4 x i32> %idx, <4 x float> %mask) {
|
||||
+ %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef,
|
||||
+ i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
|
||||
+ ret <4 x float> %res
|
||||
+}
|
||||
+
|
||||
+; CHECK: test_x86_avx2_gather_d_ps
|
||||
+; CHECK: vgatherdps
|
||||
+; CHECK-NOT: [[DST]]
|
||||
+; CHECK: [[DST:%xmm[0-9]+]]{{$}}
|
||||
+; CHECK: ret
|
||||
Reference in New Issue
Block a user