diff --git a/docs/ReleaseNotes.txt b/docs/ReleaseNotes.txt index c39fb149..bfd92cc5 100644 --- a/docs/ReleaseNotes.txt +++ b/docs/ReleaseNotes.txt @@ -1,3 +1,19 @@ +=== v1.4.2 === (11 June 2013) + +A minor version update with a few important changes: + +* Stability fix for AVX2 target (Haswell) - problem with gather instructions was + released in LLVM 3.4, if you build with LLVM 3.2 or 3.3, it's available in our + repository (llvm_patches/r183327-AVX2-GATHER.patch) and needs to be applied + manually. + +* Stability fix for widespread issue on Win32 platform (#503). + +* Performance improvements for Xeon Phi related to mask representation. + +Also LLVM 3.3 has been released and now it's the recommended version for building ISPC. +Precompiled binaries are also built with LLVM 3.3. + === v1.4.1 === (28 May 2013) A major new version of ispc has been released with stability and performance @@ -20,7 +36,7 @@ Important bug fixes/changes: * FMA instructions are enabled for AVX2 instruction set. -* Support of RDRAND instruction when availible via library function rdrand (Ivy Bridge). +* Support of RDRAND instruction when available via library function rdrand (Ivy Bridge). Release also contains numerous bug fixes and minor improvements. diff --git a/docs/news.rst b/docs/news.rst index 1228aac3..a8b65daa 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -2,6 +2,17 @@ ispc News ========= +ispc 1.4.2 is Released +---------------------- + +A minor update of ``ispc`` has been released with stability fix for AVX2 +(Haswell), fix for Win32 platform and performance improvements on Xeon Phi. +As usual, it's available on all supported platforms (Windows, Linux and MacOS). +This version supports LLVM 3.1, 3.2, 3.3 and 3.4, but now we are recommending +to avoid 3.1, as it's known to contain a number of stability problems and we are +planning to deprecate its support soon. +The released binaries are built with 3.3. + ispc 1.4.1 is Released ---------------------- diff --git a/doxygen.cfg b/doxygen.cfg index 5fc715b2..1cf3d57f 100644 --- a/doxygen.cfg +++ b/doxygen.cfg @@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 1.4.1 +PROJECT_NUMBER = 1.4.2 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/ispc.h b/ispc.h index 46320213..a70e797c 100644 --- a/ispc.h +++ b/ispc.h @@ -38,7 +38,7 @@ #ifndef ISPC_H #define ISPC_H -#define ISPC_VERSION "1.4.1dev" +#define ISPC_VERSION "1.4.2" #if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) #error "Only LLVM 3.1, 3.2, 3.3 and the 3.4 development branch are supported" diff --git a/llvm_patches/r183327-AVX2-GATHER.patch b/llvm_patches/r183327-AVX2-GATHER.patch new file mode 100755 index 00000000..7524e9a4 --- /dev/null +++ b/llvm_patches/r183327-AVX2-GATHER.patch @@ -0,0 +1,54 @@ +This patch needs to be applied to LLVM 3.2/3.3 to fix bunch of fails on AVX2 target. +LLVM 3.4 contains this fix (r183327). + +Index: lib/Target/X86/X86ISelDAGToDAG.cpp +=================================================================== +--- lib/Target/X86/X86ISelDAGToDAG.cpp (revision 183626) ++++ lib/Target/X86/X86ISelDAGToDAG.cpp (working copy) +@@ -2013,6 +2013,8 @@ + case Intrinsic::x86_avx2_gather_d_d_256: + case Intrinsic::x86_avx2_gather_q_d: + case Intrinsic::x86_avx2_gather_q_d_256: { ++ if (!Subtarget->hasAVX2()) ++ break; + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); +Index: lib/Target/X86/X86InstrSSE.td +=================================================================== +--- lib/Target/X86/X86InstrSSE.td (revision 183626) ++++ lib/Target/X86/X86InstrSSE.td (working copy) +@@ -8367,7 +8367,9 @@ + []>, VEX_4VOp3, VEX_L; + } + +-let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in { ++let mayLoad = 1, Constraints ++ = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" ++ in { + defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; + defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; + defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; +Index: test/CodeGen/X86/avx2-gather.ll +=================================================================== +--- test/CodeGen/X86/avx2-gather.ll (revision 0) ++++ test/CodeGen/X86/avx2-gather.ll (working copy) +@@ -0,0 +1,18 @@ ++; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx ++; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s ++ ++declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, ++ <4 x i32>, <4 x float>, i8) nounwind readonly ++ ++define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, ++ <4 x i32> %idx, <4 x float> %mask) { ++ %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, ++ i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; ++ ret <4 x float> %res ++} ++ ++; CHECK: test_x86_avx2_gather_d_ps ++; CHECK: vgatherdps ++; CHECK-NOT: [[DST]] ++; CHECK: [[DST:%xmm[0-9]+]]{{$}} ++; CHECK: ret