From 6c7df4cb6b9c9e69fe91a090862bcf0f48d12786 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 8 Jun 2012 15:55:00 -0700 Subject: [PATCH] Add initial support for "avx1.1" targets for Ivy Bridge. So far, only the use of the float/half conversion instructions distinguishes this from the "avx1" target. Partial work on issue #263. --- Makefile | 4 +- builtins.cpp | 22 ++++++++++ builtins/dispatch.ll | 67 ++++++++++++++++------------ builtins/target-avx1-x2.ll | 2 + builtins/target-avx1.ll | 2 + builtins/target-avx11-x2.ll | 87 +++++++++++++++++++++++++++++++++++++ builtins/target-avx11.ll | 71 ++++++++++++++++++++++++++++++ ispc.cpp | 48 ++++++++++++++++---- ispc.h | 5 ++- ispc.vcxproj | 28 ++++++++++++ 10 files changed, 296 insertions(+), 40 deletions(-) create mode 100644 builtins/target-avx11-x2.ll create mode 100644 builtins/target-avx11.ll diff --git a/Makefile b/Makefile index 0d61c611..d4a8e3d3 100644 --- a/Makefile +++ b/Makefile @@ -84,8 +84,8 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \ type.cpp util.cpp HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ opt.h stmt.h sym.h type.h util.h -TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \ - generic-16 generic-32 generic-64 generic-1 +TARGETS=avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 \ + generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \ builtins/dispatch.ll BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \ diff --git a/builtins.cpp b/builtins.cpp index db55758a..714390d7 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -804,6 +804,26 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod FATAL("logic error in DefineStdlib"); } break; + case Target::AVX11: + switch (g->target.vectorWidth) { + case 8: + extern unsigned char builtins_bitcode_avx11[]; + extern int builtins_bitcode_avx11_length; + AddBitcodeToModule(builtins_bitcode_avx11, + builtins_bitcode_avx11_length, + module, symbolTable); + break; + case 16: + extern unsigned char builtins_bitcode_avx11_x2[]; + extern int builtins_bitcode_avx11_x2_length; + AddBitcodeToModule(builtins_bitcode_avx11_x2, + builtins_bitcode_avx11_x2_length, + module, symbolTable); + break; + default: + FATAL("logic error in DefineStdlib"); + } + break; case Target::AVX2: switch (g->target.vectorWidth) { case 8: @@ -898,6 +918,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod lDefineConstantInt("__have_native_half", g->target.hasHalf, module, symbolTable); + lDefineConstantInt("__have_native_rand", g->target.hasRand, module, + symbolTable); lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals, module, symbolTable); diff --git a/builtins/dispatch.ll b/builtins/dispatch.ll index e61292aa..b9db3543 100644 --- a/builtins/dispatch.ll +++ b/builtins/dispatch.ll @@ -48,8 +48,8 @@ declare void @abort() noreturn ;; corresponding to one of the Target::ISA enumerant values that gives the ;; most capable ISA that the curremt system can run. ;; -;; Note: clang from LLVM 2.9 should be used if this is updated, for maximum -;; backwards compatibility for anyone building ispc with LLVM 2.9. +;; Note: clang from LLVM 3.0 should be used if this is updated, for maximum +;; backwards compatibility for anyone building ispc with LLVM 3.0 ;; ;; #include ;; #include @@ -80,9 +80,14 @@ declare void @abort() noreturn ;; // Call cpuid with eax=7, ecx=0 ;; __cpuid_count(info, 7, 0); ;; if ((info[1] & (1 << 5)) != 0) -;; return 3; // AVX2 -;; else -;; return 2; // AVX1 +;; return 4; // AVX2 +;; else { +;; if ((info[2] & (1 << 29)) != 0 && // F16C +;; (info[2] & (1 << 30)) != 0) // RDRAND +;; return 3; // AVX1 on IVB +;; else +;; return 2; // AVX1 +;; } ;; } ;; else if ((info[2] & (1 << 19)) != 0) ;; return 1; // SSE4 @@ -92,41 +97,47 @@ declare void @abort() noreturn ;; abort(); ;; } -%0 = type { i32, i32, i32, i32 } - -define i32 @__get_system_isa() nounwind ssp { +define i32 @__get_system_isa() nounwind uwtable ssp { entry: - %0 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind - %asmresult9.i = extractvalue %0 %0, 2 - %asmresult10.i = extractvalue %0 %0, 3 - %and = and i32 %asmresult9.i, 268435456 + %0 = tail call { i32, i32, i32, i32 } asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind + %asmresult5.i = extractvalue { i32, i32, i32, i32 } %0, 2 + %asmresult6.i = extractvalue { i32, i32, i32, i32 } %0, 3 + %and = and i32 %asmresult5.i, 268435456 %cmp = icmp eq i32 %and, 0 - br i1 %cmp, label %if.else7, label %if.then + br i1 %cmp, label %if.else14, label %if.then if.then: ; preds = %entry - %1 = tail call %0 asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind - %asmresult9.i24 = extractvalue %0 %1, 1 - %and4 = lshr i32 %asmresult9.i24, 5 - %2 = and i32 %and4, 1 - %3 = or i32 %2, 2 + %1 = tail call { i32, i32, i32, i32 } asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind + %asmresult4.i29 = extractvalue { i32, i32, i32, i32 } %1, 1 + %and3 = and i32 %asmresult4.i29, 32 + %cmp4 = icmp eq i32 %and3, 0 + br i1 %cmp4, label %if.else, label %return + +if.else: ; preds = %if.then + %asmresult5.i30 = extractvalue { i32, i32, i32, i32 } %1, 2 + %2 = and i32 %asmresult5.i30, 1610612736 + %3 = icmp eq i32 %2, 1610612736 + br i1 %3, label %return, label %if.else13 + +if.else13: ; preds = %if.else br label %return -if.else7: ; preds = %entry - %and10 = and i32 %asmresult9.i, 524288 - %cmp11 = icmp eq i32 %and10, 0 - br i1 %cmp11, label %if.else13, label %return - -if.else13: ; preds = %if.else7 - %and16 = and i32 %asmresult10.i, 67108864 +if.else14: ; preds = %entry + %and16 = and i32 %asmresult5.i, 524288 %cmp17 = icmp eq i32 %and16, 0 br i1 %cmp17, label %if.else19, label %return -if.else19: ; preds = %if.else13 +if.else19: ; preds = %if.else14 + %and21 = and i32 %asmresult6.i, 67108864 + %cmp22 = icmp eq i32 %and21, 0 + br i1 %cmp22, label %if.else24, label %return + +if.else24: ; preds = %if.else19 tail call void @abort() noreturn nounwind unreachable -return: ; preds = %if.else13, %if.else7, %if.then - %retval.0 = phi i32 [ %3, %if.then ], [ 1, %if.else7 ], [ 0, %if.else13 ] +return: ; preds = %if.else19, %if.else14, %if.else13, %if.else, %if.then + %retval.0 = phi i32 [ 2, %if.else13 ], [ 4, %if.then ], [ 3, %if.else ], [ 1, %if.else14 ], [ 0, %if.else19 ] ret i32 %retval.0 } diff --git a/builtins/target-avx1-x2.ll b/builtins/target-avx1-x2.ll index e79b9f2b..efde5d10 100644 --- a/builtins/target-avx1-x2.ll +++ b/builtins/target-avx1-x2.ll @@ -61,10 +61,12 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; half conversion routines +ifelse(NO_HALF_DECLARES, `1', `', ` declare float @__half_to_float_uniform(i16 %v) nounwind readnone declare @__half_to_float_varying( %v) nounwind readnone declare i16 @__float_to_half_uniform(float %v) nounwind readnone declare @__float_to_half_varying( %v) nounwind readnone +') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; gather diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll index fc6bdcf1..64f8ad33 100644 --- a/builtins/target-avx1.ll +++ b/builtins/target-avx1.ll @@ -61,10 +61,12 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; half conversion routines +ifelse(NO_HALF_DECLARES, `1', `', ` declare float @__half_to_float_uniform(i16 %v) nounwind readnone declare @__half_to_float_varying( %v) nounwind readnone declare i16 @__float_to_half_uniform(float %v) nounwind readnone declare @__float_to_half_varying( %v) nounwind readnone +') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; gather diff --git a/builtins/target-avx11-x2.ll b/builtins/target-avx11-x2.ll new file mode 100644 index 00000000..e2bc599d --- /dev/null +++ b/builtins/target-avx11-x2.ll @@ -0,0 +1,87 @@ +;; Copyright (c) 2012, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include(`target-avx1-x2.ll') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; float/half conversions + +declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone +; 0 is round nearest even +declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone + +define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone { + %r_0 = shufflevector <16 x i16> %v, <16 x i16> undef, + <8 x i32> + %vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0) + %r_1 = shufflevector <16 x i16> %v, <16 x i16> undef, + <8 x i32> + %vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1) + %r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1, + <16 x i32> + ret <16 x float> %r +} + +define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone { + %r_0 = shufflevector <16 x float> %v, <16 x float> undef, + <8 x i32> + %vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0) + %r_1 = shufflevector <16 x float> %v, <16 x float> undef, + <8 x i32> + %vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0) + %r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1, + <16 x i32> + ret <16 x i16> %r +} + +define float @__half_to_float_uniform(i16 %v) nounwind readnone { + %v1 = bitcast i16 %v to <1 x i16> + %vv = shufflevector <1 x i16> %v1, <1 x i16> undef, + <8 x i32> + %rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv) + %r = extractelement <8 x float> %rv, i32 0 + ret float %r +} + +define i16 @__float_to_half_uniform(float %v) nounwind readnone { + %v1 = bitcast float %v to <1 x float> + %vv = shufflevector <1 x float> %v1, <1 x float> undef, + <8 x i32> + ; round to nearest even + %rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0) + %r = extractelement <8 x i16> %rv, i32 0 + ret i16 %r +} + diff --git a/builtins/target-avx11.ll b/builtins/target-avx11.ll new file mode 100644 index 00000000..54aa35af --- /dev/null +++ b/builtins/target-avx11.ll @@ -0,0 +1,71 @@ +;; Copyright (c) 2012, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include(`target-avx1.ll') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; float/half conversions + +declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone +; 0 is round nearest even +declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone + +define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone { + %r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v) + ret <8 x float> %r +} + +define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone { + %r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0) + ret <8 x i16> %r +} + +define float @__half_to_float_uniform(i16 %v) nounwind readnone { + %v1 = bitcast i16 %v to <1 x i16> + %vv = shufflevector <1 x i16> %v1, <1 x i16> undef, + <8 x i32> + %rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv) + %r = extractelement <8 x float> %rv, i32 0 + ret float %r +} + +define i16 @__float_to_half_uniform(float %v) nounwind readnone { + %v1 = bitcast float %v to <1 x float> + %vv = shufflevector <1 x float> %v1, <1 x float> undef, + <8 x i32> + ; round to nearest even + %rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0) + %r = extractelement <8 x i16> %rv, i32 0 + ret i16 %r +} + diff --git a/ispc.cpp b/ispc.cpp index 38b9ec70..f520aa75 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -93,8 +93,14 @@ lGetSystemISA() { __cpuidex(info, 7, 0); if ((info[1] & (1 << 5)) != 0) return "avx2"; - else - return "avx"; + else { + // ivybridge? + if ((info[2] & (1 << 29)) != 0 && // F16C + (info[2] & (1 << 30)) != 0) // RDRAND + return "avx1.1"; + else + return "avx"; + } } else if ((info[2] & (1 << 19)) != 0) return "sse4"; @@ -198,7 +204,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, } // This is the case for most of them - t->hasHalf = t->hasTranscendentals = false; + t->hasHalf = t->hasRand = t->hasTranscendentals = false; if (!strcasecmp(isa, "sse2")) { t->isa = Target::SSE2; @@ -284,7 +290,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->maskingIsFree = false; t->maskBitCount = 32; } - else if (!strcasecmp(isa, "avx")) { + else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1")) { t->isa = Target::AVX; t->nativeVectorWidth = 8; t->vectorWidth = 8; @@ -292,32 +298,54 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->maskingIsFree = false; t->maskBitCount = 32; } - else if (!strcasecmp(isa, "avx-x2")) { - t->isa = Target::AVX; + else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2")) { + t->isa = Target::AVX11; t->nativeVectorWidth = 8; t->vectorWidth = 16; t->attributes = "+avx,+popcnt,+cmov"; t->maskingIsFree = false; t->maskBitCount = 32; } + else if (!strcasecmp(isa, "avx1.1")) { + t->isa = Target::AVX11; + t->nativeVectorWidth = 8; + t->vectorWidth = 8; + t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand"; + t->maskingIsFree = false; + t->maskBitCount = 32; + t->hasHalf = true; + t->hasRand = true; + } + else if (!strcasecmp(isa, "avx1.1-x2")) { + t->isa = Target::AVX11; + t->nativeVectorWidth = 8; + t->vectorWidth = 16; + t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand"; + t->maskingIsFree = false; + t->maskBitCount = 32; + t->hasHalf = true; + t->hasRand = true; + } #ifndef LLVM_3_0 else if (!strcasecmp(isa, "avx2")) { t->isa = Target::AVX2; t->nativeVectorWidth = 8; t->vectorWidth = 8; - t->attributes = "+avx2,+popcnt,+cmov,+f16c"; + t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"; t->maskingIsFree = false; t->maskBitCount = 32; t->hasHalf = true; + t->hasRand = true; } else if (!strcasecmp(isa, "avx2-x2")) { t->isa = Target::AVX2; t->nativeVectorWidth = 16; t->vectorWidth = 16; - t->attributes = "+avx2,+popcnt,+cmov,+f16c"; + t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"; t->maskingIsFree = false; t->maskBitCount = 32; t->hasHalf = true; + t->hasRand = true; } #endif // !LLVM_3_0 else { @@ -360,7 +388,7 @@ const char * Target::SupportedTargetISAs() { return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2" #ifndef LLVM_3_0 - ", avx2, avx2-x2" + ", avx1.1, avx1.1-x2, avx2, avx2-x2" #endif // !LLVM_3_0 ", generic-1, generic-4, generic-8, generic-16, generic-32"; } @@ -426,6 +454,8 @@ Target::GetISAString() const { return "sse4"; case Target::AVX: return "avx"; + case Target::AVX11: + return "avx11"; case Target::AVX2: return "avx2"; case Target::GENERIC: diff --git a/ispc.h b/ispc.h index 26e592ff..807faf78 100644 --- a/ispc.h +++ b/ispc.h @@ -207,7 +207,7 @@ struct Target { flexible/performant of them will apear last in the enumerant. Note also that __best_available_isa() needs to be updated if ISAs are added or the enumerant values are reordered. */ - enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS }; + enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS }; /** Instruction set being compiled to. */ ISA isa; @@ -252,6 +252,9 @@ struct Target { conversions. */ bool hasHalf; + /** Indicates whether there is an ISA random number instruciton. */ + bool hasRand; + /** Indicates whether the target has support for transcendentals (beyond sqrt, which we assume that all of them handle). */ bool hasTranscendentals; diff --git a/ispc.vcxproj b/ispc.vcxproj index 6478df4e..cef62d21 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -20,6 +20,8 @@ + + @@ -188,6 +190,32 @@ Building gen-bitcode-avx1-x2.cpp + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp + gen-bitcode-avx11.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp + gen-bitcode-avx11.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx11.cpp + Building gen-bitcode-avx11.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp + gen-bitcode-avx11-x2.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp + gen-bitcode-avx11-x2.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll + Building gen-bitcode-avx11-x2.cpp + Building gen-bitcode-avx11-x2.cpp + + Document