Add initial support for "avx1.1" targets for Ivy Bridge.
So far, only the use of the float/half conversion instructions distinguishes this from the "avx1" target. Partial work on issue #263.
This commit is contained in:
4
Makefile
4
Makefile
@@ -84,8 +84,8 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
|||||||
type.cpp util.cpp
|
type.cpp util.cpp
|
||||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||||
opt.h stmt.h sym.h type.h util.h
|
opt.h stmt.h sym.h type.h util.h
|
||||||
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
|
TARGETS=avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 \
|
||||||
generic-16 generic-32 generic-64 generic-1
|
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
||||||
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
||||||
builtins/dispatch.ll
|
builtins/dispatch.ll
|
||||||
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
||||||
|
|||||||
22
builtins.cpp
22
builtins.cpp
@@ -804,6 +804,26 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
FATAL("logic error in DefineStdlib");
|
FATAL("logic error in DefineStdlib");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Target::AVX11:
|
||||||
|
switch (g->target.vectorWidth) {
|
||||||
|
case 8:
|
||||||
|
extern unsigned char builtins_bitcode_avx11[];
|
||||||
|
extern int builtins_bitcode_avx11_length;
|
||||||
|
AddBitcodeToModule(builtins_bitcode_avx11,
|
||||||
|
builtins_bitcode_avx11_length,
|
||||||
|
module, symbolTable);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
extern unsigned char builtins_bitcode_avx11_x2[];
|
||||||
|
extern int builtins_bitcode_avx11_x2_length;
|
||||||
|
AddBitcodeToModule(builtins_bitcode_avx11_x2,
|
||||||
|
builtins_bitcode_avx11_x2_length,
|
||||||
|
module, symbolTable);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("logic error in DefineStdlib");
|
||||||
|
}
|
||||||
|
break;
|
||||||
case Target::AVX2:
|
case Target::AVX2:
|
||||||
switch (g->target.vectorWidth) {
|
switch (g->target.vectorWidth) {
|
||||||
case 8:
|
case 8:
|
||||||
@@ -898,6 +918,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
|
|
||||||
lDefineConstantInt("__have_native_half", g->target.hasHalf, module,
|
lDefineConstantInt("__have_native_half", g->target.hasHalf, module,
|
||||||
symbolTable);
|
symbolTable);
|
||||||
|
lDefineConstantInt("__have_native_rand", g->target.hasRand, module,
|
||||||
|
symbolTable);
|
||||||
lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
|
lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
|
||||||
module, symbolTable);
|
module, symbolTable);
|
||||||
|
|
||||||
|
|||||||
@@ -48,8 +48,8 @@ declare void @abort() noreturn
|
|||||||
;; corresponding to one of the Target::ISA enumerant values that gives the
|
;; corresponding to one of the Target::ISA enumerant values that gives the
|
||||||
;; most capable ISA that the curremt system can run.
|
;; most capable ISA that the curremt system can run.
|
||||||
;;
|
;;
|
||||||
;; Note: clang from LLVM 2.9 should be used if this is updated, for maximum
|
;; Note: clang from LLVM 3.0 should be used if this is updated, for maximum
|
||||||
;; backwards compatibility for anyone building ispc with LLVM 2.9.
|
;; backwards compatibility for anyone building ispc with LLVM 3.0
|
||||||
;;
|
;;
|
||||||
;; #include <stdint.h>
|
;; #include <stdint.h>
|
||||||
;; #include <stdlib.h>
|
;; #include <stdlib.h>
|
||||||
@@ -80,9 +80,14 @@ declare void @abort() noreturn
|
|||||||
;; // Call cpuid with eax=7, ecx=0
|
;; // Call cpuid with eax=7, ecx=0
|
||||||
;; __cpuid_count(info, 7, 0);
|
;; __cpuid_count(info, 7, 0);
|
||||||
;; if ((info[1] & (1 << 5)) != 0)
|
;; if ((info[1] & (1 << 5)) != 0)
|
||||||
;; return 3; // AVX2
|
;; return 4; // AVX2
|
||||||
;; else
|
;; else {
|
||||||
;; return 2; // AVX1
|
;; if ((info[2] & (1 << 29)) != 0 && // F16C
|
||||||
|
;; (info[2] & (1 << 30)) != 0) // RDRAND
|
||||||
|
;; return 3; // AVX1 on IVB
|
||||||
|
;; else
|
||||||
|
;; return 2; // AVX1
|
||||||
|
;; }
|
||||||
;; }
|
;; }
|
||||||
;; else if ((info[2] & (1 << 19)) != 0)
|
;; else if ((info[2] & (1 << 19)) != 0)
|
||||||
;; return 1; // SSE4
|
;; return 1; // SSE4
|
||||||
@@ -92,41 +97,47 @@ declare void @abort() noreturn
|
|||||||
;; abort();
|
;; abort();
|
||||||
;; }
|
;; }
|
||||||
|
|
||||||
%0 = type { i32, i32, i32, i32 }
|
define i32 @__get_system_isa() nounwind uwtable ssp {
|
||||||
|
|
||||||
define i32 @__get_system_isa() nounwind ssp {
|
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
%0 = tail call { i32, i32, i32, i32 } asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
||||||
%asmresult9.i = extractvalue %0 %0, 2
|
%asmresult5.i = extractvalue { i32, i32, i32, i32 } %0, 2
|
||||||
%asmresult10.i = extractvalue %0 %0, 3
|
%asmresult6.i = extractvalue { i32, i32, i32, i32 } %0, 3
|
||||||
%and = and i32 %asmresult9.i, 268435456
|
%and = and i32 %asmresult5.i, 268435456
|
||||||
%cmp = icmp eq i32 %and, 0
|
%cmp = icmp eq i32 %and, 0
|
||||||
br i1 %cmp, label %if.else7, label %if.then
|
br i1 %cmp, label %if.else14, label %if.then
|
||||||
|
|
||||||
if.then: ; preds = %entry
|
if.then: ; preds = %entry
|
||||||
%1 = tail call %0 asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
|
%1 = tail call { i32, i32, i32, i32 } asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
|
||||||
%asmresult9.i24 = extractvalue %0 %1, 1
|
%asmresult4.i29 = extractvalue { i32, i32, i32, i32 } %1, 1
|
||||||
%and4 = lshr i32 %asmresult9.i24, 5
|
%and3 = and i32 %asmresult4.i29, 32
|
||||||
%2 = and i32 %and4, 1
|
%cmp4 = icmp eq i32 %and3, 0
|
||||||
%3 = or i32 %2, 2
|
br i1 %cmp4, label %if.else, label %return
|
||||||
|
|
||||||
|
if.else: ; preds = %if.then
|
||||||
|
%asmresult5.i30 = extractvalue { i32, i32, i32, i32 } %1, 2
|
||||||
|
%2 = and i32 %asmresult5.i30, 1610612736
|
||||||
|
%3 = icmp eq i32 %2, 1610612736
|
||||||
|
br i1 %3, label %return, label %if.else13
|
||||||
|
|
||||||
|
if.else13: ; preds = %if.else
|
||||||
br label %return
|
br label %return
|
||||||
|
|
||||||
if.else7: ; preds = %entry
|
if.else14: ; preds = %entry
|
||||||
%and10 = and i32 %asmresult9.i, 524288
|
%and16 = and i32 %asmresult5.i, 524288
|
||||||
%cmp11 = icmp eq i32 %and10, 0
|
|
||||||
br i1 %cmp11, label %if.else13, label %return
|
|
||||||
|
|
||||||
if.else13: ; preds = %if.else7
|
|
||||||
%and16 = and i32 %asmresult10.i, 67108864
|
|
||||||
%cmp17 = icmp eq i32 %and16, 0
|
%cmp17 = icmp eq i32 %and16, 0
|
||||||
br i1 %cmp17, label %if.else19, label %return
|
br i1 %cmp17, label %if.else19, label %return
|
||||||
|
|
||||||
if.else19: ; preds = %if.else13
|
if.else19: ; preds = %if.else14
|
||||||
|
%and21 = and i32 %asmresult6.i, 67108864
|
||||||
|
%cmp22 = icmp eq i32 %and21, 0
|
||||||
|
br i1 %cmp22, label %if.else24, label %return
|
||||||
|
|
||||||
|
if.else24: ; preds = %if.else19
|
||||||
tail call void @abort() noreturn nounwind
|
tail call void @abort() noreturn nounwind
|
||||||
unreachable
|
unreachable
|
||||||
|
|
||||||
return: ; preds = %if.else13, %if.else7, %if.then
|
return: ; preds = %if.else19, %if.else14, %if.else13, %if.else, %if.then
|
||||||
%retval.0 = phi i32 [ %3, %if.then ], [ 1, %if.else7 ], [ 0, %if.else13 ]
|
%retval.0 = phi i32 [ 2, %if.else13 ], [ 4, %if.then ], [ 3, %if.else ], [ 1, %if.else14 ], [ 0, %if.else19 ]
|
||||||
ret i32 %retval.0
|
ret i32 %retval.0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -61,10 +61,12 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; half conversion routines
|
;; half conversion routines
|
||||||
|
|
||||||
|
ifelse(NO_HALF_DECLARES, `1', `', `
|
||||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|||||||
@@ -61,10 +61,12 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; half conversion routines
|
;; half conversion routines
|
||||||
|
|
||||||
|
ifelse(NO_HALF_DECLARES, `1', `', `
|
||||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|||||||
87
builtins/target-avx11-x2.ll
Normal file
87
builtins/target-avx11-x2.ll
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
;; Copyright (c) 2012, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
include(`target-avx1-x2.ll')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; float/half conversions
|
||||||
|
|
||||||
|
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||||
|
; 0 is round nearest even
|
||||||
|
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
|
||||||
|
%r_0 = shufflevector <16 x i16> %v, <16 x i16> undef,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0)
|
||||||
|
%r_1 = shufflevector <16 x i16> %v, <16 x i16> undef,
|
||||||
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1)
|
||||||
|
%r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1,
|
||||||
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||||
|
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
ret <16 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
|
||||||
|
%r_0 = shufflevector <16 x float> %v, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0)
|
||||||
|
%r_1 = shufflevector <16 x float> %v, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0)
|
||||||
|
%r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1,
|
||||||
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||||
|
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
ret <16 x i16> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||||
|
%v1 = bitcast i16 %v to <1 x i16>
|
||||||
|
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||||
|
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||||
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||||
|
%r = extractelement <8 x float> %rv, i32 0
|
||||||
|
ret float %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||||
|
%v1 = bitcast float %v to <1 x float>
|
||||||
|
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||||
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; round to nearest even
|
||||||
|
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||||
|
%r = extractelement <8 x i16> %rv, i32 0
|
||||||
|
ret i16 %r
|
||||||
|
}
|
||||||
|
|
||||||
71
builtins/target-avx11.ll
Normal file
71
builtins/target-avx11.ll
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
;; Copyright (c) 2012, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
include(`target-avx1.ll')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; float/half conversions
|
||||||
|
|
||||||
|
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||||
|
; 0 is round nearest even
|
||||||
|
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
|
||||||
|
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
|
||||||
|
ret <8 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
|
||||||
|
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
|
||||||
|
ret <8 x i16> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||||
|
%v1 = bitcast i16 %v to <1 x i16>
|
||||||
|
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||||
|
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||||
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||||
|
%r = extractelement <8 x float> %rv, i32 0
|
||||||
|
ret float %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||||
|
%v1 = bitcast float %v to <1 x float>
|
||||||
|
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||||
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; round to nearest even
|
||||||
|
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||||
|
%r = extractelement <8 x i16> %rv, i32 0
|
||||||
|
ret i16 %r
|
||||||
|
}
|
||||||
|
|
||||||
48
ispc.cpp
48
ispc.cpp
@@ -93,8 +93,14 @@ lGetSystemISA() {
|
|||||||
__cpuidex(info, 7, 0);
|
__cpuidex(info, 7, 0);
|
||||||
if ((info[1] & (1 << 5)) != 0)
|
if ((info[1] & (1 << 5)) != 0)
|
||||||
return "avx2";
|
return "avx2";
|
||||||
else
|
else {
|
||||||
return "avx";
|
// ivybridge?
|
||||||
|
if ((info[2] & (1 << 29)) != 0 && // F16C
|
||||||
|
(info[2] & (1 << 30)) != 0) // RDRAND
|
||||||
|
return "avx1.1";
|
||||||
|
else
|
||||||
|
return "avx";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if ((info[2] & (1 << 19)) != 0)
|
else if ((info[2] & (1 << 19)) != 0)
|
||||||
return "sse4";
|
return "sse4";
|
||||||
@@ -198,7 +204,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This is the case for most of them
|
// This is the case for most of them
|
||||||
t->hasHalf = t->hasTranscendentals = false;
|
t->hasHalf = t->hasRand = t->hasTranscendentals = false;
|
||||||
|
|
||||||
if (!strcasecmp(isa, "sse2")) {
|
if (!strcasecmp(isa, "sse2")) {
|
||||||
t->isa = Target::SSE2;
|
t->isa = Target::SSE2;
|
||||||
@@ -284,7 +290,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx")) {
|
else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1")) {
|
||||||
t->isa = Target::AVX;
|
t->isa = Target::AVX;
|
||||||
t->nativeVectorWidth = 8;
|
t->nativeVectorWidth = 8;
|
||||||
t->vectorWidth = 8;
|
t->vectorWidth = 8;
|
||||||
@@ -292,32 +298,54 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx-x2")) {
|
else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2")) {
|
||||||
t->isa = Target::AVX;
|
t->isa = Target::AVX11;
|
||||||
t->nativeVectorWidth = 8;
|
t->nativeVectorWidth = 8;
|
||||||
t->vectorWidth = 16;
|
t->vectorWidth = 16;
|
||||||
t->attributes = "+avx,+popcnt,+cmov";
|
t->attributes = "+avx,+popcnt,+cmov";
|
||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
|
else if (!strcasecmp(isa, "avx1.1")) {
|
||||||
|
t->isa = Target::AVX11;
|
||||||
|
t->nativeVectorWidth = 8;
|
||||||
|
t->vectorWidth = 8;
|
||||||
|
t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
|
t->hasHalf = true;
|
||||||
|
t->hasRand = true;
|
||||||
|
}
|
||||||
|
else if (!strcasecmp(isa, "avx1.1-x2")) {
|
||||||
|
t->isa = Target::AVX11;
|
||||||
|
t->nativeVectorWidth = 8;
|
||||||
|
t->vectorWidth = 16;
|
||||||
|
t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
|
t->hasHalf = true;
|
||||||
|
t->hasRand = true;
|
||||||
|
}
|
||||||
#ifndef LLVM_3_0
|
#ifndef LLVM_3_0
|
||||||
else if (!strcasecmp(isa, "avx2")) {
|
else if (!strcasecmp(isa, "avx2")) {
|
||||||
t->isa = Target::AVX2;
|
t->isa = Target::AVX2;
|
||||||
t->nativeVectorWidth = 8;
|
t->nativeVectorWidth = 8;
|
||||||
t->vectorWidth = 8;
|
t->vectorWidth = 8;
|
||||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
|
||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
t->hasHalf = true;
|
t->hasHalf = true;
|
||||||
|
t->hasRand = true;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx2-x2")) {
|
else if (!strcasecmp(isa, "avx2-x2")) {
|
||||||
t->isa = Target::AVX2;
|
t->isa = Target::AVX2;
|
||||||
t->nativeVectorWidth = 16;
|
t->nativeVectorWidth = 16;
|
||||||
t->vectorWidth = 16;
|
t->vectorWidth = 16;
|
||||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
|
||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
t->hasHalf = true;
|
t->hasHalf = true;
|
||||||
|
t->hasRand = true;
|
||||||
}
|
}
|
||||||
#endif // !LLVM_3_0
|
#endif // !LLVM_3_0
|
||||||
else {
|
else {
|
||||||
@@ -360,7 +388,7 @@ const char *
|
|||||||
Target::SupportedTargetISAs() {
|
Target::SupportedTargetISAs() {
|
||||||
return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2"
|
return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2"
|
||||||
#ifndef LLVM_3_0
|
#ifndef LLVM_3_0
|
||||||
", avx2, avx2-x2"
|
", avx1.1, avx1.1-x2, avx2, avx2-x2"
|
||||||
#endif // !LLVM_3_0
|
#endif // !LLVM_3_0
|
||||||
", generic-1, generic-4, generic-8, generic-16, generic-32";
|
", generic-1, generic-4, generic-8, generic-16, generic-32";
|
||||||
}
|
}
|
||||||
@@ -426,6 +454,8 @@ Target::GetISAString() const {
|
|||||||
return "sse4";
|
return "sse4";
|
||||||
case Target::AVX:
|
case Target::AVX:
|
||||||
return "avx";
|
return "avx";
|
||||||
|
case Target::AVX11:
|
||||||
|
return "avx11";
|
||||||
case Target::AVX2:
|
case Target::AVX2:
|
||||||
return "avx2";
|
return "avx2";
|
||||||
case Target::GENERIC:
|
case Target::GENERIC:
|
||||||
|
|||||||
5
ispc.h
5
ispc.h
@@ -207,7 +207,7 @@ struct Target {
|
|||||||
flexible/performant of them will apear last in the enumerant. Note
|
flexible/performant of them will apear last in the enumerant. Note
|
||||||
also that __best_available_isa() needs to be updated if ISAs are
|
also that __best_available_isa() needs to be updated if ISAs are
|
||||||
added or the enumerant values are reordered. */
|
added or the enumerant values are reordered. */
|
||||||
enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS };
|
enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS };
|
||||||
|
|
||||||
/** Instruction set being compiled to. */
|
/** Instruction set being compiled to. */
|
||||||
ISA isa;
|
ISA isa;
|
||||||
@@ -252,6 +252,9 @@ struct Target {
|
|||||||
conversions. */
|
conversions. */
|
||||||
bool hasHalf;
|
bool hasHalf;
|
||||||
|
|
||||||
|
/** Indicates whether there is an ISA random number instruciton. */
|
||||||
|
bool hasRand;
|
||||||
|
|
||||||
/** Indicates whether the target has support for transcendentals (beyond
|
/** Indicates whether the target has support for transcendentals (beyond
|
||||||
sqrt, which we assume that all of them handle). */
|
sqrt, which we assume that all of them handle). */
|
||||||
bool hasTranscendentals;
|
bool hasTranscendentals;
|
||||||
|
|||||||
28
ispc.vcxproj
28
ispc.vcxproj
@@ -20,6 +20,8 @@
|
|||||||
<ClCompile Include="func.cpp" />
|
<ClCompile Include="func.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-avx1.cpp" />
|
<ClCompile Include="gen-bitcode-avx1.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-avx1-x2.cpp" />
|
<ClCompile Include="gen-bitcode-avx1-x2.cpp" />
|
||||||
|
<ClCompile Include="gen-bitcode-avx11.cpp" />
|
||||||
|
<ClCompile Include="gen-bitcode-avx11-x2.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-avx2.cpp" />
|
<ClCompile Include="gen-bitcode-avx2.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-avx2-x2.cpp" />
|
<ClCompile Include="gen-bitcode-avx2-x2.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-c-32.cpp" />
|
<ClCompile Include="gen-bitcode-c-32.cpp" />
|
||||||
@@ -188,6 +190,32 @@
|
|||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-avx11.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx11.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx11.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx11.cpp</Message>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx11.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-avx11-x2.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx11-x2.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll</AdditionalInputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx11-x2.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll</AdditionalInputs>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx11-x2.cpp</Message>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx11-x2.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins\target-avx2.ll">
|
<CustomBuild Include="builtins\target-avx2.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
|
|||||||
Reference in New Issue
Block a user