Add initial support for "avx1.1" targets for Ivy Bridge.
So far, only the use of the float/half conversion instructions distinguishes this from the "avx1" target. Partial work on issue #263.
This commit is contained in:
4
Makefile
4
Makefile
@@ -84,8 +84,8 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
||||
type.cpp util.cpp
|
||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
|
||||
generic-16 generic-32 generic-64 generic-1
|
||||
TARGETS=avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 \
|
||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
||||
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
||||
builtins/dispatch.ll
|
||||
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
||||
|
||||
22
builtins.cpp
22
builtins.cpp
@@ -804,6 +804,26 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
case Target::AVX11:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 8:
|
||||
extern unsigned char builtins_bitcode_avx11[];
|
||||
extern int builtins_bitcode_avx11_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx11,
|
||||
builtins_bitcode_avx11_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 16:
|
||||
extern unsigned char builtins_bitcode_avx11_x2[];
|
||||
extern int builtins_bitcode_avx11_x2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx11_x2,
|
||||
builtins_bitcode_avx11_x2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
case Target::AVX2:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 8:
|
||||
@@ -898,6 +918,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
|
||||
lDefineConstantInt("__have_native_half", g->target.hasHalf, module,
|
||||
symbolTable);
|
||||
lDefineConstantInt("__have_native_rand", g->target.hasRand, module,
|
||||
symbolTable);
|
||||
lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
|
||||
module, symbolTable);
|
||||
|
||||
|
||||
@@ -48,8 +48,8 @@ declare void @abort() noreturn
|
||||
;; corresponding to one of the Target::ISA enumerant values that gives the
|
||||
;; most capable ISA that the curremt system can run.
|
||||
;;
|
||||
;; Note: clang from LLVM 2.9 should be used if this is updated, for maximum
|
||||
;; backwards compatibility for anyone building ispc with LLVM 2.9.
|
||||
;; Note: clang from LLVM 3.0 should be used if this is updated, for maximum
|
||||
;; backwards compatibility for anyone building ispc with LLVM 3.0
|
||||
;;
|
||||
;; #include <stdint.h>
|
||||
;; #include <stdlib.h>
|
||||
@@ -80,9 +80,14 @@ declare void @abort() noreturn
|
||||
;; // Call cpuid with eax=7, ecx=0
|
||||
;; __cpuid_count(info, 7, 0);
|
||||
;; if ((info[1] & (1 << 5)) != 0)
|
||||
;; return 3; // AVX2
|
||||
;; else
|
||||
;; return 2; // AVX1
|
||||
;; return 4; // AVX2
|
||||
;; else {
|
||||
;; if ((info[2] & (1 << 29)) != 0 && // F16C
|
||||
;; (info[2] & (1 << 30)) != 0) // RDRAND
|
||||
;; return 3; // AVX1 on IVB
|
||||
;; else
|
||||
;; return 2; // AVX1
|
||||
;; }
|
||||
;; }
|
||||
;; else if ((info[2] & (1 << 19)) != 0)
|
||||
;; return 1; // SSE4
|
||||
@@ -92,41 +97,47 @@ declare void @abort() noreturn
|
||||
;; abort();
|
||||
;; }
|
||||
|
||||
%0 = type { i32, i32, i32, i32 }
|
||||
|
||||
define i32 @__get_system_isa() nounwind ssp {
|
||||
define i32 @__get_system_isa() nounwind uwtable ssp {
|
||||
entry:
|
||||
%0 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
||||
%asmresult9.i = extractvalue %0 %0, 2
|
||||
%asmresult10.i = extractvalue %0 %0, 3
|
||||
%and = and i32 %asmresult9.i, 268435456
|
||||
%0 = tail call { i32, i32, i32, i32 } asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
||||
%asmresult5.i = extractvalue { i32, i32, i32, i32 } %0, 2
|
||||
%asmresult6.i = extractvalue { i32, i32, i32, i32 } %0, 3
|
||||
%and = and i32 %asmresult5.i, 268435456
|
||||
%cmp = icmp eq i32 %and, 0
|
||||
br i1 %cmp, label %if.else7, label %if.then
|
||||
br i1 %cmp, label %if.else14, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call %0 asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
|
||||
%asmresult9.i24 = extractvalue %0 %1, 1
|
||||
%and4 = lshr i32 %asmresult9.i24, 5
|
||||
%2 = and i32 %and4, 1
|
||||
%3 = or i32 %2, 2
|
||||
%1 = tail call { i32, i32, i32, i32 } asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
|
||||
%asmresult4.i29 = extractvalue { i32, i32, i32, i32 } %1, 1
|
||||
%and3 = and i32 %asmresult4.i29, 32
|
||||
%cmp4 = icmp eq i32 %and3, 0
|
||||
br i1 %cmp4, label %if.else, label %return
|
||||
|
||||
if.else: ; preds = %if.then
|
||||
%asmresult5.i30 = extractvalue { i32, i32, i32, i32 } %1, 2
|
||||
%2 = and i32 %asmresult5.i30, 1610612736
|
||||
%3 = icmp eq i32 %2, 1610612736
|
||||
br i1 %3, label %return, label %if.else13
|
||||
|
||||
if.else13: ; preds = %if.else
|
||||
br label %return
|
||||
|
||||
if.else7: ; preds = %entry
|
||||
%and10 = and i32 %asmresult9.i, 524288
|
||||
%cmp11 = icmp eq i32 %and10, 0
|
||||
br i1 %cmp11, label %if.else13, label %return
|
||||
|
||||
if.else13: ; preds = %if.else7
|
||||
%and16 = and i32 %asmresult10.i, 67108864
|
||||
if.else14: ; preds = %entry
|
||||
%and16 = and i32 %asmresult5.i, 524288
|
||||
%cmp17 = icmp eq i32 %and16, 0
|
||||
br i1 %cmp17, label %if.else19, label %return
|
||||
|
||||
if.else19: ; preds = %if.else13
|
||||
if.else19: ; preds = %if.else14
|
||||
%and21 = and i32 %asmresult6.i, 67108864
|
||||
%cmp22 = icmp eq i32 %and21, 0
|
||||
br i1 %cmp22, label %if.else24, label %return
|
||||
|
||||
if.else24: ; preds = %if.else19
|
||||
tail call void @abort() noreturn nounwind
|
||||
unreachable
|
||||
|
||||
return: ; preds = %if.else13, %if.else7, %if.then
|
||||
%retval.0 = phi i32 [ %3, %if.then ], [ 1, %if.else7 ], [ 0, %if.else13 ]
|
||||
return: ; preds = %if.else19, %if.else14, %if.else13, %if.else, %if.then
|
||||
%retval.0 = phi i32 [ 2, %if.else13 ], [ 4, %if.then ], [ 3, %if.else ], [ 1, %if.else14 ], [ 0, %if.else19 ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
|
||||
@@ -61,10 +61,12 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
ifelse(NO_HALF_DECLARES, `1', `', `
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
@@ -61,10 +61,12 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
ifelse(NO_HALF_DECLARES, `1', `', `
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
87
builtins/target-avx11-x2.ll
Normal file
87
builtins/target-avx11-x2.ll
Normal file
@@ -0,0 +1,87 @@
|
||||
;; Copyright (c) 2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx1-x2.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float/half conversions
|
||||
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||
; 0 is round nearest even
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
|
||||
%r_0 = shufflevector <16 x i16> %v, <16 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0)
|
||||
%r_1 = shufflevector <16 x i16> %v, <16 x i16> undef,
|
||||
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1)
|
||||
%r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x float> %r
|
||||
}
|
||||
|
||||
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
|
||||
%r_0 = shufflevector <16 x float> %v, <16 x float> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0)
|
||||
%r_1 = shufflevector <16 x float> %v, <16 x float> undef,
|
||||
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0)
|
||||
%r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x i16> %r
|
||||
}
|
||||
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
%v1 = bitcast i16 %v to <1 x i16>
|
||||
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||
%r = extractelement <8 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
%v1 = bitcast float %v to <1 x float>
|
||||
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; round to nearest even
|
||||
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||
%r = extractelement <8 x i16> %rv, i32 0
|
||||
ret i16 %r
|
||||
}
|
||||
|
||||
71
builtins/target-avx11.ll
Normal file
71
builtins/target-avx11.ll
Normal file
@@ -0,0 +1,71 @@
|
||||
;; Copyright (c) 2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx1.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float/half conversions
|
||||
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||
; 0 is round nearest even
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||
|
||||
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
|
||||
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
|
||||
ret <8 x float> %r
|
||||
}
|
||||
|
||||
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
|
||||
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
%v1 = bitcast i16 %v to <1 x i16>
|
||||
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||
%r = extractelement <8 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
%v1 = bitcast float %v to <1 x float>
|
||||
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; round to nearest even
|
||||
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||
%r = extractelement <8 x i16> %rv, i32 0
|
||||
ret i16 %r
|
||||
}
|
||||
|
||||
48
ispc.cpp
48
ispc.cpp
@@ -93,8 +93,14 @@ lGetSystemISA() {
|
||||
__cpuidex(info, 7, 0);
|
||||
if ((info[1] & (1 << 5)) != 0)
|
||||
return "avx2";
|
||||
else
|
||||
return "avx";
|
||||
else {
|
||||
// ivybridge?
|
||||
if ((info[2] & (1 << 29)) != 0 && // F16C
|
||||
(info[2] & (1 << 30)) != 0) // RDRAND
|
||||
return "avx1.1";
|
||||
else
|
||||
return "avx";
|
||||
}
|
||||
}
|
||||
else if ((info[2] & (1 << 19)) != 0)
|
||||
return "sse4";
|
||||
@@ -198,7 +204,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
}
|
||||
|
||||
// This is the case for most of them
|
||||
t->hasHalf = t->hasTranscendentals = false;
|
||||
t->hasHalf = t->hasRand = t->hasTranscendentals = false;
|
||||
|
||||
if (!strcasecmp(isa, "sse2")) {
|
||||
t->isa = Target::SSE2;
|
||||
@@ -284,7 +290,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx")) {
|
||||
else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1")) {
|
||||
t->isa = Target::AVX;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
@@ -292,32 +298,54 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx-x2")) {
|
||||
t->isa = Target::AVX;
|
||||
else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2")) {
|
||||
t->isa = Target::AVX11;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1")) {
|
||||
t->isa = Target::AVX11;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
t->hasHalf = true;
|
||||
t->hasRand = true;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1-x2")) {
|
||||
t->isa = Target::AVX11;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
t->hasHalf = true;
|
||||
t->hasRand = true;
|
||||
}
|
||||
#ifndef LLVM_3_0
|
||||
else if (!strcasecmp(isa, "avx2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
t->hasHalf = true;
|
||||
t->hasRand = true;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-x2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 16;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
t->hasHalf = true;
|
||||
t->hasRand = true;
|
||||
}
|
||||
#endif // !LLVM_3_0
|
||||
else {
|
||||
@@ -360,7 +388,7 @@ const char *
|
||||
Target::SupportedTargetISAs() {
|
||||
return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2"
|
||||
#ifndef LLVM_3_0
|
||||
", avx2, avx2-x2"
|
||||
", avx1.1, avx1.1-x2, avx2, avx2-x2"
|
||||
#endif // !LLVM_3_0
|
||||
", generic-1, generic-4, generic-8, generic-16, generic-32";
|
||||
}
|
||||
@@ -426,6 +454,8 @@ Target::GetISAString() const {
|
||||
return "sse4";
|
||||
case Target::AVX:
|
||||
return "avx";
|
||||
case Target::AVX11:
|
||||
return "avx11";
|
||||
case Target::AVX2:
|
||||
return "avx2";
|
||||
case Target::GENERIC:
|
||||
|
||||
5
ispc.h
5
ispc.h
@@ -207,7 +207,7 @@ struct Target {
|
||||
flexible/performant of them will apear last in the enumerant. Note
|
||||
also that __best_available_isa() needs to be updated if ISAs are
|
||||
added or the enumerant values are reordered. */
|
||||
enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS };
|
||||
enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS };
|
||||
|
||||
/** Instruction set being compiled to. */
|
||||
ISA isa;
|
||||
@@ -252,6 +252,9 @@ struct Target {
|
||||
conversions. */
|
||||
bool hasHalf;
|
||||
|
||||
/** Indicates whether there is an ISA random number instruciton. */
|
||||
bool hasRand;
|
||||
|
||||
/** Indicates whether the target has support for transcendentals (beyond
|
||||
sqrt, which we assume that all of them handle). */
|
||||
bool hasTranscendentals;
|
||||
|
||||
28
ispc.vcxproj
28
ispc.vcxproj
@@ -20,6 +20,8 @@
|
||||
<ClCompile Include="func.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx1.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx1-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx11.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx11-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx2-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-c-32.cpp" />
|
||||
@@ -188,6 +190,32 @@
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx11.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx11.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll > gen-bitcode-avx11.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx11.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx11.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx11.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx11-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx11-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll > gen-bitcode-avx11-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx11-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx11-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx11-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2.ll">
|
||||
<FileType>Document</FileType>
|
||||
|
||||
Reference in New Issue
Block a user