added SKX target definition
This commit is contained in:
2
Makefile
2
Makefile
@@ -202,7 +202,7 @@ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
|||||||
opt.h stmt.h sym.h type.h util.h
|
opt.h stmt.h sym.h type.h util.h
|
||||||
TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||||
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
||||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 knl
|
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 knl skx
|
||||||
ifneq ($(ARM_ENABLED), 0)
|
ifneq ($(ARM_ENABLED), 0)
|
||||||
TARGETS+=neon-32 neon-16 neon-8
|
TARGETS+=neon-32 neon-16 neon-8
|
||||||
endif
|
endif
|
||||||
|
|||||||
16
alloy.py
16
alloy.py
@@ -343,12 +343,12 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
|||||||
|
|
||||||
|
|
||||||
def unsupported_llvm_targets(LLVM_VERSION):
|
def unsupported_llvm_targets(LLVM_VERSION):
|
||||||
prohibited_list = {"3.2":["avx512knl-i32x16"],
|
prohibited_list = {"3.2":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||||
"3.3":["avx512knl-i32x16"],
|
"3.3":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||||
"3.4":["avx512knl-i32x16"],
|
"3.4":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||||
"3.5":["avx512knl-i32x16"],
|
"3.5":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||||
"3.6":["avx512knl-i32x16"],
|
"3.6":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||||
"3.7":[],
|
"3.7":["avx512skx-i32x16"],
|
||||||
"3.8":[],
|
"3.8":[],
|
||||||
"3.9":[],
|
"3.9":[],
|
||||||
"trunk":[]}
|
"trunk":[]}
|
||||||
@@ -379,7 +379,7 @@ def check_targets():
|
|||||||
KNL = ["knl-generic", "avx512knl-i32x16"]
|
KNL = ["knl-generic", "avx512knl-i32x16"]
|
||||||
|
|
||||||
targets = [["AVX2", AVX2, False], ["AVX1.1", AVX11, False], ["AVX", AVX, False], ["SSE4", SSE4, False],
|
targets = [["AVX2", AVX2, False], ["AVX1.1", AVX11, False], ["AVX", AVX, False], ["SSE4", SSE4, False],
|
||||||
["SSE2", SSE2, False], ["KNL", KNL, False]]
|
["SSE2", SSE2, False], ["KNL", KNL, False], ["SKX", SKX, False]]
|
||||||
f_lines = take_lines("check_isa.exe", "first")
|
f_lines = take_lines("check_isa.exe", "first")
|
||||||
for i in range(0,5):
|
for i in range(0,5):
|
||||||
if targets[i][0] in f_lines:
|
if targets[i][0] in f_lines:
|
||||||
@@ -403,6 +403,8 @@ def check_targets():
|
|||||||
# here we have SDE
|
# here we have SDE
|
||||||
f_lines = take_lines(sde_exists + " -help", "all")
|
f_lines = take_lines(sde_exists + " -help", "all")
|
||||||
for i in range(0,len(f_lines)):
|
for i in range(0,len(f_lines)):
|
||||||
|
if targets[6][2] == False and "skx" in f_lines[i]:
|
||||||
|
answer_sde = answer_sde + ["-skx", "avx512skx-i32x16"]
|
||||||
if targets[5][2] == False and "knl" in f_lines[i]:
|
if targets[5][2] == False and "knl" in f_lines[i]:
|
||||||
answer_sde = answer_sde + [["-knl", "knl-generic"], ["-knl", "avx512knl-i32x16"]]
|
answer_sde = answer_sde + [["-knl", "knl-generic"], ["-knl", "avx512knl-i32x16"]]
|
||||||
if targets[3][2] == False and "wsm" in f_lines[i]:
|
if targets[3][2] == False and "wsm" in f_lines[i]:
|
||||||
|
|||||||
17
builtins.cpp
17
builtins.cpp
@@ -1377,6 +1377,23 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||||
|
case Target::SKX_AVX512: {
|
||||||
|
switch (g->target->getVectorWidth()) {
|
||||||
|
case 16:
|
||||||
|
if (runtime32) {
|
||||||
|
EXPORT_MODULE(builtins_bitcode_skx_32bit);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
EXPORT_MODULE(builtins_bitcode_skx_64bit);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("logic error in DefineStdlib");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
case Target::GENERIC: {
|
case Target::GENERIC: {
|
||||||
switch (g->target->getVectorWidth()) {
|
switch (g->target->getVectorWidth()) {
|
||||||
|
|||||||
@@ -511,13 +511,6 @@ define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
|||||||
ret float %half_scale
|
ret float %half_scale
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
|
||||||
|
|
||||||
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
|
|
||||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %v, <16 x float> undef, i16 -1, i32 8)
|
|
||||||
ret <16 x float> %res
|
|
||||||
}
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
|
|
||||||
@@ -538,13 +531,6 @@ define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
|||||||
ret float %iv_mul
|
ret float %iv_mul
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
|
||||||
|
|
||||||
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
|
||||||
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %0, <16 x float> undef, i16 -1, i32 8)
|
|
||||||
ret <16 x float> %res
|
|
||||||
}
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; sqrt
|
;; sqrt
|
||||||
|
|
||||||
|
|||||||
@@ -40,5 +40,24 @@ ifelse(LLVM_VERSION, LLVM_3_7,
|
|||||||
`include(`target-avx512-common.ll')'
|
`include(`target-avx512-common.ll')'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; rcp
|
||||||
|
|
||||||
|
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||||
|
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %0, <16 x float> undef, i16 -1, i32 8)
|
||||||
|
ret <16 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; rsqrt
|
||||||
|
|
||||||
|
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
|
||||||
|
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %v, <16 x float> undef, i16 -1, i32 8)
|
||||||
|
ret <16 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
;;saturation_arithmetic_novec()
|
;;saturation_arithmetic_novec()
|
||||||
|
|||||||
85
builtins/target-skx.ll
Normal file
85
builtins/target-skx.ll
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
;; Copyright (c) 2016, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
define(`WIDTH',`16')
|
||||||
|
|
||||||
|
|
||||||
|
ifelse(LLVM_VERSION, LLVM_3_8,
|
||||||
|
`include(`target-avx512-common.ll')',
|
||||||
|
LLVM_VERSION, LLVM_3_9,
|
||||||
|
`include(`target-avx512-common.ll')'
|
||||||
|
)
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; rcp
|
||||||
|
|
||||||
|
|
||||||
|
declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||||
|
%call = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %0, <16 x float> undef, i16 -1)
|
||||||
|
; do one Newton-Raphson iteration to improve precision
|
||||||
|
; float iv = __rcp_v(v);
|
||||||
|
; return iv * (2. - v * iv);
|
||||||
|
%v_iv = fmul <16 x float> %0, %call
|
||||||
|
%two_minus = fsub <16 x float> <float 2., float 2., float 2., float 2.,
|
||||||
|
float 2., float 2., float 2., float 2.,
|
||||||
|
float 2., float 2., float 2., float 2.,
|
||||||
|
float 2., float 2., float 2., float 2.>, %v_iv
|
||||||
|
%iv_mul = fmul <16 x float> %call, %two_minus
|
||||||
|
ret <16 x float> %iv_mul
|
||||||
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; rsqrt
|
||||||
|
|
||||||
|
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
|
||||||
|
%is = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %v, <16 x float> undef, i16 -1)
|
||||||
|
; Newton-Raphson iteration to improve precision
|
||||||
|
; float is = __rsqrt_v(v);
|
||||||
|
; return 0.5 * is * (3. - (v * is) * is);
|
||||||
|
%v_is = fmul <16 x float> %v, %is
|
||||||
|
%v_is_is = fmul <16 x float> %v_is, %is
|
||||||
|
%three_sub = fsub <16 x float> <float 3., float 3., float 3., float 3.,
|
||||||
|
float 3., float 3., float 3., float 3.,
|
||||||
|
float 3., float 3., float 3., float 3.,
|
||||||
|
float 3., float 3., float 3., float 3.>, %v_is_is
|
||||||
|
%is_mul = fmul <16 x float> %is, %three_sub
|
||||||
|
%half_scale = fmul <16 x float> <float 0.5, float 0.5, float 0.5, float 0.5,
|
||||||
|
float 0.5, float 0.5, float 0.5, float 0.5,
|
||||||
|
float 0.5, float 0.5, float 0.5, float 0.5,
|
||||||
|
float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
|
||||||
|
ret <16 x float> %half_scale
|
||||||
|
}
|
||||||
|
|
||||||
|
;;saturation_arithmetic_novec()
|
||||||
69
ispc.cpp
69
ispc.cpp
@@ -165,7 +165,7 @@ lGetSystemISA() {
|
|||||||
(info2[1] & (1 << 28)) != 0 && // AVX512 CDI
|
(info2[1] & (1 << 28)) != 0 && // AVX512 CDI
|
||||||
(info2[1] & (1 << 30)) != 0 && // AVX512 BW
|
(info2[1] & (1 << 30)) != 0 && // AVX512 BW
|
||||||
(info2[1] & (1 << 31)) != 0) { // AVX512 VL
|
(info2[1] & (1 << 31)) != 0) { // AVX512 VL
|
||||||
return "skx";
|
return "avx512skx-i32x16";
|
||||||
}
|
}
|
||||||
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
||||||
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
||||||
@@ -239,10 +239,24 @@ typedef enum {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
||||||
// KNL. Supports AVX512.
|
// Knights Landing - Xeon Phi.
|
||||||
|
// Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
|
||||||
|
// AVX-512CDI: Conflict Detection;
|
||||||
|
// AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
|
||||||
|
// new prefetch instructions.
|
||||||
CPU_KNL,
|
CPU_KNL,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||||
|
// Skylake Xeon.
|
||||||
|
// Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
|
||||||
|
// AVX-512CDI: Conflict Detection;
|
||||||
|
// AVX-512VL: Vector Length Orthogonality;
|
||||||
|
// AVX-512DQ: New HPC ISA (vs AVX512F);
|
||||||
|
// AVX-512BW: Byte and Word Support.
|
||||||
|
CPU_SKX,
|
||||||
|
#endif
|
||||||
|
|
||||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
|
||||||
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
|
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
|
||||||
CPU_Silvermont,
|
CPU_Silvermont,
|
||||||
@@ -327,6 +341,10 @@ public:
|
|||||||
names[CPU_KNL].push_back("knl");
|
names[CPU_KNL].push_back("knl");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||||
|
names[CPU_SKX].push_back("skx");
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
names[CPU_CortexA15].push_back("cortex-a15");
|
names[CPU_CortexA15].push_back("cortex-a15");
|
||||||
|
|
||||||
@@ -353,6 +371,13 @@ public:
|
|||||||
CPU_Haswell, CPU_Broadwell, CPU_None);
|
CPU_Haswell, CPU_Broadwell, CPU_None);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||||
|
compat[CPU_SKX] = Set(CPU_SKX, CPU_Bonnell, CPU_Penryn,
|
||||||
|
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||||
|
CPU_SandyBridge, CPU_IvyBridge,
|
||||||
|
CPU_Haswell, CPU_Broadwell, CPU_None);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
|
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
|
||||||
#define CPU_Broadwell CPU_Haswell
|
#define CPU_Broadwell CPU_Haswell
|
||||||
#else /* LLVM 3.6+ */
|
#else /* LLVM 3.6+ */
|
||||||
@@ -513,6 +538,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||||
|
case CPU_SKX:
|
||||||
|
isa = "avx512skx-i32x16";
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
|
||||||
case CPU_Broadwell:
|
case CPU_Broadwell:
|
||||||
#endif
|
#endif
|
||||||
@@ -915,7 +946,26 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
CPUfromISA = CPU_KNL;
|
CPUfromISA = CPU_KNL;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||||
|
else if (!strcasecmp(isa, "avx512skx-i32x16")) {
|
||||||
|
this->m_isa = Target::SKX_AVX512;
|
||||||
|
this->m_nativeVectorWidth = 16;
|
||||||
|
this->m_nativeVectorAlignment = 64;
|
||||||
|
// ?? this->m_dataTypeWidth = 32;
|
||||||
|
this->m_vectorWidth = 16;
|
||||||
|
this->m_maskingIsFree = true;
|
||||||
|
this->m_maskBitCount = 1;
|
||||||
|
this->m_hasHalf = true;
|
||||||
|
this->m_hasRand = true;
|
||||||
|
this->m_hasGather = this->m_hasScatter = true;
|
||||||
|
this->m_hasTranscendentals = false;
|
||||||
|
// For MIC it is set to true due to performance reasons. The option should be tested.
|
||||||
|
this->m_hasTrigonometry = false;
|
||||||
|
this->m_hasRsqrtd = this->m_hasRcpd = false;
|
||||||
|
this->m_hasVecPrefetch = false;
|
||||||
|
CPUfromISA = CPU_SKX;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
else if (!strcasecmp(isa, "neon-i8x16")) {
|
else if (!strcasecmp(isa, "neon-i8x16")) {
|
||||||
this->m_isa = Target::NEON8;
|
this->m_isa = Target::NEON8;
|
||||||
@@ -1144,6 +1194,9 @@ Target::SupportedTargets() {
|
|||||||
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
|
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
|
||||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
||||||
"avx512knl-i32x16, "
|
"avx512knl-i32x16, "
|
||||||
|
#endif
|
||||||
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||||
|
"avx512skx-i32x16, "
|
||||||
#endif
|
#endif
|
||||||
"generic-x1, generic-x4, generic-x8, generic-x16, "
|
"generic-x1, generic-x4, generic-x8, generic-x16, "
|
||||||
"generic-x32, generic-x64, *-generic-x16, "
|
"generic-x32, generic-x64, *-generic-x16, "
|
||||||
@@ -1219,8 +1272,8 @@ Target::ISAToString(ISA isa) {
|
|||||||
case Target::KNL_AVX512:
|
case Target::KNL_AVX512:
|
||||||
return "avx512knl";
|
return "avx512knl";
|
||||||
#endif
|
#endif
|
||||||
case Target::SKX:
|
case Target::SKX_AVX512:
|
||||||
return "skx";
|
return "avx512skx";
|
||||||
case Target::GENERIC:
|
case Target::GENERIC:
|
||||||
return "generic";
|
return "generic";
|
||||||
#ifdef ISPC_NVPTX_ENABLED
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
@@ -1267,8 +1320,10 @@ Target::ISAToTargetString(ISA isa) {
|
|||||||
case Target::KNL_AVX512:
|
case Target::KNL_AVX512:
|
||||||
return "avx512knl-i32x16";
|
return "avx512knl-i32x16";
|
||||||
#endif
|
#endif
|
||||||
case Target::SKX:
|
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||||
return "avx2";
|
case Target::SKX_AVX512:
|
||||||
|
return "avx512skx-i32x16";
|
||||||
|
#endif
|
||||||
case Target::GENERIC:
|
case Target::GENERIC:
|
||||||
return "generic-4";
|
return "generic-4";
|
||||||
#ifdef ISPC_NVPTX_ENABLED
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
|||||||
2
ispc.h
2
ispc.h
@@ -193,7 +193,7 @@ public:
|
|||||||
AVX11 = 3,
|
AVX11 = 3,
|
||||||
AVX2 = 4,
|
AVX2 = 4,
|
||||||
KNL_AVX512 = 5,
|
KNL_AVX512 = 5,
|
||||||
SKX = 6,
|
SKX_AVX512 = 6,
|
||||||
GENERIC = 7,
|
GENERIC = 7,
|
||||||
#ifdef ISPC_NVPTX_ENABLED
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
NVPTX,
|
NVPTX,
|
||||||
|
|||||||
@@ -2809,10 +2809,8 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
|
|||||||
!g->target->getTreatGenericAsSmth().empty()) {
|
!g->target->getTreatGenericAsSmth().empty()) {
|
||||||
if (g->target->getTreatGenericAsSmth() == "knl_generic")
|
if (g->target->getTreatGenericAsSmth() == "knl_generic")
|
||||||
dispatchNum = Target::KNL_AVX512;
|
dispatchNum = Target::KNL_AVX512;
|
||||||
else if (g->target->getTreatGenericAsSmth() == "skx_generic")
|
|
||||||
dispatchNum = Target::SKX;
|
|
||||||
else {
|
else {
|
||||||
Error(SourcePos(), "*-generic target can be called only with knl or skx");
|
Error(SourcePos(), "*-generic target can be called only with knl");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user