added SKX target definition
This commit is contained in:
2
Makefile
2
Makefile
@@ -202,7 +202,7 @@ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 knl
|
||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 knl skx
|
||||
ifneq ($(ARM_ENABLED), 0)
|
||||
TARGETS+=neon-32 neon-16 neon-8
|
||||
endif
|
||||
|
||||
16
alloy.py
16
alloy.py
@@ -343,12 +343,12 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
||||
|
||||
|
||||
def unsupported_llvm_targets(LLVM_VERSION):
|
||||
prohibited_list = {"3.2":["avx512knl-i32x16"],
|
||||
"3.3":["avx512knl-i32x16"],
|
||||
"3.4":["avx512knl-i32x16"],
|
||||
"3.5":["avx512knl-i32x16"],
|
||||
"3.6":["avx512knl-i32x16"],
|
||||
"3.7":[],
|
||||
prohibited_list = {"3.2":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||
"3.3":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||
"3.4":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||
"3.5":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||
"3.6":["avx512knl-i32x16", "avx512skx-i32x16"],
|
||||
"3.7":["avx512skx-i32x16"],
|
||||
"3.8":[],
|
||||
"3.9":[],
|
||||
"trunk":[]}
|
||||
@@ -379,7 +379,7 @@ def check_targets():
|
||||
KNL = ["knl-generic", "avx512knl-i32x16"]
|
||||
|
||||
targets = [["AVX2", AVX2, False], ["AVX1.1", AVX11, False], ["AVX", AVX, False], ["SSE4", SSE4, False],
|
||||
["SSE2", SSE2, False], ["KNL", KNL, False]]
|
||||
["SSE2", SSE2, False], ["KNL", KNL, False], ["SKX", SKX, False]]
|
||||
f_lines = take_lines("check_isa.exe", "first")
|
||||
for i in range(0,5):
|
||||
if targets[i][0] in f_lines:
|
||||
@@ -403,6 +403,8 @@ def check_targets():
|
||||
# here we have SDE
|
||||
f_lines = take_lines(sde_exists + " -help", "all")
|
||||
for i in range(0,len(f_lines)):
|
||||
if targets[6][2] == False and "skx" in f_lines[i]:
|
||||
answer_sde = answer_sde + ["-skx", "avx512skx-i32x16"]
|
||||
if targets[5][2] == False and "knl" in f_lines[i]:
|
||||
answer_sde = answer_sde + [["-knl", "knl-generic"], ["-knl", "avx512knl-i32x16"]]
|
||||
if targets[3][2] == False and "wsm" in f_lines[i]:
|
||||
|
||||
17
builtins.cpp
17
builtins.cpp
@@ -1377,6 +1377,23 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||
case Target::SKX_AVX512: {
|
||||
switch (g->target->getVectorWidth()) {
|
||||
case 16:
|
||||
if (runtime32) {
|
||||
EXPORT_MODULE(builtins_bitcode_skx_32bit);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_skx_64bit);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case Target::GENERIC: {
|
||||
switch (g->target->getVectorWidth()) {
|
||||
|
||||
@@ -511,13 +511,6 @@ define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
ret float %half_scale
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %v, <16 x float> undef, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
@@ -538,13 +531,6 @@ define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||
ret float %iv_mul
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %0, <16 x float> undef, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; sqrt
|
||||
|
||||
|
||||
@@ -40,5 +40,24 @@ ifelse(LLVM_VERSION, LLVM_3_7,
|
||||
`include(`target-avx512-common.ll')'
|
||||
)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %0, <16 x float> undef, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rsqrt
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %v, <16 x float> undef, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;;saturation_arithmetic_novec()
|
||||
|
||||
85
builtins/target-skx.ll
Normal file
85
builtins/target-skx.ll
Normal file
@@ -0,0 +1,85 @@
|
||||
;; Copyright (c) 2016, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`16')
|
||||
|
||||
|
||||
ifelse(LLVM_VERSION, LLVM_3_8,
|
||||
`include(`target-avx512-common.ll')',
|
||||
LLVM_VERSION, LLVM_3_9,
|
||||
`include(`target-avx512-common.ll')'
|
||||
)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
|
||||
|
||||
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
|
||||
%call = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %0, <16 x float> undef, i16 -1)
|
||||
; do one Newton-Raphson iteration to improve precision
|
||||
; float iv = __rcp_v(v);
|
||||
; return iv * (2. - v * iv);
|
||||
%v_iv = fmul <16 x float> %0, %call
|
||||
%two_minus = fsub <16 x float> <float 2., float 2., float 2., float 2.,
|
||||
float 2., float 2., float 2., float 2.,
|
||||
float 2., float 2., float 2., float 2.,
|
||||
float 2., float 2., float 2., float 2.>, %v_iv
|
||||
%iv_mul = fmul <16 x float> %call, %two_minus
|
||||
ret <16 x float> %iv_mul
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rsqrt
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
|
||||
|
||||
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
|
||||
%is = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %v, <16 x float> undef, i16 -1)
|
||||
; Newton-Raphson iteration to improve precision
|
||||
; float is = __rsqrt_v(v);
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
%v_is = fmul <16 x float> %v, %is
|
||||
%v_is_is = fmul <16 x float> %v_is, %is
|
||||
%three_sub = fsub <16 x float> <float 3., float 3., float 3., float 3.,
|
||||
float 3., float 3., float 3., float 3.,
|
||||
float 3., float 3., float 3., float 3.,
|
||||
float 3., float 3., float 3., float 3.>, %v_is_is
|
||||
%is_mul = fmul <16 x float> %is, %three_sub
|
||||
%half_scale = fmul <16 x float> <float 0.5, float 0.5, float 0.5, float 0.5,
|
||||
float 0.5, float 0.5, float 0.5, float 0.5,
|
||||
float 0.5, float 0.5, float 0.5, float 0.5,
|
||||
float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
|
||||
ret <16 x float> %half_scale
|
||||
}
|
||||
|
||||
;;saturation_arithmetic_novec()
|
||||
69
ispc.cpp
69
ispc.cpp
@@ -165,7 +165,7 @@ lGetSystemISA() {
|
||||
(info2[1] & (1 << 28)) != 0 && // AVX512 CDI
|
||||
(info2[1] & (1 << 30)) != 0 && // AVX512 BW
|
||||
(info2[1] & (1 << 31)) != 0) { // AVX512 VL
|
||||
return "skx";
|
||||
return "avx512skx-i32x16";
|
||||
}
|
||||
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
||||
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
||||
@@ -239,10 +239,24 @@ typedef enum {
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
||||
// KNL. Supports AVX512.
|
||||
// Knights Landing - Xeon Phi.
|
||||
// Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
|
||||
// AVX-512CDI: Conflict Detection;
|
||||
// AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
|
||||
// new prefetch instructions.
|
||||
CPU_KNL,
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||
// Skylake Xeon.
|
||||
// Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
|
||||
// AVX-512CDI: Conflict Detection;
|
||||
// AVX-512VL: Vector Length Orthogonality;
|
||||
// AVX-512DQ: New HPC ISA (vs AVX512F);
|
||||
// AVX-512BW: Byte and Word Support.
|
||||
CPU_SKX,
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
|
||||
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
|
||||
CPU_Silvermont,
|
||||
@@ -327,6 +341,10 @@ public:
|
||||
names[CPU_KNL].push_back("knl");
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||
names[CPU_SKX].push_back("skx");
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
names[CPU_CortexA15].push_back("cortex-a15");
|
||||
|
||||
@@ -353,6 +371,13 @@ public:
|
||||
CPU_Haswell, CPU_Broadwell, CPU_None);
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||
compat[CPU_SKX] = Set(CPU_SKX, CPU_Bonnell, CPU_Penryn,
|
||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||
CPU_SandyBridge, CPU_IvyBridge,
|
||||
CPU_Haswell, CPU_Broadwell, CPU_None);
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
|
||||
#define CPU_Broadwell CPU_Haswell
|
||||
#else /* LLVM 3.6+ */
|
||||
@@ -513,6 +538,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||
case CPU_SKX:
|
||||
isa = "avx512skx-i32x16";
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
|
||||
case CPU_Broadwell:
|
||||
#endif
|
||||
@@ -915,7 +946,26 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
CPUfromISA = CPU_KNL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||
else if (!strcasecmp(isa, "avx512skx-i32x16")) {
|
||||
this->m_isa = Target::SKX_AVX512;
|
||||
this->m_nativeVectorWidth = 16;
|
||||
this->m_nativeVectorAlignment = 64;
|
||||
// ?? this->m_dataTypeWidth = 32;
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_maskingIsFree = true;
|
||||
this->m_maskBitCount = 1;
|
||||
this->m_hasHalf = true;
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = this->m_hasScatter = true;
|
||||
this->m_hasTranscendentals = false;
|
||||
// For MIC it is set to true due to performance reasons. The option should be tested.
|
||||
this->m_hasTrigonometry = false;
|
||||
this->m_hasRsqrtd = this->m_hasRcpd = false;
|
||||
this->m_hasVecPrefetch = false;
|
||||
CPUfromISA = CPU_SKX;
|
||||
}
|
||||
#endif
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
else if (!strcasecmp(isa, "neon-i8x16")) {
|
||||
this->m_isa = Target::NEON8;
|
||||
@@ -1144,6 +1194,9 @@ Target::SupportedTargets() {
|
||||
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
|
||||
"avx512knl-i32x16, "
|
||||
#endif
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||
"avx512skx-i32x16, "
|
||||
#endif
|
||||
"generic-x1, generic-x4, generic-x8, generic-x16, "
|
||||
"generic-x32, generic-x64, *-generic-x16, "
|
||||
@@ -1219,8 +1272,8 @@ Target::ISAToString(ISA isa) {
|
||||
case Target::KNL_AVX512:
|
||||
return "avx512knl";
|
||||
#endif
|
||||
case Target::SKX:
|
||||
return "skx";
|
||||
case Target::SKX_AVX512:
|
||||
return "avx512skx";
|
||||
case Target::GENERIC:
|
||||
return "generic";
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
@@ -1267,8 +1320,10 @@ Target::ISAToTargetString(ISA isa) {
|
||||
case Target::KNL_AVX512:
|
||||
return "avx512knl-i32x16";
|
||||
#endif
|
||||
case Target::SKX:
|
||||
return "avx2";
|
||||
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
|
||||
case Target::SKX_AVX512:
|
||||
return "avx512skx-i32x16";
|
||||
#endif
|
||||
case Target::GENERIC:
|
||||
return "generic-4";
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
|
||||
2
ispc.h
2
ispc.h
@@ -193,7 +193,7 @@ public:
|
||||
AVX11 = 3,
|
||||
AVX2 = 4,
|
||||
KNL_AVX512 = 5,
|
||||
SKX = 6,
|
||||
SKX_AVX512 = 6,
|
||||
GENERIC = 7,
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
NVPTX,
|
||||
|
||||
@@ -2809,10 +2809,8 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
|
||||
!g->target->getTreatGenericAsSmth().empty()) {
|
||||
if (g->target->getTreatGenericAsSmth() == "knl_generic")
|
||||
dispatchNum = Target::KNL_AVX512;
|
||||
else if (g->target->getTreatGenericAsSmth() == "skx_generic")
|
||||
dispatchNum = Target::SKX;
|
||||
else {
|
||||
Error(SourcePos(), "*-generic target can be called only with knl or skx");
|
||||
Error(SourcePos(), "*-generic target can be called only with knl");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user