added SKX target definition

This commit is contained in:
Andrey Shishpanov
2016-02-12 18:22:48 +03:00
parent 28227f1a8b
commit 1324e6cdd5
9 changed files with 195 additions and 33 deletions

View File

@@ -202,7 +202,7 @@ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h
TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 knl
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 knl skx
ifneq ($(ARM_ENABLED), 0)
TARGETS+=neon-32 neon-16 neon-8
endif

View File

@@ -343,12 +343,12 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
def unsupported_llvm_targets(LLVM_VERSION):
prohibited_list = {"3.2":["avx512knl-i32x16"],
"3.3":["avx512knl-i32x16"],
"3.4":["avx512knl-i32x16"],
"3.5":["avx512knl-i32x16"],
"3.6":["avx512knl-i32x16"],
"3.7":[],
prohibited_list = {"3.2":["avx512knl-i32x16", "avx512skx-i32x16"],
"3.3":["avx512knl-i32x16", "avx512skx-i32x16"],
"3.4":["avx512knl-i32x16", "avx512skx-i32x16"],
"3.5":["avx512knl-i32x16", "avx512skx-i32x16"],
"3.6":["avx512knl-i32x16", "avx512skx-i32x16"],
"3.7":["avx512skx-i32x16"],
"3.8":[],
"3.9":[],
"trunk":[]}
@@ -379,7 +379,7 @@ def check_targets():
KNL = ["knl-generic", "avx512knl-i32x16"]
targets = [["AVX2", AVX2, False], ["AVX1.1", AVX11, False], ["AVX", AVX, False], ["SSE4", SSE4, False],
["SSE2", SSE2, False], ["KNL", KNL, False]]
["SSE2", SSE2, False], ["KNL", KNL, False], ["SKX", SKX, False]]
f_lines = take_lines("check_isa.exe", "first")
for i in range(0,5):
if targets[i][0] in f_lines:
@@ -403,6 +403,8 @@ def check_targets():
# here we have SDE
f_lines = take_lines(sde_exists + " -help", "all")
for i in range(0,len(f_lines)):
if targets[6][2] == False and "skx" in f_lines[i]:
answer_sde = answer_sde + ["-skx", "avx512skx-i32x16"]
if targets[5][2] == False and "knl" in f_lines[i]:
answer_sde = answer_sde + [["-knl", "knl-generic"], ["-knl", "avx512knl-i32x16"]]
if targets[3][2] == False and "wsm" in f_lines[i]:

View File

@@ -1377,6 +1377,23 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
}
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
case Target::SKX_AVX512: {
switch (g->target->getVectorWidth()) {
case 16:
if (runtime32) {
EXPORT_MODULE(builtins_bitcode_skx_32bit);
}
else {
EXPORT_MODULE(builtins_bitcode_skx_64bit);
}
break;
default:
FATAL("logic error in DefineStdlib");
}
break;
}
#endif
case Target::GENERIC: {
switch (g->target->getVectorWidth()) {

View File

@@ -511,13 +511,6 @@ define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
ret float %half_scale
}
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %v, <16 x float> undef, i16 -1, i32 8)
ret <16 x float> %res
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp
@@ -538,13 +531,6 @@ define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
ret float %iv_mul
}
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %0, <16 x float> undef, i16 -1, i32 8)
ret <16 x float> %res
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; sqrt

View File

@@ -40,5 +40,24 @@ ifelse(LLVM_VERSION, LLVM_3_7,
`include(`target-avx512-common.ll')'
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %0, <16 x float> undef, i16 -1, i32 8)
ret <16 x float> %res
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rsqrt
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %v, <16 x float> undef, i16 -1, i32 8)
ret <16 x float> %res
}
;;saturation_arithmetic_novec()

85
builtins/target-skx.ll Normal file
View File

@@ -0,0 +1,85 @@
;; Copyright (c) 2016, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
define(`WIDTH',`16')
ifelse(LLVM_VERSION, LLVM_3_8,
`include(`target-avx512-common.ll')',
LLVM_VERSION, LLVM_3_9,
`include(`target-avx512-common.ll')'
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp
declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline {
%call = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %0, <16 x float> undef, i16 -1)
; do one Newton-Raphson iteration to improve precision
; float iv = __rcp_v(v);
; return iv * (2. - v * iv);
%v_iv = fmul <16 x float> %0, %call
%two_minus = fsub <16 x float> <float 2., float 2., float 2., float 2.,
float 2., float 2., float 2., float 2.,
float 2., float 2., float 2., float 2.,
float 2., float 2., float 2., float 2.>, %v_iv
%iv_mul = fmul <16 x float> %call, %two_minus
ret <16 x float> %iv_mul
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rsqrt
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline {
%is = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %v, <16 x float> undef, i16 -1)
; Newton-Raphson iteration to improve precision
; float is = __rsqrt_v(v);
; return 0.5 * is * (3. - (v * is) * is);
%v_is = fmul <16 x float> %v, %is
%v_is_is = fmul <16 x float> %v_is, %is
%three_sub = fsub <16 x float> <float 3., float 3., float 3., float 3.,
float 3., float 3., float 3., float 3.,
float 3., float 3., float 3., float 3.,
float 3., float 3., float 3., float 3.>, %v_is_is
%is_mul = fmul <16 x float> %is, %three_sub
%half_scale = fmul <16 x float> <float 0.5, float 0.5, float 0.5, float 0.5,
float 0.5, float 0.5, float 0.5, float 0.5,
float 0.5, float 0.5, float 0.5, float 0.5,
float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
ret <16 x float> %half_scale
}
;;saturation_arithmetic_novec()

View File

@@ -165,7 +165,7 @@ lGetSystemISA() {
(info2[1] & (1 << 28)) != 0 && // AVX512 CDI
(info2[1] & (1 << 30)) != 0 && // AVX512 BW
(info2[1] & (1 << 31)) != 0) { // AVX512 VL
return "skx";
return "avx512skx-i32x16";
}
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
@@ -239,10 +239,24 @@ typedef enum {
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
// KNL. Supports AVX512.
// Knights Landing - Xeon Phi.
// Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
// AVX-512CDI: Conflict Detection;
// AVX-512ERI & PRI: 28-bit precision RCP, RSQRT and EXP transcendentals,
// new prefetch instructions.
CPU_KNL,
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
// Skylake Xeon.
// Supports AVX-512F: All the key AVX-512 features: masking, broadcast... ;
// AVX-512CDI: Conflict Detection;
// AVX-512VL: Vector Length Orthogonality;
// AVX-512DQ: New HPC ISA (vs AVX512F);
// AVX-512BW: Byte and Word Support.
CPU_SKX,
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_4 // LLVM 3.4+
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
CPU_Silvermont,
@@ -327,6 +341,10 @@ public:
names[CPU_KNL].push_back("knl");
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
names[CPU_SKX].push_back("skx");
#endif
#ifdef ISPC_ARM_ENABLED
names[CPU_CortexA15].push_back("cortex-a15");
@@ -353,6 +371,13 @@ public:
CPU_Haswell, CPU_Broadwell, CPU_None);
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
compat[CPU_SKX] = Set(CPU_SKX, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_Haswell, CPU_Broadwell, CPU_None);
#endif
#if ISPC_LLVM_VERSION <= ISPC_LLVM_3_5 // LLVM 3.2, 3.3, 3.4 or 3.5
#define CPU_Broadwell CPU_Haswell
#else /* LLVM 3.6+ */
@@ -513,6 +538,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
break;
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
case CPU_SKX:
isa = "avx512skx-i32x16";
break;
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6
case CPU_Broadwell:
#endif
@@ -915,7 +946,26 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
CPUfromISA = CPU_KNL;
}
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
else if (!strcasecmp(isa, "avx512skx-i32x16")) {
this->m_isa = Target::SKX_AVX512;
this->m_nativeVectorWidth = 16;
this->m_nativeVectorAlignment = 64;
// ?? this->m_dataTypeWidth = 32;
this->m_vectorWidth = 16;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasRand = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasTranscendentals = false;
// For MIC it is set to true due to performance reasons. The option should be tested.
this->m_hasTrigonometry = false;
this->m_hasRsqrtd = this->m_hasRcpd = false;
this->m_hasVecPrefetch = false;
CPUfromISA = CPU_SKX;
}
#endif
#ifdef ISPC_ARM_ENABLED
else if (!strcasecmp(isa, "neon-i8x16")) {
this->m_isa = Target::NEON8;
@@ -1144,6 +1194,9 @@ Target::SupportedTargets() {
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+
"avx512knl-i32x16, "
#endif
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
"avx512skx-i32x16, "
#endif
"generic-x1, generic-x4, generic-x8, generic-x16, "
"generic-x32, generic-x64, *-generic-x16, "
@@ -1219,8 +1272,8 @@ Target::ISAToString(ISA isa) {
case Target::KNL_AVX512:
return "avx512knl";
#endif
case Target::SKX:
return "skx";
case Target::SKX_AVX512:
return "avx512skx";
case Target::GENERIC:
return "generic";
#ifdef ISPC_NVPTX_ENABLED
@@ -1267,8 +1320,10 @@ Target::ISAToTargetString(ISA isa) {
case Target::KNL_AVX512:
return "avx512knl-i32x16";
#endif
case Target::SKX:
return "avx2";
#if ISPC_LLVM_VERSION >= ISPC_LLVM_3_8 // LLVM 3.8+
case Target::SKX_AVX512:
return "avx512skx-i32x16";
#endif
case Target::GENERIC:
return "generic-4";
#ifdef ISPC_NVPTX_ENABLED

2
ispc.h
View File

@@ -193,7 +193,7 @@ public:
AVX11 = 3,
AVX2 = 4,
KNL_AVX512 = 5,
SKX = 6,
SKX_AVX512 = 6,
GENERIC = 7,
#ifdef ISPC_NVPTX_ENABLED
NVPTX,

View File

@@ -2809,10 +2809,8 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
!g->target->getTreatGenericAsSmth().empty()) {
if (g->target->getTreatGenericAsSmth() == "knl_generic")
dispatchNum = Target::KNL_AVX512;
else if (g->target->getTreatGenericAsSmth() == "skx_generic")
dispatchNum = Target::SKX;
else {
Error(SourcePos(), "*-generic target can be called only with knl or skx");
Error(SourcePos(), "*-generic target can be called only with knl");
exit(1);
}
}