[AVX512]: knl target was added
This commit is contained in:
committed by
Anton Mitrokhin
parent
f5c90dbd43
commit
35222694e5
@@ -112,7 +112,7 @@
|
|||||||
;; else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
;; else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
||||||
;; (info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
;; (info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
||||||
;; (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
|
;; (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
|
||||||
;; return 5; // KNL
|
;; return 5; // KNL_AVX512
|
||||||
;; }
|
;; }
|
||||||
;; // If it's unknown AVX512 target, fall through and use AVX2
|
;; // If it's unknown AVX512 target, fall through and use AVX2
|
||||||
;; // or whatever is available in the machine.
|
;; // or whatever is available in the machine.
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ lGetSystemISA() {
|
|||||||
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
||||||
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
||||||
(info2[1] & (1 << 28)) != 0) { // AVX512 CDI
|
(info2[1] & (1 << 28)) != 0) { // AVX512 CDI
|
||||||
return "KNL";
|
return "KNL_AVX512";
|
||||||
}
|
}
|
||||||
// If it's unknown AVX512 target, fall through and use AVX2
|
// If it's unknown AVX512 target, fall through and use AVX2
|
||||||
// or whatever is available in the machine.
|
// or whatever is available in the machine.
|
||||||
|
|||||||
54
ispc.cpp
54
ispc.cpp
@@ -170,7 +170,7 @@ lGetSystemISA() {
|
|||||||
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
||||||
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
||||||
(info2[1] & (1 << 28)) != 0) { // AVX512 CDI
|
(info2[1] & (1 << 28)) != 0) { // AVX512 CDI
|
||||||
return "knl";
|
return "knl-avx512";
|
||||||
}
|
}
|
||||||
// If it's unknown AVX512 target, fall through and use AVX2
|
// If it's unknown AVX512 target, fall through and use AVX2
|
||||||
// or whatever is available in the machine.
|
// or whatever is available in the machine.
|
||||||
@@ -238,6 +238,11 @@ typedef enum {
|
|||||||
CPU_Broadwell,
|
CPU_Broadwell,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
|
||||||
|
// KNL. Supports AVX512.
|
||||||
|
CPU_KNL,
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
|
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
|
||||||
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
|
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
|
||||||
CPU_Silvermont,
|
CPU_Silvermont,
|
||||||
@@ -318,6 +323,10 @@ public:
|
|||||||
names[CPU_Broadwell].push_back("broadwell");
|
names[CPU_Broadwell].push_back("broadwell");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
|
||||||
|
names[CPU_KNL].push_back("knl");
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
names[CPU_CortexA15].push_back("cortex-a15");
|
names[CPU_CortexA15].push_back("cortex-a15");
|
||||||
|
|
||||||
@@ -336,6 +345,14 @@ public:
|
|||||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||||
CPU_None);
|
CPU_None);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
|
||||||
|
compat[CPU_KNL] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
|
||||||
|
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||||
|
CPU_SandyBridge, CPU_IvyBridge,
|
||||||
|
CPU_Haswell, CPU_Broadwell, CPU_None);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+
|
#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+
|
||||||
#define CPU_Broadwell CPU_Haswell
|
#define CPU_Broadwell CPU_Haswell
|
||||||
#else
|
#else
|
||||||
@@ -490,6 +507,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
|
||||||
|
case CPU_KNL:
|
||||||
|
isa = "knl-avx512";
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
||||||
case CPU_Broadwell:
|
case CPU_Broadwell:
|
||||||
#endif
|
#endif
|
||||||
@@ -822,7 +845,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
// TODO: enable knl and skx support
|
// TODO: enable knl and skx support
|
||||||
// They are downconverted to avx2 for code generation.
|
// They are downconverted to avx2 for code generation.
|
||||||
!strcasecmp(isa, "skx") ||
|
!strcasecmp(isa, "skx") ||
|
||||||
!strcasecmp(isa, "knl")) {
|
!strcasecmp(isa, "knl-avx512")) {
|
||||||
this->m_isa = Target::AVX2;
|
this->m_isa = Target::AVX2;
|
||||||
this->m_nativeVectorWidth = 8;
|
this->m_nativeVectorWidth = 8;
|
||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
@@ -862,6 +885,27 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
this->m_hasGather = true;
|
this->m_hasGather = true;
|
||||||
CPUfromISA = CPU_Haswell;
|
CPUfromISA = CPU_Haswell;
|
||||||
}
|
}
|
||||||
|
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
|
||||||
|
else if (!strcasecmp(isa, "knl-avx512")) {
|
||||||
|
this->m_isa = Target::KNL_AVX512;
|
||||||
|
this->m_nativeVectorWidth = 16;
|
||||||
|
this->m_nativeVectorAlignment = 64;
|
||||||
|
// ?? this->m_dataTypeWidth = 32;
|
||||||
|
this->m_vectorWidth = 16;
|
||||||
|
this->m_maskingIsFree = true;
|
||||||
|
this->m_maskBitCount = 1;
|
||||||
|
this->m_hasHalf = true;
|
||||||
|
this->m_hasRand = true;
|
||||||
|
this->m_hasGather = this->m_hasScatter = true;
|
||||||
|
this->m_hasTranscendentals = true;
|
||||||
|
// For MIC it is set to true due to performance reasons. The option should be tested.
|
||||||
|
this->m_hasTrigonometry = true;
|
||||||
|
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||||
|
this->m_hasVecPrefetch = true;
|
||||||
|
CPUfromISA = CPU_KNL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
else if (!strcasecmp(isa, "neon-i8x16")) {
|
else if (!strcasecmp(isa, "neon-i8x16")) {
|
||||||
this->m_isa = Target::NEON8;
|
this->m_isa = Target::NEON8;
|
||||||
@@ -1155,8 +1199,8 @@ Target::ISAToString(ISA isa) {
|
|||||||
return "avx11";
|
return "avx11";
|
||||||
case Target::AVX2:
|
case Target::AVX2:
|
||||||
return "avx2";
|
return "avx2";
|
||||||
case Target::KNL:
|
case Target::KNL_AVX512:
|
||||||
return "knl";
|
return "knl-avx512";
|
||||||
case Target::SKX:
|
case Target::SKX:
|
||||||
return "skx";
|
return "skx";
|
||||||
case Target::GENERIC:
|
case Target::GENERIC:
|
||||||
@@ -1203,7 +1247,7 @@ Target::ISAToTargetString(ISA isa) {
|
|||||||
return "avx2-i32x8";
|
return "avx2-i32x8";
|
||||||
// TODO: enable knl and skx support.
|
// TODO: enable knl and skx support.
|
||||||
// They are downconverted to avx2 for code generation.
|
// They are downconverted to avx2 for code generation.
|
||||||
case Target::KNL:
|
case Target::KNL_AVX512:
|
||||||
return "avx2";
|
return "avx2";
|
||||||
case Target::SKX:
|
case Target::SKX:
|
||||||
return "avx2";
|
return "avx2";
|
||||||
|
|||||||
16
ispc.h
16
ispc.h
@@ -187,14 +187,14 @@ public:
|
|||||||
also that __best_available_isa() needs to be updated if ISAs are
|
also that __best_available_isa() needs to be updated if ISAs are
|
||||||
added or the enumerant values are reordered. */
|
added or the enumerant values are reordered. */
|
||||||
enum ISA {
|
enum ISA {
|
||||||
SSE2 = 0,
|
SSE2 = 0,
|
||||||
SSE4 = 1,
|
SSE4 = 1,
|
||||||
AVX = 2,
|
AVX = 2,
|
||||||
AVX11 = 3,
|
AVX11 = 3,
|
||||||
AVX2 = 4,
|
AVX2 = 4,
|
||||||
KNL = 5,
|
KNL_AVX512 = 5,
|
||||||
SKX = 6,
|
SKX = 6,
|
||||||
GENERIC = 7,
|
GENERIC = 7,
|
||||||
#ifdef ISPC_NVPTX_ENABLED
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
NVPTX,
|
NVPTX,
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user