[AVX512]: knl target was added

This commit is contained in:
Vsevolod Livinskiy
2015-04-24 09:44:23 +03:00
committed by Anton Mitrokhin
parent f5c90dbd43
commit 35222694e5
4 changed files with 59 additions and 15 deletions

View File

@@ -112,7 +112,7 @@
;; else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF ;; else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
;; (info2[1] & (1 << 27)) != 0 && // AVX512 ER ;; (info2[1] & (1 << 27)) != 0 && // AVX512 ER
;; (info2[1] & (1 << 28)) != 0) { // AVX512 CDI ;; (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
;; return 5; // KNL ;; return 5; // KNL_AVX512
;; } ;; }
;; // If it's unknown AVX512 target, fall through and use AVX2 ;; // If it's unknown AVX512 target, fall through and use AVX2
;; // or whatever is available in the machine. ;; // or whatever is available in the machine.

View File

@@ -126,7 +126,7 @@ lGetSystemISA() {
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
(info2[1] & (1 << 27)) != 0 && // AVX512 ER (info2[1] & (1 << 27)) != 0 && // AVX512 ER
(info2[1] & (1 << 28)) != 0) { // AVX512 CDI (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
return "KNL"; return "KNL_AVX512";
} }
// If it's unknown AVX512 target, fall through and use AVX2 // If it's unknown AVX512 target, fall through and use AVX2
// or whatever is available in the machine. // or whatever is available in the machine.

View File

@@ -170,7 +170,7 @@ lGetSystemISA() {
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
(info2[1] & (1 << 27)) != 0 && // AVX512 ER (info2[1] & (1 << 27)) != 0 && // AVX512 ER
(info2[1] & (1 << 28)) != 0) { // AVX512 CDI (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
return "knl"; return "knl-avx512";
} }
// If it's unknown AVX512 target, fall through and use AVX2 // If it's unknown AVX512 target, fall through and use AVX2
// or whatever is available in the machine. // or whatever is available in the machine.
@@ -238,6 +238,11 @@ typedef enum {
CPU_Broadwell, CPU_Broadwell,
#endif #endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
// KNL. Supports AVX512.
CPU_KNL,
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+ #if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT. // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
CPU_Silvermont, CPU_Silvermont,
@@ -318,6 +323,10 @@ public:
names[CPU_Broadwell].push_back("broadwell"); names[CPU_Broadwell].push_back("broadwell");
#endif #endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
names[CPU_KNL].push_back("knl");
#endif
#ifdef ISPC_ARM_ENABLED #ifdef ISPC_ARM_ENABLED
names[CPU_CortexA15].push_back("cortex-a15"); names[CPU_CortexA15].push_back("cortex-a15");
@@ -336,6 +345,14 @@ public:
CPU_Core2, CPU_Nehalem, CPU_Silvermont, CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_None); CPU_None);
#endif #endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
compat[CPU_KNL] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_Haswell, CPU_Broadwell, CPU_None);
#endif
#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+ #if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+
#define CPU_Broadwell CPU_Haswell #define CPU_Broadwell CPU_Haswell
#else #else
@@ -490,6 +507,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
break; break;
#endif #endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
case CPU_KNL:
isa = "knl-avx512";
break;
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) #if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
case CPU_Broadwell: case CPU_Broadwell:
#endif #endif
@@ -822,7 +845,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
// TODO: enable knl and skx support // TODO: enable knl and skx support
// They are downconverted to avx2 for code generation. // They are downconverted to avx2 for code generation.
!strcasecmp(isa, "skx") || !strcasecmp(isa, "skx") ||
!strcasecmp(isa, "knl")) { !strcasecmp(isa, "knl-avx512")) {
this->m_isa = Target::AVX2; this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 8; this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32; this->m_nativeVectorAlignment = 32;
@@ -862,6 +885,27 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
this->m_hasGather = true; this->m_hasGather = true;
CPUfromISA = CPU_Haswell; CPUfromISA = CPU_Haswell;
} }
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
else if (!strcasecmp(isa, "knl-avx512")) {
this->m_isa = Target::KNL_AVX512;
this->m_nativeVectorWidth = 16;
this->m_nativeVectorAlignment = 64;
// ?? this->m_dataTypeWidth = 32;
this->m_vectorWidth = 16;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasRand = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasTranscendentals = true;
// For MIC it is set to true due to performance reasons. The option should be tested.
this->m_hasTrigonometry = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
this->m_hasVecPrefetch = true;
CPUfromISA = CPU_KNL;
}
#endif
#ifdef ISPC_ARM_ENABLED #ifdef ISPC_ARM_ENABLED
else if (!strcasecmp(isa, "neon-i8x16")) { else if (!strcasecmp(isa, "neon-i8x16")) {
this->m_isa = Target::NEON8; this->m_isa = Target::NEON8;
@@ -1155,8 +1199,8 @@ Target::ISAToString(ISA isa) {
return "avx11"; return "avx11";
case Target::AVX2: case Target::AVX2:
return "avx2"; return "avx2";
case Target::KNL: case Target::KNL_AVX512:
return "knl"; return "knl-avx512";
case Target::SKX: case Target::SKX:
return "skx"; return "skx";
case Target::GENERIC: case Target::GENERIC:
@@ -1203,7 +1247,7 @@ Target::ISAToTargetString(ISA isa) {
return "avx2-i32x8"; return "avx2-i32x8";
// TODO: enable knl and skx support. // TODO: enable knl and skx support.
// They are downconverted to avx2 for code generation. // They are downconverted to avx2 for code generation.
case Target::KNL: case Target::KNL_AVX512:
return "avx2"; return "avx2";
case Target::SKX: case Target::SKX:
return "avx2"; return "avx2";

16
ispc.h
View File

@@ -187,14 +187,14 @@ public:
also that __best_available_isa() needs to be updated if ISAs are also that __best_available_isa() needs to be updated if ISAs are
added or the enumerant values are reordered. */ added or the enumerant values are reordered. */
enum ISA { enum ISA {
SSE2 = 0, SSE2 = 0,
SSE4 = 1, SSE4 = 1,
AVX = 2, AVX = 2,
AVX11 = 3, AVX11 = 3,
AVX2 = 4, AVX2 = 4,
KNL = 5, KNL_AVX512 = 5,
SKX = 6, SKX = 6,
GENERIC = 7, GENERIC = 7,
#ifdef ISPC_NVPTX_ENABLED #ifdef ISPC_NVPTX_ENABLED
NVPTX, NVPTX,
#endif #endif