From 35222694e5b81b5b199ba60621ad58cea01bad92 Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskiy Date: Fri, 24 Apr 2015 09:44:23 +0300 Subject: [PATCH] [AVX512]: knl target was added --- builtins/dispatch.ll | 2 +- check_isa.cpp | 2 +- ispc.cpp | 54 ++++++++++++++++++++++++++++++++++++++++---- ispc.h | 16 ++++++------- 4 files changed, 59 insertions(+), 15 deletions(-) diff --git a/builtins/dispatch.ll b/builtins/dispatch.ll index 055ce705..e0bab120 100644 --- a/builtins/dispatch.ll +++ b/builtins/dispatch.ll @@ -112,7 +112,7 @@ ;; else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF ;; (info2[1] & (1 << 27)) != 0 && // AVX512 ER ;; (info2[1] & (1 << 28)) != 0) { // AVX512 CDI -;; return 5; // KNL +;; return 5; // KNL_AVX512 ;; } ;; // If it's unknown AVX512 target, fall through and use AVX2 ;; // or whatever is available in the machine. diff --git a/check_isa.cpp b/check_isa.cpp index 6aff860b..8ef3499c 100644 --- a/check_isa.cpp +++ b/check_isa.cpp @@ -126,7 +126,7 @@ lGetSystemISA() { else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF (info2[1] & (1 << 27)) != 0 && // AVX512 ER (info2[1] & (1 << 28)) != 0) { // AVX512 CDI - return "KNL"; + return "KNL_AVX512"; } // If it's unknown AVX512 target, fall through and use AVX2 // or whatever is available in the machine. diff --git a/ispc.cpp b/ispc.cpp index 8e62191a..9e89b6cf 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -170,7 +170,7 @@ lGetSystemISA() { else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF (info2[1] & (1 << 27)) != 0 && // AVX512 ER (info2[1] & (1 << 28)) != 0) { // AVX512 CDI - return "knl"; + return "knl-avx512"; } // If it's unknown AVX512 target, fall through and use AVX2 // or whatever is available in the machine. @@ -238,6 +238,11 @@ typedef enum { CPU_Broadwell, #endif +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+ + // KNL. Supports AVX512. + CPU_KNL, +#endif + #if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+ // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT. CPU_Silvermont, @@ -318,6 +323,10 @@ public: names[CPU_Broadwell].push_back("broadwell"); #endif +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+ + names[CPU_KNL].push_back("knl"); +#endif + #ifdef ISPC_ARM_ENABLED names[CPU_CortexA15].push_back("cortex-a15"); @@ -336,6 +345,14 @@ public: CPU_Core2, CPU_Nehalem, CPU_Silvermont, CPU_None); #endif + +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+ + compat[CPU_KNL] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn, + CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_SandyBridge, CPU_IvyBridge, + CPU_Haswell, CPU_Broadwell, CPU_None); +#endif + #if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+ #define CPU_Broadwell CPU_Haswell #else @@ -490,6 +507,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo break; #endif +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+ + case CPU_KNL: + isa = "knl-avx512"; + break; +#endif + #if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) case CPU_Broadwell: #endif @@ -822,7 +845,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo // TODO: enable knl and skx support // They are downconverted to avx2 for code generation. !strcasecmp(isa, "skx") || - !strcasecmp(isa, "knl")) { + !strcasecmp(isa, "knl-avx512")) { this->m_isa = Target::AVX2; this->m_nativeVectorWidth = 8; this->m_nativeVectorAlignment = 32; @@ -862,6 +885,27 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo this->m_hasGather = true; CPUfromISA = CPU_Haswell; } +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+ + else if (!strcasecmp(isa, "knl-avx512")) { + this->m_isa = Target::KNL_AVX512; + this->m_nativeVectorWidth = 16; + this->m_nativeVectorAlignment = 64; + // ?? this->m_dataTypeWidth = 32; + this->m_vectorWidth = 16; + this->m_maskingIsFree = true; + this->m_maskBitCount = 1; + this->m_hasHalf = true; + this->m_hasRand = true; + this->m_hasGather = this->m_hasScatter = true; + this->m_hasTranscendentals = true; + // For MIC it is set to true due to performance reasons. The option should be tested. + this->m_hasTrigonometry = true; + this->m_hasRsqrtd = this->m_hasRcpd = true; + this->m_hasVecPrefetch = true; + CPUfromISA = CPU_KNL; + } +#endif + #ifdef ISPC_ARM_ENABLED else if (!strcasecmp(isa, "neon-i8x16")) { this->m_isa = Target::NEON8; @@ -1155,8 +1199,8 @@ Target::ISAToString(ISA isa) { return "avx11"; case Target::AVX2: return "avx2"; - case Target::KNL: - return "knl"; + case Target::KNL_AVX512: + return "knl-avx512"; case Target::SKX: return "skx"; case Target::GENERIC: @@ -1203,7 +1247,7 @@ Target::ISAToTargetString(ISA isa) { return "avx2-i32x8"; // TODO: enable knl and skx support. // They are downconverted to avx2 for code generation. - case Target::KNL: + case Target::KNL_AVX512: return "avx2"; case Target::SKX: return "avx2"; diff --git a/ispc.h b/ispc.h index cec6e4d6..99fba1b3 100644 --- a/ispc.h +++ b/ispc.h @@ -187,14 +187,14 @@ public: also that __best_available_isa() needs to be updated if ISAs are added or the enumerant values are reordered. */ enum ISA { - SSE2 = 0, - SSE4 = 1, - AVX = 2, - AVX11 = 3, - AVX2 = 4, - KNL = 5, - SKX = 6, - GENERIC = 7, + SSE2 = 0, + SSE4 = 1, + AVX = 2, + AVX11 = 3, + AVX2 = 4, + KNL_AVX512 = 5, + SKX = 6, + GENERIC = 7, #ifdef ISPC_NVPTX_ENABLED NVPTX, #endif