Merge pull request #1042 from ncos/sg_native_knl

'Native' KNL support (AVX512 target)
This commit is contained in:
Dmitry Babokin
2015-05-21 15:44:06 +03:00
10 changed files with 990 additions and 28 deletions

View File

@@ -170,7 +170,7 @@ lGetSystemISA() {
else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
(info2[1] & (1 << 27)) != 0 && // AVX512 ER
(info2[1] & (1 << 28)) != 0) { // AVX512 CDI
return "knl";
return "knl-avx512";
}
// If it's unknown AVX512 target, fall through and use AVX2
// or whatever is available in the machine.
@@ -238,6 +238,11 @@ typedef enum {
CPU_Broadwell,
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
// KNL. Supports AVX512.
CPU_KNL,
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
CPU_Silvermont,
@@ -318,6 +323,10 @@ public:
names[CPU_Broadwell].push_back("broadwell");
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
names[CPU_KNL].push_back("knl");
#endif
#ifdef ISPC_ARM_ENABLED
names[CPU_CortexA15].push_back("cortex-a15");
@@ -336,6 +345,14 @@ public:
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_None);
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
compat[CPU_KNL] = Set(CPU_KNL, CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_Haswell, CPU_Broadwell, CPU_None);
#endif
#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+
#define CPU_Broadwell CPU_Haswell
#else
@@ -490,6 +507,12 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
break;
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
case CPU_KNL:
isa = "knl-avx512";
break;
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
case CPU_Broadwell:
#endif
@@ -828,11 +851,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
CPUfromISA = CPU_IvyBridge;
}
else if (!strcasecmp(isa, "avx2") ||
!strcasecmp(isa, "avx2-i32x8") ||
// TODO: enable knl and skx support
// They are downconverted to avx2 for code generation.
!strcasecmp(isa, "skx") ||
!strcasecmp(isa, "knl")) {
!strcasecmp(isa, "avx2-i32x8")) {
this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 8;
this->m_nativeVectorAlignment = 32;
@@ -872,6 +891,27 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
this->m_hasGather = true;
CPUfromISA = CPU_Haswell;
}
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
else if (!strcasecmp(isa, "knl-avx512")) {
this->m_isa = Target::KNL_AVX512;
this->m_nativeVectorWidth = 16;
this->m_nativeVectorAlignment = 64;
// ?? this->m_dataTypeWidth = 32;
this->m_vectorWidth = 16;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasRand = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasTranscendentals = false;
// For MIC it is set to true due to performance reasons. The option should be tested.
this->m_hasTrigonometry = false;
this->m_hasRsqrtd = this->m_hasRcpd = false;
this->m_hasVecPrefetch = false;
CPUfromISA = CPU_KNL;
}
#endif
#ifdef ISPC_ARM_ENABLED
else if (!strcasecmp(isa, "neon-i8x16")) {
this->m_isa = Target::NEON8;
@@ -909,8 +949,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
}
#endif
#ifdef ISPC_NVPTX_ENABLED
else if (!strcasecmp(isa, "nvptx"))
{
else if (!strcasecmp(isa, "nvptx")) {
this->m_isa = Target::NVPTX;
this->m_cpu = "sm_35";
this->m_nativeVectorWidth = 32;
@@ -1096,7 +1135,10 @@ Target::SupportedTargets() {
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
"generic-x1, generic-x4, generic-x8, generic-x16, "
"generic-x32, generic-x64, *-generic-x16"
"generic-x32, generic-x64, *-generic-x16, "
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
"knl-avx512"
#endif
#ifdef ISPC_ARM_ENABLED
", neon-i8x16, neon-i16x8, neon-i32x4"
#endif
@@ -1165,8 +1207,10 @@ Target::ISAToString(ISA isa) {
return "avx11";
case Target::AVX2:
return "avx2";
case Target::KNL:
return "knl";
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
case Target::KNL_AVX512:
return "knl-avx512";
#endif
case Target::SKX:
return "skx";
case Target::GENERIC:
@@ -1211,10 +1255,10 @@ Target::ISAToTargetString(ISA isa) {
return "avx1.1-i32x8";
case Target::AVX2:
return "avx2-i32x8";
// TODO: enable knl and skx support.
// They are downconverted to avx2 for code generation.
case Target::KNL:
return "avx2";
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) && !defined(LLVM_3_6)// LLVM 3.7+
case Target::KNL_AVX512:
return "knl-avx512";
#endif
case Target::SKX:
return "avx2";
case Target::GENERIC: