Merge pull request #953 from aguskov/master

New CPU naming model
This commit is contained in:
Dmitry Babokin
2015-02-03 15:46:25 +03:00

390
ispc.cpp
View File

@@ -40,6 +40,7 @@
#include "util.h" #include "util.h"
#include "llvmutil.h" #include "llvmutil.h"
#include <stdio.h> #include <stdio.h>
#include <sstream>
#ifdef ISPC_IS_WINDOWS #ifdef ISPC_IS_WINDOWS
#include <windows.h> #include <windows.h>
#include <direct.h> #include <direct.h>
@@ -161,20 +162,219 @@ lGetSystemISA() {
} }
static const char *supportedCPUs[] = { typedef enum {
#ifdef ISPC_ARM_ENABLED // Special value, indicates that no CPU is present.
CPU_None = 0,
// 'Generic' CPU without any hardware SIMD capabilities.
CPU_Generic = 1,
// Early Atom CPU. Supports SSSE3.
CPU_Bonnell,
// Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
// but for ISPC the difference is negligible; ISPC doesn`t make use of it.
CPU_Core2,
// Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
CPU_Penryn,
// Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
CPU_Nehalem,
// Sandy Bridge. Supports AVX 1.
CPU_SandyBridge,
// Ivy Bridge. Supports AVX 1 + RDRAND.
CPU_IvyBridge,
// Haswell. Supports AVX 2.
CPU_Haswell,
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) // LLVM 3.6+
// Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
CPU_Broadwell,
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
CPU_Silvermont,
#endif
// FIXME: LLVM supports a ton of different ARM CPU variants--not just // FIXME: LLVM supports a ton of different ARM CPU variants--not just
// cortex-a9 and a15. We should be able to handle any of them that also // cortex-a9 and a15. We should be able to handle any of them that also
// have NEON support. // have NEON support.
"cortex-a9", "cortex-a15", #ifdef ISPC_ARM_ENABLED
// ARM Cortex A15. Supports NEON VFPv4.
CPU_CortexA15,
// ARM Cortex A9. Supports NEON VFPv3.
CPU_CortexA9,
#endif #endif
"atom", "penryn", "core2", "corei7", "corei7-avx"
, "core-avx-i", "core-avx2" #ifdef ISPC_NVPTX_ENABLED
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // NVidia CUDA-compatible SM-35 architecture.
, "slm" CPU_SM35,
#endif // LLVM 3.4+ #endif
sizeofCPUtype
} CPUtype;
class AllCPUs {
private:
std::vector<std::vector<std::string>> names;
std::vector<std::set<CPUtype>> compat;
std::set<CPUtype> Set(CPUtype type, ...) {
std::set<CPUtype> retn;
va_list args;
retn.insert(type);
va_start(args, type);
while ((type = (CPUtype)va_arg(args, int)) != CPU_None)
retn.insert(type);
va_end(args);
return retn;
}
public:
AllCPUs() {
names = std::vector<std::vector<std::string>>(sizeofCPUtype);
compat = std::vector<std::set<CPUtype>>(sizeofCPUtype);
names[CPU_Generic].push_back("generic");
names[CPU_Bonnell].push_back("atom");
names[CPU_Bonnell].push_back("bonnell");
names[CPU_Core2].push_back("core2");
names[CPU_Penryn].push_back("penryn");
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
names[CPU_Silvermont].push_back("slm");
names[CPU_Silvermont].push_back("silvermont");
#endif
names[CPU_Nehalem].push_back("corei7");
names[CPU_Nehalem].push_back("nehalem");
names[CPU_SandyBridge].push_back("corei7-avx");
names[CPU_SandyBridge].push_back("sandybridge");
names[CPU_IvyBridge].push_back("core-avx-i");
names[CPU_IvyBridge].push_back("ivybridge");
names[CPU_Haswell].push_back("core-avx2");
names[CPU_Haswell].push_back("haswell");
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) // LLVM 3.6+
names[CPU_Broadwell].push_back("broadwell");
#endif
#ifdef ISPC_ARM_ENABLED
names[CPU_CortexA15].push_back("cortex-a15");
names[CPU_CortexA9].push_back("cortex-a9");
#endif
#ifdef ISPC_NVPTX_ENABLED
names[CPU_SM35].push_back("sm_35");
#endif
#if defined(LLVM_3_2) || defined(LLVM_3_3) // LLVM 3.4+
#define CPU_Silvermont CPU_Nehalem
#else
compat[CPU_Silvermont] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_None);
#endif
#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+
#define CPU_Broadwell CPU_Haswell
#else
compat[CPU_Broadwell] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_Haswell, CPU_Broadwell, CPU_None);
#endif
compat[CPU_Haswell] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_Haswell, CPU_Broadwell, CPU_None);
compat[CPU_IvyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_None);
compat[CPU_SandyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_None);
compat[CPU_Nehalem] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_None);
compat[CPU_Penryn] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_None);
compat[CPU_Core2] = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
CPU_None);
compat[CPU_Bonnell] = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
CPU_None);
compat[CPU_Generic] = Set(CPU_Generic, CPU_None);
#ifdef ISPC_ARM_ENABLED
compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15,
CPU_None);
compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
#endif
#ifdef ISPC_NVPTX_ENABLED
compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None);
#endif
}
std::string HumanReadableListOfNames() {
std::stringstream CPUs;
for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
CPUs << names[i][0];
if (names[i].size() > 1) {
CPUs << " (synonyms: " << names[i][1];
for (int j = 2, je = names[i].size(); j < je; j++)
CPUs << ", " << names[i][j];
CPUs << ")";
}
if (i < sizeofCPUtype - 1)
CPUs << ", ";
}
return CPUs.str();
}
std::string GetDefaultNameFromType(CPUtype type) {
Assert((type > CPU_None) && (type < sizeofCPUtype));
return names[type][0];
}
CPUtype GetTypeFromName(std::string name) {
CPUtype retn = CPU_None;
for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
for (int j = 0, je = names[i].size();
(retn == CPU_None) && (j < je); j++)
if (!name.compare(names[i][j]))
retn = (CPUtype)i;
return retn;
}
bool BackwardCompatible(CPUtype what, CPUtype with) {
Assert((what > CPU_None) && (what < sizeofCPUtype));
Assert((with > CPU_None) && (with < sizeofCPUtype));
return compat[what].find(with) != compat[what].end();
}
}; };
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_target(NULL), m_target(NULL),
m_targetMachine(NULL), m_targetMachine(NULL),
@@ -205,39 +405,79 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_hasRcpd(false), m_hasRcpd(false),
m_hasVecPrefetch(false) m_hasVecPrefetch(false)
{ {
CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
AllCPUs a;
if (cpu) {
CPUID = a.GetTypeFromName(cpu);
if (CPUID == CPU_None) {
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported"
" CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str());
return;
}
}
if (isa == NULL) { if (isa == NULL) {
if (cpu != NULL) { // If a CPU was specified explicitly, try to pick the best
// If a CPU was specified explicitly, try to pick the best // possible ISA based on that.
// possible ISA based on that. switch (CPUID) {
if (!strcmp(cpu, "core-avx2")) case CPU_None:
isa = "avx2-i32x8"; // No CPU and no ISA, so use system info to figure out
#ifdef ISPC_ARM_ENABLED // what this CPU supports.
else if (!strcmp(cpu, "cortex-a9") || isa = lGetSystemISA();
!strcmp(cpu, "cortex-a15")) Warning(SourcePos(), "No --target specified on command-line."
isa = "neon-i32x4"; " Using default system target \"%s\".", isa);
break;
case CPU_Generic:
isa = "generic-1";
break;
#ifdef ISPC_NVPTX_ENABLED
case CPU_SM35:
isa = "nvptx";
break;
#endif #endif
else if (!strcmp(cpu, "core-avx-i"))
#ifdef ISPC_ARM_ENABLED
case CPU_CortexA9:
case CPU_CortexA15:
isa = "neon-i32x4";
break;
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
case CPU_Broadwell:
#endif
case CPU_Haswell:
isa = "avx2-i32x8";
break;
case CPU_IvyBridge:
isa = "avx1.1-i32x8"; isa = "avx1.1-i32x8";
else if (!strcmp(cpu, "sandybridge") || break;
!strcmp(cpu, "corei7-avx"))
case CPU_SandyBridge:
isa = "avx1-i32x8"; isa = "avx1-i32x8";
else if (!strcmp(cpu, "corei7") || break;
!strcmp(cpu, "penryn") ||
!strcmp(cpu, "slm")) // Penryn is here because ISPC does not use SSE 4.2
case CPU_Penryn:
case CPU_Nehalem:
#if !defined(LLVM_3_2) && !defined(LLVM_3_3)
case CPU_Silvermont:
#endif
isa = "sse4-i32x4"; isa = "sse4-i32x4";
else break;
default:
isa = "sse2-i32x4"; isa = "sse2-i32x4";
Warning(SourcePos(), "No --target specified on command-line. " break;
"Using ISA \"%s\" based on specified CPU \"%s\".", isa,
cpu);
}
else {
// No CPU and no ISA, so use CPUID to figure out what this CPU
// supports.
isa = lGetSystemISA();
Warning(SourcePos(), "No --target specified on command-line. "
"Using default system target \"%s\".", isa);
} }
if (CPUID != CPU_None)
Warning(SourcePos(), "No --target specified on command-line."
" Using ISA \"%s\" based on specified CPU \"%s\".",
isa, cpu);
} }
if (arch == NULL) { if (arch == NULL) {
@@ -278,8 +518,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_arch = arch; this->m_arch = arch;
} }
const char * cpuFromIsa = "";
// Check default LLVM generated targets // Check default LLVM generated targets
if (!strcasecmp(isa, "sse2") || if (!strcasecmp(isa, "sse2") ||
!strcasecmp(isa, "sse2-i32x4")) { !strcasecmp(isa, "sse2-i32x4")) {
@@ -290,7 +528,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
cpuFromIsa = "core2"; CPUfromISA = CPU_Core2;
} }
else if (!strcasecmp(isa, "sse2-x2") || else if (!strcasecmp(isa, "sse2-x2") ||
!strcasecmp(isa, "sse2-i32x8")) { !strcasecmp(isa, "sse2-i32x8")) {
@@ -301,7 +539,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
cpuFromIsa = "core2"; CPUfromISA = CPU_Core2;
} }
else if (!strcasecmp(isa, "sse4") || else if (!strcasecmp(isa, "sse4") ||
!strcasecmp(isa, "sse4-i32x4")) { !strcasecmp(isa, "sse4-i32x4")) {
@@ -312,7 +550,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
cpuFromIsa = "corei7"; CPUfromISA = CPU_Nehalem;
} }
else if (!strcasecmp(isa, "sse4x2") || else if (!strcasecmp(isa, "sse4x2") ||
!strcasecmp(isa, "sse4-x2") || !strcasecmp(isa, "sse4-x2") ||
@@ -324,7 +562,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
cpuFromIsa = "corei7"; CPUfromISA = CPU_Nehalem;
} }
else if (!strcasecmp(isa, "sse4-i8x16")) { else if (!strcasecmp(isa, "sse4-i8x16")) {
this->m_isa = Target::SSE4; this->m_isa = Target::SSE4;
@@ -334,7 +572,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 16; this->m_vectorWidth = 16;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 8; this->m_maskBitCount = 8;
cpuFromIsa = "corei7"; CPUfromISA = CPU_Nehalem;
} }
else if (!strcasecmp(isa, "sse4-i16x8")) { else if (!strcasecmp(isa, "sse4-i16x8")) {
this->m_isa = Target::SSE4; this->m_isa = Target::SSE4;
@@ -344,7 +582,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 16; this->m_maskBitCount = 16;
cpuFromIsa = "corei7"; CPUfromISA = CPU_Nehalem;
} }
else if (!strcasecmp(isa, "generic-4") || else if (!strcasecmp(isa, "generic-4") ||
!strcasecmp(isa, "generic-x4")) { !strcasecmp(isa, "generic-x4")) {
@@ -359,6 +597,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = true; this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true; this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true; this->m_hasRsqrtd = this->m_hasRcpd = true;
CPUfromISA = CPU_Generic;
} }
else if (!strcasecmp(isa, "generic-8") || else if (!strcasecmp(isa, "generic-8") ||
!strcasecmp(isa, "generic-x8")) { !strcasecmp(isa, "generic-x8")) {
@@ -373,6 +612,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = true; this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true; this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true; this->m_hasRsqrtd = this->m_hasRcpd = true;
CPUfromISA = CPU_Generic;
} }
else if (!strcasecmp(isa, "generic-16") || else if (!strcasecmp(isa, "generic-16") ||
!strcasecmp(isa, "generic-x16")) { !strcasecmp(isa, "generic-x16")) {
@@ -392,6 +632,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasRsqrtd = this->m_hasRcpd = true; this->m_hasRsqrtd = this->m_hasRcpd = true;
// It's set to true, because MIC has hardware vector prefetch instruction // It's set to true, because MIC has hardware vector prefetch instruction
this->m_hasVecPrefetch = true; this->m_hasVecPrefetch = true;
CPUfromISA = CPU_Generic;
} }
else if (!strcasecmp(isa, "generic-32") || else if (!strcasecmp(isa, "generic-32") ||
!strcasecmp(isa, "generic-x32")) { !strcasecmp(isa, "generic-x32")) {
@@ -406,6 +647,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = true; this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true; this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true; this->m_hasRsqrtd = this->m_hasRcpd = true;
CPUfromISA = CPU_Generic;
} }
else if (!strcasecmp(isa, "generic-64") || else if (!strcasecmp(isa, "generic-64") ||
!strcasecmp(isa, "generic-x64")) { !strcasecmp(isa, "generic-x64")) {
@@ -420,6 +662,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = true; this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true; this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true; this->m_hasRsqrtd = this->m_hasRcpd = true;
CPUfromISA = CPU_Generic;
} }
else if (!strcasecmp(isa, "generic-1") || else if (!strcasecmp(isa, "generic-1") ||
!strcasecmp(isa, "generic-x1")) { !strcasecmp(isa, "generic-x1")) {
@@ -429,6 +672,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 1; this->m_vectorWidth = 1;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
CPUfromISA = CPU_Generic;
} }
else if (!strcasecmp(isa, "avx1-i32x4")) { else if (!strcasecmp(isa, "avx1-i32x4")) {
this->m_isa = Target::AVX; this->m_isa = Target::AVX;
@@ -438,7 +682,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
cpuFromIsa = "corei7-avx"; CPUfromISA = CPU_SandyBridge;
} }
else if (!strcasecmp(isa, "avx") || else if (!strcasecmp(isa, "avx") ||
!strcasecmp(isa, "avx1") || !strcasecmp(isa, "avx1") ||
@@ -450,7 +694,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 8; this->m_vectorWidth = 8;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
cpuFromIsa = "corei7-avx"; CPUfromISA = CPU_SandyBridge;
} }
else if (!strcasecmp(isa, "avx-i64x4") || else if (!strcasecmp(isa, "avx-i64x4") ||
!strcasecmp(isa, "avx1-i64x4")) { !strcasecmp(isa, "avx1-i64x4")) {
@@ -461,7 +705,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 4; this->m_vectorWidth = 4;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 64; this->m_maskBitCount = 64;
cpuFromIsa = "corei7-avx"; CPUfromISA = CPU_SandyBridge;
} }
else if (!strcasecmp(isa, "avx-x2") || else if (!strcasecmp(isa, "avx-x2") ||
!strcasecmp(isa, "avx1-x2") || !strcasecmp(isa, "avx1-x2") ||
@@ -473,7 +717,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 16; this->m_vectorWidth = 16;
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
cpuFromIsa = "corei7-avx"; CPUfromISA = CPU_SandyBridge;
} }
else if (!strcasecmp(isa, "avx1.1") || else if (!strcasecmp(isa, "avx1.1") ||
!strcasecmp(isa, "avx1.1-i32x8")) { !strcasecmp(isa, "avx1.1-i32x8")) {
@@ -486,7 +730,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
this->m_hasHalf = true; this->m_hasHalf = true;
this->m_hasRand = true; this->m_hasRand = true;
cpuFromIsa = "core-avx-i"; CPUfromISA = CPU_IvyBridge;
} }
else if (!strcasecmp(isa, "avx1.1-x2") || else if (!strcasecmp(isa, "avx1.1-x2") ||
!strcasecmp(isa, "avx1.1-i32x16")) { !strcasecmp(isa, "avx1.1-i32x16")) {
@@ -499,7 +743,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
this->m_hasHalf = true; this->m_hasHalf = true;
this->m_hasRand = true; this->m_hasRand = true;
cpuFromIsa = "core-avx-i"; CPUfromISA = CPU_IvyBridge;
} }
else if (!strcasecmp(isa, "avx1.1-i64x4")) { else if (!strcasecmp(isa, "avx1.1-i64x4")) {
this->m_isa = Target::AVX11; this->m_isa = Target::AVX11;
@@ -511,7 +755,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 64; this->m_maskBitCount = 64;
this->m_hasHalf = true; this->m_hasHalf = true;
this->m_hasRand = true; this->m_hasRand = true;
cpuFromIsa = "core-avx-i"; CPUfromISA = CPU_IvyBridge;
} }
else if (!strcasecmp(isa, "avx2") || else if (!strcasecmp(isa, "avx2") ||
!strcasecmp(isa, "avx2-i32x8")) { !strcasecmp(isa, "avx2-i32x8")) {
@@ -525,7 +769,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasHalf = true; this->m_hasHalf = true;
this->m_hasRand = true; this->m_hasRand = true;
this->m_hasGather = true; this->m_hasGather = true;
cpuFromIsa = "core-avx2"; CPUfromISA = CPU_Haswell;
} }
else if (!strcasecmp(isa, "avx2-x2") || else if (!strcasecmp(isa, "avx2-x2") ||
!strcasecmp(isa, "avx2-i32x16")) { !strcasecmp(isa, "avx2-i32x16")) {
@@ -539,7 +783,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasHalf = true; this->m_hasHalf = true;
this->m_hasRand = true; this->m_hasRand = true;
this->m_hasGather = true; this->m_hasGather = true;
cpuFromIsa = "core-avx2"; CPUfromISA = CPU_Haswell;
} }
else if (!strcasecmp(isa, "avx2-i64x4")) { else if (!strcasecmp(isa, "avx2-i64x4")) {
this->m_isa = Target::AVX2; this->m_isa = Target::AVX2;
@@ -552,7 +796,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasHalf = true; this->m_hasHalf = true;
this->m_hasRand = true; this->m_hasRand = true;
this->m_hasGather = true; this->m_hasGather = true;
cpuFromIsa = "core-avx2"; CPUfromISA = CPU_Haswell;
} }
#ifdef ISPC_ARM_ENABLED #ifdef ISPC_ARM_ENABLED
else if (!strcasecmp(isa, "neon-i8x16")) { else if (!strcasecmp(isa, "neon-i8x16")) {
@@ -604,22 +848,21 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTranscendentals = true; this->m_hasTranscendentals = true;
this->m_hasTrigonometry = true; this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = false; this->m_hasGather = this->m_hasScatter = false;
cpuFromIsa = "sm_35"; CPUfromISA = CPU_SM35;
} }
#endif /* ISPC_NVPTX_ENABLED */ #endif /* ISPC_NVPTX_ENABLED */
else { else {
Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.", Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
isa, SupportedTargets()); isa, SupportedTargets());
error = true; error = true;
} }
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__) #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
if (cpu == NULL && !strncmp(isa, "neon", 4)) if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
CPUID = CPU_CortexA9;
cpu = "cortex-a9";
#endif #endif
if (cpu == NULL) { if (CPUID == CPU_None) {
#ifndef ISPC_ARM_ENABLED #ifndef ISPC_ARM_ENABLED
if (isa == NULL) { if (isa == NULL) {
#endif #endif
@@ -628,29 +871,24 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
cpu = strdup(hostCPU.c_str()); cpu = strdup(hostCPU.c_str());
else { else {
Warning(SourcePos(), "Unable to determine host CPU!\n"); Warning(SourcePos(), "Unable to determine host CPU!\n");
cpu = "generic"; cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
} }
#ifndef ISPC_ARM_ENABLED #ifndef ISPC_ARM_ENABLED
} }
else { else {
cpu = cpuFromIsa; cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
} }
#endif #endif
} }
else { else {
bool foundCPU = false; if ((CPUfromISA != CPU_None) &&
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0])); !a.BackwardCompatible(CPUID, CPUfromISA)) {
++i) { Error(SourcePos(), "The requested CPU is incompatible"
if (!strcmp(cpu, supportedCPUs[i])) { " with the CPU %s needs: %s vs. %s!\n",
foundCPU = true; isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
break;
}
}
if (foundCPU == false) {
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: "
"%s.", cpu, SupportedCPUs().c_str());
return; return;
} }
cpu = a.GetDefaultNameFromType(CPUID).c_str();
} }
this->m_cpu = cpu; this->m_cpu = cpu;
@@ -752,14 +990,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
std::string std::string
Target::SupportedCPUs() { Target::SupportedCPUs() {
std::string ret; AllCPUs a;
int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]); return a.HumanReadableListOfNames();
for (int i = 0; i < count; ++i) {
ret += supportedCPUs[i];
if (i != count - 1)
ret += ", ";
}
return ret;
} }