Merge pull request #953 from aguskov/master

New CPU naming model
This commit is contained in:
Dmitry Babokin
2015-02-03 15:46:25 +03:00

390
ispc.cpp
View File

@@ -40,6 +40,7 @@
#include "util.h"
#include "llvmutil.h"
#include <stdio.h>
#include <sstream>
#ifdef ISPC_IS_WINDOWS
#include <windows.h>
#include <direct.h>
@@ -161,20 +162,219 @@ lGetSystemISA() {
}
static const char *supportedCPUs[] = {
#ifdef ISPC_ARM_ENABLED
typedef enum {
// Special value, indicates that no CPU is present.
CPU_None = 0,
// 'Generic' CPU without any hardware SIMD capabilities.
CPU_Generic = 1,
// Early Atom CPU. Supports SSSE3.
CPU_Bonnell,
// Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
// but for ISPC the difference is negligible; ISPC doesn`t make use of it.
CPU_Core2,
// Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
CPU_Penryn,
// Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
CPU_Nehalem,
// Sandy Bridge. Supports AVX 1.
CPU_SandyBridge,
// Ivy Bridge. Supports AVX 1 + RDRAND.
CPU_IvyBridge,
// Haswell. Supports AVX 2.
CPU_Haswell,
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) // LLVM 3.6+
// Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
CPU_Broadwell,
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
CPU_Silvermont,
#endif
// FIXME: LLVM supports a ton of different ARM CPU variants--not just
// cortex-a9 and a15. We should be able to handle any of them that also
// have NEON support.
"cortex-a9", "cortex-a15",
#ifdef ISPC_ARM_ENABLED
// ARM Cortex A15. Supports NEON VFPv4.
CPU_CortexA15,
// ARM Cortex A9. Supports NEON VFPv3.
CPU_CortexA9,
#endif
"atom", "penryn", "core2", "corei7", "corei7-avx"
, "core-avx-i", "core-avx2"
#if !defined(LLVM_3_2) && !defined(LLVM_3_3)
, "slm"
#endif // LLVM 3.4+
#ifdef ISPC_NVPTX_ENABLED
// NVidia CUDA-compatible SM-35 architecture.
CPU_SM35,
#endif
sizeofCPUtype
} CPUtype;
class AllCPUs {
private:
std::vector<std::vector<std::string>> names;
std::vector<std::set<CPUtype>> compat;
std::set<CPUtype> Set(CPUtype type, ...) {
std::set<CPUtype> retn;
va_list args;
retn.insert(type);
va_start(args, type);
while ((type = (CPUtype)va_arg(args, int)) != CPU_None)
retn.insert(type);
va_end(args);
return retn;
}
public:
AllCPUs() {
names = std::vector<std::vector<std::string>>(sizeofCPUtype);
compat = std::vector<std::set<CPUtype>>(sizeofCPUtype);
names[CPU_Generic].push_back("generic");
names[CPU_Bonnell].push_back("atom");
names[CPU_Bonnell].push_back("bonnell");
names[CPU_Core2].push_back("core2");
names[CPU_Penryn].push_back("penryn");
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
names[CPU_Silvermont].push_back("slm");
names[CPU_Silvermont].push_back("silvermont");
#endif
names[CPU_Nehalem].push_back("corei7");
names[CPU_Nehalem].push_back("nehalem");
names[CPU_SandyBridge].push_back("corei7-avx");
names[CPU_SandyBridge].push_back("sandybridge");
names[CPU_IvyBridge].push_back("core-avx-i");
names[CPU_IvyBridge].push_back("ivybridge");
names[CPU_Haswell].push_back("core-avx2");
names[CPU_Haswell].push_back("haswell");
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) // LLVM 3.6+
names[CPU_Broadwell].push_back("broadwell");
#endif
#ifdef ISPC_ARM_ENABLED
names[CPU_CortexA15].push_back("cortex-a15");
names[CPU_CortexA9].push_back("cortex-a9");
#endif
#ifdef ISPC_NVPTX_ENABLED
names[CPU_SM35].push_back("sm_35");
#endif
#if defined(LLVM_3_2) || defined(LLVM_3_3) // LLVM 3.4+
#define CPU_Silvermont CPU_Nehalem
#else
compat[CPU_Silvermont] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_None);
#endif
#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+
#define CPU_Broadwell CPU_Haswell
#else
compat[CPU_Broadwell] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_Haswell, CPU_Broadwell, CPU_None);
#endif
compat[CPU_Haswell] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_Haswell, CPU_Broadwell, CPU_None);
compat[CPU_IvyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_IvyBridge,
CPU_None);
compat[CPU_SandyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_SandyBridge, CPU_None);
compat[CPU_Nehalem] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_None);
compat[CPU_Penryn] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
CPU_None);
compat[CPU_Core2] = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
CPU_None);
compat[CPU_Bonnell] = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
CPU_None);
compat[CPU_Generic] = Set(CPU_Generic, CPU_None);
#ifdef ISPC_ARM_ENABLED
compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15,
CPU_None);
compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
#endif
#ifdef ISPC_NVPTX_ENABLED
compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None);
#endif
}
std::string HumanReadableListOfNames() {
std::stringstream CPUs;
for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
CPUs << names[i][0];
if (names[i].size() > 1) {
CPUs << " (synonyms: " << names[i][1];
for (int j = 2, je = names[i].size(); j < je; j++)
CPUs << ", " << names[i][j];
CPUs << ")";
}
if (i < sizeofCPUtype - 1)
CPUs << ", ";
}
return CPUs.str();
}
std::string GetDefaultNameFromType(CPUtype type) {
Assert((type > CPU_None) && (type < sizeofCPUtype));
return names[type][0];
}
CPUtype GetTypeFromName(std::string name) {
CPUtype retn = CPU_None;
for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
for (int j = 0, je = names[i].size();
(retn == CPU_None) && (j < je); j++)
if (!name.compare(names[i][j]))
retn = (CPUtype)i;
return retn;
}
bool BackwardCompatible(CPUtype what, CPUtype with) {
Assert((what > CPU_None) && (what < sizeofCPUtype));
Assert((with > CPU_None) && (with < sizeofCPUtype));
return compat[what].find(with) != compat[what].end();
}
};
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_target(NULL),
m_targetMachine(NULL),
@@ -205,39 +405,79 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_hasRcpd(false),
m_hasVecPrefetch(false)
{
CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
AllCPUs a;
if (cpu) {
CPUID = a.GetTypeFromName(cpu);
if (CPUID == CPU_None) {
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported"
" CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str());
return;
}
}
if (isa == NULL) {
if (cpu != NULL) {
// If a CPU was specified explicitly, try to pick the best
// possible ISA based on that.
if (!strcmp(cpu, "core-avx2"))
isa = "avx2-i32x8";
#ifdef ISPC_ARM_ENABLED
else if (!strcmp(cpu, "cortex-a9") ||
!strcmp(cpu, "cortex-a15"))
isa = "neon-i32x4";
// If a CPU was specified explicitly, try to pick the best
// possible ISA based on that.
switch (CPUID) {
case CPU_None:
// No CPU and no ISA, so use system info to figure out
// what this CPU supports.
isa = lGetSystemISA();
Warning(SourcePos(), "No --target specified on command-line."
" Using default system target \"%s\".", isa);
break;
case CPU_Generic:
isa = "generic-1";
break;
#ifdef ISPC_NVPTX_ENABLED
case CPU_SM35:
isa = "nvptx";
break;
#endif
else if (!strcmp(cpu, "core-avx-i"))
#ifdef ISPC_ARM_ENABLED
case CPU_CortexA9:
case CPU_CortexA15:
isa = "neon-i32x4";
break;
#endif
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
case CPU_Broadwell:
#endif
case CPU_Haswell:
isa = "avx2-i32x8";
break;
case CPU_IvyBridge:
isa = "avx1.1-i32x8";
else if (!strcmp(cpu, "sandybridge") ||
!strcmp(cpu, "corei7-avx"))
break;
case CPU_SandyBridge:
isa = "avx1-i32x8";
else if (!strcmp(cpu, "corei7") ||
!strcmp(cpu, "penryn") ||
!strcmp(cpu, "slm"))
break;
// Penryn is here because ISPC does not use SSE 4.2
case CPU_Penryn:
case CPU_Nehalem:
#if !defined(LLVM_3_2) && !defined(LLVM_3_3)
case CPU_Silvermont:
#endif
isa = "sse4-i32x4";
else
break;
default:
isa = "sse2-i32x4";
Warning(SourcePos(), "No --target specified on command-line. "
"Using ISA \"%s\" based on specified CPU \"%s\".", isa,
cpu);
}
else {
// No CPU and no ISA, so use CPUID to figure out what this CPU
// supports.
isa = lGetSystemISA();
Warning(SourcePos(), "No --target specified on command-line. "
"Using default system target \"%s\".", isa);
break;
}
if (CPUID != CPU_None)
Warning(SourcePos(), "No --target specified on command-line."
" Using ISA \"%s\" based on specified CPU \"%s\".",
isa, cpu);
}
if (arch == NULL) {
@@ -278,8 +518,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_arch = arch;
}
const char * cpuFromIsa = "";
// Check default LLVM generated targets
if (!strcasecmp(isa, "sse2") ||
!strcasecmp(isa, "sse2-i32x4")) {
@@ -290,7 +528,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 4;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
cpuFromIsa = "core2";
CPUfromISA = CPU_Core2;
}
else if (!strcasecmp(isa, "sse2-x2") ||
!strcasecmp(isa, "sse2-i32x8")) {
@@ -301,7 +539,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 8;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
cpuFromIsa = "core2";
CPUfromISA = CPU_Core2;
}
else if (!strcasecmp(isa, "sse4") ||
!strcasecmp(isa, "sse4-i32x4")) {
@@ -312,7 +550,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 4;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
cpuFromIsa = "corei7";
CPUfromISA = CPU_Nehalem;
}
else if (!strcasecmp(isa, "sse4x2") ||
!strcasecmp(isa, "sse4-x2") ||
@@ -324,7 +562,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 8;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
cpuFromIsa = "corei7";
CPUfromISA = CPU_Nehalem;
}
else if (!strcasecmp(isa, "sse4-i8x16")) {
this->m_isa = Target::SSE4;
@@ -334,7 +572,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 16;
this->m_maskingIsFree = false;
this->m_maskBitCount = 8;
cpuFromIsa = "corei7";
CPUfromISA = CPU_Nehalem;
}
else if (!strcasecmp(isa, "sse4-i16x8")) {
this->m_isa = Target::SSE4;
@@ -344,7 +582,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 8;
this->m_maskingIsFree = false;
this->m_maskBitCount = 16;
cpuFromIsa = "corei7";
CPUfromISA = CPU_Nehalem;
}
else if (!strcasecmp(isa, "generic-4") ||
!strcasecmp(isa, "generic-x4")) {
@@ -359,6 +597,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
CPUfromISA = CPU_Generic;
}
else if (!strcasecmp(isa, "generic-8") ||
!strcasecmp(isa, "generic-x8")) {
@@ -373,6 +612,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
CPUfromISA = CPU_Generic;
}
else if (!strcasecmp(isa, "generic-16") ||
!strcasecmp(isa, "generic-x16")) {
@@ -392,6 +632,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasRsqrtd = this->m_hasRcpd = true;
// It's set to true, because MIC has hardware vector prefetch instruction
this->m_hasVecPrefetch = true;
CPUfromISA = CPU_Generic;
}
else if (!strcasecmp(isa, "generic-32") ||
!strcasecmp(isa, "generic-x32")) {
@@ -406,6 +647,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
CPUfromISA = CPU_Generic;
}
else if (!strcasecmp(isa, "generic-64") ||
!strcasecmp(isa, "generic-x64")) {
@@ -420,6 +662,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
CPUfromISA = CPU_Generic;
}
else if (!strcasecmp(isa, "generic-1") ||
!strcasecmp(isa, "generic-x1")) {
@@ -429,6 +672,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 1;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
CPUfromISA = CPU_Generic;
}
else if (!strcasecmp(isa, "avx1-i32x4")) {
this->m_isa = Target::AVX;
@@ -438,7 +682,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 4;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
cpuFromIsa = "corei7-avx";
CPUfromISA = CPU_SandyBridge;
}
else if (!strcasecmp(isa, "avx") ||
!strcasecmp(isa, "avx1") ||
@@ -450,7 +694,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 8;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
cpuFromIsa = "corei7-avx";
CPUfromISA = CPU_SandyBridge;
}
else if (!strcasecmp(isa, "avx-i64x4") ||
!strcasecmp(isa, "avx1-i64x4")) {
@@ -461,7 +705,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 4;
this->m_maskingIsFree = false;
this->m_maskBitCount = 64;
cpuFromIsa = "corei7-avx";
CPUfromISA = CPU_SandyBridge;
}
else if (!strcasecmp(isa, "avx-x2") ||
!strcasecmp(isa, "avx1-x2") ||
@@ -473,7 +717,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_vectorWidth = 16;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
cpuFromIsa = "corei7-avx";
CPUfromISA = CPU_SandyBridge;
}
else if (!strcasecmp(isa, "avx1.1") ||
!strcasecmp(isa, "avx1.1-i32x8")) {
@@ -486,7 +730,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 32;
this->m_hasHalf = true;
this->m_hasRand = true;
cpuFromIsa = "core-avx-i";
CPUfromISA = CPU_IvyBridge;
}
else if (!strcasecmp(isa, "avx1.1-x2") ||
!strcasecmp(isa, "avx1.1-i32x16")) {
@@ -499,7 +743,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 32;
this->m_hasHalf = true;
this->m_hasRand = true;
cpuFromIsa = "core-avx-i";
CPUfromISA = CPU_IvyBridge;
}
else if (!strcasecmp(isa, "avx1.1-i64x4")) {
this->m_isa = Target::AVX11;
@@ -511,7 +755,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 64;
this->m_hasHalf = true;
this->m_hasRand = true;
cpuFromIsa = "core-avx-i";
CPUfromISA = CPU_IvyBridge;
}
else if (!strcasecmp(isa, "avx2") ||
!strcasecmp(isa, "avx2-i32x8")) {
@@ -525,7 +769,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasHalf = true;
this->m_hasRand = true;
this->m_hasGather = true;
cpuFromIsa = "core-avx2";
CPUfromISA = CPU_Haswell;
}
else if (!strcasecmp(isa, "avx2-x2") ||
!strcasecmp(isa, "avx2-i32x16")) {
@@ -539,7 +783,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasHalf = true;
this->m_hasRand = true;
this->m_hasGather = true;
cpuFromIsa = "core-avx2";
CPUfromISA = CPU_Haswell;
}
else if (!strcasecmp(isa, "avx2-i64x4")) {
this->m_isa = Target::AVX2;
@@ -552,7 +796,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasHalf = true;
this->m_hasRand = true;
this->m_hasGather = true;
cpuFromIsa = "core-avx2";
CPUfromISA = CPU_Haswell;
}
#ifdef ISPC_ARM_ENABLED
else if (!strcasecmp(isa, "neon-i8x16")) {
@@ -604,22 +848,21 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasTranscendentals = true;
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = false;
cpuFromIsa = "sm_35";
CPUfromISA = CPU_SM35;
}
#endif /* ISPC_NVPTX_ENABLED */
else {
Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
isa, SupportedTargets());
isa, SupportedTargets());
error = true;
}
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
if (cpu == NULL && !strncmp(isa, "neon", 4))
cpu = "cortex-a9";
if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
CPUID = CPU_CortexA9;
#endif
if (cpu == NULL) {
if (CPUID == CPU_None) {
#ifndef ISPC_ARM_ENABLED
if (isa == NULL) {
#endif
@@ -628,29 +871,24 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
cpu = strdup(hostCPU.c_str());
else {
Warning(SourcePos(), "Unable to determine host CPU!\n");
cpu = "generic";
cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
}
#ifndef ISPC_ARM_ENABLED
}
else {
cpu = cpuFromIsa;
cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
}
#endif
}
else {
bool foundCPU = false;
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
++i) {
if (!strcmp(cpu, supportedCPUs[i])) {
foundCPU = true;
break;
}
}
if (foundCPU == false) {
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: "
"%s.", cpu, SupportedCPUs().c_str());
if ((CPUfromISA != CPU_None) &&
!a.BackwardCompatible(CPUID, CPUfromISA)) {
Error(SourcePos(), "The requested CPU is incompatible"
" with the CPU %s needs: %s vs. %s!\n",
isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
return;
}
cpu = a.GetDefaultNameFromType(CPUID).c_str();
}
this->m_cpu = cpu;
@@ -752,14 +990,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
std::string
Target::SupportedCPUs() {
std::string ret;
int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]);
for (int i = 0; i < count; ++i) {
ret += supportedCPUs[i];
if (i != count - 1)
ret += ", ";
}
return ret;
AllCPUs a;
return a.HumanReadableListOfNames();
}