diff --git a/ispc.cpp b/ispc.cpp index 1e5c5e2d..59c8a826 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -40,6 +40,7 @@ #include "util.h" #include "llvmutil.h" #include +#include #ifdef ISPC_IS_WINDOWS #include #include @@ -161,20 +162,219 @@ lGetSystemISA() { } -static const char *supportedCPUs[] = { -#ifdef ISPC_ARM_ENABLED +typedef enum { + // Special value, indicates that no CPU is present. + CPU_None = 0, + + // 'Generic' CPU without any hardware SIMD capabilities. + CPU_Generic = 1, + + // Early Atom CPU. Supports SSSE3. + CPU_Bonnell, + + // Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell, + // but for ISPC the difference is negligible; ISPC doesn`t make use of it. + CPU_Core2, + + // Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2). + CPU_Penryn, + + // Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT. + CPU_Nehalem, + + // Sandy Bridge. Supports AVX 1. + CPU_SandyBridge, + + // Ivy Bridge. Supports AVX 1 + RDRAND. + CPU_IvyBridge, + + // Haswell. Supports AVX 2. + CPU_Haswell, + +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) // LLVM 3.6+ + // Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP. + CPU_Broadwell, +#endif + +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+ + // Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT. + CPU_Silvermont, +#endif + // FIXME: LLVM supports a ton of different ARM CPU variants--not just // cortex-a9 and a15. We should be able to handle any of them that also // have NEON support. - "cortex-a9", "cortex-a15", +#ifdef ISPC_ARM_ENABLED + // ARM Cortex A15. Supports NEON VFPv4. + CPU_CortexA15, + + // ARM Cortex A9. Supports NEON VFPv3. + CPU_CortexA9, #endif - "atom", "penryn", "core2", "corei7", "corei7-avx" - , "core-avx-i", "core-avx2" -#if !defined(LLVM_3_2) && !defined(LLVM_3_3) - , "slm" -#endif // LLVM 3.4+ + +#ifdef ISPC_NVPTX_ENABLED + // NVidia CUDA-compatible SM-35 architecture. + CPU_SM35, +#endif + + sizeofCPUtype +} CPUtype; + + +class AllCPUs { +private: + std::vector> names; + std::vector> compat; + + std::set Set(CPUtype type, ...) { + std::set retn; + va_list args; + + retn.insert(type); + va_start(args, type); + while ((type = (CPUtype)va_arg(args, int)) != CPU_None) + retn.insert(type); + va_end(args); + + return retn; + } + +public: + AllCPUs() { + names = std::vector>(sizeofCPUtype); + compat = std::vector>(sizeofCPUtype); + + + names[CPU_Generic].push_back("generic"); + + names[CPU_Bonnell].push_back("atom"); + names[CPU_Bonnell].push_back("bonnell"); + + names[CPU_Core2].push_back("core2"); + + names[CPU_Penryn].push_back("penryn"); + +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+ + names[CPU_Silvermont].push_back("slm"); + names[CPU_Silvermont].push_back("silvermont"); +#endif + + names[CPU_Nehalem].push_back("corei7"); + names[CPU_Nehalem].push_back("nehalem"); + + names[CPU_SandyBridge].push_back("corei7-avx"); + names[CPU_SandyBridge].push_back("sandybridge"); + + names[CPU_IvyBridge].push_back("core-avx-i"); + names[CPU_IvyBridge].push_back("ivybridge"); + + names[CPU_Haswell].push_back("core-avx2"); + names[CPU_Haswell].push_back("haswell"); + +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) // LLVM 3.6+ + names[CPU_Broadwell].push_back("broadwell"); +#endif + +#ifdef ISPC_ARM_ENABLED + names[CPU_CortexA15].push_back("cortex-a15"); + + names[CPU_CortexA9].push_back("cortex-a9"); +#endif + +#ifdef ISPC_NVPTX_ENABLED + names[CPU_SM35].push_back("sm_35"); +#endif + + +#if defined(LLVM_3_2) || defined(LLVM_3_3) // LLVM 3.4+ + #define CPU_Silvermont CPU_Nehalem +#else + compat[CPU_Silvermont] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn, + CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_None); +#endif +#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+ + #define CPU_Broadwell CPU_Haswell +#else + compat[CPU_Broadwell] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn, + CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_SandyBridge, CPU_IvyBridge, + CPU_Haswell, CPU_Broadwell, CPU_None); +#endif + compat[CPU_Haswell] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn, + CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_SandyBridge, CPU_IvyBridge, + CPU_Haswell, CPU_Broadwell, CPU_None); + compat[CPU_IvyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn, + CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_SandyBridge, CPU_IvyBridge, + CPU_None); + compat[CPU_SandyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn, + CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_SandyBridge, CPU_None); + compat[CPU_Nehalem] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn, + CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_None); + compat[CPU_Penryn] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn, + CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_None); + compat[CPU_Core2] = Set(CPU_Generic, CPU_Bonnell, CPU_Core2, + CPU_None); + compat[CPU_Bonnell] = Set(CPU_Generic, CPU_Bonnell, CPU_Core2, + CPU_None); + compat[CPU_Generic] = Set(CPU_Generic, CPU_None); + +#ifdef ISPC_ARM_ENABLED + compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15, + CPU_None); + compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None); +#endif + +#ifdef ISPC_NVPTX_ENABLED + compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None); +#endif + } + + std::string HumanReadableListOfNames() { + std::stringstream CPUs; + for (int i = CPU_Generic; i < sizeofCPUtype; i++) { + CPUs << names[i][0]; + if (names[i].size() > 1) { + CPUs << " (synonyms: " << names[i][1]; + for (int j = 2, je = names[i].size(); j < je; j++) + CPUs << ", " << names[i][j]; + CPUs << ")"; + } + if (i < sizeofCPUtype - 1) + CPUs << ", "; + } + return CPUs.str(); + } + + std::string GetDefaultNameFromType(CPUtype type) { + Assert((type > CPU_None) && (type < sizeofCPUtype)); + return names[type][0]; + } + + CPUtype GetTypeFromName(std::string name) { + CPUtype retn = CPU_None; + + for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++) + for (int j = 0, je = names[i].size(); + (retn == CPU_None) && (j < je); j++) + if (!name.compare(names[i][j])) + retn = (CPUtype)i; + return retn; + } + + bool BackwardCompatible(CPUtype what, CPUtype with) { + Assert((what > CPU_None) && (what < sizeofCPUtype)); + Assert((with > CPU_None) && (with < sizeofCPUtype)); + return compat[what].find(with) != compat[what].end(); + } }; + Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : m_target(NULL), m_targetMachine(NULL), @@ -205,39 +405,79 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : m_hasRcpd(false), m_hasVecPrefetch(false) { + CPUtype CPUID = CPU_None, CPUfromISA = CPU_None; + AllCPUs a; + + if (cpu) { + CPUID = a.GetTypeFromName(cpu); + if (CPUID == CPU_None) { + Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported" + " CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str()); + return; + } + } + if (isa == NULL) { - if (cpu != NULL) { - // If a CPU was specified explicitly, try to pick the best - // possible ISA based on that. - if (!strcmp(cpu, "core-avx2")) - isa = "avx2-i32x8"; -#ifdef ISPC_ARM_ENABLED - else if (!strcmp(cpu, "cortex-a9") || - !strcmp(cpu, "cortex-a15")) - isa = "neon-i32x4"; + // If a CPU was specified explicitly, try to pick the best + // possible ISA based on that. + switch (CPUID) { + case CPU_None: + // No CPU and no ISA, so use system info to figure out + // what this CPU supports. + isa = lGetSystemISA(); + Warning(SourcePos(), "No --target specified on command-line." + " Using default system target \"%s\".", isa); + break; + + case CPU_Generic: + isa = "generic-1"; + break; + +#ifdef ISPC_NVPTX_ENABLED + case CPU_SM35: + isa = "nvptx"; + break; #endif - else if (!strcmp(cpu, "core-avx-i")) + +#ifdef ISPC_ARM_ENABLED + case CPU_CortexA9: + case CPU_CortexA15: + isa = "neon-i32x4"; + break; +#endif + +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) + case CPU_Broadwell: +#endif + case CPU_Haswell: + isa = "avx2-i32x8"; + break; + + case CPU_IvyBridge: isa = "avx1.1-i32x8"; - else if (!strcmp(cpu, "sandybridge") || - !strcmp(cpu, "corei7-avx")) + break; + + case CPU_SandyBridge: isa = "avx1-i32x8"; - else if (!strcmp(cpu, "corei7") || - !strcmp(cpu, "penryn") || - !strcmp(cpu, "slm")) + break; + + // Penryn is here because ISPC does not use SSE 4.2 + case CPU_Penryn: + case CPU_Nehalem: +#if !defined(LLVM_3_2) && !defined(LLVM_3_3) + case CPU_Silvermont: +#endif isa = "sse4-i32x4"; - else + break; + + default: isa = "sse2-i32x4"; - Warning(SourcePos(), "No --target specified on command-line. " - "Using ISA \"%s\" based on specified CPU \"%s\".", isa, - cpu); - } - else { - // No CPU and no ISA, so use CPUID to figure out what this CPU - // supports. - isa = lGetSystemISA(); - Warning(SourcePos(), "No --target specified on command-line. " - "Using default system target \"%s\".", isa); + break; } + if (CPUID != CPU_None) + Warning(SourcePos(), "No --target specified on command-line." + " Using ISA \"%s\" based on specified CPU \"%s\".", + isa, cpu); } if (arch == NULL) { @@ -278,8 +518,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_arch = arch; } - const char * cpuFromIsa = ""; - // Check default LLVM generated targets if (!strcasecmp(isa, "sse2") || !strcasecmp(isa, "sse2-i32x4")) { @@ -290,7 +528,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 4; this->m_maskingIsFree = false; this->m_maskBitCount = 32; - cpuFromIsa = "core2"; + CPUfromISA = CPU_Core2; } else if (!strcasecmp(isa, "sse2-x2") || !strcasecmp(isa, "sse2-i32x8")) { @@ -301,7 +539,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 8; this->m_maskingIsFree = false; this->m_maskBitCount = 32; - cpuFromIsa = "core2"; + CPUfromISA = CPU_Core2; } else if (!strcasecmp(isa, "sse4") || !strcasecmp(isa, "sse4-i32x4")) { @@ -312,7 +550,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 4; this->m_maskingIsFree = false; this->m_maskBitCount = 32; - cpuFromIsa = "corei7"; + CPUfromISA = CPU_Nehalem; } else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2") || @@ -324,7 +562,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 8; this->m_maskingIsFree = false; this->m_maskBitCount = 32; - cpuFromIsa = "corei7"; + CPUfromISA = CPU_Nehalem; } else if (!strcasecmp(isa, "sse4-i8x16")) { this->m_isa = Target::SSE4; @@ -334,7 +572,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 16; this->m_maskingIsFree = false; this->m_maskBitCount = 8; - cpuFromIsa = "corei7"; + CPUfromISA = CPU_Nehalem; } else if (!strcasecmp(isa, "sse4-i16x8")) { this->m_isa = Target::SSE4; @@ -344,7 +582,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 8; this->m_maskingIsFree = false; this->m_maskBitCount = 16; - cpuFromIsa = "corei7"; + CPUfromISA = CPU_Nehalem; } else if (!strcasecmp(isa, "generic-4") || !strcasecmp(isa, "generic-x4")) { @@ -359,6 +597,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; + CPUfromISA = CPU_Generic; } else if (!strcasecmp(isa, "generic-8") || !strcasecmp(isa, "generic-x8")) { @@ -373,6 +612,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; + CPUfromISA = CPU_Generic; } else if (!strcasecmp(isa, "generic-16") || !strcasecmp(isa, "generic-x16")) { @@ -392,6 +632,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasRsqrtd = this->m_hasRcpd = true; // It's set to true, because MIC has hardware vector prefetch instruction this->m_hasVecPrefetch = true; + CPUfromISA = CPU_Generic; } else if (!strcasecmp(isa, "generic-32") || !strcasecmp(isa, "generic-x32")) { @@ -406,6 +647,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; + CPUfromISA = CPU_Generic; } else if (!strcasecmp(isa, "generic-64") || !strcasecmp(isa, "generic-x64")) { @@ -420,6 +662,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; + CPUfromISA = CPU_Generic; } else if (!strcasecmp(isa, "generic-1") || !strcasecmp(isa, "generic-x1")) { @@ -429,6 +672,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 1; this->m_maskingIsFree = false; this->m_maskBitCount = 32; + CPUfromISA = CPU_Generic; } else if (!strcasecmp(isa, "avx1-i32x4")) { this->m_isa = Target::AVX; @@ -438,7 +682,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 4; this->m_maskingIsFree = false; this->m_maskBitCount = 32; - cpuFromIsa = "corei7-avx"; + CPUfromISA = CPU_SandyBridge; } else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1") || @@ -450,7 +694,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 8; this->m_maskingIsFree = false; this->m_maskBitCount = 32; - cpuFromIsa = "corei7-avx"; + CPUfromISA = CPU_SandyBridge; } else if (!strcasecmp(isa, "avx-i64x4") || !strcasecmp(isa, "avx1-i64x4")) { @@ -461,7 +705,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 4; this->m_maskingIsFree = false; this->m_maskBitCount = 64; - cpuFromIsa = "corei7-avx"; + CPUfromISA = CPU_SandyBridge; } else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2") || @@ -473,7 +717,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_vectorWidth = 16; this->m_maskingIsFree = false; this->m_maskBitCount = 32; - cpuFromIsa = "corei7-avx"; + CPUfromISA = CPU_SandyBridge; } else if (!strcasecmp(isa, "avx1.1") || !strcasecmp(isa, "avx1.1-i32x8")) { @@ -486,7 +730,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 32; this->m_hasHalf = true; this->m_hasRand = true; - cpuFromIsa = "core-avx-i"; + CPUfromISA = CPU_IvyBridge; } else if (!strcasecmp(isa, "avx1.1-x2") || !strcasecmp(isa, "avx1.1-i32x16")) { @@ -499,7 +743,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 32; this->m_hasHalf = true; this->m_hasRand = true; - cpuFromIsa = "core-avx-i"; + CPUfromISA = CPU_IvyBridge; } else if (!strcasecmp(isa, "avx1.1-i64x4")) { this->m_isa = Target::AVX11; @@ -511,7 +755,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 64; this->m_hasHalf = true; this->m_hasRand = true; - cpuFromIsa = "core-avx-i"; + CPUfromISA = CPU_IvyBridge; } else if (!strcasecmp(isa, "avx2") || !strcasecmp(isa, "avx2-i32x8")) { @@ -525,7 +769,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasHalf = true; this->m_hasRand = true; this->m_hasGather = true; - cpuFromIsa = "core-avx2"; + CPUfromISA = CPU_Haswell; } else if (!strcasecmp(isa, "avx2-x2") || !strcasecmp(isa, "avx2-i32x16")) { @@ -539,7 +783,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasHalf = true; this->m_hasRand = true; this->m_hasGather = true; - cpuFromIsa = "core-avx2"; + CPUfromISA = CPU_Haswell; } else if (!strcasecmp(isa, "avx2-i64x4")) { this->m_isa = Target::AVX2; @@ -552,7 +796,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasHalf = true; this->m_hasRand = true; this->m_hasGather = true; - cpuFromIsa = "core-avx2"; + CPUfromISA = CPU_Haswell; } #ifdef ISPC_ARM_ENABLED else if (!strcasecmp(isa, "neon-i8x16")) { @@ -604,22 +848,21 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasTranscendentals = true; this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = false; - cpuFromIsa = "sm_35"; + CPUfromISA = CPU_SM35; } #endif /* ISPC_NVPTX_ENABLED */ else { Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.", - isa, SupportedTargets()); + isa, SupportedTargets()); error = true; } #if defined(ISPC_ARM_ENABLED) && !defined(__arm__) - if (cpu == NULL && !strncmp(isa, "neon", 4)) - - cpu = "cortex-a9"; + if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4)) + CPUID = CPU_CortexA9; #endif - if (cpu == NULL) { + if (CPUID == CPU_None) { #ifndef ISPC_ARM_ENABLED if (isa == NULL) { #endif @@ -628,29 +871,24 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : cpu = strdup(hostCPU.c_str()); else { Warning(SourcePos(), "Unable to determine host CPU!\n"); - cpu = "generic"; + cpu = a.GetDefaultNameFromType(CPU_Generic).c_str(); } #ifndef ISPC_ARM_ENABLED } else { - cpu = cpuFromIsa; + cpu = a.GetDefaultNameFromType(CPUfromISA).c_str(); } #endif } else { - bool foundCPU = false; - for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0])); - ++i) { - if (!strcmp(cpu, supportedCPUs[i])) { - foundCPU = true; - break; - } - } - if (foundCPU == false) { - Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: " - "%s.", cpu, SupportedCPUs().c_str()); + if ((CPUfromISA != CPU_None) && + !a.BackwardCompatible(CPUID, CPUfromISA)) { + Error(SourcePos(), "The requested CPU is incompatible" + " with the CPU %s needs: %s vs. %s!\n", + isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str()); return; } + cpu = a.GetDefaultNameFromType(CPUID).c_str(); } this->m_cpu = cpu; @@ -752,14 +990,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : std::string Target::SupportedCPUs() { - std::string ret; - int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]); - for (int i = 0; i < count; ++i) { - ret += supportedCPUs[i]; - if (i != count - 1) - ret += ", "; - } - return ret; + AllCPUs a; + return a.HumanReadableListOfNames(); }