390
ispc.cpp
390
ispc.cpp
@@ -40,6 +40,7 @@
|
||||
#include "util.h"
|
||||
#include "llvmutil.h"
|
||||
#include <stdio.h>
|
||||
#include <sstream>
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <windows.h>
|
||||
#include <direct.h>
|
||||
@@ -161,20 +162,219 @@ lGetSystemISA() {
|
||||
}
|
||||
|
||||
|
||||
static const char *supportedCPUs[] = {
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
typedef enum {
|
||||
// Special value, indicates that no CPU is present.
|
||||
CPU_None = 0,
|
||||
|
||||
// 'Generic' CPU without any hardware SIMD capabilities.
|
||||
CPU_Generic = 1,
|
||||
|
||||
// Early Atom CPU. Supports SSSE3.
|
||||
CPU_Bonnell,
|
||||
|
||||
// Generic Core2-like. Supports SSSE3. Isn`t quite compatible with Bonnell,
|
||||
// but for ISPC the difference is negligible; ISPC doesn`t make use of it.
|
||||
CPU_Core2,
|
||||
|
||||
// Core2 Solo/Duo/Quad/Extreme. Supports SSE 4.1 (but not 4.2).
|
||||
CPU_Penryn,
|
||||
|
||||
// Late Core2-like. Supports SSE 4.2 + POPCNT/LZCNT.
|
||||
CPU_Nehalem,
|
||||
|
||||
// Sandy Bridge. Supports AVX 1.
|
||||
CPU_SandyBridge,
|
||||
|
||||
// Ivy Bridge. Supports AVX 1 + RDRAND.
|
||||
CPU_IvyBridge,
|
||||
|
||||
// Haswell. Supports AVX 2.
|
||||
CPU_Haswell,
|
||||
|
||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) // LLVM 3.6+
|
||||
// Broadwell. Supports AVX 2 + ADX/RDSEED/SMAP.
|
||||
CPU_Broadwell,
|
||||
#endif
|
||||
|
||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
|
||||
// Late Atom-like design. Supports SSE 4.2 + POPCNT/LZCNT.
|
||||
CPU_Silvermont,
|
||||
#endif
|
||||
|
||||
// FIXME: LLVM supports a ton of different ARM CPU variants--not just
|
||||
// cortex-a9 and a15. We should be able to handle any of them that also
|
||||
// have NEON support.
|
||||
"cortex-a9", "cortex-a15",
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
// ARM Cortex A15. Supports NEON VFPv4.
|
||||
CPU_CortexA15,
|
||||
|
||||
// ARM Cortex A9. Supports NEON VFPv3.
|
||||
CPU_CortexA9,
|
||||
#endif
|
||||
"atom", "penryn", "core2", "corei7", "corei7-avx"
|
||||
, "core-avx-i", "core-avx2"
|
||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3)
|
||||
, "slm"
|
||||
#endif // LLVM 3.4+
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
// NVidia CUDA-compatible SM-35 architecture.
|
||||
CPU_SM35,
|
||||
#endif
|
||||
|
||||
sizeofCPUtype
|
||||
} CPUtype;
|
||||
|
||||
|
||||
class AllCPUs {
|
||||
private:
|
||||
std::vector<std::vector<std::string>> names;
|
||||
std::vector<std::set<CPUtype>> compat;
|
||||
|
||||
std::set<CPUtype> Set(CPUtype type, ...) {
|
||||
std::set<CPUtype> retn;
|
||||
va_list args;
|
||||
|
||||
retn.insert(type);
|
||||
va_start(args, type);
|
||||
while ((type = (CPUtype)va_arg(args, int)) != CPU_None)
|
||||
retn.insert(type);
|
||||
va_end(args);
|
||||
|
||||
return retn;
|
||||
}
|
||||
|
||||
public:
|
||||
AllCPUs() {
|
||||
names = std::vector<std::vector<std::string>>(sizeofCPUtype);
|
||||
compat = std::vector<std::set<CPUtype>>(sizeofCPUtype);
|
||||
|
||||
|
||||
names[CPU_Generic].push_back("generic");
|
||||
|
||||
names[CPU_Bonnell].push_back("atom");
|
||||
names[CPU_Bonnell].push_back("bonnell");
|
||||
|
||||
names[CPU_Core2].push_back("core2");
|
||||
|
||||
names[CPU_Penryn].push_back("penryn");
|
||||
|
||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) // LLVM 3.4+
|
||||
names[CPU_Silvermont].push_back("slm");
|
||||
names[CPU_Silvermont].push_back("silvermont");
|
||||
#endif
|
||||
|
||||
names[CPU_Nehalem].push_back("corei7");
|
||||
names[CPU_Nehalem].push_back("nehalem");
|
||||
|
||||
names[CPU_SandyBridge].push_back("corei7-avx");
|
||||
names[CPU_SandyBridge].push_back("sandybridge");
|
||||
|
||||
names[CPU_IvyBridge].push_back("core-avx-i");
|
||||
names[CPU_IvyBridge].push_back("ivybridge");
|
||||
|
||||
names[CPU_Haswell].push_back("core-avx2");
|
||||
names[CPU_Haswell].push_back("haswell");
|
||||
|
||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5) // LLVM 3.6+
|
||||
names[CPU_Broadwell].push_back("broadwell");
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
names[CPU_CortexA15].push_back("cortex-a15");
|
||||
|
||||
names[CPU_CortexA9].push_back("cortex-a9");
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
names[CPU_SM35].push_back("sm_35");
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(LLVM_3_2) || defined(LLVM_3_3) // LLVM 3.4+
|
||||
#define CPU_Silvermont CPU_Nehalem
|
||||
#else
|
||||
compat[CPU_Silvermont] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
|
||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||
CPU_None);
|
||||
#endif
|
||||
#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) // LLVM 3.6+
|
||||
#define CPU_Broadwell CPU_Haswell
|
||||
#else
|
||||
compat[CPU_Broadwell] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
|
||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||
CPU_SandyBridge, CPU_IvyBridge,
|
||||
CPU_Haswell, CPU_Broadwell, CPU_None);
|
||||
#endif
|
||||
compat[CPU_Haswell] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
|
||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||
CPU_SandyBridge, CPU_IvyBridge,
|
||||
CPU_Haswell, CPU_Broadwell, CPU_None);
|
||||
compat[CPU_IvyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
|
||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||
CPU_SandyBridge, CPU_IvyBridge,
|
||||
CPU_None);
|
||||
compat[CPU_SandyBridge] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
|
||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||
CPU_SandyBridge, CPU_None);
|
||||
compat[CPU_Nehalem] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
|
||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||
CPU_None);
|
||||
compat[CPU_Penryn] = Set(CPU_Generic, CPU_Bonnell, CPU_Penryn,
|
||||
CPU_Core2, CPU_Nehalem, CPU_Silvermont,
|
||||
CPU_None);
|
||||
compat[CPU_Core2] = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
|
||||
CPU_None);
|
||||
compat[CPU_Bonnell] = Set(CPU_Generic, CPU_Bonnell, CPU_Core2,
|
||||
CPU_None);
|
||||
compat[CPU_Generic] = Set(CPU_Generic, CPU_None);
|
||||
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
compat[CPU_CortexA15] = Set(CPU_Generic, CPU_CortexA9, CPU_CortexA15,
|
||||
CPU_None);
|
||||
compat[CPU_CortexA9] = Set(CPU_Generic, CPU_CortexA9, CPU_None);
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
compat[CPU_SM35] = Set(CPU_Generic, CPU_SM35, CPU_None);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string HumanReadableListOfNames() {
|
||||
std::stringstream CPUs;
|
||||
for (int i = CPU_Generic; i < sizeofCPUtype; i++) {
|
||||
CPUs << names[i][0];
|
||||
if (names[i].size() > 1) {
|
||||
CPUs << " (synonyms: " << names[i][1];
|
||||
for (int j = 2, je = names[i].size(); j < je; j++)
|
||||
CPUs << ", " << names[i][j];
|
||||
CPUs << ")";
|
||||
}
|
||||
if (i < sizeofCPUtype - 1)
|
||||
CPUs << ", ";
|
||||
}
|
||||
return CPUs.str();
|
||||
}
|
||||
|
||||
std::string GetDefaultNameFromType(CPUtype type) {
|
||||
Assert((type > CPU_None) && (type < sizeofCPUtype));
|
||||
return names[type][0];
|
||||
}
|
||||
|
||||
CPUtype GetTypeFromName(std::string name) {
|
||||
CPUtype retn = CPU_None;
|
||||
|
||||
for (int i = 1; (retn == CPU_None) && (i < sizeofCPUtype); i++)
|
||||
for (int j = 0, je = names[i].size();
|
||||
(retn == CPU_None) && (j < je); j++)
|
||||
if (!name.compare(names[i][j]))
|
||||
retn = (CPUtype)i;
|
||||
return retn;
|
||||
}
|
||||
|
||||
bool BackwardCompatible(CPUtype what, CPUtype with) {
|
||||
Assert((what > CPU_None) && (what < sizeofCPUtype));
|
||||
Assert((with > CPU_None) && (with < sizeofCPUtype));
|
||||
return compat[what].find(with) != compat[what].end();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
m_target(NULL),
|
||||
m_targetMachine(NULL),
|
||||
@@ -205,39 +405,79 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
m_hasRcpd(false),
|
||||
m_hasVecPrefetch(false)
|
||||
{
|
||||
CPUtype CPUID = CPU_None, CPUfromISA = CPU_None;
|
||||
AllCPUs a;
|
||||
|
||||
if (cpu) {
|
||||
CPUID = a.GetTypeFromName(cpu);
|
||||
if (CPUID == CPU_None) {
|
||||
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported"
|
||||
" CPUs: %s.", cpu, a.HumanReadableListOfNames().c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (isa == NULL) {
|
||||
if (cpu != NULL) {
|
||||
// If a CPU was specified explicitly, try to pick the best
|
||||
// possible ISA based on that.
|
||||
if (!strcmp(cpu, "core-avx2"))
|
||||
isa = "avx2-i32x8";
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
else if (!strcmp(cpu, "cortex-a9") ||
|
||||
!strcmp(cpu, "cortex-a15"))
|
||||
isa = "neon-i32x4";
|
||||
// If a CPU was specified explicitly, try to pick the best
|
||||
// possible ISA based on that.
|
||||
switch (CPUID) {
|
||||
case CPU_None:
|
||||
// No CPU and no ISA, so use system info to figure out
|
||||
// what this CPU supports.
|
||||
isa = lGetSystemISA();
|
||||
Warning(SourcePos(), "No --target specified on command-line."
|
||||
" Using default system target \"%s\".", isa);
|
||||
break;
|
||||
|
||||
case CPU_Generic:
|
||||
isa = "generic-1";
|
||||
break;
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
case CPU_SM35:
|
||||
isa = "nvptx";
|
||||
break;
|
||||
#endif
|
||||
else if (!strcmp(cpu, "core-avx-i"))
|
||||
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
case CPU_CortexA9:
|
||||
case CPU_CortexA15:
|
||||
isa = "neon-i32x4";
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4) && !defined(LLVM_3_5)
|
||||
case CPU_Broadwell:
|
||||
#endif
|
||||
case CPU_Haswell:
|
||||
isa = "avx2-i32x8";
|
||||
break;
|
||||
|
||||
case CPU_IvyBridge:
|
||||
isa = "avx1.1-i32x8";
|
||||
else if (!strcmp(cpu, "sandybridge") ||
|
||||
!strcmp(cpu, "corei7-avx"))
|
||||
break;
|
||||
|
||||
case CPU_SandyBridge:
|
||||
isa = "avx1-i32x8";
|
||||
else if (!strcmp(cpu, "corei7") ||
|
||||
!strcmp(cpu, "penryn") ||
|
||||
!strcmp(cpu, "slm"))
|
||||
break;
|
||||
|
||||
// Penryn is here because ISPC does not use SSE 4.2
|
||||
case CPU_Penryn:
|
||||
case CPU_Nehalem:
|
||||
#if !defined(LLVM_3_2) && !defined(LLVM_3_3)
|
||||
case CPU_Silvermont:
|
||||
#endif
|
||||
isa = "sse4-i32x4";
|
||||
else
|
||||
break;
|
||||
|
||||
default:
|
||||
isa = "sse2-i32x4";
|
||||
Warning(SourcePos(), "No --target specified on command-line. "
|
||||
"Using ISA \"%s\" based on specified CPU \"%s\".", isa,
|
||||
cpu);
|
||||
}
|
||||
else {
|
||||
// No CPU and no ISA, so use CPUID to figure out what this CPU
|
||||
// supports.
|
||||
isa = lGetSystemISA();
|
||||
Warning(SourcePos(), "No --target specified on command-line. "
|
||||
"Using default system target \"%s\".", isa);
|
||||
break;
|
||||
}
|
||||
if (CPUID != CPU_None)
|
||||
Warning(SourcePos(), "No --target specified on command-line."
|
||||
" Using ISA \"%s\" based on specified CPU \"%s\".",
|
||||
isa, cpu);
|
||||
}
|
||||
|
||||
if (arch == NULL) {
|
||||
@@ -278,8 +518,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_arch = arch;
|
||||
}
|
||||
|
||||
const char * cpuFromIsa = "";
|
||||
|
||||
// Check default LLVM generated targets
|
||||
if (!strcasecmp(isa, "sse2") ||
|
||||
!strcasecmp(isa, "sse2-i32x4")) {
|
||||
@@ -290,7 +528,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "core2";
|
||||
CPUfromISA = CPU_Core2;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse2-x2") ||
|
||||
!strcasecmp(isa, "sse2-i32x8")) {
|
||||
@@ -301,7 +539,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "core2";
|
||||
CPUfromISA = CPU_Core2;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4") ||
|
||||
!strcasecmp(isa, "sse4-i32x4")) {
|
||||
@@ -312,7 +550,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7";
|
||||
CPUfromISA = CPU_Nehalem;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4x2") ||
|
||||
!strcasecmp(isa, "sse4-x2") ||
|
||||
@@ -324,7 +562,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7";
|
||||
CPUfromISA = CPU_Nehalem;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4-i8x16")) {
|
||||
this->m_isa = Target::SSE4;
|
||||
@@ -334,7 +572,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 8;
|
||||
cpuFromIsa = "corei7";
|
||||
CPUfromISA = CPU_Nehalem;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4-i16x8")) {
|
||||
this->m_isa = Target::SSE4;
|
||||
@@ -344,7 +582,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 16;
|
||||
cpuFromIsa = "corei7";
|
||||
CPUfromISA = CPU_Nehalem;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-4") ||
|
||||
!strcasecmp(isa, "generic-x4")) {
|
||||
@@ -359,6 +597,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasTrigonometry = true;
|
||||
this->m_hasGather = this->m_hasScatter = true;
|
||||
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||
CPUfromISA = CPU_Generic;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-8") ||
|
||||
!strcasecmp(isa, "generic-x8")) {
|
||||
@@ -373,6 +612,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasTrigonometry = true;
|
||||
this->m_hasGather = this->m_hasScatter = true;
|
||||
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||
CPUfromISA = CPU_Generic;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-16") ||
|
||||
!strcasecmp(isa, "generic-x16")) {
|
||||
@@ -392,6 +632,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||
// It's set to true, because MIC has hardware vector prefetch instruction
|
||||
this->m_hasVecPrefetch = true;
|
||||
CPUfromISA = CPU_Generic;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-32") ||
|
||||
!strcasecmp(isa, "generic-x32")) {
|
||||
@@ -406,6 +647,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasTrigonometry = true;
|
||||
this->m_hasGather = this->m_hasScatter = true;
|
||||
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||
CPUfromISA = CPU_Generic;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-64") ||
|
||||
!strcasecmp(isa, "generic-x64")) {
|
||||
@@ -420,6 +662,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasTrigonometry = true;
|
||||
this->m_hasGather = this->m_hasScatter = true;
|
||||
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||
CPUfromISA = CPU_Generic;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-1") ||
|
||||
!strcasecmp(isa, "generic-x1")) {
|
||||
@@ -429,6 +672,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 1;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
CPUfromISA = CPU_Generic;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1-i32x4")) {
|
||||
this->m_isa = Target::AVX;
|
||||
@@ -438,7 +682,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7-avx";
|
||||
CPUfromISA = CPU_SandyBridge;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx") ||
|
||||
!strcasecmp(isa, "avx1") ||
|
||||
@@ -450,7 +694,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 8;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7-avx";
|
||||
CPUfromISA = CPU_SandyBridge;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx-i64x4") ||
|
||||
!strcasecmp(isa, "avx1-i64x4")) {
|
||||
@@ -461,7 +705,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 4;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 64;
|
||||
cpuFromIsa = "corei7-avx";
|
||||
CPUfromISA = CPU_SandyBridge;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx-x2") ||
|
||||
!strcasecmp(isa, "avx1-x2") ||
|
||||
@@ -473,7 +717,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_vectorWidth = 16;
|
||||
this->m_maskingIsFree = false;
|
||||
this->m_maskBitCount = 32;
|
||||
cpuFromIsa = "corei7-avx";
|
||||
CPUfromISA = CPU_SandyBridge;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1") ||
|
||||
!strcasecmp(isa, "avx1.1-i32x8")) {
|
||||
@@ -486,7 +730,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_maskBitCount = 32;
|
||||
this->m_hasHalf = true;
|
||||
this->m_hasRand = true;
|
||||
cpuFromIsa = "core-avx-i";
|
||||
CPUfromISA = CPU_IvyBridge;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1-x2") ||
|
||||
!strcasecmp(isa, "avx1.1-i32x16")) {
|
||||
@@ -499,7 +743,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_maskBitCount = 32;
|
||||
this->m_hasHalf = true;
|
||||
this->m_hasRand = true;
|
||||
cpuFromIsa = "core-avx-i";
|
||||
CPUfromISA = CPU_IvyBridge;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx1.1-i64x4")) {
|
||||
this->m_isa = Target::AVX11;
|
||||
@@ -511,7 +755,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_maskBitCount = 64;
|
||||
this->m_hasHalf = true;
|
||||
this->m_hasRand = true;
|
||||
cpuFromIsa = "core-avx-i";
|
||||
CPUfromISA = CPU_IvyBridge;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2") ||
|
||||
!strcasecmp(isa, "avx2-i32x8")) {
|
||||
@@ -525,7 +769,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasHalf = true;
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = true;
|
||||
cpuFromIsa = "core-avx2";
|
||||
CPUfromISA = CPU_Haswell;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-x2") ||
|
||||
!strcasecmp(isa, "avx2-i32x16")) {
|
||||
@@ -539,7 +783,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasHalf = true;
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = true;
|
||||
cpuFromIsa = "core-avx2";
|
||||
CPUfromISA = CPU_Haswell;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-i64x4")) {
|
||||
this->m_isa = Target::AVX2;
|
||||
@@ -552,7 +796,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasHalf = true;
|
||||
this->m_hasRand = true;
|
||||
this->m_hasGather = true;
|
||||
cpuFromIsa = "core-avx2";
|
||||
CPUfromISA = CPU_Haswell;
|
||||
}
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
else if (!strcasecmp(isa, "neon-i8x16")) {
|
||||
@@ -604,22 +848,21 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasTranscendentals = true;
|
||||
this->m_hasTrigonometry = true;
|
||||
this->m_hasGather = this->m_hasScatter = false;
|
||||
cpuFromIsa = "sm_35";
|
||||
CPUfromISA = CPU_SM35;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
else {
|
||||
Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
|
||||
isa, SupportedTargets());
|
||||
isa, SupportedTargets());
|
||||
error = true;
|
||||
}
|
||||
|
||||
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
|
||||
if (cpu == NULL && !strncmp(isa, "neon", 4))
|
||||
|
||||
cpu = "cortex-a9";
|
||||
if ((CPUID == CPU_None) && !strncmp(isa, "neon", 4))
|
||||
CPUID = CPU_CortexA9;
|
||||
#endif
|
||||
|
||||
if (cpu == NULL) {
|
||||
if (CPUID == CPU_None) {
|
||||
#ifndef ISPC_ARM_ENABLED
|
||||
if (isa == NULL) {
|
||||
#endif
|
||||
@@ -628,29 +871,24 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
cpu = strdup(hostCPU.c_str());
|
||||
else {
|
||||
Warning(SourcePos(), "Unable to determine host CPU!\n");
|
||||
cpu = "generic";
|
||||
cpu = a.GetDefaultNameFromType(CPU_Generic).c_str();
|
||||
}
|
||||
#ifndef ISPC_ARM_ENABLED
|
||||
}
|
||||
else {
|
||||
cpu = cpuFromIsa;
|
||||
cpu = a.GetDefaultNameFromType(CPUfromISA).c_str();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
bool foundCPU = false;
|
||||
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
|
||||
++i) {
|
||||
if (!strcmp(cpu, supportedCPUs[i])) {
|
||||
foundCPU = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (foundCPU == false) {
|
||||
Error(SourcePos(), "Error: CPU type \"%s\" unknown. Supported CPUs: "
|
||||
"%s.", cpu, SupportedCPUs().c_str());
|
||||
if ((CPUfromISA != CPU_None) &&
|
||||
!a.BackwardCompatible(CPUID, CPUfromISA)) {
|
||||
Error(SourcePos(), "The requested CPU is incompatible"
|
||||
" with the CPU %s needs: %s vs. %s!\n",
|
||||
isa, cpu, a.GetDefaultNameFromType(CPUfromISA).c_str());
|
||||
return;
|
||||
}
|
||||
cpu = a.GetDefaultNameFromType(CPUID).c_str();
|
||||
}
|
||||
this->m_cpu = cpu;
|
||||
|
||||
@@ -752,14 +990,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
|
||||
std::string
|
||||
Target::SupportedCPUs() {
|
||||
std::string ret;
|
||||
int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
ret += supportedCPUs[i];
|
||||
if (i != count - 1)
|
||||
ret += ", ";
|
||||
}
|
||||
return ret;
|
||||
AllCPUs a;
|
||||
return a.HumanReadableListOfNames();
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user