Big rewrite / improvement of target handling.
If no CPU is specified, use the host CPU type, not just a default of "nehalem".
Provide better features strings to the LLVM target machinery.
-> Thus ensuring that LLVM doesn't generate SSE>2 instructions for the SSE2
target (Fixes issue #82).
-> Slight code improvements from using cmovs in generated code now
Use the llvm popcnt intrinsic for the SSE2 target now (it now generates code
that doesn't call the popcnt instruction now that we properly tell LLVM
which instructions are and aren't available for SSE2.)
This commit is contained in:
11
Makefile
11
Makefile
@@ -10,7 +10,12 @@ CLANG_LIBS = -lclangFrontend -lclangDriver \
|
|||||||
-lclangSerialization -lclangParse -lclangSema \
|
-lclangSerialization -lclangParse -lclangSema \
|
||||||
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
|
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
|
||||||
|
|
||||||
LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
|
ISPC_LIBS=$(CLANG_LIBS) \
|
||||||
|
$(shell llvm-config --ldflags --libs backend bitreader bitwriter codegen engine mcjit scalaropts native analysis core instcombine ipa ipo linker instrumentation) \
|
||||||
|
-lpthread -ldl
|
||||||
|
ISPC_TEST_LIBS=$(shell llvm-config --ldflags --libs bitreader backend interpreter engine jit mcjit) \
|
||||||
|
-lpthread -ldl
|
||||||
|
|
||||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||||
LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
|
LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
|
||||||
LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
|
LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
|
||||||
@@ -80,11 +85,11 @@ doxygen:
|
|||||||
|
|
||||||
ispc: print_llvm_src dirs $(OBJS)
|
ispc: print_llvm_src dirs $(OBJS)
|
||||||
@echo Creating ispc executable
|
@echo Creating ispc executable
|
||||||
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(CLANG_LIBS) $(LLVM_LIBS)
|
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS)
|
||||||
|
|
||||||
ispc_test: dirs ispc_test.cpp
|
ispc_test: dirs ispc_test.cpp
|
||||||
@echo Creating ispc_test executable
|
@echo Creating ispc_test executable
|
||||||
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ ispc_test.cpp $(LLVM_LIBS)
|
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ ispc_test.cpp $(ISPC_TEST_LIBS)
|
||||||
|
|
||||||
objs/%.o: %.cpp
|
objs/%.o: %.cpp
|
||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
|
|||||||
@@ -277,41 +277,18 @@ define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinli
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; horizontal ops / reductions
|
; horizontal ops / reductions
|
||||||
|
|
||||||
; FIXME: this is very inefficient, loops over all 32 bits...
|
declare i32 @llvm.ctpop.i32(i32)
|
||||||
|
declare i64 @llvm.ctpop.i64(i64)
|
||||||
; we could use the LLVM intrinsic declare i32 @llvm.ctpop.i32(i32),
|
|
||||||
; although that currently ends up generating a POPCNT instruction even
|
|
||||||
; if we give --target=sse2 on the command line. We probably need to
|
|
||||||
; pipe through the 'sse2' request to LLVM via the 'features' string
|
|
||||||
; at codegen time... (If e.g. --cpu=penryn is also passed along, then
|
|
||||||
; it does generate non-POPCNT code and in particular better code than
|
|
||||||
; the below does.)
|
|
||||||
|
|
||||||
define internal i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
define internal i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
||||||
entry:
|
%val = call i32 @llvm.ctpop.i32(i32 %0)
|
||||||
br label %loop
|
ret i32 %val
|
||||||
|
|
||||||
loop:
|
|
||||||
%count = phi i32 [ 0, %entry ], [ %newcount, %loop ]
|
|
||||||
%val = phi i32 [ %0, %entry ], [ %newval, %loop ]
|
|
||||||
%delta = and i32 %val, 1
|
|
||||||
%newcount = add i32 %count, %delta
|
|
||||||
%newval = lshr i32 %val, 1
|
|
||||||
%done = icmp eq i32 %newval, 0
|
|
||||||
br i1 %done, label %exit, label %loop
|
|
||||||
|
|
||||||
exit:
|
|
||||||
ret i32 %newcount
|
|
||||||
}
|
}
|
||||||
|
|
||||||
define internal i32 @__popcnt_int64(i64) nounwind readnone alwaysinline {
|
define internal i32 @__popcnt_int64(i64) nounwind readnone alwaysinline {
|
||||||
%vec = bitcast i64 %0 to <2 x i32>
|
%val = call i64 @llvm.ctpop.i64(i64 %0)
|
||||||
%v0 = extractelement <2 x i32> %vec, i32 0
|
%val32 = trunc i64 %val to i32
|
||||||
%v1 = extractelement <2 x i32> %vec, i32 1
|
ret i32 %val32
|
||||||
%c0 = call i32 @__popcnt_int32(i32 %v0)
|
|
||||||
%c1 = call i32 @__popcnt_int32(i32 %v1)
|
|
||||||
%sum = add i32 %c0, %c1
|
|
||||||
ret i32 %sum
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
195
ispc.cpp
195
ispc.cpp
@@ -50,6 +50,18 @@
|
|||||||
#endif
|
#endif
|
||||||
#include <llvm/Analysis/DebugInfo.h>
|
#include <llvm/Analysis/DebugInfo.h>
|
||||||
#include <llvm/Support/Dwarf.h>
|
#include <llvm/Support/Dwarf.h>
|
||||||
|
#include <llvm/Target/TargetMachine.h>
|
||||||
|
#include <llvm/Target/TargetOptions.h>
|
||||||
|
#include <llvm/Target/TargetData.h>
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
|
#include <llvm/Support/TargetRegistry.h>
|
||||||
|
#include <llvm/Support/TargetSelect.h>
|
||||||
|
#else
|
||||||
|
#include <llvm/Target/TargetRegistry.h>
|
||||||
|
#include <llvm/Target/TargetSelect.h>
|
||||||
|
#include <llvm/Target/SubtargetFeature.h>
|
||||||
|
#endif
|
||||||
|
#include <llvm/Support/Host.h>
|
||||||
|
|
||||||
Globals *g;
|
Globals *g;
|
||||||
Module *m;
|
Module *m;
|
||||||
@@ -57,15 +69,184 @@ Module *m;
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Target
|
// Target
|
||||||
|
|
||||||
Target::Target() {
|
bool
|
||||||
arch = "x86-64";
|
Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||||
cpu = "nehalem";
|
Target *t) {
|
||||||
is32bit = false;
|
// initialize available targets
|
||||||
isa = SSE4;
|
LLVMInitializeX86Target();
|
||||||
nativeVectorWidth = 4;
|
LLVMInitializeX86TargetInfo();
|
||||||
vectorWidth = 4;
|
|
||||||
|
if (cpu == NULL) {
|
||||||
|
std::string hostCPU = llvm::sys::getHostCPUName();
|
||||||
|
if (hostCPU.size() > 0)
|
||||||
|
cpu = hostCPU.c_str();
|
||||||
|
else {
|
||||||
|
fprintf(stderr, "Warning: unable to determine host CPU!\n");
|
||||||
|
cpu = "generic";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t->cpu = cpu;
|
||||||
|
|
||||||
|
if (isa == NULL) {
|
||||||
|
if (!strcasecmp(cpu, "atom"))
|
||||||
|
isa = "sse2";
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0_svn)
|
||||||
|
else if (!strcasecmp(cpu, "sandybridge") ||
|
||||||
|
!strcasecmp(cpu, "corei7-avx"))
|
||||||
|
isa = "avx";
|
||||||
|
#endif // LLVM_3_0
|
||||||
|
else
|
||||||
|
isa = "sse4";
|
||||||
|
}
|
||||||
|
if (arch == NULL)
|
||||||
|
arch = "x86-64";
|
||||||
|
|
||||||
|
bool error = false;
|
||||||
|
|
||||||
|
// Make sure the target architecture is a known one; print an error
|
||||||
|
// with the valid ones otherwise.
|
||||||
|
t->target = NULL;
|
||||||
|
for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
|
||||||
|
iter != llvm::TargetRegistry::end(); ++iter) {
|
||||||
|
if (std::string(arch) == iter->getName()) {
|
||||||
|
t->target = &*iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (t->target == NULL) {
|
||||||
|
fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
|
||||||
|
llvm::TargetRegistry::iterator iter;
|
||||||
|
for (iter = llvm::TargetRegistry::begin();
|
||||||
|
iter != llvm::TargetRegistry::end(); ++iter)
|
||||||
|
fprintf(stderr, "%s ", iter->getName());
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
error = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
t->arch = arch;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strcasecmp(isa, "sse2")) {
|
||||||
|
t->isa = Target::SSE2;
|
||||||
|
t->nativeVectorWidth = 4;
|
||||||
|
t->vectorWidth = 4;
|
||||||
|
t->attributes = "-sse2,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||||
|
}
|
||||||
|
else if (!strcasecmp(isa, "sse4")) {
|
||||||
|
t->isa = Target::SSE4;
|
||||||
|
t->nativeVectorWidth = 4;
|
||||||
|
t->vectorWidth = 4;
|
||||||
|
t->attributes = "+sse,+sse2,+sse3,+sse41,+sse42,+sse4a,+ssse3,+popcnt,+cmov";
|
||||||
|
}
|
||||||
|
else if (!strcasecmp(isa, "sse4x2")) {
|
||||||
|
t->isa = Target::SSE4;
|
||||||
|
t->nativeVectorWidth = 4;
|
||||||
|
t->vectorWidth = 8;
|
||||||
|
t->attributes = "+sse,+sse2,+sse3,+sse41,+sse42,+sse4a,+ssse3,+popcnt,+cmov";
|
||||||
|
}
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
|
else if (!strcasecmp(isa, "avx")) {
|
||||||
|
t->isa = Target::AVX;
|
||||||
|
t->nativeVectorWidth = 8;
|
||||||
|
t->vectorWidth = 8;
|
||||||
|
t->attributes = "+avx,+popcnt,+cmov";
|
||||||
|
}
|
||||||
|
else if (!strcasecmp(isa, "avx-x2")) {
|
||||||
|
t->isa = Target::AVX;
|
||||||
|
t->nativeVectorWidth = 8;
|
||||||
|
t->vectorWidth = 16;
|
||||||
|
t->attributes = "+avx,+popcnt,+cmov";
|
||||||
|
}
|
||||||
|
#endif // LLVM 3.0
|
||||||
|
else {
|
||||||
|
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
|
||||||
|
isa, SupportedTargetISAs());
|
||||||
|
error = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!error) {
|
||||||
|
llvm::TargetMachine *targetMachine = t->GetTargetMachine();
|
||||||
|
const llvm::TargetData *targetData = targetMachine->getTargetData();
|
||||||
|
t->is32bit = (targetData->getPointerSize() == 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
return !error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char *
|
||||||
|
Target::SupportedTargetCPUs() {
|
||||||
|
return "atom, barcelona, core2, corei7, "
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0_svn)
|
||||||
|
"corei7-avx, "
|
||||||
|
#endif
|
||||||
|
"istanbul, nocona, penryn, "
|
||||||
|
#ifdef LLVM_2_9
|
||||||
|
"sandybridge, "
|
||||||
|
#endif
|
||||||
|
"westmere";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char *
|
||||||
|
Target::SupportedTargetArchs() {
|
||||||
|
return "x86, x86-64";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char *
|
||||||
|
Target::SupportedTargetISAs() {
|
||||||
|
return "sse2, sse4, sse4x2"
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0_svn)
|
||||||
|
", avx, avx-x2"
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
Target::GetTripleString() const {
|
||||||
|
llvm::Triple triple;
|
||||||
|
// Start with the host triple as the default
|
||||||
|
triple.setTriple(llvm::sys::getHostTriple());
|
||||||
|
|
||||||
|
// And override the arch in the host triple based on what the user
|
||||||
|
// specified. Here we need to deal with the fact that LLVM uses one
|
||||||
|
// naming convention for targets TargetRegistry, but wants some
|
||||||
|
// slightly different ones for the triple. TODO: is there a way to
|
||||||
|
// have it do this remapping, which would presumably be a bit less
|
||||||
|
// error prone?
|
||||||
|
if (arch == "x86")
|
||||||
|
triple.setArchName("i386");
|
||||||
|
else if (arch == "x86-64")
|
||||||
|
triple.setArchName("x86_64");
|
||||||
|
else
|
||||||
|
triple.setArchName(arch);
|
||||||
|
|
||||||
|
return triple.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::TargetMachine *
|
||||||
|
Target::GetTargetMachine() const {
|
||||||
|
std::string triple = GetTripleString();
|
||||||
|
|
||||||
|
#if defined(LLVM_3_0svn) || defined(LLVM_3_0)
|
||||||
|
std::string featuresString = attributes;
|
||||||
|
llvm::TargetMachine *targetMachine =
|
||||||
|
target->createTargetMachine(triple, cpu, featuresString);
|
||||||
|
#else
|
||||||
|
std::string featuresString = cpu + std::string(",") + attributes;
|
||||||
|
llvm::TargetMachine *targetMachine =
|
||||||
|
target->createTargetMachine(triple, featuresString);
|
||||||
|
#endif
|
||||||
|
assert(targetMachine != NULL);
|
||||||
|
|
||||||
|
targetMachine->setAsmVerbosityDefault(true);
|
||||||
|
return targetMachine;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Opt
|
// Opt
|
||||||
|
|
||||||
|
|||||||
34
ispc.h
34
ispc.h
@@ -69,6 +69,8 @@ namespace llvm {
|
|||||||
class FunctionType;
|
class FunctionType;
|
||||||
class LLVMContext;
|
class LLVMContext;
|
||||||
class Module;
|
class Module;
|
||||||
|
class Target;
|
||||||
|
class TargetMachine;
|
||||||
class Type;
|
class Type;
|
||||||
class Value;
|
class Value;
|
||||||
}
|
}
|
||||||
@@ -156,7 +158,34 @@ public:
|
|||||||
This structure defines a compilation target for the ispc compiler.
|
This structure defines a compilation target for the ispc compiler.
|
||||||
*/
|
*/
|
||||||
struct Target {
|
struct Target {
|
||||||
Target();
|
/** Initializes the given Target pointer for a target of the given
|
||||||
|
name, if the name is a known target. Returns true if the
|
||||||
|
target was initialized and false if the name is unknown. */
|
||||||
|
static bool GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||||
|
Target *);
|
||||||
|
|
||||||
|
/** Returns a comma-delimited string giving the names of the currently
|
||||||
|
supported target ISAs. */
|
||||||
|
static const char *SupportedTargetISAs();
|
||||||
|
|
||||||
|
/** Returns a comma-delimited string giving the names of the currently
|
||||||
|
supported target CPUs. */
|
||||||
|
static const char *SupportedTargetCPUs();
|
||||||
|
|
||||||
|
/** Returns a comma-delimited string giving the names of the currently
|
||||||
|
supported target architectures. */
|
||||||
|
static const char *SupportedTargetArchs();
|
||||||
|
|
||||||
|
/** Returns a triple string specifying the target architecture, vendor,
|
||||||
|
and environment. */
|
||||||
|
std::string GetTripleString() const;
|
||||||
|
|
||||||
|
/** Returns the LLVM TargetMachine object corresponding to this
|
||||||
|
target. */
|
||||||
|
llvm::TargetMachine *GetTargetMachine() const;
|
||||||
|
|
||||||
|
/** llvm Target object representing this target. */
|
||||||
|
const llvm::Target *target;
|
||||||
|
|
||||||
/** Enumerator giving the instruction sets that the compiler can
|
/** Enumerator giving the instruction sets that the compiler can
|
||||||
target. */
|
target. */
|
||||||
@@ -174,6 +203,9 @@ struct Target {
|
|||||||
/** Target CPU. (e.g. "corei7", "corei7-avx", ..) */
|
/** Target CPU. (e.g. "corei7", "corei7-avx", ..) */
|
||||||
std::string cpu;
|
std::string cpu;
|
||||||
|
|
||||||
|
/** Target-specific attributes to pass along to the LLVM backend */
|
||||||
|
std::string attributes;
|
||||||
|
|
||||||
/** Native vector width of the vector instruction set. Note that this
|
/** Native vector width of the vector instruction set. Note that this
|
||||||
value is directly derived from the ISA Being used (e.g. it's 4 for
|
value is directly derived from the ISA Being used (e.g. it's 4 for
|
||||||
SSE, 8 for AVX, etc.) */
|
SSE, 8 for AVX, etc.) */
|
||||||
|
|||||||
@@ -171,7 +171,21 @@ static bool lRunTest(const char *fn) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string eeError;
|
std::string eeError;
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
|
llvm::EngineBuilder engineBuilder(module);
|
||||||
|
engineBuilder.setErrorStr(&eeError);
|
||||||
|
engineBuilder.setEngineKind(llvm::EngineKind::JIT);
|
||||||
|
#if 0
|
||||||
|
std::vector<std::string> attributes;
|
||||||
|
if (target != NULL && !strcmp(target, "avx"))
|
||||||
|
attributes.push_back("+avx");
|
||||||
|
engineBuilder.setMAttrs(attributes);
|
||||||
|
engineBuilder.setUseMCJIT(true);
|
||||||
|
#endif
|
||||||
|
llvm::ExecutionEngine *ee = engineBuilder.create();
|
||||||
|
#else
|
||||||
llvm::ExecutionEngine *ee = llvm::ExecutionEngine::createJIT(module, &eeError);
|
llvm::ExecutionEngine *ee = llvm::ExecutionEngine::createJIT(module, &eeError);
|
||||||
|
#endif
|
||||||
if (!ee) {
|
if (!ee) {
|
||||||
fprintf(stderr, "Unable to create ExecutionEngine: %s\n", eeError.c_str());
|
fprintf(stderr, "Unable to create ExecutionEngine: %s\n", eeError.c_str());
|
||||||
return false;
|
return false;
|
||||||
@@ -338,6 +352,11 @@ static bool lRunTest(const char *fn) {
|
|||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
llvm::InitializeNativeTarget();
|
llvm::InitializeNativeTarget();
|
||||||
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
|
llvm::InitializeAllTargetMCs();
|
||||||
|
LLVMLinkInMCJIT();
|
||||||
|
LLVMLinkInJIT();
|
||||||
|
#endif
|
||||||
|
|
||||||
std::vector<const char *> files;
|
std::vector<const char *> files;
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
|
|||||||
79
main.cpp
79
main.cpp
@@ -53,12 +53,13 @@
|
|||||||
#endif // ISPC_IS_WINDOWS
|
#endif // ISPC_IS_WINDOWS
|
||||||
|
|
||||||
static void usage(int ret) {
|
static void usage(int ret) {
|
||||||
printf("This is the Intel(r) SPMD Program Compiler (ispc), build %s (%s)\n\n", BUILD_DATE, BUILD_VERSION);
|
printf("This is the Intel(r) SPMD Program Compiler (ispc), build %s (%s)\n\n",
|
||||||
|
BUILD_DATE, BUILD_VERSION);
|
||||||
printf("usage: ispc\n");
|
printf("usage: ispc\n");
|
||||||
printf(" [--arch={x86,x86-64}]\t\tSelect target architecture\n");
|
printf(" [--arch={%s}]\t\tSelect target architecture\n",
|
||||||
|
Target::SupportedTargetArchs());
|
||||||
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
|
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
|
||||||
printf(" (atom, barcelona, core2, corei7, corei7-avx, istanbul, nocona,\n");
|
printf(" (%s)\n", Target::SupportedTargetCPUs());
|
||||||
printf(" penryn, westmere)\n");
|
|
||||||
#ifndef ISPC_IS_WINDOWS
|
#ifndef ISPC_IS_WINDOWS
|
||||||
printf(" [-D<foo>]\t\t\t\t#define value when running preprocessor\n");
|
printf(" [-D<foo>]\t\t\t\t#define value when running preprocessor\n");
|
||||||
#endif
|
#endif
|
||||||
@@ -91,11 +92,7 @@ static void usage(int ret) {
|
|||||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||||
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
printf(" [--target=<isa>]\t\t\tSelect target ISA. (%s)\n", Target::SupportedTargetISAs());
|
||||||
printf(" [--target={sse2,sse4,sse4x2,avx,avx-x2}] Select target ISA (SSE4 is default unless compiling for atom; then SSE2 is.)\n");
|
|
||||||
#else
|
|
||||||
printf(" [--target={sse2,sse4,sse4x2}] Select target ISA (SSE4 is default unless compiling for atom; then SSE2 is.)\n");
|
|
||||||
#endif // LLVM 3.0
|
|
||||||
printf(" [--version]\t\t\t\tPrint ispc version\n");
|
printf(" [--version]\t\t\t\tPrint ispc version\n");
|
||||||
printf(" [--woff]\t\t\t\tDisable warnings\n");
|
printf(" [--woff]\t\t\t\tDisable warnings\n");
|
||||||
printf(" [--wno-perf]\t\t\tDon't issue warnings related to performance-related issues\n");
|
printf(" [--wno-perf]\t\t\tDon't issue warnings related to performance-related issues\n");
|
||||||
@@ -103,40 +100,6 @@ static void usage(int ret) {
|
|||||||
exit(ret);
|
exit(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Given a target name string, set initialize the global g->target
|
|
||||||
structure appropriately.
|
|
||||||
*/
|
|
||||||
static void lDoTarget(const char *target) {
|
|
||||||
if (!strcasecmp(target, "sse2")) {
|
|
||||||
g->target.isa = Target::SSE2;
|
|
||||||
g->target.nativeVectorWidth = 4;
|
|
||||||
g->target.vectorWidth = 4;
|
|
||||||
}
|
|
||||||
else if (!strcasecmp(target, "sse4")) {
|
|
||||||
g->target.isa = Target::SSE4;
|
|
||||||
g->target.nativeVectorWidth = 4;
|
|
||||||
g->target.vectorWidth = 4;
|
|
||||||
}
|
|
||||||
else if (!strcasecmp(target, "sse4x2")) {
|
|
||||||
g->target.isa = Target::SSE4;
|
|
||||||
g->target.nativeVectorWidth = 4;
|
|
||||||
g->target.vectorWidth = 8;
|
|
||||||
}
|
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
|
||||||
else if (!strcasecmp(target, "avx")) {
|
|
||||||
g->target.isa = Target::AVX;
|
|
||||||
g->target.nativeVectorWidth = 8;
|
|
||||||
g->target.vectorWidth = 8;
|
|
||||||
}
|
|
||||||
else if (!strcasecmp(target, "avx-x2")) {
|
|
||||||
g->target.isa = Target::AVX;
|
|
||||||
g->target.nativeVectorWidth = 8;
|
|
||||||
g->target.vectorWidth = 16;
|
|
||||||
}
|
|
||||||
#endif // LLVM 3.0
|
|
||||||
else
|
|
||||||
usage(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** We take arguments from both the command line as well as from the
|
/** We take arguments from both the command line as well as from the
|
||||||
@@ -203,9 +166,10 @@ int main(int Argc, char *Argv[]) {
|
|||||||
// as we're parsing below
|
// as we're parsing below
|
||||||
g = new Globals;
|
g = new Globals;
|
||||||
|
|
||||||
bool debugSet = false, optSet = false, targetSet = false;
|
bool debugSet = false, optSet = false;
|
||||||
Module::OutputType ot = Module::Object;
|
Module::OutputType ot = Module::Object;
|
||||||
|
|
||||||
|
const char *arch = NULL, *cpu = NULL, *target = NULL;
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
if (!strcmp(argv[i], "--help"))
|
if (!strcmp(argv[i], "--help"))
|
||||||
usage(0);
|
usage(0);
|
||||||
@@ -214,15 +178,10 @@ int main(int Argc, char *Argv[]) {
|
|||||||
g->cppArgs.push_back(argv[i]);
|
g->cppArgs.push_back(argv[i]);
|
||||||
}
|
}
|
||||||
#endif // !ISPC_IS_WINDOWS
|
#endif // !ISPC_IS_WINDOWS
|
||||||
else if (!strncmp(argv[i], "--arch=", 7)) {
|
else if (!strncmp(argv[i], "--arch=", 7))
|
||||||
g->target.arch = argv[i] + 7;
|
arch = argv[i] + 7;
|
||||||
if (g->target.arch == "x86")
|
|
||||||
g->target.is32bit = true;
|
|
||||||
else if (g->target.arch == "x86-64")
|
|
||||||
g->target.is32bit = false;
|
|
||||||
}
|
|
||||||
else if (!strncmp(argv[i], "--cpu=", 6))
|
else if (!strncmp(argv[i], "--cpu=", 6))
|
||||||
g->target.cpu = argv[i] + 6;
|
cpu = argv[i] + 6;
|
||||||
else if (!strcmp(argv[i], "--fast-math"))
|
else if (!strcmp(argv[i], "--fast-math"))
|
||||||
g->opt.fastMath = true;
|
g->opt.fastMath = true;
|
||||||
else if (!strcmp(argv[i], "--debug"))
|
else if (!strcmp(argv[i], "--debug"))
|
||||||
@@ -240,14 +199,12 @@ int main(int Argc, char *Argv[]) {
|
|||||||
else if (!strcmp(argv[i], "--emit-obj"))
|
else if (!strcmp(argv[i], "--emit-obj"))
|
||||||
ot = Module::Object;
|
ot = Module::Object;
|
||||||
else if (!strcmp(argv[i], "--target")) {
|
else if (!strcmp(argv[i], "--target")) {
|
||||||
|
// FIXME: should remove this way of specifying the target...
|
||||||
if (++i == argc) usage(1);
|
if (++i == argc) usage(1);
|
||||||
lDoTarget(argv[i]);
|
target = argv[i];
|
||||||
targetSet = true;
|
|
||||||
}
|
|
||||||
else if (!strncmp(argv[i], "--target=", 9)) {
|
|
||||||
const char *target = argv[i] + 9;
|
|
||||||
lDoTarget(target);
|
|
||||||
}
|
}
|
||||||
|
else if (!strncmp(argv[i], "--target=", 9))
|
||||||
|
target = argv[i] + 9;
|
||||||
else if (!strncmp(argv[i], "--math-lib=", 11)) {
|
else if (!strncmp(argv[i], "--math-lib=", 11)) {
|
||||||
const char *lib = argv[i] + 11;
|
const char *lib = argv[i] + 11;
|
||||||
if (!strcmp(lib, "default"))
|
if (!strcmp(lib, "default"))
|
||||||
@@ -332,10 +289,8 @@ int main(int Argc, char *Argv[]) {
|
|||||||
if (debugSet && !optSet)
|
if (debugSet && !optSet)
|
||||||
g->opt.level = 0;
|
g->opt.level = 0;
|
||||||
|
|
||||||
// Make SSE2 the default target on atom unless the target has been set
|
if (!Target::GetTarget(arch, cpu, target, &g->target))
|
||||||
// explicitly.
|
usage(1);
|
||||||
if (!targetSet && (g->target.cpu == "atom"))
|
|
||||||
lDoTarget("sse2");
|
|
||||||
|
|
||||||
m = new Module(file);
|
m = new Module(file);
|
||||||
if (m->CompileFile() == 0) {
|
if (m->CompileFile() == 0) {
|
||||||
|
|||||||
95
module.cpp
95
module.cpp
@@ -72,18 +72,16 @@
|
|||||||
#include <llvm/Support/FormattedStream.h>
|
#include <llvm/Support/FormattedStream.h>
|
||||||
#include <llvm/Support/FileUtilities.h>
|
#include <llvm/Support/FileUtilities.h>
|
||||||
#include <llvm/Target/TargetMachine.h>
|
#include <llvm/Target/TargetMachine.h>
|
||||||
|
#include <llvm/Target/TargetOptions.h>
|
||||||
|
#include <llvm/Target/TargetData.h>
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
#include <llvm/Support/TargetRegistry.h>
|
#include <llvm/Support/TargetRegistry.h>
|
||||||
#include <llvm/Support/TargetSelect.h>
|
#include <llvm/Support/TargetSelect.h>
|
||||||
#else
|
#else
|
||||||
#include <llvm/Target/TargetRegistry.h>
|
#include <llvm/Target/TargetRegistry.h>
|
||||||
#include <llvm/Target/TargetSelect.h>
|
#include <llvm/Target/TargetSelect.h>
|
||||||
#endif
|
|
||||||
#include <llvm/Target/TargetOptions.h>
|
|
||||||
#include <llvm/Target/TargetData.h>
|
|
||||||
#if !defined(LLVM_3_0) && !defined(LLVM_3_0svn)
|
|
||||||
#include <llvm/Target/SubtargetFeature.h>
|
#include <llvm/Target/SubtargetFeature.h>
|
||||||
#endif // !LLVM_3_0
|
#endif
|
||||||
#include <llvm/PassManager.h>
|
#include <llvm/PassManager.h>
|
||||||
#include <llvm/Analysis/Verifier.h>
|
#include <llvm/Analysis/Verifier.h>
|
||||||
#include <llvm/Support/CFG.h>
|
#include <llvm/Support/CFG.h>
|
||||||
@@ -91,10 +89,10 @@
|
|||||||
#include <clang/Frontend/Utils.h>
|
#include <clang/Frontend/Utils.h>
|
||||||
#include <clang/Basic/TargetInfo.h>
|
#include <clang/Basic/TargetInfo.h>
|
||||||
#ifndef LLVM_2_8
|
#ifndef LLVM_2_8
|
||||||
#include <llvm/Support/ToolOutputFile.h>
|
#include <llvm/Support/ToolOutputFile.h>
|
||||||
#include <llvm/Support/Host.h>
|
#include <llvm/Support/Host.h>
|
||||||
#else // !LLVM_2_8
|
#else // !LLVM_2_8
|
||||||
#include <llvm/System/Host.h>
|
#include <llvm/System/Host.h>
|
||||||
#endif // LLVM_2_8
|
#endif // LLVM_2_8
|
||||||
#include <llvm/Assembly/PrintModulePass.h>
|
#include <llvm/Assembly/PrintModulePass.h>
|
||||||
#include <llvm/Support/raw_ostream.h>
|
#include <llvm/Support/raw_ostream.h>
|
||||||
@@ -114,42 +112,7 @@ Module::Module(const char *fn) {
|
|||||||
symbolTable = new SymbolTable;
|
symbolTable = new SymbolTable;
|
||||||
module = new llvm::Module(filename ? filename : "<stdin>", *g->ctx);
|
module = new llvm::Module(filename ? filename : "<stdin>", *g->ctx);
|
||||||
|
|
||||||
// initialize target in module
|
module->setTargetTriple(g->target.GetTripleString());
|
||||||
llvm::InitializeAllTargets();
|
|
||||||
|
|
||||||
llvm::Triple triple;
|
|
||||||
// Start with the host triple as the default
|
|
||||||
triple.setTriple(llvm::sys::getHostTriple());
|
|
||||||
if (g->target.arch != "") {
|
|
||||||
// If the user specified a target architecture, see if it's a known
|
|
||||||
// one; print an error with the valid ones otherwise.
|
|
||||||
const llvm::Target *target = NULL;
|
|
||||||
for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
|
|
||||||
iter != llvm::TargetRegistry::end(); ++iter) {
|
|
||||||
if (g->target.arch == iter->getName()) {
|
|
||||||
target = &*iter;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!target) {
|
|
||||||
fprintf(stderr, "Invalid target \"%s\"\nOptions: ",
|
|
||||||
g->target.arch.c_str());
|
|
||||||
llvm::TargetRegistry::iterator iter;
|
|
||||||
for (iter = llvm::TargetRegistry::begin();
|
|
||||||
iter != llvm::TargetRegistry::end(); ++iter)
|
|
||||||
fprintf(stderr, "%s ", iter->getName());
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// And override the arch in the host triple
|
|
||||||
llvm::Triple::ArchType archType =
|
|
||||||
llvm::Triple::getArchTypeForLLVMName(g->target.arch);
|
|
||||||
if (archType != llvm::Triple::UnknownArch)
|
|
||||||
triple.setArch(archType);
|
|
||||||
}
|
|
||||||
module->setTargetTriple(triple.str());
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef LLVM_2_8
|
#ifndef LLVM_2_8
|
||||||
if (g->generateDebuggingSymbols)
|
if (g->generateDebuggingSymbols)
|
||||||
@@ -973,42 +936,12 @@ Module::WriteOutput(OutputType outputType, const char *outFileName) {
|
|||||||
bool
|
bool
|
||||||
Module::writeObjectFileOrAssembly(OutputType outputType, const char *outFileName) {
|
Module::writeObjectFileOrAssembly(OutputType outputType, const char *outFileName) {
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
llvm::InitializeAllTargetMCs();
|
LLVMInitializeX86TargetMC();
|
||||||
#endif
|
#endif
|
||||||
llvm::InitializeAllAsmPrinters();
|
LLVMInitializeX86AsmPrinter();
|
||||||
llvm::InitializeAllAsmParsers();
|
LLVMInitializeX86AsmParser();
|
||||||
|
|
||||||
llvm::Triple triple(module->getTargetTriple());
|
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
|
||||||
assert(triple.getTriple().empty() == false);
|
|
||||||
|
|
||||||
const llvm::Target *target = NULL;
|
|
||||||
std::string error;
|
|
||||||
target = llvm::TargetRegistry::lookupTarget(triple.getTriple(), error);
|
|
||||||
assert(target != NULL);
|
|
||||||
|
|
||||||
std::string featuresString;
|
|
||||||
llvm::TargetMachine *targetMachine = NULL;
|
|
||||||
#if defined LLVM_3_0svn || defined LLVM_3_0
|
|
||||||
if (g->target.isa == Target::AVX)
|
|
||||||
featuresString = "+avx";
|
|
||||||
targetMachine = target->createTargetMachine(triple.getTriple(), g->target.cpu,
|
|
||||||
featuresString);
|
|
||||||
#else
|
|
||||||
if (g->target.cpu.size()) {
|
|
||||||
llvm::SubtargetFeatures features;
|
|
||||||
features.setCPU(g->target.cpu);
|
|
||||||
featuresString = features.getString();
|
|
||||||
}
|
|
||||||
|
|
||||||
targetMachine = target->createTargetMachine(triple.getTriple(),
|
|
||||||
featuresString);
|
|
||||||
#endif
|
|
||||||
if (targetMachine == NULL) {
|
|
||||||
fprintf(stderr, "Unable to create target machine for target \"%s\"!",
|
|
||||||
triple.str().c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
targetMachine->setAsmVerbosityDefault(true);
|
|
||||||
|
|
||||||
// Figure out if we're generating object file or assembly output, and
|
// Figure out if we're generating object file or assembly output, and
|
||||||
// set binary output for object files
|
// set binary output for object files
|
||||||
@@ -1017,6 +950,7 @@ Module::writeObjectFileOrAssembly(OutputType outputType, const char *outFileName
|
|||||||
bool binary = (fileType == llvm::TargetMachine::CGFT_ObjectFile);
|
bool binary = (fileType == llvm::TargetMachine::CGFT_ObjectFile);
|
||||||
unsigned int flags = binary ? llvm::raw_fd_ostream::F_Binary : 0;
|
unsigned int flags = binary ? llvm::raw_fd_ostream::F_Binary : 0;
|
||||||
|
|
||||||
|
std::string error;
|
||||||
llvm::tool_output_file *of = new llvm::tool_output_file(outFileName, error, flags);
|
llvm::tool_output_file *of = new llvm::tool_output_file(outFileName, error, flags);
|
||||||
if (error.size()) {
|
if (error.size()) {
|
||||||
fprintf(stderr, "Error opening output file \"%s\".\n", outFileName);
|
fprintf(stderr, "Error opening output file \"%s\".\n", outFileName);
|
||||||
@@ -1034,9 +968,8 @@ Module::writeObjectFileOrAssembly(OutputType outputType, const char *outFileName
|
|||||||
(g->opt.level > 0) ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
|
(g->opt.level > 0) ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
|
||||||
|
|
||||||
if (targetMachine->addPassesToEmitFile(pm, fos, fileType, optLevel)) {
|
if (targetMachine->addPassesToEmitFile(pm, fos, fileType, optLevel)) {
|
||||||
fprintf(stderr, "Fatal error adding passes to emit object file for "
|
fprintf(stderr, "Fatal error adding passes to emit object file!");
|
||||||
"target %s!\n", triple.str().c_str());
|
exit(1);
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finally, run the passes to emit the object file/assembly
|
// Finally, run the passes to emit the object file/assembly
|
||||||
|
|||||||
Reference in New Issue
Block a user