Merge pull request #448 from dbabokin/target_class

Redesign of Target class with several fixes
This commit is contained in:
jbrodman
2013-03-25 10:21:00 -07:00
14 changed files with 659 additions and 568 deletions

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -210,7 +210,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
// symbol creation code below assumes that any LLVM vector of i32s is a
// varying int32. Here, we need that to be interpreted as a varying
// bool, so just have a one-off override for that one...
if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
if (g->target->getMaskBitCount() != 1 && name == "__sext_varying_bool") {
const Type *returnType = AtomicType::VaryingInt32;
llvm::SmallVector<const Type *, 8> argTypes;
argTypes.push_back(AtomicType::VaryingBool);
@@ -599,11 +599,7 @@ lSetInternalFunctions(llvm::Module *module) {
llvm::Function *f = module->getFunction(names[i]);
if (f != NULL && f->empty() == false) {
f->setLinkage(llvm::GlobalValue::InternalLinkage);
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
f->addAttributes(
llvm::AttributeSet::FunctionIndex,
*g->target.tf_attributes);
#endif
g->target->markFuncWithTargetAttr(f);
}
}
}
@@ -650,7 +646,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
// targets having a layout with 16-bit alignment for 16xi1 vectors.
// As long as builtins-c.c doesn't have any 16xi1 vector types
// (which it shouldn't!), then this override is safe.
if (g->target.isa == Target::GENERIC)
if (g->target->getISA() == Target::GENERIC)
bcModule->setDataLayout(module->getDataLayout());
std::string(linkError);
@@ -737,7 +733,7 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC);
int pi[ISPC_MAX_NVEC];
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
pi[i] = i;
sym->constValue = new ConstExpr(sym->type, pi, SourcePos());
@@ -770,7 +766,7 @@ void
DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module,
bool includeStdlibISPC) {
// Add the definitions from the compiled builtins-c.c file
if (g->target.is32Bit) {
if (g->target->is32Bit()) {
extern unsigned char builtins_bitcode_c_32[];
extern int builtins_bitcode_c_32_length;
AddBitcodeToModule(builtins_bitcode_c_32, builtins_bitcode_c_32_length,
@@ -785,13 +781,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
// Next, add the target's custom implementations of the various needed
// builtin functions (e.g. __masked_store_32(), etc).
switch (g->target.isa) {
switch (g->target->getISA()) {
case Target::SSE2:
extern unsigned char builtins_bitcode_sse2[];
extern int builtins_bitcode_sse2_length;
extern unsigned char builtins_bitcode_sse2_x2[];
extern int builtins_bitcode_sse2_x2_length;
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 4:
AddBitcodeToModule(builtins_bitcode_sse2, builtins_bitcode_sse2_length,
module, symbolTable);
@@ -809,7 +805,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
extern int builtins_bitcode_sse4_length;
extern unsigned char builtins_bitcode_sse4_x2[];
extern int builtins_bitcode_sse4_x2_length;
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 4:
AddBitcodeToModule(builtins_bitcode_sse4,
builtins_bitcode_sse4_length,
@@ -825,7 +821,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::AVX:
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 8:
extern unsigned char builtins_bitcode_avx1[];
extern int builtins_bitcode_avx1_length;
@@ -845,7 +841,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::AVX11:
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 8:
extern unsigned char builtins_bitcode_avx11[];
extern int builtins_bitcode_avx11_length;
@@ -865,7 +861,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::AVX2:
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 8:
extern unsigned char builtins_bitcode_avx2[];
extern int builtins_bitcode_avx2_length;
@@ -885,7 +881,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::GENERIC:
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 4:
extern unsigned char builtins_bitcode_generic_4[];
extern int builtins_bitcode_generic_4_length;
@@ -937,7 +933,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
// define the 'programCount' builtin variable
lDefineConstantInt("programCount", g->target.vectorWidth, module, symbolTable);
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
// define the 'programIndex' builtin
lDefineProgramIndex(module, symbolTable);
@@ -956,18 +952,18 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload,
module, symbolTable);
lDefineConstantInt("__have_native_half", g->target.hasHalf, module,
lDefineConstantInt("__have_native_half", g->target->hasHalf(), module,
symbolTable);
lDefineConstantInt("__have_native_rand", g->target.hasRand, module,
lDefineConstantInt("__have_native_rand", g->target->hasRand(), module,
symbolTable);
lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(),
module, symbolTable);
if (includeStdlibISPC) {
// If the user wants the standard library to be included, parse the
// serialized version of the stdlib.ispc file to get its
// definitions added.
if (g->target.isa == Target::GENERIC&&g->target.vectorWidth!=1) { // 1 wide uses x86 stdlib
if (g->target->getISA() == Target::GENERIC&&g->target->getVectorWidth()!=1) { // 1 wide uses x86 stdlib
extern char stdlib_generic_code[];
yy_scan_string(stdlib_generic_code);
yyparse();

90
ctx.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -308,7 +308,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
LLVMMaskAllOn, "__all_on_mask");
char buf[256];
sprintf(buf, "__off_all_on_mask_%s", g->target.GetISAString());
sprintf(buf, "__off_all_on_mask_%s", g->target->GetISAString());
llvm::Constant *offFunc =
m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
NULL);
@@ -1295,7 +1295,7 @@ FunctionEmitContext::Any(llvm::Value *mask) {
// Call the target-dependent any function to test that the mask is non-zero
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__any", &mm);
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
@@ -1313,7 +1313,7 @@ FunctionEmitContext::All(llvm::Value *mask) {
// into an i64 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__all", &mm);
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
@@ -1331,7 +1331,7 @@ FunctionEmitContext::None(llvm::Value *mask) {
// into an i64 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__none", &mm);
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
@@ -1349,7 +1349,7 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
// into an i64 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__movmsk", &mm);
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
@@ -1405,7 +1405,7 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
return NULL;
}
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
return b;
llvm::ArrayType *at =
@@ -1594,7 +1594,7 @@ lArrayVectorWidth(llvm::Type *t) {
llvm::VectorType *vectorElementType =
llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
Assert((vectorElementType != NULL &&
(int)vectorElementType->getNumElements() == g->target.vectorWidth));
(int)vectorElementType->getNumElements() == g->target->getVectorWidth()));
return (int)arrayType->getNumElements();
}
@@ -1678,10 +1678,10 @@ lGetMatchingBoolVectorType(llvm::Type *type) {
llvm::VectorType *vectorElementType =
llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
Assert(vectorElementType != NULL);
Assert((int)vectorElementType->getNumElements() == g->target.vectorWidth);
Assert((int)vectorElementType->getNumElements() == g->target->getVectorWidth());
llvm::Type *base =
llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth);
llvm::VectorType::get(LLVMTypes::BoolType, g->target->getVectorWidth());
return llvm::ArrayType::get(base, arrayType->getNumElements());
}
@@ -1741,9 +1741,9 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
// All other varying types are represented as vectors of the
// underlying type.
ret = llvm::UndefValue::get(llvm::VectorType::get(eltType,
g->target.vectorWidth));
g->target->getVectorWidth()));
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::Twine n = llvm::Twine("smear.") + llvm::Twine(name ? name : "") +
llvm::Twine(i);
ret = InsertInst(ret, value, i, n.str().c_str());
@@ -1963,7 +1963,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
// Find the scale factor for the index (i.e. the size of the object
// that the pointer(s) point(s) to.
const Type *scaleType = ptrType->GetBaseType();
llvm::Value *scale = g->target.SizeOf(scaleType->LLVMType(g->ctx), bblock);
llvm::Value *scale = g->target->SizeOf(scaleType->LLVMType(g->ctx), bblock);
bool indexIsVarying =
llvm::isa<llvm::VectorType>(index->getType());
@@ -1971,10 +1971,10 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
if (indexIsVarying == false) {
// Truncate or sign extend the index as appropriate to a 32 or
// 64-bit type.
if ((g->target.is32Bit || g->opt.force32BitAddressing) &&
if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
index->getType() == LLVMTypes::Int64Type)
index = TruncInst(index, LLVMTypes::Int32Type);
else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) &&
else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
index->getType() == LLVMTypes::Int32Type)
index = SExtInst(index, LLVMTypes::Int64Type);
@@ -1988,10 +1988,10 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
else {
// Similarly, truncate or sign extend the index to be a 32 or 64
// bit vector type
if ((g->target.is32Bit || g->opt.force32BitAddressing) &&
if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
index->getType() == LLVMTypes::Int64VectorType)
index = TruncInst(index, LLVMTypes::Int32VectorType);
else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) &&
else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
index->getType() == LLVMTypes::Int32VectorType)
index = SExtInst(index, LLVMTypes::Int64VectorType);
@@ -2005,7 +2005,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
// For 64-bit targets, if we've been doing our offset calculations in
// 32 bits, we still have to convert to a 64-bit value before we
// actually add the offset to the pointer.
if (g->target.is32Bit == false && g->opt.force32BitAddressing == true)
if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
offset = SExtInst(offset, LLVMTypes::Int64VectorType,
LLVMGetName(offset, "_to_64"));
@@ -2343,7 +2343,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
if (st != NULL)
// If the pointer is to a structure, Target::StructOffset() gives
// us the offset in bytes to the given element of the structure
offset = g->target.StructOffset(st->LLVMType(g->ctx), elementNum,
offset = g->target->StructOffset(st->LLVMType(g->ctx), elementNum,
bblock);
else {
// Otherwise we should have a vector or array here and the offset
@@ -2353,15 +2353,15 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
CastType<SequentialType>(ptrType->GetBaseType());
AssertPos(currentPos, st != NULL);
llvm::Value *size =
g->target.SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
llvm::Value *scale = (g->target.is32Bit || g->opt.force32BitAddressing) ?
g->target->SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
llvm::Value *scale = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
LLVMInt32(elementNum) : LLVMInt64(elementNum);
offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
}
offset = SmearUniform(offset, "offset_smear");
if (g->target.is32Bit == false && g->opt.force32BitAddressing == true)
if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
// If we're doing 32 bit addressing with a 64 bit target, although
// we did the math above in 32 bit, we need to go to 64 bit before
// we add the offset to the varying pointers.
@@ -2583,26 +2583,26 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType,
const PointerType *pt = CastType<PointerType>(returnType);
const char *funcName = NULL;
if (pt != NULL)
funcName = g->target.is32Bit ? "__pseudo_gather32_i32" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
"__pseudo_gather64_i64";
else if (llvmReturnType == LLVMTypes::DoubleVectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_double" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_double" :
"__pseudo_gather64_double";
else if (llvmReturnType == LLVMTypes::Int64VectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_i64" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i64" :
"__pseudo_gather64_i64";
else if (llvmReturnType == LLVMTypes::FloatVectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_float" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_float" :
"__pseudo_gather64_float";
else if (llvmReturnType == LLVMTypes::Int32VectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_i32" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
"__pseudo_gather64_i32";
else if (llvmReturnType == LLVMTypes::Int16VectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_i16" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i16" :
"__pseudo_gather64_i16";
else {
AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
funcName = g->target.is32Bit ? "__pseudo_gather32_i8" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i8" :
"__pseudo_gather64_i8";
}
@@ -2684,7 +2684,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
llvm::dyn_cast<llvm::ArrayType>(llvmType);
if (align == 0 && arrayType != NULL &&
!llvm::isa<llvm::VectorType>(arrayType->getElementType()))
align = 4 * g->target.nativeVectorWidth;
align = 4 * g->target->getNativeVectorWidth();
if (align != 0)
inst->setAlignment(align);
@@ -2761,13 +2761,13 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
return;
}
if (g->target.is32Bit)
if (g->target->is32Bit())
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
else
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
}
else if (Type::Equal(valueType, AtomicType::VaryingBool) &&
g->target.maskBitCount == 1) {
g->target->getMaskBitCount() == 1) {
llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
LLVMMaskAllOn, "~mask");
llvm::Value *old = LoadInst(ptr);
@@ -2894,31 +2894,31 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
llvm::Type *type = value->getType();
const char *funcName = NULL;
if (pt != NULL) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
"__pseudo_scatter64_i64";
}
else if (type == LLVMTypes::DoubleVectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_double" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_double" :
"__pseudo_scatter64_double";
}
else if (type == LLVMTypes::Int64VectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i64" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i64" :
"__pseudo_scatter64_i64";
}
else if (type == LLVMTypes::FloatVectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_float" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_float" :
"__pseudo_scatter64_float";
}
else if (type == LLVMTypes::Int32VectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
"__pseudo_scatter64_i32";
}
else if (type == LLVMTypes::Int16VectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i16" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i16" :
"__pseudo_scatter64_i16";
}
else if (type == LLVMTypes::Int8VectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i8" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i8" :
"__pseudo_scatter64_i8";
}
@@ -3408,13 +3408,13 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
AssertPos(currentPos, falloc != NULL);
llvm::Value *structSize = g->target.SizeOf(argStructType, bblock);
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64");
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr);
@@ -3505,20 +3505,20 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr,
// Find the size of a uniform element of the varying type
llvm::Type *llvmBaseUniformType =
baseType->GetAsUniformType()->LLVMType(g->ctx);
llvm::Value *unifSize = g->target.SizeOf(llvmBaseUniformType, bblock);
llvm::Value *unifSize = g->target->SizeOf(llvmBaseUniformType, bblock);
unifSize = SmearUniform(unifSize);
// Compute offset = <0, 1, .. > * unifSize
llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType());
for (int i = 0; i < g->target.vectorWidth; ++i) {
llvm::Value *iValue = (g->target.is32Bit || g->opt.force32BitAddressing) ?
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::Value *iValue = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
LLVMInt32(i) : LLVMInt64(i);
varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta");
}
llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
varyingOffsets);
if (g->opt.force32BitAddressing == true && g->target.is32Bit == false)
if (g->opt.force32BitAddressing == true && g->target->is32Bit() == false)
// On 64-bit targets where we're doing 32-bit addressing
// calculations, we need to convert to an i64 vector before adding
// to the pointer

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -175,10 +175,10 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
else
retType = st->GetAsSOAType(soaWidth);
if (soaWidth < g->target.vectorWidth)
if (soaWidth < g->target->getVectorWidth())
PerformanceWarning(pos, "soa<%d> width smaller than gang size %d "
"currently leads to inefficient code to access "
"soa types.", soaWidth, g->target.vectorWidth);
"soa types.", soaWidth, g->target->getVectorWidth());
}
return retType;

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -662,7 +662,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr,
new llvm::GlobalVariable(*m->module, llvmType, true /* const */,
llvm::GlobalValue::InternalLinkage,
constValue, "const_initializer");
llvm::Value *size = g->target.SizeOf(llvmType,
llvm::Value *size = g->target->SizeOf(llvmType,
ctx->GetCurrentBasicBlock());
ctx->MemcpyInst(ptr, constPtr, size);
}
@@ -1462,12 +1462,12 @@ lEmitBinaryPointerArith(BinaryExpr::Op op, llvm::Value *value0,
// points to in order to return the difference in elements.
llvm::Type *llvmElementType =
ptrType->GetBaseType()->LLVMType(g->ctx);
llvm::Value *size = g->target.SizeOf(llvmElementType,
llvm::Value *size = g->target->SizeOf(llvmElementType,
ctx->GetCurrentBasicBlock());
if (ptrType->IsVaryingType())
size = ctx->SmearUniform(size);
if (g->target.is32Bit == false &&
if (g->target->is32Bit() == false &&
g->opt.force32BitAddressing == true) {
// If we're doing 32-bit addressing math on a 64-bit
// target, then trunc the delta down to a 32-bit value.
@@ -1961,7 +1961,7 @@ BinaryExpr::GetType() const {
else if (op == Sub) {
if (CastType<PointerType>(type1) != NULL) {
// ptr - ptr -> ~ptrdiff_t
const Type *diffType = (g->target.is32Bit ||
const Type *diffType = (g->target->is32Bit() ||
g->opt.force32BitAddressing) ?
AtomicType::UniformInt32 : AtomicType::UniformInt64;
if (type0->IsVaryingType() || type1->IsVaryingType())
@@ -2381,7 +2381,7 @@ BinaryExpr::TypeCheck() {
return NULL;
}
const Type *offsetType = g->target.is32Bit ?
const Type *offsetType = g->target->is32Bit() ?
AtomicType::UniformInt32 : AtomicType::UniformInt64;
if (pt0->IsVaryingType())
offsetType = offsetType->GetAsVaryingType();
@@ -2866,7 +2866,7 @@ AssignExpr::TypeCheck() {
return NULL;
}
const Type *deltaType = g->target.is32Bit ? AtomicType::UniformInt32 :
const Type *deltaType = g->target->is32Bit() ? AtomicType::UniformInt32 :
AtomicType::UniformInt64;
if (lhsType->IsVaryingType())
deltaType = deltaType->GetAsVaryingType();
@@ -3811,7 +3811,7 @@ ExprList::GetConstant(const Type *type) const {
// Uniform short vectors are stored as vectors of length
// rounded up to the native vector width. So we add additional
// undef values here until we get the right size.
int vectorWidth = g->target.nativeVectorWidth;
int vectorWidth = g->target->getNativeVectorWidth();
const VectorType *vt = CastType<VectorType>(type);
const AtomicType *bt = vt->GetElementType();
@@ -3907,7 +3907,7 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr,
// Onward: compute the per lane offsets.
llvm::Value *varyingOffsets =
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i,
"varying_delta");
@@ -4350,7 +4350,7 @@ IndexExpr::TypeCheck() {
// The range of varying index is limited to [0,2^31) as a result.
if (Type::EqualIgnoringConst(indexType->GetAsUniformType(),
AtomicType::UniformInt64) == false ||
g->target.is32Bit ||
g->target->is32Bit() ||
g->opt.force32BitAddressing) {
const Type *indexType = AtomicType::VaryingInt32;
index = TypeConvertExpr(index, indexType, "array index");
@@ -4367,7 +4367,7 @@ IndexExpr::TypeCheck() {
//
// However, the index can be still truncated to signed int32 if
// the index type is 64 bit and --addressing=32.
bool force_32bit = g->target.is32Bit ||
bool force_32bit = g->target->is32Bit() ||
(g->opt.force32BitAddressing &&
Type::EqualIgnoringConst(indexType->GetAsUniformType(),
AtomicType::UniformInt64));
@@ -5492,7 +5492,7 @@ lConvert(const From *from, To *to, int count, bool forceVarying) {
lConvertElement(from[i], &to[i]);
if (forceVarying && count == 1)
for (int i = 1; i < g->target.vectorWidth; ++i)
for (int i = 1; i < g->target->getVectorWidth(); ++i)
to[i] = to[0];
}
@@ -5730,7 +5730,7 @@ ConstExpr::AsUInt32(uint32_t *up, bool forceVarying) const {
int
ConstExpr::Count() const {
return GetType()->IsVaryingType() ? g->target.vectorWidth : 1;
return GetType()->IsVaryingType() ? g->target->getVectorWidth() : 1;
}
@@ -6001,7 +6001,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
case AtomicType::TYPE_UINT16:
case AtomicType::TYPE_UINT32:
case AtomicType::TYPE_UINT64:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from unsigned int to float is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to float
@@ -6117,14 +6117,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = ctx->TruncInst(exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_FLOAT:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_DOUBLE:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6197,7 +6197,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = exprVal;
break;
case AtomicType::TYPE_FLOAT:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6210,7 +6210,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = ctx->TruncInst(exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_DOUBLE:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6285,7 +6285,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = exprVal;
break;
case AtomicType::TYPE_FLOAT:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6296,7 +6296,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = ctx->TruncInst(exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_DOUBLE:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6367,7 +6367,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_FLOAT:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from float to unsigned int64 is slow. "
"Use \"int64\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // signed int
@@ -6378,7 +6378,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = exprVal;
break;
case AtomicType::TYPE_DOUBLE:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from double to unsigned int64 is slow. "
"Use \"int64\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // signed int
@@ -6861,7 +6861,7 @@ TypeCastExpr::TypeCheck() {
if (fromPtr != NULL && toAtomic != NULL && toAtomic->IsIntType()) {
bool safeCast = (toAtomic->basicType == AtomicType::TYPE_INT64 ||
toAtomic->basicType == AtomicType::TYPE_UINT64);
if (g->target.is32Bit)
if (g->target->is32Bit())
safeCast |= (toAtomic->basicType == AtomicType::TYPE_INT32 ||
toAtomic->basicType == AtomicType::TYPE_UINT32);
if (safeCast == false)
@@ -7007,7 +7007,7 @@ lConvertPointerConstant(llvm::Constant *c, const Type *constType) {
llvm::Constant *intPtr =
llvm::ConstantExpr::getPtrToInt(c, LLVMTypes::PointerIntType);
Assert(constType->IsVaryingType() || constType->IsSOAType());
int count = constType->IsVaryingType() ? g->target.vectorWidth :
int count = constType->IsVaryingType() ? g->target->getVectorWidth() :
constType->GetSOAWidth();
std::vector<llvm::Constant *> smear;
@@ -7498,13 +7498,13 @@ SizeOfExpr::GetValue(FunctionEmitContext *ctx) const {
if (llvmType == NULL)
return NULL;
return g->target.SizeOf(llvmType, ctx->GetCurrentBasicBlock());
return g->target->SizeOf(llvmType, ctx->GetCurrentBasicBlock());
}
const Type *
SizeOfExpr::GetType() const {
return (g->target.is32Bit || g->opt.force32BitAddressing) ?
return (g->target->is32Bit() || g->opt.force32BitAddressing) ?
AtomicType::UniformUInt32 : AtomicType::UniformUInt64;
}
@@ -8182,7 +8182,7 @@ NewExpr::NewExpr(int typeQual, const Type *t, Expr *init, Expr *count,
llvm::Value *
NewExpr::GetValue(FunctionEmitContext *ctx) const {
bool do32Bit = (g->target.is32Bit || g->opt.force32BitAddressing);
bool do32Bit = (g->target->is32Bit() || g->opt.force32BitAddressing);
// Determine how many elements we need to allocate. Note that this
// will be a varying value if this is a varying new.
@@ -8208,7 +8208,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
// Compute the total amount of memory to allocate, allocSize, as the
// product of the number of elements to allocate and the size of a
// single element.
llvm::Value *eltSize = g->target.SizeOf(allocType->LLVMType(g->ctx),
llvm::Value *eltSize = g->target->SizeOf(allocType->LLVMType(g->ctx),
ctx->GetCurrentBasicBlock());
if (isVarying)
eltSize = ctx->SmearUniform(eltSize, "smear_size");
@@ -8240,7 +8240,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
if (retType == NULL)
return NULL;
if (isVarying) {
if (g->target.is32Bit)
if (g->target->is32Bit())
// Convert i64 vector values to i32 if we are compiling to a
// 32-bit target.
ptrValue = ctx->TruncInst(ptrValue, LLVMTypes::VoidPointerVectorType,
@@ -8254,11 +8254,11 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
// implemented to return NULL for program instances that aren't
// executing; more generally, we should be using the current
// execution mask for this...
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::BasicBlock *bbInit = ctx->CreateBasicBlock("init_ptr");
llvm::BasicBlock *bbSkip = ctx->CreateBasicBlock("skip_init");
llvm::Value *p = ctx->ExtractInst(ptrValue, i);
llvm::Value *nullValue = g->target.is32Bit ? LLVMInt32(0) :
llvm::Value *nullValue = g->target->is32Bit() ? LLVMInt32(0) :
LLVMInt64(0);
// Is the pointer for the current lane non-zero?
llvm::Value *nonNull = ctx->CmpInst(llvm::Instruction::ICmp,
@@ -8337,7 +8337,7 @@ NewExpr::TypeCheck() {
}
// Figure out the type that the allocation count should be
const Type *t = (g->target.is32Bit || g->opt.force32BitAddressing) ?
const Type *t = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
AtomicType::UniformUInt32 : AtomicType::UniformUInt64;
if (isVarying)
t = t->GetAsVaryingType();

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2011-2012, Intel Corporation
Copyright (c) 2011-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -320,7 +320,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
&&
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
checkMask &= (type->isUnmasked == false);
checkMask &= (g->target.maskingIsFree == false);
checkMask &= (g->target->getMaskingIsFree() == false);
checkMask &= (g->opt.disableCoherentControlFlow == false);
if (checkMask) {
@@ -450,7 +450,7 @@ Function::GenerateIR() {
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
std::string functionName = sym->name;
if (g->mangleFunctionsWithTarget)
functionName += std::string("_") + g->target.GetISAString();
functionName += std::string("_") + g->target->GetISAString();
llvm::Function *appFunction =
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
#if defined(LLVM_3_1)

426
ispc.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -143,10 +143,34 @@ static const char *supportedCPUs[] = {
#endif // LLVM_3_2 or LLVM_3_3
};
bool
Target::GetTarget(const char *arch, const char *cpu, const char *isa,
bool pic, Target *t) {
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_target(NULL),
m_targetMachine(NULL),
#if defined(LLVM_3_1)
m_targetData(NULL),
#else
m_dataLayout(NULL),
#endif
m_valid(false),
m_isa(SSE2),
m_arch(""),
m_is32Bit(true),
m_cpu(""),
m_attributes(""),
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
m_tf_attributes(NULL),
#endif
m_nativeVectorWidth(-1),
m_vectorWidth(-1),
m_generatePIC(pic),
m_maskingIsFree(false),
m_maskBitCount(-1),
m_hasHalf(false),
m_hasRand(false),
m_hasGather(false),
m_hasScatter(false),
m_hasTranscendentals(false)
{
if (isa == NULL) {
if (cpu != NULL) {
// If a CPU was specified explicitly, try to pick the best
@@ -197,30 +221,27 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
if (foundCPU == false) {
fprintf(stderr, "Error: CPU type \"%s\" unknown. Supported CPUs: "
"%s.\n", cpu, SupportedTargetCPUs().c_str());
return false;
return;
}
}
t->cpu = cpu;
this->m_cpu = cpu;
if (arch == NULL)
arch = "x86-64";
bool error = false;
t->generatePIC = pic;
// Make sure the target architecture is a known one; print an error
// with the valid ones otherwise.
t->target = NULL;
for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
iter != llvm::TargetRegistry::end(); ++iter) {
if (std::string(arch) == iter->getName()) {
t->target = &*iter;
this->m_target = &*iter;
break;
}
}
if (t->target == NULL) {
if (this->m_target == NULL) {
fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
llvm::TargetRegistry::iterator iter;
for (iter = llvm::TargetRegistry::begin();
@@ -230,178 +251,176 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
error = true;
}
else {
t->arch = arch;
this->m_arch = arch;
}
// This is the case for most of them
t->hasHalf = t->hasRand = t->hasTranscendentals = false;
t->hasGather = t->hasScatter = false;
// Check default LLVM generated targets
if (!strcasecmp(isa, "sse2")) {
t->isa = Target::SSE2;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::SSE2;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 4;
this->m_attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse2-x2")) {
t->isa = Target::SSE2;
t->nativeVectorWidth = 4;
t->vectorWidth = 8;
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::SSE2;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 8;
this->m_attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse4")) {
t->isa = Target::SSE4;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::SSE4;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 4;
// TODO: why not sse42 and popcnt?
this->m_attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
t->isa = Target::SSE4;
t->nativeVectorWidth = 4;
t->vectorWidth = 8;
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::SSE4;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 8;
this->m_attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "generic-4")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 4;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-8")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 8;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-16")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 16;
this->m_vectorWidth = 16;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-32")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 32;
t->vectorWidth = 32;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 32;
this->m_vectorWidth = 32;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-64")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 64;
t->vectorWidth = 64;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 64;
this->m_vectorWidth = 64;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-1")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 1;
t->vectorWidth = 1;
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 1;
this->m_vectorWidth = 1;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1")) {
t->isa = Target::AVX;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx,+popcnt,+cmov";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::AVX;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 8;
this->m_attributes = "+avx,+popcnt,+cmov";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2")) {
t->isa = Target::AVX;
t->nativeVectorWidth = 8;
t->vectorWidth = 16;
t->attributes = "+avx,+popcnt,+cmov";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::AVX;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 16;
this->m_attributes = "+avx,+popcnt,+cmov";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx1.1")) {
t->isa = Target::AVX11;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
this->m_isa = Target::AVX11;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 8;
this->m_attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
t->hasRand = true;
this->m_hasRand = true;
#endif
}
else if (!strcasecmp(isa, "avx1.1-x2")) {
t->isa = Target::AVX11;
t->nativeVectorWidth = 8;
t->vectorWidth = 16;
t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
this->m_isa = Target::AVX11;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 16;
this->m_attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
t->hasRand = true;
this->m_hasRand = true;
#endif
}
else if (!strcasecmp(isa, "avx2")) {
t->isa = Target::AVX2;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 8;
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
t->hasRand = true;
t->hasGather = true;
this->m_hasRand = true;
this->m_hasGather = true;
#endif
}
else if (!strcasecmp(isa, "avx2-x2")) {
t->isa = Target::AVX2;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 16;
this->m_vectorWidth = 16;
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
t->hasRand = true;
t->hasGather = true;
this->m_hasRand = true;
this->m_hasGather = true;
#endif
}
else {
@@ -411,32 +430,75 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
}
if (!error) {
llvm::TargetMachine *targetMachine = t->GetTargetMachine();
// Create TargetMachine
std::string triple = GetTripleString();
llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ :
llvm::Reloc::Default;
std::string featuresString = m_attributes;
llvm::TargetOptions options;
#if !defined(LLVM_3_1)
if (g->opt.disableFMA == false)
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
#endif // !LLVM_3_1
m_targetMachine =
m_target->createTargetMachine(triple, m_cpu, featuresString, options,
relocModel);
Assert(m_targetMachine != NULL);
m_targetMachine->setAsmVerbosityDefault(true);
// Initialize TargetData/DataLayout in 3 steps.
// 1. Get default data layout first
std::string dl_string;
#if defined(LLVM_3_1)
const llvm::TargetData *targetData = targetMachine->getTargetData();
t->is32Bit = (targetData->getPointerSize() == 4);
dl_string = m_targetMachine->getTargetData()->getStringRepresentation();
#else
int addressSpace = 0;
const llvm::DataLayout *dataLayout = targetMachine->getDataLayout();
t->is32Bit = (dataLayout->getPointerSize(addressSpace) == 4);
dl_string = m_targetMachine->getDataLayout()->getStringRepresentation();
#endif
// 2. Adjust for generic
if (m_isa == Target::GENERIC) {
// <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
// that to the regular datalayout string for IA..
// For generic-4 target we need to treat <4 x i1> as 128 bit value
// in terms of required memory storage and alignment, as this is
// translated to __m128 type.
dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
"f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
}
// 3. Finally set member data
#if defined(LLVM_3_1)
m_targetData = new llvm::TargetData(dl_string);
#else
m_dataLayout = new llvm::DataLayout(dl_string);
#endif
// Set is32Bit
this->m_is32Bit = (getDataLayout()->getPointerSize() == 4);
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
// This is LLVM 3.3+ feature.
// Initialize target-specific "target-feature" attribute.
llvm::AttrBuilder attrBuilder;
attrBuilder.addAttribute("target-features", t->attributes);
t->tf_attributes = new llvm::AttributeSet(
llvm::AttributeSet::get(
*g->ctx,
llvm::AttributeSet::FunctionIndex,
attrBuilder));
if (!m_attributes.empty()) {
llvm::AttrBuilder attrBuilder;
attrBuilder.addAttribute("target-features", this->m_attributes);
this->m_tf_attributes = new llvm::AttributeSet(
llvm::AttributeSet::get(
*g->ctx,
llvm::AttributeSet::FunctionIndex,
attrBuilder));
}
#endif
Assert(t->vectorWidth <= ISPC_MAX_NVEC);
Assert(this->m_vectorWidth <= ISPC_MAX_NVEC);
}
return !error;
m_valid = !error;
return;
}
@@ -479,42 +541,20 @@ Target::GetTripleString() const {
// slightly different ones for the triple. TODO: is there a way to
// have it do this remapping, which would presumably be a bit less
// error prone?
if (arch == "x86")
if (m_arch == "x86")
triple.setArchName("i386");
else if (arch == "x86-64")
else if (m_arch == "x86-64")
triple.setArchName("x86_64");
else
triple.setArchName(arch);
triple.setArchName(m_arch);
return triple.str();
}
llvm::TargetMachine *
Target::GetTargetMachine() const {
std::string triple = GetTripleString();
llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ :
llvm::Reloc::Default;
std::string featuresString = attributes;
llvm::TargetOptions options;
#if !defined(LLVM_3_1)
if (g->opt.disableFMA == false)
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
#endif // !LLVM_3_1
llvm::TargetMachine *targetMachine =
target->createTargetMachine(triple, cpu, featuresString, options,
relocModel);
Assert(targetMachine != NULL);
targetMachine->setAsmVerbosityDefault(true);
return targetMachine;
}
const char *
Target::GetISAString() const {
switch (isa) {
switch (m_isa) {
case Target::SSE2:
return "sse2";
case Target::SSE4:
@@ -571,7 +611,7 @@ lGenericTypeLayoutIndeterminate(llvm::Type *type) {
llvm::Value *
Target::SizeOf(llvm::Type *type,
llvm::BasicBlock *insertAtEnd) {
if (isa == Target::GENERIC &&
if (m_isa == Target::GENERIC &&
lGenericTypeLayoutIndeterminate(type)) {
llvm::Value *index[1] = { LLVMInt32(1) };
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
@@ -581,7 +621,7 @@ Target::SizeOf(llvm::Type *type,
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
insertAtEnd);
if (is32Bit || g->opt.force32BitAddressing)
if (m_is32Bit || g->opt.force32BitAddressing)
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
"sizeof_int", insertAtEnd);
else
@@ -589,19 +629,11 @@ Target::SizeOf(llvm::Type *type,
"sizeof_int", insertAtEnd);
}
#if defined(LLVM_3_1)
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
Assert(td != NULL);
uint64_t bitSize = td->getTypeSizeInBits(type);
#else
const llvm::DataLayout *dl = GetTargetMachine()->getDataLayout();
Assert(dl != NULL);
uint64_t bitSize = dl->getTypeSizeInBits(type);
#endif
uint64_t bitSize = getDataLayout()->getTypeSizeInBits(type);
Assert((bitSize % 8) == 0);
uint64_t byteSize = bitSize / 8;
if (is32Bit || g->opt.force32BitAddressing)
if (m_is32Bit || g->opt.force32BitAddressing)
return LLVMInt32((int32_t)byteSize);
else
return LLVMInt64(byteSize);
@@ -611,7 +643,7 @@ Target::SizeOf(llvm::Type *type,
llvm::Value *
Target::StructOffset(llvm::Type *type, int element,
llvm::BasicBlock *insertAtEnd) {
if (isa == Target::GENERIC &&
if (m_isa == Target::GENERIC &&
lGenericTypeLayoutIndeterminate(type) == true) {
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
@@ -621,7 +653,7 @@ Target::StructOffset(llvm::Type *type, int element,
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
insertAtEnd);
if (is32Bit || g->opt.force32BitAddressing)
if (m_is32Bit || g->opt.force32BitAddressing)
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
"offset_int", insertAtEnd);
else
@@ -636,24 +668,24 @@ Target::StructOffset(llvm::Type *type, int element,
return NULL;
}
#if defined(LLVM_3_1)
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
Assert(td != NULL);
const llvm::StructLayout *sl = td->getStructLayout(structType);
#else
const llvm::DataLayout *dl = GetTargetMachine()->getDataLayout();
Assert(dl != NULL);
const llvm::StructLayout *sl = dl->getStructLayout(structType);
#endif
const llvm::StructLayout *sl = getDataLayout()->getStructLayout(structType);
Assert(sl != NULL);
uint64_t offset = sl->getElementOffset(element);
if (is32Bit || g->opt.force32BitAddressing)
if (m_is32Bit || g->opt.force32BitAddressing)
return LLVMInt32((int32_t)offset);
else
return LLVMInt64(offset);
}
void Target::markFuncWithTargetAttr(llvm::Function* func) {
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
if (m_tf_attributes) {
func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
}
#endif
}
///////////////////////////////////////////////////////////////////////////
// Opt

121
ispc.h
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
*/
/** @file ispc.h
@brief Main ispc.header file
@brief Main ispc.header file. Defines Target, Globals and Opt classes.
*/
#ifndef ISPC_H
@@ -72,6 +72,11 @@ namespace llvm {
class BasicBlock;
class Constant;
class ConstantValue;
#if defined(LLVM_3_1)
class TargetData;
#else
class DataLayout;
#endif
class DIBuilder;
class DIDescriptor;
class DIFile;
@@ -162,12 +167,12 @@ extern void DoAssertPos(SourcePos pos, const char *file, int line, const char *e
This structure defines a compilation target for the ispc compiler.
*/
struct Target {
class Target {
public:
/** Initializes the given Target pointer for a target of the given
name, if the name is a known target. Returns true if the
target was initialized and false if the name is unknown. */
static bool GetTarget(const char *arch, const char *cpu, const char *isa,
bool pic, Target *);
Target(const char *arch, const char *cpu, const char *isa, bool pic);
/** Returns a comma-delimited string giving the names of the currently
supported target ISAs. */
@@ -187,7 +192,7 @@ struct Target {
/** Returns the LLVM TargetMachine object corresponding to this
target. */
llvm::TargetMachine *GetTargetMachine() const;
llvm::TargetMachine *GetTargetMachine() const {return m_targetMachine;}
/** Returns a string like "avx" encoding the target. */
const char *GetISAString() const;
@@ -202,8 +207,8 @@ struct Target {
llvm::Value *StructOffset(llvm::Type *type,
int element, llvm::BasicBlock *insertAtEnd);
/** llvm Target object representing this target. */
const llvm::Target *target;
/** Mark LLVM function with target specific attribute, if required. */
void markFuncWithTargetAttr(llvm::Function* func);
/** Enumerator giving the instruction sets that the compiler can
target. These should be ordered from "worse" to "better" in that
@@ -213,68 +218,134 @@ struct Target {
added or the enumerant values are reordered. */
enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS };
const llvm::Target *getTarget() const {return m_target;}
// Note the same name of method for 3.1 and 3.2+, this allows
// to reduce number ifdefs on client side.
#if defined(LLVM_3_1)
llvm::TargetData *getDataLayout() const {return m_targetData;}
#else
llvm::DataLayout *getDataLayout() const {return m_dataLayout;}
#endif
/** Reports if Target object has valid state. */
bool isValid() const {return m_valid;}
ISA getISA() const {return m_isa;}
std::string getArch() const {return m_arch;}
bool is32Bit() const {return m_is32Bit;}
std::string getCPU() const {return m_cpu;}
int getNativeVectorWidth() const {return m_nativeVectorWidth;}
int getVectorWidth() const {return m_vectorWidth;}
bool getGeneratePIC() const {return m_generatePIC;}
bool getMaskingIsFree() const {return m_maskingIsFree;}
int getMaskBitCount() const {return m_maskBitCount;}
bool hasHalf() const {return m_hasHalf;}
bool hasRand() const {return m_hasRand;}
bool hasGather() const {return m_hasGather;}
bool hasScatter() const {return m_hasScatter;}
bool hasTranscendentals() const {return m_hasTranscendentals;}
private:
/** llvm Target object representing this target. */
const llvm::Target *m_target;
/** llvm TargetMachine.
Note that it's not destroyed during Target destruction, as
Module::CompileAndOutput() uses TargetMachines after Target is destroyed.
This needs to be changed.
It's also worth noticing, that DataLayout of TargetMachine cannot be
modified and for generic targets it's not what we really need, so it
must not be used.
*/
llvm::TargetMachine *m_targetMachine;
#if defined(LLVM_3_1)
llvm::TargetData *m_targetData;
#else
llvm::DataLayout *m_dataLayout;
#endif
/** flag to report invalid state after construction
(due to bad parameters passed to constructor). */
bool m_valid;
/** Instruction set being compiled to. */
ISA isa;
ISA m_isa;
/** Target system architecture. (e.g. "x86-64", "x86"). */
std::string arch;
std::string m_arch;
/** Is the target architecture 32 or 64 bit */
bool is32Bit;
bool m_is32Bit;
/** Target CPU. (e.g. "corei7", "corei7-avx", ..) */
std::string cpu;
std::string m_cpu;
/** Target-specific attribute string to pass along to the LLVM backend */
std::string attributes;
std::string m_attributes;
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
/** Target-specific LLVM attribute, which has to be attached to every
function to ensure that it is generated for correct target architecture.
This is requirement was introduced in LLVM 3.3 */
llvm::AttributeSet* tf_attributes;
llvm::AttributeSet* m_tf_attributes;
#endif
/** Native vector width of the vector instruction set. Note that this
value is directly derived from the ISA Being used (e.g. it's 4 for
SSE, 8 for AVX, etc.) */
int nativeVectorWidth;
int m_nativeVectorWidth;
/** Actual vector width currently being compiled to. This may be an
integer multiple of the native vector width, for example if we're
"doubling up" and compiling 8-wide on a 4-wide SSE system. */
int vectorWidth;
int m_vectorWidth;
/** Indicates whether position independent code should be generated. */
bool generatePIC;
bool m_generatePIC;
/** Is there overhead associated with masking on the target
architecture; e.g. there is on SSE, due to extra blends and the
like, but there isn't with an ISA that supports masking
natively. */
bool maskingIsFree;
bool m_maskingIsFree;
/** How many bits are used to store each element of the mask: e.g. this
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
the generic target. */
int maskBitCount;
int m_maskBitCount;
/** Indicates whether the target has native support for float/half
conversions. */
bool hasHalf;
bool m_hasHalf;
/** Indicates whether there is an ISA random number instruction. */
bool hasRand;
bool m_hasRand;
/** Indicates whether the target has a native gather instruction */
bool hasGather;
bool m_hasGather;
/** Indicates whether the target has a native scatter instruction */
bool hasScatter;
bool m_hasScatter;
/** Indicates whether the target has support for transcendentals (beyond
sqrt, which we assume that all of them handle). */
bool hasTranscendentals;
bool m_hasTranscendentals;
};
@@ -401,7 +472,7 @@ struct Globals {
/** Optimization option settings */
Opt opt;
/** Compilation target information */
Target target;
Target* target;
/** There are a number of math libraries that can be used for
transcendentals and the like during program compilation. */

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -94,10 +94,10 @@ llvm::Constant *LLVMMaskAllOff = NULL;
void
InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx);
LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0);
LLVMTypes::PointerIntType = target.is32Bit ? llvm::Type::getInt32Ty(*ctx) :
LLVMTypes::PointerIntType = target.is32Bit() ? llvm::Type::getInt32Ty(*ctx) :
llvm::Type::getInt64Ty(*ctx);
LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx);
@@ -115,29 +115,29 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
if (target.maskBitCount == 1)
if (target.getMaskBitCount() == 1)
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.getVectorWidth());
else {
Assert(target.maskBitCount == 32);
Assert(target.getMaskBitCount() == 32);
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.getVectorWidth());
}
LLVMTypes::Int1VectorType =
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.getVectorWidth());
LLVMTypes::Int8VectorType =
llvm::VectorType::get(LLVMTypes::Int8Type, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::Int8Type, target.getVectorWidth());
LLVMTypes::Int16VectorType =
llvm::VectorType::get(LLVMTypes::Int16Type, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::Int16Type, target.getVectorWidth());
LLVMTypes::Int32VectorType =
llvm::VectorType::get(LLVMTypes::Int32Type, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::Int32Type, target.getVectorWidth());
LLVMTypes::Int64VectorType =
llvm::VectorType::get(LLVMTypes::Int64Type, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::Int64Type, target.getVectorWidth());
LLVMTypes::FloatVectorType =
llvm::VectorType::get(LLVMTypes::FloatType, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::FloatType, target.getVectorWidth());
LLVMTypes::DoubleVectorType =
llvm::VectorType::get(LLVMTypes::DoubleType, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::DoubleType, target.getVectorWidth());
LLVMTypes::Int8VectorPointerType = llvm::PointerType::get(LLVMTypes::Int8VectorType, 0);
LLVMTypes::Int16VectorPointerType = llvm::PointerType::get(LLVMTypes::Int16VectorType, 0);
@@ -146,7 +146,7 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0);
LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0);
LLVMTypes::VoidPointerVectorType = g->target.is32Bit ? LLVMTypes::Int32VectorType :
LLVMTypes::VoidPointerVectorType = g->target->is32Bit() ? LLVMTypes::Int32VectorType :
LLVMTypes::Int64VectorType;
LLVMTrue = llvm::ConstantInt::getTrue(*ctx);
@@ -154,27 +154,27 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
std::vector<llvm::Constant *> maskOnes;
llvm::Constant *onMask = NULL;
if (target.maskBitCount == 1)
if (target.getMaskBitCount() == 1)
onMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 1,
false /*unsigned*/); // 0x1
else
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
true /*signed*/); // 0xffffffff
for (int i = 0; i < target.vectorWidth; ++i)
for (int i = 0; i < target.getVectorWidth(); ++i)
maskOnes.push_back(onMask);
LLVMMaskAllOn = llvm::ConstantVector::get(maskOnes);
std::vector<llvm::Constant *> maskZeros;
llvm::Constant *offMask = NULL;
if (target.maskBitCount == 1)
if (target.getMaskBitCount() == 1)
offMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 0,
true /*signed*/);
else
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
true /*signed*/);
for (int i = 0; i < target.vectorWidth; ++i)
for (int i = 0; i < target.getVectorWidth(); ++i)
maskZeros.push_back(offMask);
LLVMMaskAllOff = llvm::ConstantVector::get(maskZeros);
}
@@ -252,7 +252,7 @@ llvm::Constant *
LLVMInt8Vector(int8_t ival) {
llvm::Constant *v = LLVMInt8(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -261,7 +261,7 @@ LLVMInt8Vector(int8_t ival) {
llvm::Constant *
LLVMInt8Vector(const int8_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMInt8(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -271,7 +271,7 @@ llvm::Constant *
LLVMUInt8Vector(uint8_t ival) {
llvm::Constant *v = LLVMUInt8(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -280,7 +280,7 @@ LLVMUInt8Vector(uint8_t ival) {
llvm::Constant *
LLVMUInt8Vector(const uint8_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMUInt8(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -290,7 +290,7 @@ llvm::Constant *
LLVMInt16Vector(int16_t ival) {
llvm::Constant *v = LLVMInt16(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -299,7 +299,7 @@ LLVMInt16Vector(int16_t ival) {
llvm::Constant *
LLVMInt16Vector(const int16_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMInt16(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -309,7 +309,7 @@ llvm::Constant *
LLVMUInt16Vector(uint16_t ival) {
llvm::Constant *v = LLVMUInt16(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -318,7 +318,7 @@ LLVMUInt16Vector(uint16_t ival) {
llvm::Constant *
LLVMUInt16Vector(const uint16_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMUInt16(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -328,7 +328,7 @@ llvm::Constant *
LLVMInt32Vector(int32_t ival) {
llvm::Constant *v = LLVMInt32(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -337,7 +337,7 @@ LLVMInt32Vector(int32_t ival) {
llvm::Constant *
LLVMInt32Vector(const int32_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMInt32(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -347,7 +347,7 @@ llvm::Constant *
LLVMUInt32Vector(uint32_t ival) {
llvm::Constant *v = LLVMUInt32(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -356,7 +356,7 @@ LLVMUInt32Vector(uint32_t ival) {
llvm::Constant *
LLVMUInt32Vector(const uint32_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMUInt32(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -366,7 +366,7 @@ llvm::Constant *
LLVMFloatVector(float fval) {
llvm::Constant *v = LLVMFloat(fval);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -375,7 +375,7 @@ LLVMFloatVector(float fval) {
llvm::Constant *
LLVMFloatVector(const float *fvec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMFloat(fvec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -385,7 +385,7 @@ llvm::Constant *
LLVMDoubleVector(double dval) {
llvm::Constant *v = LLVMDouble(dval);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -394,7 +394,7 @@ LLVMDoubleVector(double dval) {
llvm::Constant *
LLVMDoubleVector(const double *dvec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMDouble(dvec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -404,7 +404,7 @@ llvm::Constant *
LLVMInt64Vector(int64_t ival) {
llvm::Constant *v = LLVMInt64(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -413,7 +413,7 @@ LLVMInt64Vector(int64_t ival) {
llvm::Constant *
LLVMInt64Vector(const int64_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMInt64(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -423,7 +423,7 @@ llvm::Constant *
LLVMUInt64Vector(uint64_t ival) {
llvm::Constant *v = LLVMUInt64(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -432,7 +432,7 @@ LLVMUInt64Vector(uint64_t ival) {
llvm::Constant *
LLVMUInt64Vector(const uint64_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMUInt64(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -451,7 +451,7 @@ LLVMBoolVector(bool b) {
}
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -460,7 +460,7 @@ LLVMBoolVector(bool b) {
llvm::Constant *
LLVMBoolVector(const bool *bvec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::Constant *v;
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0,
@@ -697,7 +697,7 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(val);
if (ie != NULL) {
llvm::Value *elts[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(ie, g->target.vectorWidth, elts);
LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elts);
// We just need to check the scalar first value, since we know that
// all elements are equal
return lIsExactMultiple(elts[0], baseValue, vectorLength,

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -110,8 +110,8 @@ extern llvm::Constant *LLVMTrue, *LLVMFalse;
of LLVMTypes and the LLVMTrue/LLVMFalse constants. However, it can't
be called until the compilation target is known.
*/
struct Target;
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target);
class Target;
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target& target);
/** Returns an LLVM i8 constant of the given value */
extern llvm::ConstantInt *LLVMInt8(int8_t i);

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -123,7 +123,7 @@ void RegisterDependency(const std::string &fileName)
static void
lDeclareSizeAndPtrIntTypes(SymbolTable *symbolTable) {
const Type *ptrIntType = (g->target.is32Bit) ? AtomicType::VaryingInt32 :
const Type *ptrIntType = (g->target->is32Bit()) ? AtomicType::VaryingInt32 :
AtomicType::VaryingInt64;
ptrIntType = ptrIntType->GetAsUnboundVariabilityType();
@@ -132,7 +132,7 @@ lDeclareSizeAndPtrIntTypes(SymbolTable *symbolTable) {
SourcePos());
symbolTable->AddType("ptrdiff_t", ptrIntType, SourcePos());
const Type *sizeType = (g->target.is32Bit || g->opt.force32BitAddressing) ?
const Type *sizeType = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
AtomicType::VaryingUInt32 : AtomicType::VaryingUInt64;
sizeType = sizeType->GetAsUnboundVariabilityType();
symbolTable->AddType("size_t", sizeType, SourcePos());
@@ -245,7 +245,7 @@ Module::Module(const char *fn) {
// information has been set (so e.g. the vector width is known...) In
// particular, if we're compiling to multiple targets with different
// vector widths, this needs to be redone each time through.
InitLLVMUtil(g->ctx, g->target);
InitLLVMUtil(g->ctx, *g->target);
filename = fn;
errorCount = 0;
@@ -255,16 +255,10 @@ Module::Module(const char *fn) {
lDeclareSizeAndPtrIntTypes(symbolTable);
module = new llvm::Module(filename ? filename : "<stdin>", *g->ctx);
module->setTargetTriple(g->target.GetTripleString());
module->setTargetTriple(g->target->GetTripleString());
if (g->target.isa == Target::GENERIC) {
// <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
// that to the regular datalayout string for IA..
std::string datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
"f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32";
module->setDataLayout(datalayout);
}
// DataLayout information supposed to be managed in single place in Target class.
module->setDataLayout(g->target->getDataLayout()->getStringRepresentation());
if (g->generateDebuggingSymbols) {
diBuilder = new llvm::DIBuilder(*module);
@@ -761,7 +755,7 @@ Module::AddFunctionDeclaration(const std::string &name,
if (storageClass != SC_EXTERN_C) {
functionName += functionType->Mangle();
if (g->mangleFunctionsWithTarget)
functionName += g->target.GetISAString();
functionName += g->target->GetISAString();
}
llvm::Function *function =
llvm::Function::Create(llvmFunctionType, linkage, functionName.c_str(),
@@ -785,9 +779,7 @@ Module::AddFunctionDeclaration(const std::string &name,
function->setDoesNotAlias(1);
#endif
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
function->addAttributes(llvm::AttributeSet::FunctionIndex, *g->target.tf_attributes);
#endif
g->target->markFuncWithTargetAttr(function);
// Make sure that the return type isn't 'varying' or vector typed if
// the function is 'export'ed.
@@ -841,7 +833,7 @@ Module::AddFunctionDeclaration(const std::string &name,
#endif
#if 0
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
int align = 4 * RoundUpPow2(g->target->nativeVectorWidth);
function->addAttribute(i+1, llvm::Attribute::constructAlignmentFromInt(align));
#endif
}
@@ -991,14 +983,14 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
else if (outputType == Bitcode)
return writeBitcode(module, outFileName);
else if (outputType == CXX) {
if (g->target.isa != Target::GENERIC) {
if (g->target->getISA() != Target::GENERIC) {
Error(SourcePos(), "Only \"generic-*\" targets can be used with "
"C++ emission.");
return false;
}
extern bool WriteCXXFile(llvm::Module *module, const char *fn,
int vectorWidth, const char *includeName);
return WriteCXXFile(module, outFileName, g->target.vectorWidth,
return WriteCXXFile(module, outFileName, g->target->getVectorWidth(),
includeFileName);
}
else
@@ -1036,7 +1028,7 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
bool
Module::writeObjectFileOrAssembly(OutputType outputType, const char *outFileName) {
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
return writeObjectFileOrAssembly(targetMachine, module, outputType,
outFileName);
}
@@ -1062,15 +1054,9 @@ Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine,
llvm::PassManager pm;
#if defined(LLVM_3_1)
if (const llvm::TargetData *td = targetMachine->getTargetData())
pm.add(new llvm::TargetData(*td));
else
pm.add(new llvm::TargetData(module));
pm.add(new llvm::TargetData(*g->target->getDataLayout()));
#else
if (const llvm::DataLayout *dl = targetMachine->getDataLayout())
pm.add(new llvm::DataLayout(*dl));
else
pm.add(new llvm::DataLayout(module));
pm.add(new llvm::DataLayout(*g->target->getDataLayout()));
#endif
llvm::formatted_raw_ostream fos(of->os());
@@ -1213,7 +1199,7 @@ lEmitVectorTypedefs(const std::vector<const VectorType *> &types, FILE *file) {
fprintf(file, "// Vector types with external visibility from ispc code\n");
fprintf(file, "///////////////////////////////////////////////////////////////////////////\n\n");
int align = g->target.nativeVectorWidth * 4;
int align = g->target->getNativeVectorWidth() * 4;
for (unsigned int i = 0; i < types.size(); ++i) {
std::string baseDecl;
@@ -1858,7 +1844,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
// Add #define for current compilation target
char targetMacro[128];
sprintf(targetMacro, "ISPC_TARGET_%s", g->target.GetISAString());
sprintf(targetMacro, "ISPC_TARGET_%s", g->target->GetISAString());
char *p = targetMacro;
while (*p) {
*p = toupper(*p);
@@ -1866,16 +1852,16 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
}
opts.addMacroDef(targetMacro);
if (g->target.is32Bit)
if (g->target->is32Bit())
opts.addMacroDef("ISPC_POINTER_SIZE=32");
else
opts.addMacroDef("ISPC_POINTER_SIZE=64");
if (g->target.hasHalf)
if (g->target->hasHalf())
opts.addMacroDef("ISPC_TARGET_HAS_HALF");
if (g->target.hasRand)
if (g->target->hasRand())
opts.addMacroDef("ISPC_TARGET_HAS_RAND");
if (g->target.hasTranscendentals)
if (g->target->hasTranscendentals())
opts.addMacroDef("ISPC_TARGET_HAS_TRANSCENDENTALS");
if (g->opt.forceAlignedMemory)
opts.addMacroDef("ISPC_FORCE_ALIGNED_MEMORY");
@@ -1992,7 +1978,7 @@ lGetExportedFunctions(SymbolTable *symbolTable,
symbolTable->GetMatchingFunctions(lSymbolIsExported, &syms);
for (unsigned int i = 0; i < syms.size(); ++i) {
FunctionTargetVariants &ftv = functions[syms[i]->name];
ftv.func[g->target.isa] = syms[i]->exportedFunction;
ftv.func[g->target->getISA()] = syms[i]->exportedFunction;
}
}
@@ -2287,7 +2273,8 @@ Module::CompileAndOutput(const char *srcFile,
{
if (target == NULL || strchr(target, ',') == NULL) {
// We're only compiling to a single target
if (!Target::GetTarget(arch, cpu, target, generatePIC, &g->target))
g->target = new Target(arch, cpu, target, generatePIC);
if (!g->target->isValid())
return 1;
m = new Module(srcFile);
@@ -2331,6 +2318,9 @@ Module::CompileAndOutput(const char *srcFile,
delete m;
m = NULL;
delete g->target;
g->target = NULL;
return errorCount > 0;
}
else {
@@ -2368,19 +2358,19 @@ Module::CompileAndOutput(const char *srcFile,
std::vector<RewriteGlobalInfo> globals[Target::NUM_ISAS];
int errorCount = 0;
for (unsigned int i = 0; i < targets.size(); ++i) {
if (!Target::GetTarget(arch, cpu, targets[i].c_str(), generatePIC,
&g->target))
g->target = new Target(arch, cpu, targets[i].c_str(), generatePIC);
if (!g->target->isValid())
return 1;
// Issue an error if we've already compiled to a variant of
// this target ISA. (It doesn't make sense to compile to both
// avx and avx-x2, for example.)
if (targetMachines[g->target.isa] != NULL) {
if (targetMachines[g->target->getISA()] != NULL) {
Error(SourcePos(), "Can't compile to multiple variants of %s "
"target!\n", g->target.GetISAString());
"target!\n", g->target->GetISAString());
return 1;
}
targetMachines[g->target.isa] = g->target.GetTargetMachine();
targetMachines[g->target->getISA()] = g->target->GetTargetMachine();
m = new Module(srcFile);
if (m->CompileFile() == 0) {
@@ -2392,7 +2382,7 @@ Module::CompileAndOutput(const char *srcFile,
lExtractAndRewriteGlobals(m->module, &globals[i]);
if (outFileName != NULL) {
const char *isaName = g->target.GetISAString();
const char *isaName = g->target->GetISAString();
std::string targetOutFileName =
lGetTargetFileName(outFileName, isaName);
if (!m->writeOutput(outputType, targetOutFileName.c_str()))
@@ -2407,6 +2397,9 @@ Module::CompileAndOutput(const char *srcFile,
if (!m->writeOutput(Module::Header, headerFileName))
return 1;
delete g->target;
g->target = NULL;
// Important: Don't delete the llvm::Module *m here; we need to
// keep it around so the llvm::Functions *s stay valid for when
// we generate the dispatch module's functions...

255
opt.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -353,7 +353,7 @@ lGetMask(llvm::Value *factor, uint64_t *mask) {
#if 0
llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(factor);
if (ce != NULL) {
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
const llvm::TargetData *td = targetMachine->getTargetData();
llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
c->dump();
@@ -382,7 +382,7 @@ lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) {
return ALL_OFF;
if (vecWidth == -1)
vecWidth = g->target.vectorWidth;
vecWidth = g->target->getVectorWidth();
Assert(vecWidth <= 64);
for (int i = 0; i < vecWidth; ++i) {
@@ -414,14 +414,13 @@ Optimize(llvm::Module *module, int optLevel) {
new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
optPM.add(targetLibraryInfo);
#if defined(LLVM_3_1)
optPM.add(new llvm::TargetData(module));
optPM.add(new llvm::TargetData(*g->target->getDataLayout()));
#else
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
if (const llvm::DataLayout *dl = targetMachine->getDataLayout())
optPM.add(new llvm::DataLayout(*dl));
else
optPM.add(new llvm::DataLayout(module));
optPM.add(new llvm::DataLayout(*g->target->getDataLayout()));
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
#ifdef LLVM_3_2
optPM.add(new llvm::TargetTransformInfo(targetMachine->getScalarTargetTransformInfo(),
targetMachine->getVectorTargetTransformInfo()));
@@ -471,7 +470,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createCFGSimplificationPass());
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(CreateImproveMemoryOpsPass());
}
@@ -485,7 +484,7 @@ Optimize(llvm::Module *module, int optLevel) {
// 1) 4 fields (r,g,b,w)
// 2) field size: vectorWidth * sizeof(float)
const int field_limit = 4;
int sr_threshold = g->target.vectorWidth * sizeof(float) * field_limit;
int sr_threshold = g->target->getVectorWidth() * sizeof(float) * field_limit;
// On to more serious optimizations
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
@@ -520,12 +519,12 @@ Optimize(llvm::Module *module, int optLevel) {
}
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(CreateImproveMemoryOpsPass());
if (g->opt.disableCoalescing == false &&
g->target.isa != Target::GENERIC) {
g->target->getISA() != Target::GENERIC) {
// It is important to run this here to make it easier to
// finding matching gathers we can coalesce..
optPM.add(llvm::createEarlyCSEPass());
@@ -539,7 +538,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(CreateVSelMovmskOptPass());
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(CreateImproveMemoryOpsPass());
}
@@ -1062,18 +1061,18 @@ lGetBasePointer(llvm::Value *v) {
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
if (ie != NULL) {
llvm::Value *elements[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(ie, g->target.vectorWidth, elements);
LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elements);
// Make sure none of the elements is undefined.
// TODO: it's probably ok to allow undefined elements and return
// the base pointer if all of the other elements have the same
// value.
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
if (elements[i] == NULL)
return NULL;
// Do all of the elements have the same value?
for (int i = 0; i < g->target.vectorWidth-1; ++i)
for (int i = 0; i < g->target->getVectorWidth()-1; ++i)
if (elements[i] != elements[i+1])
return NULL;
@@ -1141,7 +1140,7 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets,
if (base != NULL) {
// We have a straight up varying pointer with no indexing that's
// actually all the same value.
if (g->target.is32Bit)
if (g->target->is32Bit())
*offsets = LLVMInt32Vector(0);
else
*offsets = LLVMInt64Vector((int64_t)0);
@@ -1197,7 +1196,7 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets,
// If the element is just a ptr to int instruction, treat
// it as having an offset of zero
elementBase = ce;
delta[i] = g->target.is32Bit ? LLVMInt32(0) : LLVMInt64(0);
delta[i] = g->target->is32Bit() ? LLVMInt32(0) : LLVMInt64(0);
}
else if (ce->getOpcode() == llvm::Instruction::Add) {
// Try both orderings of the operands to see if we can get
@@ -1748,150 +1747,150 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
GSInfo gsFuncs[] = {
GSInfo("__pseudo_gather32_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
true),
GSInfo("__pseudo_gather32_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
true),
GSInfo("__pseudo_gather32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSInfo("__pseudo_gather32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSInfo("__pseudo_gather32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSInfo("__pseudo_gather32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSInfo("__pseudo_scatter32_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
false),
GSInfo("__pseudo_scatter32_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
false),
GSInfo("__pseudo_scatter32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
false),
GSInfo("__pseudo_scatter32_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
false),
GSInfo("__pseudo_scatter32_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
false),
GSInfo("__pseudo_scatter32_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
false),
GSInfo("__pseudo_gather64_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i8" :
"__pseudo_gather_factored_base_offsets64_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
true),
GSInfo("__pseudo_gather64_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i16" :
"__pseudo_gather_factored_base_offsets64_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
true),
GSInfo("__pseudo_gather64_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i32" :
"__pseudo_gather_factored_base_offsets64_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSInfo("__pseudo_gather64_float",
g->target.hasGather ? "__pseudo_gather_base_offsets64_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_float" :
"__pseudo_gather_factored_base_offsets64_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSInfo("__pseudo_gather64_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i64" :
"__pseudo_gather_factored_base_offsets64_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSInfo("__pseudo_gather64_double",
g->target.hasGather ? "__pseudo_gather_base_offsets64_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_double" :
"__pseudo_gather_factored_base_offsets64_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSInfo("__pseudo_scatter64_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i8" :
"__pseudo_scatter_factored_base_offsets64_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
false),
GSInfo("__pseudo_scatter64_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i16" :
"__pseudo_scatter_factored_base_offsets64_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
false),
GSInfo("__pseudo_scatter64_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i32" :
"__pseudo_scatter_factored_base_offsets64_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
false),
GSInfo("__pseudo_scatter64_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_float" :
"__pseudo_scatter_factored_base_offsets64_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
false),
GSInfo("__pseudo_scatter64_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i64" :
"__pseudo_scatter_factored_base_offsets64_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
false),
GSInfo("__pseudo_scatter64_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_double" :
"__pseudo_scatter_factored_base_offsets64_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
false),
};
@@ -1933,8 +1932,8 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
llvm::Function *gatherScatterFunc = info->baseOffsetsFunc;
if ((info->isGather == true && g->target.hasGather) ||
(info->isGather == false && g->target.hasScatter)) {
if ((info->isGather == true && g->target->hasGather()) ||
(info->isGather == false && g->target->hasScatter())) {
// See if the offsets are scaled by 2, 4, or 8. If so,
// extract that scale factor and rewrite the offsets to remove
// it.
@@ -2057,65 +2056,65 @@ lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst) {
};
GSBOInfo gsFuncs[] = {
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSBOInfo( g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
GSBOInfo( g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
false),
};
@@ -2208,7 +2207,7 @@ lGetOffsetScaleVec(llvm::Value *offsetScale, llvm::Type *vecType) {
uint64_t scaleValue = offsetScaleInt->getZExtValue();
std::vector<llvm::Constant *> scales;
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
if (vecType == LLVMTypes::Int64VectorType)
scales.push_back(LLVMInt64(scaleValue));
else {
@@ -2240,7 +2239,7 @@ lGSToLoadStore(llvm::CallInst *callInst) {
struct GatherImpInfo {
GatherImpInfo(const char *pName, const char *lmName, llvm::Type *st,
int a)
: align(a), isFactored(!g->target.hasGather) {
: align(a), isFactored(!g->target->hasGather()) {
pseudoFunc = m->module->getFunction(pName);
loadMaskedFunc = m->module->getFunction(lmName);
Assert(pseudoFunc != NULL && loadMaskedFunc != NULL);
@@ -2255,40 +2254,40 @@ lGSToLoadStore(llvm::CallInst *callInst) {
};
GatherImpInfo gInfo[] = {
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
"__masked_load_i8", LLVMTypes::Int8Type, 1),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
"__masked_load_i16", LLVMTypes::Int16Type, 2),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
"__masked_load_i32", LLVMTypes::Int32Type, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
"__masked_load_float", LLVMTypes::FloatType, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
"__masked_load_i64", LLVMTypes::Int64Type, 8),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
"__masked_load_double", LLVMTypes::DoubleType, 8),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i8" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_i8" :
"__pseudo_gather_factored_base_offsets64_i8",
"__masked_load_i8", LLVMTypes::Int8Type, 1),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i16" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_i16" :
"__pseudo_gather_factored_base_offsets64_i16",
"__masked_load_i16", LLVMTypes::Int16Type, 2),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i32" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_i32" :
"__pseudo_gather_factored_base_offsets64_i32",
"__masked_load_i32", LLVMTypes::Int32Type, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_float" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_float" :
"__pseudo_gather_factored_base_offsets64_float",
"__masked_load_float", LLVMTypes::FloatType, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i64" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_i64" :
"__pseudo_gather_factored_base_offsets64_i64",
"__masked_load_i64", LLVMTypes::Int64Type, 8),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_double" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_double" :
"__pseudo_gather_factored_base_offsets64_double",
"__masked_load_double", LLVMTypes::DoubleType, 8),
};
@@ -2296,7 +2295,7 @@ lGSToLoadStore(llvm::CallInst *callInst) {
struct ScatterImpInfo {
ScatterImpInfo(const char *pName, const char *msName,
llvm::Type *vpt, int a)
: align(a), isFactored(!g->target.hasScatter) {
: align(a), isFactored(!g->target->hasScatter()) {
pseudoFunc = m->module->getFunction(pName);
maskedStoreFunc = m->module->getFunction(msName);
vecPtrType = vpt;
@@ -2310,40 +2309,40 @@ lGSToLoadStore(llvm::CallInst *callInst) {
};
ScatterImpInfo sInfo[] = {
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
"__pseudo_masked_store_i8", LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_masked_store_i16", LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
"__pseudo_masked_store_i32", LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
"__pseudo_masked_store_float", LLVMTypes::FloatVectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
"__pseudo_masked_store_i64", LLVMTypes::Int64VectorPointerType, 8),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
"__pseudo_masked_store_double", LLVMTypes::DoubleVectorPointerType, 8),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i8" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i8" :
"__pseudo_scatter_factored_base_offsets64_i8",
"__pseudo_masked_store_i8", LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i16" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i16" :
"__pseudo_scatter_factored_base_offsets64_i16",
"__pseudo_masked_store_i16", LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i32" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i32" :
"__pseudo_scatter_factored_base_offsets64_i32",
"__pseudo_masked_store_i32", LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_float" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_float" :
"__pseudo_scatter_factored_base_offsets64_float",
"__pseudo_masked_store_float", LLVMTypes::FloatVectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i64" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i64" :
"__pseudo_scatter_factored_base_offsets64_i64",
"__pseudo_masked_store_i64", LLVMTypes::Int64VectorPointerType, 8),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_double" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_double" :
"__pseudo_scatter_factored_base_offsets64_double",
"__pseudo_masked_store_double", LLVMTypes::DoubleVectorPointerType, 8),
};
@@ -2432,8 +2431,8 @@ lGSToLoadStore(llvm::CallInst *callInst) {
ptr->getName(), callInst);
llvm::Value *scalarValue = new llvm::LoadInst(ptr, callInst->getName(), callInst);
llvm::Value *vecValue = llvm::UndefValue::get(callInst->getType());
for (int i = 0; i < g->target.vectorWidth; ++i) {
if (i < g->target.vectorWidth - 1)
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
if (i < g->target->getVectorWidth() - 1)
vecValue = llvm::InsertElementInst::Create(vecValue, scalarValue, LLVMInt32(i),
callInst->getName(), callInst);
else
@@ -2449,7 +2448,7 @@ lGSToLoadStore(llvm::CallInst *callInst) {
// A scatter with everyone going to the same location is
// undefined (if there's more than one program instance in
// the gang). Issue a warning.
if (g->target.vectorWidth > 1)
if (g->target->getVectorWidth() > 1)
Warning(pos, "Undefined behavior: all program instances are "
"writing to the same location!");
@@ -3422,10 +3421,10 @@ lAssembleResultVectors(const std::vector<CoalescedLoadOp> &loadOps,
// And now concatenate 1, 2, or 4 of the 4-wide vectors computed above
// into 4, 8, or 16-wide final result vectors.
int numGathers = constOffsets.size() / g->target.vectorWidth;
int numGathers = constOffsets.size() / g->target->getVectorWidth();
for (int i = 0; i < numGathers; ++i) {
llvm::Value *result = NULL;
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 4:
result = vec4s[i];
break;
@@ -3486,7 +3485,7 @@ lComputeBasePtr(llvm::CallInst *gatherInst, llvm::Instruction *insertBefore) {
static void
lExtractConstOffsets(const std::vector<llvm::CallInst *> &coalesceGroup,
int elementSize, std::vector<int64_t> *constOffsets) {
int width = g->target.vectorWidth;
int width = g->target->getVectorWidth();
*constOffsets = std::vector<int64_t>(coalesceGroup.size() * width, 0);
int64_t *endPtr = &((*constOffsets)[0]);
@@ -3814,7 +3813,7 @@ lIsSafeToBlend(llvm::Value *lvalue) {
llvm::VectorType *vt =
llvm::dyn_cast<llvm::VectorType>(type);
return (vt != NULL &&
(int)vt->getNumElements() == g->target.vectorWidth);
(int)vt->getNumElements() == g->target->getVectorWidth());
}
else {
llvm::GetElementPtrInst *gep =
@@ -4060,7 +4059,7 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
bool gotPosition = lGetSourcePosFromMetadata(callInst, &pos);
callInst->setCalledFunction(info->actualFunc);
if (gotPosition && g->target.vectorWidth > 1) {
if (gotPosition && g->target->getVectorWidth() > 1) {
if (info->isGather)
PerformanceWarning(pos, "Gather required to load value.");
else

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -2148,7 +2148,7 @@ lAddFunctionParams(Declarator *decl) {
/** Add a symbol for the built-in mask variable to the symbol table */
static void lAddMaskToSymbolTable(SourcePos pos) {
const Type *t = g->target.maskBitCount == 1 ?
const Type *t = g->target->getMaskBitCount() == 1 ?
AtomicType::VaryingBool : AtomicType::VaryingUInt32;
t = t->GetAsConstType();
Symbol *maskSymbol = new Symbol("__mask", pos, t);

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -1274,7 +1274,7 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter =
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
smearCounter =
ctx->InsertInst(smearCounter, counter, i, "smear_counter");
@@ -1285,7 +1285,7 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
@@ -1393,7 +1393,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
std::vector<int> span(nDims, 0);
lGetSpans(nDims-1, nDims, g->target.vectorWidth, isTiled, &span[0]);
lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
for (int i = 0; i < nDims; ++i) {
// Basic blocks that we'll fill in later with the looping logic for
@@ -1518,7 +1518,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
dimVariables[i]->storagePtr, span);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int j = 0; j < g->target.vectorWidth; ++j)
for (int j = 0; j < g->target->getVectorWidth(); ++j)
smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
// Do a vector compare of its value to the end value to generate a
// mask for this last bit of work.
@@ -1663,7 +1663,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::Value *varyingCounter =
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int j = 0; j < g->target.vectorWidth; ++j)
for (int j = 0; j < g->target->getVectorWidth(); ++j)
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
@@ -1759,7 +1759,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int j = 0; j < g->target.vectorWidth; ++j)
for (int j = 0; j < g->target->getVectorWidth(); ++j)
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
@@ -1995,7 +1995,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
// Get the "program index" vector value
llvm::Value *programIndex =
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
programIndex = ctx->InsertInst(programIndex, LLVMInt32(i), i,
"prog_index");
@@ -3103,7 +3103,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
// Set up the rest of the parameters to it
args[0] = ctx->GetStringPtr(format);
args[1] = ctx->GetStringPtr(argTypes);
args[2] = LLVMInt32(g->target.vectorWidth);
args[2] = LLVMInt32(g->target->getVectorWidth());
args[3] = ctx->LaneMask(mask);
std::vector<llvm::Value *> argVec(&args[0], &args[5]);
ctx->CallInst(printFunc, NULL, argVec, "");
@@ -3254,7 +3254,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
// calling it.
llvm::Function *func = m->module->getFunction("__delete_varying");
AssertPos(pos, func != NULL);
if (g->target.is32Bit)
if (g->target->is32Bit())
exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
"ptr_to_64");
ctx->CallInst(func, NULL, exprValue, "");

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -566,10 +566,10 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
}
else if (variability == Variability::Varying) {
llvm::DIType unifType = GetAsUniformType()->GetDIType(scope);
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target.vectorWidth-1);
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()-1);
llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
uint64_t size = unifType.getSizeInBits() * g->target.vectorWidth;
uint64_t align = unifType.getAlignInBits() * g->target.vectorWidth;
uint64_t size = unifType.getSizeInBits() * g->target->getVectorWidth();
uint64_t align = unifType.getAlignInBits() * g->target->getVectorWidth();
return m->diBuilder->createVectorType(size, align, unifType, subArray);
}
else {
@@ -830,10 +830,10 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
case Variability::Uniform:
return diType;
case Variability::Varying: {
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target.vectorWidth-1);
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()-1);
llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
uint64_t size = diType.getSizeInBits() * g->target.vectorWidth;
uint64_t align = diType.getAlignInBits() * g->target.vectorWidth;
uint64_t size = diType.getSizeInBits() * g->target->getVectorWidth();
uint64_t align = diType.getAlignInBits() * g->target->getVectorWidth();
return m->diBuilder->createVectorType(size, align, diType, subArray);
}
case Variability::SOA: {
@@ -1173,7 +1173,7 @@ PointerType::GetDIType(llvm::DIDescriptor scope) const {
}
llvm::DIType diTargetType = baseType->GetDIType(scope);
int bitsSize = g->target.is32Bit ? 32 : 64;
int bitsSize = g->target->is32Bit() ? 32 : 64;
int ptrAlignBits = bitsSize;
switch (variability.type) {
case Variability::Uniform:
@@ -1183,7 +1183,7 @@ PointerType::GetDIType(llvm::DIDescriptor scope) const {
// emit them as an array of pointers
llvm::DIType eltType = m->diBuilder->createPointerType(diTargetType,
bitsSize, ptrAlignBits);
return lCreateDIArray(eltType, g->target.vectorWidth);
return lCreateDIArray(eltType, g->target->getVectorWidth());
}
case Variability::SOA: {
ArrayType at(GetAsUniformType(), variability.soaWidth);
@@ -1712,7 +1712,7 @@ VectorType::GetDIType(llvm::DIDescriptor scope) const {
// explicitly aligned to the machines natural vector alignment.
uint64_t align = eltType.getAlignInBits();
if (IsUniformType())
align = 4 * g->target.nativeVectorWidth;
align = 4 * g->target->getNativeVectorWidth();
if (IsUniformType() || IsVaryingType())
return m->diBuilder->createVectorType(sizeBits, align, eltType, subArray);
@@ -1732,11 +1732,11 @@ VectorType::getVectorMemoryCount() const {
if (base->IsVaryingType())
return numElements;
else if (base->IsUniformType()) {
int nativeWidth = g->target.nativeVectorWidth;
int nativeWidth = g->target->getNativeVectorWidth();
if (Type::Equal(base->GetAsUniformType(), AtomicType::UniformInt64) ||
Type::Equal(base->GetAsUniformType(), AtomicType::UniformUInt64) ||
Type::Equal(base->GetAsUniformType(), AtomicType::UniformDouble))
// target.nativeVectorWidth should be in terms of 32-bit
// target.getNativeVectorWidth() should be in terms of 32-bit
// values, so for the 64-bit guys, it takes half as many of
// them to fill the native width
nativeWidth /= 2;
@@ -1778,7 +1778,7 @@ lMangleStructName(const std::string &name, Variability variability) {
std::string n;
// Encode vector width
sprintf(buf, "v%d", g->target.vectorWidth);
sprintf(buf, "v%d", g->target->getVectorWidth());
n += buf;
// Variability