Target class redesign: data moved to private. Also empty target-feature attribute is not added anymore (generic targets).

This commit is contained in:
Dmitry Babokin
2013-03-23 14:28:05 +04:00
parent 95d0c5e67b
commit 0f86255279
14 changed files with 575 additions and 507 deletions

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -210,7 +210,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
// symbol creation code below assumes that any LLVM vector of i32s is a
// varying int32. Here, we need that to be interpreted as a varying
// bool, so just have a one-off override for that one...
if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
if (g->target->getMaskBitCount() != 1 && name == "__sext_varying_bool") {
const Type *returnType = AtomicType::VaryingInt32;
llvm::SmallVector<const Type *, 8> argTypes;
argTypes.push_back(AtomicType::VaryingBool);
@@ -599,11 +599,7 @@ lSetInternalFunctions(llvm::Module *module) {
llvm::Function *f = module->getFunction(names[i]);
if (f != NULL && f->empty() == false) {
f->setLinkage(llvm::GlobalValue::InternalLinkage);
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
f->addAttributes(
llvm::AttributeSet::FunctionIndex,
*g->target.tf_attributes);
#endif
g->target->markFuncWithTargetAttr(f);
}
}
}
@@ -650,7 +646,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
// targets having a layout with 16-bit alignment for 16xi1 vectors.
// As long as builtins-c.c doesn't have any 16xi1 vector types
// (which it shouldn't!), then this override is safe.
if (g->target.isa == Target::GENERIC)
if (g->target->getISA() == Target::GENERIC)
bcModule->setDataLayout(module->getDataLayout());
std::string(linkError);
@@ -737,7 +733,7 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC);
int pi[ISPC_MAX_NVEC];
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
pi[i] = i;
sym->constValue = new ConstExpr(sym->type, pi, SourcePos());
@@ -770,7 +766,7 @@ void
DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module,
bool includeStdlibISPC) {
// Add the definitions from the compiled builtins-c.c file
if (g->target.is32Bit) {
if (g->target->is32Bit()) {
extern unsigned char builtins_bitcode_c_32[];
extern int builtins_bitcode_c_32_length;
AddBitcodeToModule(builtins_bitcode_c_32, builtins_bitcode_c_32_length,
@@ -785,13 +781,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
// Next, add the target's custom implementations of the various needed
// builtin functions (e.g. __masked_store_32(), etc).
switch (g->target.isa) {
switch (g->target->getISA()) {
case Target::SSE2:
extern unsigned char builtins_bitcode_sse2[];
extern int builtins_bitcode_sse2_length;
extern unsigned char builtins_bitcode_sse2_x2[];
extern int builtins_bitcode_sse2_x2_length;
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 4:
AddBitcodeToModule(builtins_bitcode_sse2, builtins_bitcode_sse2_length,
module, symbolTable);
@@ -809,7 +805,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
extern int builtins_bitcode_sse4_length;
extern unsigned char builtins_bitcode_sse4_x2[];
extern int builtins_bitcode_sse4_x2_length;
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 4:
AddBitcodeToModule(builtins_bitcode_sse4,
builtins_bitcode_sse4_length,
@@ -825,7 +821,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::AVX:
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 8:
extern unsigned char builtins_bitcode_avx1[];
extern int builtins_bitcode_avx1_length;
@@ -845,7 +841,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::AVX11:
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 8:
extern unsigned char builtins_bitcode_avx11[];
extern int builtins_bitcode_avx11_length;
@@ -865,7 +861,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::AVX2:
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 8:
extern unsigned char builtins_bitcode_avx2[];
extern int builtins_bitcode_avx2_length;
@@ -885,7 +881,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::GENERIC:
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 4:
extern unsigned char builtins_bitcode_generic_4[];
extern int builtins_bitcode_generic_4_length;
@@ -937,7 +933,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
// define the 'programCount' builtin variable
lDefineConstantInt("programCount", g->target.vectorWidth, module, symbolTable);
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
// define the 'programIndex' builtin
lDefineProgramIndex(module, symbolTable);
@@ -956,18 +952,18 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload,
module, symbolTable);
lDefineConstantInt("__have_native_half", g->target.hasHalf, module,
lDefineConstantInt("__have_native_half", g->target->hasHalf(), module,
symbolTable);
lDefineConstantInt("__have_native_rand", g->target.hasRand, module,
lDefineConstantInt("__have_native_rand", g->target->hasRand(), module,
symbolTable);
lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(),
module, symbolTable);
if (includeStdlibISPC) {
// If the user wants the standard library to be included, parse the
// serialized version of the stdlib.ispc file to get its
// definitions added.
if (g->target.isa == Target::GENERIC&&g->target.vectorWidth!=1) { // 1 wide uses x86 stdlib
if (g->target->getISA() == Target::GENERIC&&g->target->getVectorWidth()!=1) { // 1 wide uses x86 stdlib
extern char stdlib_generic_code[];
yy_scan_string(stdlib_generic_code);
yyparse();

90
ctx.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -308,7 +308,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
LLVMMaskAllOn, "__all_on_mask");
char buf[256];
sprintf(buf, "__off_all_on_mask_%s", g->target.GetISAString());
sprintf(buf, "__off_all_on_mask_%s", g->target->GetISAString());
llvm::Constant *offFunc =
m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
NULL);
@@ -1295,7 +1295,7 @@ FunctionEmitContext::Any(llvm::Value *mask) {
// Call the target-dependent any function to test that the mask is non-zero
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__any", &mm);
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
@@ -1313,7 +1313,7 @@ FunctionEmitContext::All(llvm::Value *mask) {
// into an i64 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__all", &mm);
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
@@ -1331,7 +1331,7 @@ FunctionEmitContext::None(llvm::Value *mask) {
// into an i64 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__none", &mm);
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
@@ -1349,7 +1349,7 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
// into an i64 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__movmsk", &mm);
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
@@ -1405,7 +1405,7 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
return NULL;
}
if (g->target.maskBitCount == 1)
if (g->target->getMaskBitCount() == 1)
return b;
llvm::ArrayType *at =
@@ -1594,7 +1594,7 @@ lArrayVectorWidth(llvm::Type *t) {
llvm::VectorType *vectorElementType =
llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
Assert((vectorElementType != NULL &&
(int)vectorElementType->getNumElements() == g->target.vectorWidth));
(int)vectorElementType->getNumElements() == g->target->getVectorWidth()));
return (int)arrayType->getNumElements();
}
@@ -1678,10 +1678,10 @@ lGetMatchingBoolVectorType(llvm::Type *type) {
llvm::VectorType *vectorElementType =
llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
Assert(vectorElementType != NULL);
Assert((int)vectorElementType->getNumElements() == g->target.vectorWidth);
Assert((int)vectorElementType->getNumElements() == g->target->getVectorWidth());
llvm::Type *base =
llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth);
llvm::VectorType::get(LLVMTypes::BoolType, g->target->getVectorWidth());
return llvm::ArrayType::get(base, arrayType->getNumElements());
}
@@ -1741,9 +1741,9 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
// All other varying types are represented as vectors of the
// underlying type.
ret = llvm::UndefValue::get(llvm::VectorType::get(eltType,
g->target.vectorWidth));
g->target->getVectorWidth()));
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::Twine n = llvm::Twine("smear.") + llvm::Twine(name ? name : "") +
llvm::Twine(i);
ret = InsertInst(ret, value, i, n.str().c_str());
@@ -1963,7 +1963,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
// Find the scale factor for the index (i.e. the size of the object
// that the pointer(s) point(s) to.
const Type *scaleType = ptrType->GetBaseType();
llvm::Value *scale = g->target.SizeOf(scaleType->LLVMType(g->ctx), bblock);
llvm::Value *scale = g->target->SizeOf(scaleType->LLVMType(g->ctx), bblock);
bool indexIsVarying =
llvm::isa<llvm::VectorType>(index->getType());
@@ -1971,10 +1971,10 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
if (indexIsVarying == false) {
// Truncate or sign extend the index as appropriate to a 32 or
// 64-bit type.
if ((g->target.is32Bit || g->opt.force32BitAddressing) &&
if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
index->getType() == LLVMTypes::Int64Type)
index = TruncInst(index, LLVMTypes::Int32Type);
else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) &&
else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
index->getType() == LLVMTypes::Int32Type)
index = SExtInst(index, LLVMTypes::Int64Type);
@@ -1988,10 +1988,10 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
else {
// Similarly, truncate or sign extend the index to be a 32 or 64
// bit vector type
if ((g->target.is32Bit || g->opt.force32BitAddressing) &&
if ((g->target->is32Bit() || g->opt.force32BitAddressing) &&
index->getType() == LLVMTypes::Int64VectorType)
index = TruncInst(index, LLVMTypes::Int32VectorType);
else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) &&
else if ((!g->target->is32Bit() && !g->opt.force32BitAddressing) &&
index->getType() == LLVMTypes::Int32VectorType)
index = SExtInst(index, LLVMTypes::Int64VectorType);
@@ -2005,7 +2005,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
// For 64-bit targets, if we've been doing our offset calculations in
// 32 bits, we still have to convert to a 64-bit value before we
// actually add the offset to the pointer.
if (g->target.is32Bit == false && g->opt.force32BitAddressing == true)
if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
offset = SExtInst(offset, LLVMTypes::Int64VectorType,
LLVMGetName(offset, "_to_64"));
@@ -2343,7 +2343,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
if (st != NULL)
// If the pointer is to a structure, Target::StructOffset() gives
// us the offset in bytes to the given element of the structure
offset = g->target.StructOffset(st->LLVMType(g->ctx), elementNum,
offset = g->target->StructOffset(st->LLVMType(g->ctx), elementNum,
bblock);
else {
// Otherwise we should have a vector or array here and the offset
@@ -2353,15 +2353,15 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
CastType<SequentialType>(ptrType->GetBaseType());
AssertPos(currentPos, st != NULL);
llvm::Value *size =
g->target.SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
llvm::Value *scale = (g->target.is32Bit || g->opt.force32BitAddressing) ?
g->target->SizeOf(st->GetElementType()->LLVMType(g->ctx), bblock);
llvm::Value *scale = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
LLVMInt32(elementNum) : LLVMInt64(elementNum);
offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
}
offset = SmearUniform(offset, "offset_smear");
if (g->target.is32Bit == false && g->opt.force32BitAddressing == true)
if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true)
// If we're doing 32 bit addressing with a 64 bit target, although
// we did the math above in 32 bit, we need to go to 64 bit before
// we add the offset to the varying pointers.
@@ -2583,26 +2583,26 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType,
const PointerType *pt = CastType<PointerType>(returnType);
const char *funcName = NULL;
if (pt != NULL)
funcName = g->target.is32Bit ? "__pseudo_gather32_i32" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
"__pseudo_gather64_i64";
else if (llvmReturnType == LLVMTypes::DoubleVectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_double" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_double" :
"__pseudo_gather64_double";
else if (llvmReturnType == LLVMTypes::Int64VectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_i64" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i64" :
"__pseudo_gather64_i64";
else if (llvmReturnType == LLVMTypes::FloatVectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_float" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_float" :
"__pseudo_gather64_float";
else if (llvmReturnType == LLVMTypes::Int32VectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_i32" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i32" :
"__pseudo_gather64_i32";
else if (llvmReturnType == LLVMTypes::Int16VectorType)
funcName = g->target.is32Bit ? "__pseudo_gather32_i16" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i16" :
"__pseudo_gather64_i16";
else {
AssertPos(currentPos, llvmReturnType == LLVMTypes::Int8VectorType);
funcName = g->target.is32Bit ? "__pseudo_gather32_i8" :
funcName = g->target->is32Bit() ? "__pseudo_gather32_i8" :
"__pseudo_gather64_i8";
}
@@ -2684,7 +2684,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType,
llvm::dyn_cast<llvm::ArrayType>(llvmType);
if (align == 0 && arrayType != NULL &&
!llvm::isa<llvm::VectorType>(arrayType->getElementType()))
align = 4 * g->target.nativeVectorWidth;
align = 4 * g->target->getNativeVectorWidth();
if (align != 0)
inst->setAlignment(align);
@@ -2761,13 +2761,13 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
return;
}
if (g->target.is32Bit)
if (g->target->is32Bit())
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i32");
else
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_i64");
}
else if (Type::Equal(valueType, AtomicType::VaryingBool) &&
g->target.maskBitCount == 1) {
g->target->getMaskBitCount() == 1) {
llvm::Value *notMask = BinaryOperator(llvm::Instruction::Xor, mask,
LLVMMaskAllOn, "~mask");
llvm::Value *old = LoadInst(ptr);
@@ -2894,31 +2894,31 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
llvm::Type *type = value->getType();
const char *funcName = NULL;
if (pt != NULL) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
"__pseudo_scatter64_i64";
}
else if (type == LLVMTypes::DoubleVectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_double" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_double" :
"__pseudo_scatter64_double";
}
else if (type == LLVMTypes::Int64VectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i64" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i64" :
"__pseudo_scatter64_i64";
}
else if (type == LLVMTypes::FloatVectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_float" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_float" :
"__pseudo_scatter64_float";
}
else if (type == LLVMTypes::Int32VectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i32" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i32" :
"__pseudo_scatter64_i32";
}
else if (type == LLVMTypes::Int16VectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i16" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i16" :
"__pseudo_scatter64_i16";
}
else if (type == LLVMTypes::Int8VectorType) {
funcName = g->target.is32Bit ? "__pseudo_scatter32_i8" :
funcName = g->target->is32Bit() ? "__pseudo_scatter32_i8" :
"__pseudo_scatter64_i8";
}
@@ -3408,13 +3408,13 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
AssertPos(currentPos, falloc != NULL);
llvm::Value *structSize = g->target.SizeOf(argStructType, bblock);
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64");
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr);
@@ -3505,20 +3505,20 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr,
// Find the size of a uniform element of the varying type
llvm::Type *llvmBaseUniformType =
baseType->GetAsUniformType()->LLVMType(g->ctx);
llvm::Value *unifSize = g->target.SizeOf(llvmBaseUniformType, bblock);
llvm::Value *unifSize = g->target->SizeOf(llvmBaseUniformType, bblock);
unifSize = SmearUniform(unifSize);
// Compute offset = <0, 1, .. > * unifSize
llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType());
for (int i = 0; i < g->target.vectorWidth; ++i) {
llvm::Value *iValue = (g->target.is32Bit || g->opt.force32BitAddressing) ?
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::Value *iValue = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
LLVMInt32(i) : LLVMInt64(i);
varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta");
}
llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
varyingOffsets);
if (g->opt.force32BitAddressing == true && g->target.is32Bit == false)
if (g->opt.force32BitAddressing == true && g->target->is32Bit() == false)
// On 64-bit targets where we're doing 32-bit addressing
// calculations, we need to convert to an i64 vector before adding
// to the pointer

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -175,10 +175,10 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
else
retType = st->GetAsSOAType(soaWidth);
if (soaWidth < g->target.vectorWidth)
if (soaWidth < g->target->getVectorWidth())
PerformanceWarning(pos, "soa<%d> width smaller than gang size %d "
"currently leads to inefficient code to access "
"soa types.", soaWidth, g->target.vectorWidth);
"soa types.", soaWidth, g->target->getVectorWidth());
}
return retType;

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -662,7 +662,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr,
new llvm::GlobalVariable(*m->module, llvmType, true /* const */,
llvm::GlobalValue::InternalLinkage,
constValue, "const_initializer");
llvm::Value *size = g->target.SizeOf(llvmType,
llvm::Value *size = g->target->SizeOf(llvmType,
ctx->GetCurrentBasicBlock());
ctx->MemcpyInst(ptr, constPtr, size);
}
@@ -1462,12 +1462,12 @@ lEmitBinaryPointerArith(BinaryExpr::Op op, llvm::Value *value0,
// points to in order to return the difference in elements.
llvm::Type *llvmElementType =
ptrType->GetBaseType()->LLVMType(g->ctx);
llvm::Value *size = g->target.SizeOf(llvmElementType,
llvm::Value *size = g->target->SizeOf(llvmElementType,
ctx->GetCurrentBasicBlock());
if (ptrType->IsVaryingType())
size = ctx->SmearUniform(size);
if (g->target.is32Bit == false &&
if (g->target->is32Bit() == false &&
g->opt.force32BitAddressing == true) {
// If we're doing 32-bit addressing math on a 64-bit
// target, then trunc the delta down to a 32-bit value.
@@ -1961,7 +1961,7 @@ BinaryExpr::GetType() const {
else if (op == Sub) {
if (CastType<PointerType>(type1) != NULL) {
// ptr - ptr -> ~ptrdiff_t
const Type *diffType = (g->target.is32Bit ||
const Type *diffType = (g->target->is32Bit() ||
g->opt.force32BitAddressing) ?
AtomicType::UniformInt32 : AtomicType::UniformInt64;
if (type0->IsVaryingType() || type1->IsVaryingType())
@@ -2381,7 +2381,7 @@ BinaryExpr::TypeCheck() {
return NULL;
}
const Type *offsetType = g->target.is32Bit ?
const Type *offsetType = g->target->is32Bit() ?
AtomicType::UniformInt32 : AtomicType::UniformInt64;
if (pt0->IsVaryingType())
offsetType = offsetType->GetAsVaryingType();
@@ -2866,7 +2866,7 @@ AssignExpr::TypeCheck() {
return NULL;
}
const Type *deltaType = g->target.is32Bit ? AtomicType::UniformInt32 :
const Type *deltaType = g->target->is32Bit() ? AtomicType::UniformInt32 :
AtomicType::UniformInt64;
if (lhsType->IsVaryingType())
deltaType = deltaType->GetAsVaryingType();
@@ -3811,7 +3811,7 @@ ExprList::GetConstant(const Type *type) const {
// Uniform short vectors are stored as vectors of length
// rounded up to the native vector width. So we add additional
// undef values here until we get the right size.
int vectorWidth = g->target.nativeVectorWidth;
int vectorWidth = g->target->getNativeVectorWidth();
const VectorType *vt = CastType<VectorType>(type);
const AtomicType *bt = vt->GetElementType();
@@ -3907,7 +3907,7 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr,
// Onward: compute the per lane offsets.
llvm::Value *varyingOffsets =
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i,
"varying_delta");
@@ -4350,7 +4350,7 @@ IndexExpr::TypeCheck() {
// The range of varying index is limited to [0,2^31) as a result.
if (Type::EqualIgnoringConst(indexType->GetAsUniformType(),
AtomicType::UniformInt64) == false ||
g->target.is32Bit ||
g->target->is32Bit() ||
g->opt.force32BitAddressing) {
const Type *indexType = AtomicType::VaryingInt32;
index = TypeConvertExpr(index, indexType, "array index");
@@ -4367,7 +4367,7 @@ IndexExpr::TypeCheck() {
//
// However, the index can be still truncated to signed int32 if
// the index type is 64 bit and --addressing=32.
bool force_32bit = g->target.is32Bit ||
bool force_32bit = g->target->is32Bit() ||
(g->opt.force32BitAddressing &&
Type::EqualIgnoringConst(indexType->GetAsUniformType(),
AtomicType::UniformInt64));
@@ -5492,7 +5492,7 @@ lConvert(const From *from, To *to, int count, bool forceVarying) {
lConvertElement(from[i], &to[i]);
if (forceVarying && count == 1)
for (int i = 1; i < g->target.vectorWidth; ++i)
for (int i = 1; i < g->target->getVectorWidth(); ++i)
to[i] = to[0];
}
@@ -5730,7 +5730,7 @@ ConstExpr::AsUInt32(uint32_t *up, bool forceVarying) const {
int
ConstExpr::Count() const {
return GetType()->IsVaryingType() ? g->target.vectorWidth : 1;
return GetType()->IsVaryingType() ? g->target->getVectorWidth() : 1;
}
@@ -6001,7 +6001,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
case AtomicType::TYPE_UINT16:
case AtomicType::TYPE_UINT32:
case AtomicType::TYPE_UINT64:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from unsigned int to float is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to float
@@ -6117,14 +6117,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = ctx->TruncInst(exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_FLOAT:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_DOUBLE:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6197,7 +6197,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = exprVal;
break;
case AtomicType::TYPE_FLOAT:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6210,7 +6210,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = ctx->TruncInst(exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_DOUBLE:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6285,7 +6285,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = exprVal;
break;
case AtomicType::TYPE_FLOAT:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6296,7 +6296,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = ctx->TruncInst(exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_DOUBLE:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
"Use \"int\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
@@ -6367,7 +6367,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = ctx->ZExtInst(exprVal, targetType, cOpName);
break;
case AtomicType::TYPE_FLOAT:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from float to unsigned int64 is slow. "
"Use \"int64\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // signed int
@@ -6378,7 +6378,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
cast = exprVal;
break;
case AtomicType::TYPE_DOUBLE:
if (fromType->IsVaryingType() && g->target.isa != Target::GENERIC)
if (fromType->IsVaryingType() && g->target->getISA() != Target::GENERIC)
PerformanceWarning(pos, "Conversion from double to unsigned int64 is slow. "
"Use \"int64\" if possible");
cast = ctx->CastInst(llvm::Instruction::FPToUI, // signed int
@@ -6861,7 +6861,7 @@ TypeCastExpr::TypeCheck() {
if (fromPtr != NULL && toAtomic != NULL && toAtomic->IsIntType()) {
bool safeCast = (toAtomic->basicType == AtomicType::TYPE_INT64 ||
toAtomic->basicType == AtomicType::TYPE_UINT64);
if (g->target.is32Bit)
if (g->target->is32Bit())
safeCast |= (toAtomic->basicType == AtomicType::TYPE_INT32 ||
toAtomic->basicType == AtomicType::TYPE_UINT32);
if (safeCast == false)
@@ -7007,7 +7007,7 @@ lConvertPointerConstant(llvm::Constant *c, const Type *constType) {
llvm::Constant *intPtr =
llvm::ConstantExpr::getPtrToInt(c, LLVMTypes::PointerIntType);
Assert(constType->IsVaryingType() || constType->IsSOAType());
int count = constType->IsVaryingType() ? g->target.vectorWidth :
int count = constType->IsVaryingType() ? g->target->getVectorWidth() :
constType->GetSOAWidth();
std::vector<llvm::Constant *> smear;
@@ -7498,13 +7498,13 @@ SizeOfExpr::GetValue(FunctionEmitContext *ctx) const {
if (llvmType == NULL)
return NULL;
return g->target.SizeOf(llvmType, ctx->GetCurrentBasicBlock());
return g->target->SizeOf(llvmType, ctx->GetCurrentBasicBlock());
}
const Type *
SizeOfExpr::GetType() const {
return (g->target.is32Bit || g->opt.force32BitAddressing) ?
return (g->target->is32Bit() || g->opt.force32BitAddressing) ?
AtomicType::UniformUInt32 : AtomicType::UniformUInt64;
}
@@ -8182,7 +8182,7 @@ NewExpr::NewExpr(int typeQual, const Type *t, Expr *init, Expr *count,
llvm::Value *
NewExpr::GetValue(FunctionEmitContext *ctx) const {
bool do32Bit = (g->target.is32Bit || g->opt.force32BitAddressing);
bool do32Bit = (g->target->is32Bit() || g->opt.force32BitAddressing);
// Determine how many elements we need to allocate. Note that this
// will be a varying value if this is a varying new.
@@ -8208,7 +8208,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
// Compute the total amount of memory to allocate, allocSize, as the
// product of the number of elements to allocate and the size of a
// single element.
llvm::Value *eltSize = g->target.SizeOf(allocType->LLVMType(g->ctx),
llvm::Value *eltSize = g->target->SizeOf(allocType->LLVMType(g->ctx),
ctx->GetCurrentBasicBlock());
if (isVarying)
eltSize = ctx->SmearUniform(eltSize, "smear_size");
@@ -8240,7 +8240,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
if (retType == NULL)
return NULL;
if (isVarying) {
if (g->target.is32Bit)
if (g->target->is32Bit())
// Convert i64 vector values to i32 if we are compiling to a
// 32-bit target.
ptrValue = ctx->TruncInst(ptrValue, LLVMTypes::VoidPointerVectorType,
@@ -8254,11 +8254,11 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
// implemented to return NULL for program instances that aren't
// executing; more generally, we should be using the current
// execution mask for this...
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::BasicBlock *bbInit = ctx->CreateBasicBlock("init_ptr");
llvm::BasicBlock *bbSkip = ctx->CreateBasicBlock("skip_init");
llvm::Value *p = ctx->ExtractInst(ptrValue, i);
llvm::Value *nullValue = g->target.is32Bit ? LLVMInt32(0) :
llvm::Value *nullValue = g->target->is32Bit() ? LLVMInt32(0) :
LLVMInt64(0);
// Is the pointer for the current lane non-zero?
llvm::Value *nonNull = ctx->CmpInst(llvm::Instruction::ICmp,
@@ -8337,7 +8337,7 @@ NewExpr::TypeCheck() {
}
// Figure out the type that the allocation count should be
const Type *t = (g->target.is32Bit || g->opt.force32BitAddressing) ?
const Type *t = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
AtomicType::UniformUInt32 : AtomicType::UniformUInt64;
if (isVarying)
t = t->GetAsVaryingType();

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2011-2012, Intel Corporation
Copyright (c) 2011-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -320,7 +320,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
&&
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
checkMask &= (type->isUnmasked == false);
checkMask &= (g->target.maskingIsFree == false);
checkMask &= (g->target->getMaskingIsFree() == false);
checkMask &= (g->opt.disableCoherentControlFlow == false);
if (checkMask) {
@@ -450,7 +450,7 @@ Function::GenerateIR() {
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
std::string functionName = sym->name;
if (g->mangleFunctionsWithTarget)
functionName += std::string("_") + g->target.GetISAString();
functionName += std::string("_") + g->target->GetISAString();
llvm::Function *appFunction =
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
#if defined(LLVM_3_1)

339
ispc.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -143,10 +143,28 @@ static const char *supportedCPUs[] = {
#endif // LLVM_3_2 or LLVM_3_3
};
bool
Target::GetTarget(const char *arch, const char *cpu, const char *isa,
bool pic, Target *t) {
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_target(NULL),
m_valid(false),
m_isa(SSE2),
m_arch(""),
m_is32Bit(true),
m_cpu(""),
m_attributes(""),
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
m_tf_attributes(NULL),
#endif
m_nativeVectorWidth(-1),
m_vectorWidth(-1),
m_generatePIC(pic),
m_maskingIsFree(false),
m_maskBitCount(-1),
m_hasHalf(false),
m_hasRand(false),
m_hasGather(false),
m_hasScatter(false),
m_hasTranscendentals(false)
{
if (isa == NULL) {
if (cpu != NULL) {
// If a CPU was specified explicitly, try to pick the best
@@ -197,30 +215,27 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
if (foundCPU == false) {
fprintf(stderr, "Error: CPU type \"%s\" unknown. Supported CPUs: "
"%s.\n", cpu, SupportedTargetCPUs().c_str());
return false;
return;
}
}
t->cpu = cpu;
this->m_cpu = cpu;
if (arch == NULL)
arch = "x86-64";
bool error = false;
t->generatePIC = pic;
// Make sure the target architecture is a known one; print an error
// with the valid ones otherwise.
t->target = NULL;
for (llvm::TargetRegistry::iterator iter = llvm::TargetRegistry::begin();
iter != llvm::TargetRegistry::end(); ++iter) {
if (std::string(arch) == iter->getName()) {
t->target = &*iter;
this->m_target = &*iter;
break;
}
}
if (t->target == NULL) {
if (this->m_target == NULL) {
fprintf(stderr, "Invalid architecture \"%s\"\nOptions: ", arch);
llvm::TargetRegistry::iterator iter;
for (iter = llvm::TargetRegistry::begin();
@@ -230,178 +245,176 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
error = true;
}
else {
t->arch = arch;
this->m_arch = arch;
}
// This is the case for most of them
t->hasHalf = t->hasRand = t->hasTranscendentals = false;
t->hasGather = t->hasScatter = false;
// Check default LLVM generated targets
if (!strcasecmp(isa, "sse2")) {
t->isa = Target::SSE2;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::SSE2;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 4;
this->m_attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse2-x2")) {
t->isa = Target::SSE2;
t->nativeVectorWidth = 4;
t->vectorWidth = 8;
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::SSE2;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 8;
this->m_attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse4")) {
t->isa = Target::SSE4;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::SSE4;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 4;
// TODO: why not sse42 and popcnt?
this->m_attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
t->isa = Target::SSE4;
t->nativeVectorWidth = 4;
t->vectorWidth = 8;
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::SSE4;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 8;
this->m_attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "generic-4")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 4;
this->m_vectorWidth = 4;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-8")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 8;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-16")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 16;
this->m_vectorWidth = 16;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-32")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 32;
t->vectorWidth = 32;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 32;
this->m_vectorWidth = 32;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-64")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 64;
t->vectorWidth = 64;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
t->hasGather = t->hasScatter = true;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 64;
this->m_vectorWidth = 64;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasGather = this->m_hasScatter = true;
}
else if (!strcasecmp(isa, "generic-1")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 1;
t->vectorWidth = 1;
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::GENERIC;
this->m_nativeVectorWidth = 1;
this->m_vectorWidth = 1;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1")) {
t->isa = Target::AVX;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx,+popcnt,+cmov";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::AVX;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 8;
this->m_attributes = "+avx,+popcnt,+cmov";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2")) {
t->isa = Target::AVX;
t->nativeVectorWidth = 8;
t->vectorWidth = 16;
t->attributes = "+avx,+popcnt,+cmov";
t->maskingIsFree = false;
t->maskBitCount = 32;
this->m_isa = Target::AVX;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 16;
this->m_attributes = "+avx,+popcnt,+cmov";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx1.1")) {
t->isa = Target::AVX11;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
this->m_isa = Target::AVX11;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 8;
this->m_attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
t->hasRand = true;
this->m_hasRand = true;
#endif
}
else if (!strcasecmp(isa, "avx1.1-x2")) {
t->isa = Target::AVX11;
t->nativeVectorWidth = 8;
t->vectorWidth = 16;
t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
this->m_isa = Target::AVX11;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 16;
this->m_attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
t->hasRand = true;
this->m_hasRand = true;
#endif
}
else if (!strcasecmp(isa, "avx2")) {
t->isa = Target::AVX2;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 8;
this->m_vectorWidth = 8;
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
t->hasRand = true;
t->hasGather = true;
this->m_hasRand = true;
this->m_hasGather = true;
#endif
}
else if (!strcasecmp(isa, "avx2-x2")) {
t->isa = Target::AVX2;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
this->m_isa = Target::AVX2;
this->m_nativeVectorWidth = 16;
this->m_vectorWidth = 16;
this->m_attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
this->m_maskingIsFree = false;
this->m_maskBitCount = 32;
this->m_hasHalf = true;
#if !defined(LLVM_3_1)
// LLVM 3.2+ only
t->hasRand = true;
t->hasGather = true;
this->m_hasRand = true;
this->m_hasGather = true;
#endif
}
else {
@@ -411,32 +424,36 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
}
if (!error) {
llvm::TargetMachine *targetMachine = t->GetTargetMachine();
llvm::TargetMachine *targetMachine = this->GetTargetMachine();
#if defined(LLVM_3_1)
const llvm::TargetData *targetData = targetMachine->getTargetData();
t->is32Bit = (targetData->getPointerSize() == 4);
this->m_is32Bit = (targetData->getPointerSize() == 4);
#else
int addressSpace = 0;
const llvm::DataLayout *dataLayout = targetMachine->getDataLayout();
t->is32Bit = (dataLayout->getPointerSize(addressSpace) == 4);
this->m_is32Bit = (dataLayout->getPointerSize(addressSpace) == 4);
#endif
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
// This is LLVM 3.3+ feature.
// Initialize target-specific "target-feature" attribute.
llvm::AttrBuilder attrBuilder;
attrBuilder.addAttribute("target-features", t->attributes);
t->tf_attributes = new llvm::AttributeSet(
llvm::AttributeSet::get(
*g->ctx,
llvm::AttributeSet::FunctionIndex,
attrBuilder));
if (!m_attributes.empty()) {
llvm::AttrBuilder attrBuilder;
attrBuilder.addAttribute("target-features", this->m_attributes);
this->m_tf_attributes = new llvm::AttributeSet(
llvm::AttributeSet::get(
*g->ctx,
llvm::AttributeSet::FunctionIndex,
attrBuilder));
}
#endif
Assert(t->vectorWidth <= ISPC_MAX_NVEC);
Assert(this->m_vectorWidth <= ISPC_MAX_NVEC);
}
return !error;
m_valid = !error;
return;
}
@@ -479,12 +496,12 @@ Target::GetTripleString() const {
// slightly different ones for the triple. TODO: is there a way to
// have it do this remapping, which would presumably be a bit less
// error prone?
if (arch == "x86")
if (m_arch == "x86")
triple.setArchName("i386");
else if (arch == "x86-64")
else if (m_arch == "x86-64")
triple.setArchName("x86_64");
else
triple.setArchName(arch);
triple.setArchName(m_arch);
return triple.str();
}
@@ -494,16 +511,16 @@ llvm::TargetMachine *
Target::GetTargetMachine() const {
std::string triple = GetTripleString();
llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ :
llvm::Reloc::Model relocModel = m_generatePIC ? llvm::Reloc::PIC_ :
llvm::Reloc::Default;
std::string featuresString = attributes;
std::string featuresString = m_attributes;
llvm::TargetOptions options;
#if !defined(LLVM_3_1)
if (g->opt.disableFMA == false)
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
#endif // !LLVM_3_1
llvm::TargetMachine *targetMachine =
target->createTargetMachine(triple, cpu, featuresString, options,
m_target->createTargetMachine(triple, m_cpu, featuresString, options,
relocModel);
Assert(targetMachine != NULL);
@@ -514,7 +531,7 @@ Target::GetTargetMachine() const {
const char *
Target::GetISAString() const {
switch (isa) {
switch (m_isa) {
case Target::SSE2:
return "sse2";
case Target::SSE4:
@@ -571,7 +588,7 @@ lGenericTypeLayoutIndeterminate(llvm::Type *type) {
llvm::Value *
Target::SizeOf(llvm::Type *type,
llvm::BasicBlock *insertAtEnd) {
if (isa == Target::GENERIC &&
if (m_isa == Target::GENERIC &&
lGenericTypeLayoutIndeterminate(type)) {
llvm::Value *index[1] = { LLVMInt32(1) };
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
@@ -581,7 +598,7 @@ Target::SizeOf(llvm::Type *type,
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
insertAtEnd);
if (is32Bit || g->opt.force32BitAddressing)
if (m_is32Bit || g->opt.force32BitAddressing)
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
"sizeof_int", insertAtEnd);
else
@@ -601,7 +618,7 @@ Target::SizeOf(llvm::Type *type,
Assert((bitSize % 8) == 0);
uint64_t byteSize = bitSize / 8;
if (is32Bit || g->opt.force32BitAddressing)
if (m_is32Bit || g->opt.force32BitAddressing)
return LLVMInt32((int32_t)byteSize);
else
return LLVMInt64(byteSize);
@@ -611,7 +628,7 @@ Target::SizeOf(llvm::Type *type,
llvm::Value *
Target::StructOffset(llvm::Type *type, int element,
llvm::BasicBlock *insertAtEnd) {
if (isa == Target::GENERIC &&
if (m_isa == Target::GENERIC &&
lGenericTypeLayoutIndeterminate(type) == true) {
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
@@ -621,7 +638,7 @@ Target::StructOffset(llvm::Type *type, int element,
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
insertAtEnd);
if (is32Bit || g->opt.force32BitAddressing)
if (m_is32Bit || g->opt.force32BitAddressing)
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
"offset_int", insertAtEnd);
else
@@ -648,12 +665,20 @@ Target::StructOffset(llvm::Type *type, int element,
Assert(sl != NULL);
uint64_t offset = sl->getElementOffset(element);
if (is32Bit || g->opt.force32BitAddressing)
if (m_is32Bit || g->opt.force32BitAddressing)
return LLVMInt32((int32_t)offset);
else
return LLVMInt64(offset);
}
void Target::markFuncWithTargetAttr(llvm::Function* func) {
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
if (m_tf_attributes) {
func->addAttributes(llvm::AttributeSet::FunctionIndex, *m_tf_attributes);
}
#endif
}
///////////////////////////////////////////////////////////////////////////
// Opt

90
ispc.h
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
*/
/** @file ispc.h
@brief Main ispc.header file
@brief Main ispc.header file. Defines Target, Globals and Opt classes.
*/
#ifndef ISPC_H
@@ -162,12 +162,12 @@ extern void DoAssertPos(SourcePos pos, const char *file, int line, const char *e
This structure defines a compilation target for the ispc compiler.
*/
struct Target {
class Target {
public:
/** Initializes the given Target pointer for a target of the given
name, if the name is a known target. Returns true if the
target was initialized and false if the name is unknown. */
static bool GetTarget(const char *arch, const char *cpu, const char *isa,
bool pic, Target *);
Target(const char *arch, const char *cpu, const char *isa, bool pic);
/** Returns a comma-delimited string giving the names of the currently
supported target ISAs. */
@@ -202,8 +202,8 @@ struct Target {
llvm::Value *StructOffset(llvm::Type *type,
int element, llvm::BasicBlock *insertAtEnd);
/** llvm Target object representing this target. */
const llvm::Target *target;
/** Mark LLVM function with target specific attribute, if required. */
void markFuncWithTargetAttr(llvm::Function* func);
/** Enumerator giving the instruction sets that the compiler can
target. These should be ordered from "worse" to "better" in that
@@ -213,68 +213,110 @@ struct Target {
added or the enumerant values are reordered. */
enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS };
const llvm::Target *getTarget() const {return m_target;}
/** Reports if Target object has valid state. */
bool isValid() const {return m_valid;}
ISA getISA() const {return m_isa;}
std::string getArch() const {return m_arch;}
bool is32Bit() const {return m_is32Bit;}
std::string getCPU() const {return m_cpu;}
int getNativeVectorWidth() const {return m_nativeVectorWidth;}
int getVectorWidth() const {return m_vectorWidth;}
bool getGeneratePIC() const {return m_generatePIC;}
bool getMaskingIsFree() const {return m_maskingIsFree;}
int getMaskBitCount() const {return m_maskBitCount;}
bool hasHalf() const {return m_hasHalf;}
bool hasRand() const {return m_hasRand;}
bool hasGather() const {return m_hasGather;}
bool hasScatter() const {return m_hasScatter;}
bool hasTranscendentals() const {return m_hasTranscendentals;}
private:
/** llvm Target object representing this target. */
const llvm::Target *m_target;
/** flag to report invalid state after construction
(due to bad parameters passed to constructor). */
bool m_valid;
/** Instruction set being compiled to. */
ISA isa;
ISA m_isa;
/** Target system architecture. (e.g. "x86-64", "x86"). */
std::string arch;
std::string m_arch;
/** Is the target architecture 32 or 64 bit */
bool is32Bit;
bool m_is32Bit;
/** Target CPU. (e.g. "corei7", "corei7-avx", ..) */
std::string cpu;
std::string m_cpu;
/** Target-specific attribute string to pass along to the LLVM backend */
std::string attributes;
std::string m_attributes;
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
/** Target-specific LLVM attribute, which has to be attached to every
function to ensure that it is generated for correct target architecture.
This is requirement was introduced in LLVM 3.3 */
llvm::AttributeSet* tf_attributes;
llvm::AttributeSet* m_tf_attributes;
#endif
/** Native vector width of the vector instruction set. Note that this
value is directly derived from the ISA Being used (e.g. it's 4 for
SSE, 8 for AVX, etc.) */
int nativeVectorWidth;
int m_nativeVectorWidth;
/** Actual vector width currently being compiled to. This may be an
integer multiple of the native vector width, for example if we're
"doubling up" and compiling 8-wide on a 4-wide SSE system. */
int vectorWidth;
int m_vectorWidth;
/** Indicates whether position independent code should be generated. */
bool generatePIC;
bool m_generatePIC;
/** Is there overhead associated with masking on the target
architecture; e.g. there is on SSE, due to extra blends and the
like, but there isn't with an ISA that supports masking
natively. */
bool maskingIsFree;
bool m_maskingIsFree;
/** How many bits are used to store each element of the mask: e.g. this
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
the generic target. */
int maskBitCount;
int m_maskBitCount;
/** Indicates whether the target has native support for float/half
conversions. */
bool hasHalf;
bool m_hasHalf;
/** Indicates whether there is an ISA random number instruction. */
bool hasRand;
bool m_hasRand;
/** Indicates whether the target has a native gather instruction */
bool hasGather;
bool m_hasGather;
/** Indicates whether the target has a native scatter instruction */
bool hasScatter;
bool m_hasScatter;
/** Indicates whether the target has support for transcendentals (beyond
sqrt, which we assume that all of them handle). */
bool hasTranscendentals;
bool m_hasTranscendentals;
};
@@ -401,7 +443,7 @@ struct Globals {
/** Optimization option settings */
Opt opt;
/** Compilation target information */
Target target;
Target* target;
/** There are a number of math libraries that can be used for
transcendentals and the like during program compilation. */

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -94,10 +94,10 @@ llvm::Constant *LLVMMaskAllOff = NULL;
void
InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx);
LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0);
LLVMTypes::PointerIntType = target.is32Bit ? llvm::Type::getInt32Ty(*ctx) :
LLVMTypes::PointerIntType = target.is32Bit() ? llvm::Type::getInt32Ty(*ctx) :
llvm::Type::getInt64Ty(*ctx);
LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx);
@@ -115,29 +115,29 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
if (target.maskBitCount == 1)
if (target.getMaskBitCount() == 1)
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.getVectorWidth());
else {
Assert(target.maskBitCount == 32);
Assert(target.getMaskBitCount() == 32);
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.getVectorWidth());
}
LLVMTypes::Int1VectorType =
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.getVectorWidth());
LLVMTypes::Int8VectorType =
llvm::VectorType::get(LLVMTypes::Int8Type, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::Int8Type, target.getVectorWidth());
LLVMTypes::Int16VectorType =
llvm::VectorType::get(LLVMTypes::Int16Type, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::Int16Type, target.getVectorWidth());
LLVMTypes::Int32VectorType =
llvm::VectorType::get(LLVMTypes::Int32Type, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::Int32Type, target.getVectorWidth());
LLVMTypes::Int64VectorType =
llvm::VectorType::get(LLVMTypes::Int64Type, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::Int64Type, target.getVectorWidth());
LLVMTypes::FloatVectorType =
llvm::VectorType::get(LLVMTypes::FloatType, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::FloatType, target.getVectorWidth());
LLVMTypes::DoubleVectorType =
llvm::VectorType::get(LLVMTypes::DoubleType, target.vectorWidth);
llvm::VectorType::get(LLVMTypes::DoubleType, target.getVectorWidth());
LLVMTypes::Int8VectorPointerType = llvm::PointerType::get(LLVMTypes::Int8VectorType, 0);
LLVMTypes::Int16VectorPointerType = llvm::PointerType::get(LLVMTypes::Int16VectorType, 0);
@@ -146,7 +146,7 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0);
LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0);
LLVMTypes::VoidPointerVectorType = g->target.is32Bit ? LLVMTypes::Int32VectorType :
LLVMTypes::VoidPointerVectorType = g->target->is32Bit() ? LLVMTypes::Int32VectorType :
LLVMTypes::Int64VectorType;
LLVMTrue = llvm::ConstantInt::getTrue(*ctx);
@@ -154,27 +154,27 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
std::vector<llvm::Constant *> maskOnes;
llvm::Constant *onMask = NULL;
if (target.maskBitCount == 1)
if (target.getMaskBitCount() == 1)
onMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 1,
false /*unsigned*/); // 0x1
else
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
true /*signed*/); // 0xffffffff
for (int i = 0; i < target.vectorWidth; ++i)
for (int i = 0; i < target.getVectorWidth(); ++i)
maskOnes.push_back(onMask);
LLVMMaskAllOn = llvm::ConstantVector::get(maskOnes);
std::vector<llvm::Constant *> maskZeros;
llvm::Constant *offMask = NULL;
if (target.maskBitCount == 1)
if (target.getMaskBitCount() == 1)
offMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 0,
true /*signed*/);
else
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
true /*signed*/);
for (int i = 0; i < target.vectorWidth; ++i)
for (int i = 0; i < target.getVectorWidth(); ++i)
maskZeros.push_back(offMask);
LLVMMaskAllOff = llvm::ConstantVector::get(maskZeros);
}
@@ -252,7 +252,7 @@ llvm::Constant *
LLVMInt8Vector(int8_t ival) {
llvm::Constant *v = LLVMInt8(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -261,7 +261,7 @@ LLVMInt8Vector(int8_t ival) {
llvm::Constant *
LLVMInt8Vector(const int8_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMInt8(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -271,7 +271,7 @@ llvm::Constant *
LLVMUInt8Vector(uint8_t ival) {
llvm::Constant *v = LLVMUInt8(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -280,7 +280,7 @@ LLVMUInt8Vector(uint8_t ival) {
llvm::Constant *
LLVMUInt8Vector(const uint8_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMUInt8(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -290,7 +290,7 @@ llvm::Constant *
LLVMInt16Vector(int16_t ival) {
llvm::Constant *v = LLVMInt16(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -299,7 +299,7 @@ LLVMInt16Vector(int16_t ival) {
llvm::Constant *
LLVMInt16Vector(const int16_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMInt16(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -309,7 +309,7 @@ llvm::Constant *
LLVMUInt16Vector(uint16_t ival) {
llvm::Constant *v = LLVMUInt16(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -318,7 +318,7 @@ LLVMUInt16Vector(uint16_t ival) {
llvm::Constant *
LLVMUInt16Vector(const uint16_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMUInt16(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -328,7 +328,7 @@ llvm::Constant *
LLVMInt32Vector(int32_t ival) {
llvm::Constant *v = LLVMInt32(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -337,7 +337,7 @@ LLVMInt32Vector(int32_t ival) {
llvm::Constant *
LLVMInt32Vector(const int32_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMInt32(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -347,7 +347,7 @@ llvm::Constant *
LLVMUInt32Vector(uint32_t ival) {
llvm::Constant *v = LLVMUInt32(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -356,7 +356,7 @@ LLVMUInt32Vector(uint32_t ival) {
llvm::Constant *
LLVMUInt32Vector(const uint32_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMUInt32(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -366,7 +366,7 @@ llvm::Constant *
LLVMFloatVector(float fval) {
llvm::Constant *v = LLVMFloat(fval);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -375,7 +375,7 @@ LLVMFloatVector(float fval) {
llvm::Constant *
LLVMFloatVector(const float *fvec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMFloat(fvec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -385,7 +385,7 @@ llvm::Constant *
LLVMDoubleVector(double dval) {
llvm::Constant *v = LLVMDouble(dval);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -394,7 +394,7 @@ LLVMDoubleVector(double dval) {
llvm::Constant *
LLVMDoubleVector(const double *dvec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMDouble(dvec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -404,7 +404,7 @@ llvm::Constant *
LLVMInt64Vector(int64_t ival) {
llvm::Constant *v = LLVMInt64(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -413,7 +413,7 @@ LLVMInt64Vector(int64_t ival) {
llvm::Constant *
LLVMInt64Vector(const int64_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMInt64(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -423,7 +423,7 @@ llvm::Constant *
LLVMUInt64Vector(uint64_t ival) {
llvm::Constant *v = LLVMUInt64(ival);
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -432,7 +432,7 @@ LLVMUInt64Vector(uint64_t ival) {
llvm::Constant *
LLVMUInt64Vector(const uint64_t *ivec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(LLVMUInt64(ivec[i]));
return llvm::ConstantVector::get(vals);
}
@@ -451,7 +451,7 @@ LLVMBoolVector(bool b) {
}
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
vals.push_back(v);
return llvm::ConstantVector::get(vals);
}
@@ -460,7 +460,7 @@ LLVMBoolVector(bool b) {
llvm::Constant *
LLVMBoolVector(const bool *bvec) {
std::vector<llvm::Constant *> vals;
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::Constant *v;
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0,
@@ -697,7 +697,7 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(val);
if (ie != NULL) {
llvm::Value *elts[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(ie, g->target.vectorWidth, elts);
LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elts);
// We just need to check the scalar first value, since we know that
// all elements are equal
return lIsExactMultiple(elts[0], baseValue, vectorLength,

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -110,8 +110,8 @@ extern llvm::Constant *LLVMTrue, *LLVMFalse;
of LLVMTypes and the LLVMTrue/LLVMFalse constants. However, it can't
be called until the compilation target is known.
*/
struct Target;
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target);
class Target;
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target& target);
/** Returns an LLVM i8 constant of the given value */
extern llvm::ConstantInt *LLVMInt8(int8_t i);

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -123,7 +123,7 @@ void RegisterDependency(const std::string &fileName)
static void
lDeclareSizeAndPtrIntTypes(SymbolTable *symbolTable) {
const Type *ptrIntType = (g->target.is32Bit) ? AtomicType::VaryingInt32 :
const Type *ptrIntType = (g->target->is32Bit()) ? AtomicType::VaryingInt32 :
AtomicType::VaryingInt64;
ptrIntType = ptrIntType->GetAsUnboundVariabilityType();
@@ -132,7 +132,7 @@ lDeclareSizeAndPtrIntTypes(SymbolTable *symbolTable) {
SourcePos());
symbolTable->AddType("ptrdiff_t", ptrIntType, SourcePos());
const Type *sizeType = (g->target.is32Bit || g->opt.force32BitAddressing) ?
const Type *sizeType = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
AtomicType::VaryingUInt32 : AtomicType::VaryingUInt64;
sizeType = sizeType->GetAsUnboundVariabilityType();
symbolTable->AddType("size_t", sizeType, SourcePos());
@@ -245,7 +245,7 @@ Module::Module(const char *fn) {
// information has been set (so e.g. the vector width is known...) In
// particular, if we're compiling to multiple targets with different
// vector widths, this needs to be redone each time through.
InitLLVMUtil(g->ctx, g->target);
InitLLVMUtil(g->ctx, *g->target);
filename = fn;
errorCount = 0;
@@ -255,9 +255,9 @@ Module::Module(const char *fn) {
lDeclareSizeAndPtrIntTypes(symbolTable);
module = new llvm::Module(filename ? filename : "<stdin>", *g->ctx);
module->setTargetTriple(g->target.GetTripleString());
module->setTargetTriple(g->target->GetTripleString());
if (g->target.isa == Target::GENERIC) {
if (g->target->getISA() == Target::GENERIC) {
// <16 x i1> vectors only need 16 bit / 2 byte alignment, so add
// that to the regular datalayout string for IA..
std::string datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
@@ -761,7 +761,7 @@ Module::AddFunctionDeclaration(const std::string &name,
if (storageClass != SC_EXTERN_C) {
functionName += functionType->Mangle();
if (g->mangleFunctionsWithTarget)
functionName += g->target.GetISAString();
functionName += g->target->GetISAString();
}
llvm::Function *function =
llvm::Function::Create(llvmFunctionType, linkage, functionName.c_str(),
@@ -785,9 +785,7 @@ Module::AddFunctionDeclaration(const std::string &name,
function->setDoesNotAlias(1);
#endif
#if !defined(LLVM_3_1) && !defined(LLVM_3_2)
function->addAttributes(llvm::AttributeSet::FunctionIndex, *g->target.tf_attributes);
#endif
g->target->markFuncWithTargetAttr(function);
// Make sure that the return type isn't 'varying' or vector typed if
// the function is 'export'ed.
@@ -841,7 +839,7 @@ Module::AddFunctionDeclaration(const std::string &name,
#endif
#if 0
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
int align = 4 * RoundUpPow2(g->target->nativeVectorWidth);
function->addAttribute(i+1, llvm::Attribute::constructAlignmentFromInt(align));
#endif
}
@@ -991,14 +989,14 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
else if (outputType == Bitcode)
return writeBitcode(module, outFileName);
else if (outputType == CXX) {
if (g->target.isa != Target::GENERIC) {
if (g->target->getISA() != Target::GENERIC) {
Error(SourcePos(), "Only \"generic-*\" targets can be used with "
"C++ emission.");
return false;
}
extern bool WriteCXXFile(llvm::Module *module, const char *fn,
int vectorWidth, const char *includeName);
return WriteCXXFile(module, outFileName, g->target.vectorWidth,
return WriteCXXFile(module, outFileName, g->target->getVectorWidth(),
includeFileName);
}
else
@@ -1036,7 +1034,7 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
bool
Module::writeObjectFileOrAssembly(OutputType outputType, const char *outFileName) {
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
return writeObjectFileOrAssembly(targetMachine, module, outputType,
outFileName);
}
@@ -1213,7 +1211,7 @@ lEmitVectorTypedefs(const std::vector<const VectorType *> &types, FILE *file) {
fprintf(file, "// Vector types with external visibility from ispc code\n");
fprintf(file, "///////////////////////////////////////////////////////////////////////////\n\n");
int align = g->target.nativeVectorWidth * 4;
int align = g->target->getNativeVectorWidth() * 4;
for (unsigned int i = 0; i < types.size(); ++i) {
std::string baseDecl;
@@ -1858,7 +1856,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
// Add #define for current compilation target
char targetMacro[128];
sprintf(targetMacro, "ISPC_TARGET_%s", g->target.GetISAString());
sprintf(targetMacro, "ISPC_TARGET_%s", g->target->GetISAString());
char *p = targetMacro;
while (*p) {
*p = toupper(*p);
@@ -1866,16 +1864,16 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
}
opts.addMacroDef(targetMacro);
if (g->target.is32Bit)
if (g->target->is32Bit())
opts.addMacroDef("ISPC_POINTER_SIZE=32");
else
opts.addMacroDef("ISPC_POINTER_SIZE=64");
if (g->target.hasHalf)
if (g->target->hasHalf())
opts.addMacroDef("ISPC_TARGET_HAS_HALF");
if (g->target.hasRand)
if (g->target->hasRand())
opts.addMacroDef("ISPC_TARGET_HAS_RAND");
if (g->target.hasTranscendentals)
if (g->target->hasTranscendentals())
opts.addMacroDef("ISPC_TARGET_HAS_TRANSCENDENTALS");
if (g->opt.forceAlignedMemory)
opts.addMacroDef("ISPC_FORCE_ALIGNED_MEMORY");
@@ -1992,7 +1990,7 @@ lGetExportedFunctions(SymbolTable *symbolTable,
symbolTable->GetMatchingFunctions(lSymbolIsExported, &syms);
for (unsigned int i = 0; i < syms.size(); ++i) {
FunctionTargetVariants &ftv = functions[syms[i]->name];
ftv.func[g->target.isa] = syms[i]->exportedFunction;
ftv.func[g->target->getISA()] = syms[i]->exportedFunction;
}
}
@@ -2287,7 +2285,8 @@ Module::CompileAndOutput(const char *srcFile,
{
if (target == NULL || strchr(target, ',') == NULL) {
// We're only compiling to a single target
if (!Target::GetTarget(arch, cpu, target, generatePIC, &g->target))
g->target = new Target(arch, cpu, target, generatePIC);
if (!g->target->isValid())
return 1;
m = new Module(srcFile);
@@ -2331,6 +2330,9 @@ Module::CompileAndOutput(const char *srcFile,
delete m;
m = NULL;
delete g->target;
g->target = NULL;
return errorCount > 0;
}
else {
@@ -2368,19 +2370,19 @@ Module::CompileAndOutput(const char *srcFile,
std::vector<RewriteGlobalInfo> globals[Target::NUM_ISAS];
int errorCount = 0;
for (unsigned int i = 0; i < targets.size(); ++i) {
if (!Target::GetTarget(arch, cpu, targets[i].c_str(), generatePIC,
&g->target))
g->target = new Target(arch, cpu, targets[i].c_str(), generatePIC);
if (!g->target->isValid())
return 1;
// Issue an error if we've already compiled to a variant of
// this target ISA. (It doesn't make sense to compile to both
// avx and avx-x2, for example.)
if (targetMachines[g->target.isa] != NULL) {
if (targetMachines[g->target->getISA()] != NULL) {
Error(SourcePos(), "Can't compile to multiple variants of %s "
"target!\n", g->target.GetISAString());
"target!\n", g->target->GetISAString());
return 1;
}
targetMachines[g->target.isa] = g->target.GetTargetMachine();
targetMachines[g->target->getISA()] = g->target->GetTargetMachine();
m = new Module(srcFile);
if (m->CompileFile() == 0) {
@@ -2392,7 +2394,7 @@ Module::CompileAndOutput(const char *srcFile,
lExtractAndRewriteGlobals(m->module, &globals[i]);
if (outFileName != NULL) {
const char *isaName = g->target.GetISAString();
const char *isaName = g->target->GetISAString();
std::string targetOutFileName =
lGetTargetFileName(outFileName, isaName);
if (!m->writeOutput(outputType, targetOutFileName.c_str()))
@@ -2407,6 +2409,9 @@ Module::CompileAndOutput(const char *srcFile,
if (!m->writeOutput(Module::Header, headerFileName))
return 1;
delete g->target;
g->target = NULL;
// Important: Don't delete the llvm::Module *m here; we need to
// keep it around so the llvm::Functions *s stay valid for when
// we generate the dispatch module's functions...

246
opt.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -353,7 +353,7 @@ lGetMask(llvm::Value *factor, uint64_t *mask) {
#if 0
llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(factor);
if (ce != NULL) {
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
const llvm::TargetData *td = targetMachine->getTargetData();
llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
c->dump();
@@ -382,7 +382,7 @@ lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) {
return ALL_OFF;
if (vecWidth == -1)
vecWidth = g->target.vectorWidth;
vecWidth = g->target->getVectorWidth();
Assert(vecWidth <= 64);
for (int i = 0; i < vecWidth; ++i) {
@@ -417,7 +417,7 @@ Optimize(llvm::Module *module, int optLevel) {
#if defined(LLVM_3_1)
optPM.add(new llvm::TargetData(module));
#else
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
if (const llvm::DataLayout *dl = targetMachine->getDataLayout())
optPM.add(new llvm::DataLayout(*dl));
else
@@ -471,7 +471,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createCFGSimplificationPass());
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(CreateImproveMemoryOpsPass());
}
@@ -485,7 +485,7 @@ Optimize(llvm::Module *module, int optLevel) {
// 1) 4 fields (r,g,b,w)
// 2) field size: vectorWidth * sizeof(float)
const int field_limit = 4;
int sr_threshold = g->target.vectorWidth * sizeof(float) * field_limit;
int sr_threshold = g->target->getVectorWidth() * sizeof(float) * field_limit;
// On to more serious optimizations
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
@@ -520,12 +520,12 @@ Optimize(llvm::Module *module, int optLevel) {
}
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(CreateImproveMemoryOpsPass());
if (g->opt.disableCoalescing == false &&
g->target.isa != Target::GENERIC) {
g->target->getISA() != Target::GENERIC) {
// It is important to run this here to make it easier to
// finding matching gathers we can coalesce..
optPM.add(llvm::createEarlyCSEPass());
@@ -539,7 +539,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(CreateVSelMovmskOptPass());
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(CreateImproveMemoryOpsPass());
}
@@ -1062,18 +1062,18 @@ lGetBasePointer(llvm::Value *v) {
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
if (ie != NULL) {
llvm::Value *elements[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(ie, g->target.vectorWidth, elements);
LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elements);
// Make sure none of the elements is undefined.
// TODO: it's probably ok to allow undefined elements and return
// the base pointer if all of the other elements have the same
// value.
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
if (elements[i] == NULL)
return NULL;
// Do all of the elements have the same value?
for (int i = 0; i < g->target.vectorWidth-1; ++i)
for (int i = 0; i < g->target->getVectorWidth()-1; ++i)
if (elements[i] != elements[i+1])
return NULL;
@@ -1141,7 +1141,7 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets,
if (base != NULL) {
// We have a straight up varying pointer with no indexing that's
// actually all the same value.
if (g->target.is32Bit)
if (g->target->is32Bit())
*offsets = LLVMInt32Vector(0);
else
*offsets = LLVMInt64Vector((int64_t)0);
@@ -1197,7 +1197,7 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets,
// If the element is just a ptr to int instruction, treat
// it as having an offset of zero
elementBase = ce;
delta[i] = g->target.is32Bit ? LLVMInt32(0) : LLVMInt64(0);
delta[i] = g->target->is32Bit() ? LLVMInt32(0) : LLVMInt64(0);
}
else if (ce->getOpcode() == llvm::Instruction::Add) {
// Try both orderings of the operands to see if we can get
@@ -1748,150 +1748,150 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
GSInfo gsFuncs[] = {
GSInfo("__pseudo_gather32_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
true),
GSInfo("__pseudo_gather32_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
true),
GSInfo("__pseudo_gather32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSInfo("__pseudo_gather32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSInfo("__pseudo_gather32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSInfo("__pseudo_gather32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSInfo("__pseudo_scatter32_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
false),
GSInfo("__pseudo_scatter32_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
false),
GSInfo("__pseudo_scatter32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
false),
GSInfo("__pseudo_scatter32_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
false),
GSInfo("__pseudo_scatter32_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
false),
GSInfo("__pseudo_scatter32_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
false),
GSInfo("__pseudo_gather64_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i8" :
"__pseudo_gather_factored_base_offsets64_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
true),
GSInfo("__pseudo_gather64_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i16" :
"__pseudo_gather_factored_base_offsets64_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
true),
GSInfo("__pseudo_gather64_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i32" :
"__pseudo_gather_factored_base_offsets64_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSInfo("__pseudo_gather64_float",
g->target.hasGather ? "__pseudo_gather_base_offsets64_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_float" :
"__pseudo_gather_factored_base_offsets64_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSInfo("__pseudo_gather64_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets64_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_i64" :
"__pseudo_gather_factored_base_offsets64_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSInfo("__pseudo_gather64_double",
g->target.hasGather ? "__pseudo_gather_base_offsets64_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets64_double" :
"__pseudo_gather_factored_base_offsets64_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSInfo("__pseudo_scatter64_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i8" :
"__pseudo_scatter_factored_base_offsets64_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
false),
GSInfo("__pseudo_scatter64_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i16" :
"__pseudo_scatter_factored_base_offsets64_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
false),
GSInfo("__pseudo_scatter64_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i32" :
"__pseudo_scatter_factored_base_offsets64_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
false),
GSInfo("__pseudo_scatter64_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_float" :
"__pseudo_scatter_factored_base_offsets64_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
false),
GSInfo("__pseudo_scatter64_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i64" :
"__pseudo_scatter_factored_base_offsets64_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
false),
GSInfo("__pseudo_scatter64_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets64_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_double" :
"__pseudo_scatter_factored_base_offsets64_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
false),
};
@@ -1933,8 +1933,8 @@ lGSToGSBaseOffsets(llvm::CallInst *callInst) {
llvm::Function *gatherScatterFunc = info->baseOffsetsFunc;
if ((info->isGather == true && g->target.hasGather) ||
(info->isGather == false && g->target.hasScatter)) {
if ((info->isGather == true && g->target->hasGather()) ||
(info->isGather == false && g->target->hasScatter())) {
// See if the offsets are scaled by 2, 4, or 8. If so,
// extract that scale factor and rewrite the offsets to remove
// it.
@@ -2057,65 +2057,65 @@ lGSBaseOffsetsGetMoreConst(llvm::CallInst *callInst) {
};
GSBOInfo gsFuncs[] = {
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
true),
GSBOInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
GSBOInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
true),
GSBOInfo( g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
GSBOInfo( g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
false),
GSBOInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
GSBOInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
false),
};
@@ -2208,7 +2208,7 @@ lGetOffsetScaleVec(llvm::Value *offsetScale, llvm::Type *vecType) {
uint64_t scaleValue = offsetScaleInt->getZExtValue();
std::vector<llvm::Constant *> scales;
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
if (vecType == LLVMTypes::Int64VectorType)
scales.push_back(LLVMInt64(scaleValue));
else {
@@ -2240,7 +2240,7 @@ lGSToLoadStore(llvm::CallInst *callInst) {
struct GatherImpInfo {
GatherImpInfo(const char *pName, const char *lmName, llvm::Type *st,
int a)
: align(a), isFactored(!g->target.hasGather) {
: align(a), isFactored(!g->target->hasGather()) {
pseudoFunc = m->module->getFunction(pName);
loadMaskedFunc = m->module->getFunction(lmName);
Assert(pseudoFunc != NULL && loadMaskedFunc != NULL);
@@ -2255,40 +2255,40 @@ lGSToLoadStore(llvm::CallInst *callInst) {
};
GatherImpInfo gInfo[] = {
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i8" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i8" :
"__pseudo_gather_factored_base_offsets32_i8",
"__masked_load_i8", LLVMTypes::Int8Type, 1),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i16" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i16" :
"__pseudo_gather_factored_base_offsets32_i16",
"__masked_load_i16", LLVMTypes::Int16Type, 2),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i32" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i32" :
"__pseudo_gather_factored_base_offsets32_i32",
"__masked_load_i32", LLVMTypes::Int32Type, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_float" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_float" :
"__pseudo_gather_factored_base_offsets32_float",
"__masked_load_float", LLVMTypes::FloatType, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_i64" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_i64" :
"__pseudo_gather_factored_base_offsets32_i64",
"__masked_load_i64", LLVMTypes::Int64Type, 8),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets32_double" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets32_double" :
"__pseudo_gather_factored_base_offsets32_double",
"__masked_load_double", LLVMTypes::DoubleType, 8),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i8" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_i8" :
"__pseudo_gather_factored_base_offsets64_i8",
"__masked_load_i8", LLVMTypes::Int8Type, 1),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i16" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_i16" :
"__pseudo_gather_factored_base_offsets64_i16",
"__masked_load_i16", LLVMTypes::Int16Type, 2),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i32" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_i32" :
"__pseudo_gather_factored_base_offsets64_i32",
"__masked_load_i32", LLVMTypes::Int32Type, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_float" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_float" :
"__pseudo_gather_factored_base_offsets64_float",
"__masked_load_float", LLVMTypes::FloatType, 4),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_i64" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_i64" :
"__pseudo_gather_factored_base_offsets64_i64",
"__masked_load_i64", LLVMTypes::Int64Type, 8),
GatherImpInfo(g->target.hasGather ? "__pseudo_gather_base_offsets64_double" :
GatherImpInfo(g->target->hasGather() ? "__pseudo_gather_base_offsets64_double" :
"__pseudo_gather_factored_base_offsets64_double",
"__masked_load_double", LLVMTypes::DoubleType, 8),
};
@@ -2296,7 +2296,7 @@ lGSToLoadStore(llvm::CallInst *callInst) {
struct ScatterImpInfo {
ScatterImpInfo(const char *pName, const char *msName,
llvm::Type *vpt, int a)
: align(a), isFactored(!g->target.hasScatter) {
: align(a), isFactored(!g->target->hasScatter()) {
pseudoFunc = m->module->getFunction(pName);
maskedStoreFunc = m->module->getFunction(msName);
vecPtrType = vpt;
@@ -2310,40 +2310,40 @@ lGSToLoadStore(llvm::CallInst *callInst) {
};
ScatterImpInfo sInfo[] = {
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i8" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i8" :
"__pseudo_scatter_factored_base_offsets32_i8",
"__pseudo_masked_store_i8", LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i16" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i16" :
"__pseudo_scatter_factored_base_offsets32_i16",
"__pseudo_masked_store_i16", LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i32" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i32" :
"__pseudo_scatter_factored_base_offsets32_i32",
"__pseudo_masked_store_i32", LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_float" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_float" :
"__pseudo_scatter_factored_base_offsets32_float",
"__pseudo_masked_store_float", LLVMTypes::FloatVectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_i64" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_i64" :
"__pseudo_scatter_factored_base_offsets32_i64",
"__pseudo_masked_store_i64", LLVMTypes::Int64VectorPointerType, 8),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets32_double" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets32_double" :
"__pseudo_scatter_factored_base_offsets32_double",
"__pseudo_masked_store_double", LLVMTypes::DoubleVectorPointerType, 8),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i8" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i8" :
"__pseudo_scatter_factored_base_offsets64_i8",
"__pseudo_masked_store_i8", LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i16" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i16" :
"__pseudo_scatter_factored_base_offsets64_i16",
"__pseudo_masked_store_i16", LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i32" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i32" :
"__pseudo_scatter_factored_base_offsets64_i32",
"__pseudo_masked_store_i32", LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_float" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_float" :
"__pseudo_scatter_factored_base_offsets64_float",
"__pseudo_masked_store_float", LLVMTypes::FloatVectorPointerType, 4),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_i64" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_i64" :
"__pseudo_scatter_factored_base_offsets64_i64",
"__pseudo_masked_store_i64", LLVMTypes::Int64VectorPointerType, 8),
ScatterImpInfo(g->target.hasScatter ? "__pseudo_scatter_base_offsets64_double" :
ScatterImpInfo(g->target->hasScatter() ? "__pseudo_scatter_base_offsets64_double" :
"__pseudo_scatter_factored_base_offsets64_double",
"__pseudo_masked_store_double", LLVMTypes::DoubleVectorPointerType, 8),
};
@@ -2432,8 +2432,8 @@ lGSToLoadStore(llvm::CallInst *callInst) {
ptr->getName(), callInst);
llvm::Value *scalarValue = new llvm::LoadInst(ptr, callInst->getName(), callInst);
llvm::Value *vecValue = llvm::UndefValue::get(callInst->getType());
for (int i = 0; i < g->target.vectorWidth; ++i) {
if (i < g->target.vectorWidth - 1)
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
if (i < g->target->getVectorWidth() - 1)
vecValue = llvm::InsertElementInst::Create(vecValue, scalarValue, LLVMInt32(i),
callInst->getName(), callInst);
else
@@ -2449,7 +2449,7 @@ lGSToLoadStore(llvm::CallInst *callInst) {
// A scatter with everyone going to the same location is
// undefined (if there's more than one program instance in
// the gang). Issue a warning.
if (g->target.vectorWidth > 1)
if (g->target->getVectorWidth() > 1)
Warning(pos, "Undefined behavior: all program instances are "
"writing to the same location!");
@@ -3422,10 +3422,10 @@ lAssembleResultVectors(const std::vector<CoalescedLoadOp> &loadOps,
// And now concatenate 1, 2, or 4 of the 4-wide vectors computed above
// into 4, 8, or 16-wide final result vectors.
int numGathers = constOffsets.size() / g->target.vectorWidth;
int numGathers = constOffsets.size() / g->target->getVectorWidth();
for (int i = 0; i < numGathers; ++i) {
llvm::Value *result = NULL;
switch (g->target.vectorWidth) {
switch (g->target->getVectorWidth()) {
case 4:
result = vec4s[i];
break;
@@ -3486,7 +3486,7 @@ lComputeBasePtr(llvm::CallInst *gatherInst, llvm::Instruction *insertBefore) {
static void
lExtractConstOffsets(const std::vector<llvm::CallInst *> &coalesceGroup,
int elementSize, std::vector<int64_t> *constOffsets) {
int width = g->target.vectorWidth;
int width = g->target->getVectorWidth();
*constOffsets = std::vector<int64_t>(coalesceGroup.size() * width, 0);
int64_t *endPtr = &((*constOffsets)[0]);
@@ -3814,7 +3814,7 @@ lIsSafeToBlend(llvm::Value *lvalue) {
llvm::VectorType *vt =
llvm::dyn_cast<llvm::VectorType>(type);
return (vt != NULL &&
(int)vt->getNumElements() == g->target.vectorWidth);
(int)vt->getNumElements() == g->target->getVectorWidth());
}
else {
llvm::GetElementPtrInst *gep =
@@ -4060,7 +4060,7 @@ lReplacePseudoGS(llvm::CallInst *callInst) {
bool gotPosition = lGetSourcePosFromMetadata(callInst, &pos);
callInst->setCalledFunction(info->actualFunc);
if (gotPosition && g->target.vectorWidth > 1) {
if (gotPosition && g->target->getVectorWidth() > 1) {
if (info->isGather)
PerformanceWarning(pos, "Gather required to load value.");
else

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -2148,7 +2148,7 @@ lAddFunctionParams(Declarator *decl) {
/** Add a symbol for the built-in mask variable to the symbol table */
static void lAddMaskToSymbolTable(SourcePos pos) {
const Type *t = g->target.maskBitCount == 1 ?
const Type *t = g->target->getMaskBitCount() == 1 ?
AtomicType::VaryingBool : AtomicType::VaryingUInt32;
t = t->GetAsConstType();
Symbol *maskSymbol = new Symbol("__mask", pos, t);

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -1274,7 +1274,7 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter =
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
smearCounter =
ctx->InsertInst(smearCounter, counter, i, "smear_counter");
@@ -1285,7 +1285,7 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
for (int i = 0; i < g->target.vectorWidth; ++i) {
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
@@ -1393,7 +1393,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
std::vector<int> span(nDims, 0);
lGetSpans(nDims-1, nDims, g->target.vectorWidth, isTiled, &span[0]);
lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
for (int i = 0; i < nDims; ++i) {
// Basic blocks that we'll fill in later with the looping logic for
@@ -1518,7 +1518,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
dimVariables[i]->storagePtr, span);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int j = 0; j < g->target.vectorWidth; ++j)
for (int j = 0; j < g->target->getVectorWidth(); ++j)
smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
// Do a vector compare of its value to the end value to generate a
// mask for this last bit of work.
@@ -1663,7 +1663,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::Value *varyingCounter =
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int j = 0; j < g->target.vectorWidth; ++j)
for (int j = 0; j < g->target->getVectorWidth(); ++j)
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
@@ -1759,7 +1759,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int j = 0; j < g->target.vectorWidth; ++j)
for (int j = 0; j < g->target->getVectorWidth(); ++j)
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
@@ -1995,7 +1995,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
// Get the "program index" vector value
llvm::Value *programIndex =
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target.vectorWidth; ++i)
for (int i = 0; i < g->target->getVectorWidth(); ++i)
programIndex = ctx->InsertInst(programIndex, LLVMInt32(i), i,
"prog_index");
@@ -3103,7 +3103,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
// Set up the rest of the parameters to it
args[0] = ctx->GetStringPtr(format);
args[1] = ctx->GetStringPtr(argTypes);
args[2] = LLVMInt32(g->target.vectorWidth);
args[2] = LLVMInt32(g->target->getVectorWidth());
args[3] = ctx->LaneMask(mask);
std::vector<llvm::Value *> argVec(&args[0], &args[5]);
ctx->CallInst(printFunc, NULL, argVec, "");
@@ -3254,7 +3254,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
// calling it.
llvm::Function *func = m->module->getFunction("__delete_varying");
AssertPos(pos, func != NULL);
if (g->target.is32Bit)
if (g->target->is32Bit())
exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
"ptr_to_64");
ctx->CallInst(func, NULL, exprValue, "");

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2012, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -566,10 +566,10 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
}
else if (variability == Variability::Varying) {
llvm::DIType unifType = GetAsUniformType()->GetDIType(scope);
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target.vectorWidth-1);
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()-1);
llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
uint64_t size = unifType.getSizeInBits() * g->target.vectorWidth;
uint64_t align = unifType.getAlignInBits() * g->target.vectorWidth;
uint64_t size = unifType.getSizeInBits() * g->target->getVectorWidth();
uint64_t align = unifType.getAlignInBits() * g->target->getVectorWidth();
return m->diBuilder->createVectorType(size, align, unifType, subArray);
}
else {
@@ -830,10 +830,10 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
case Variability::Uniform:
return diType;
case Variability::Varying: {
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target.vectorWidth-1);
llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()-1);
llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
uint64_t size = diType.getSizeInBits() * g->target.vectorWidth;
uint64_t align = diType.getAlignInBits() * g->target.vectorWidth;
uint64_t size = diType.getSizeInBits() * g->target->getVectorWidth();
uint64_t align = diType.getAlignInBits() * g->target->getVectorWidth();
return m->diBuilder->createVectorType(size, align, diType, subArray);
}
case Variability::SOA: {
@@ -1173,7 +1173,7 @@ PointerType::GetDIType(llvm::DIDescriptor scope) const {
}
llvm::DIType diTargetType = baseType->GetDIType(scope);
int bitsSize = g->target.is32Bit ? 32 : 64;
int bitsSize = g->target->is32Bit() ? 32 : 64;
int ptrAlignBits = bitsSize;
switch (variability.type) {
case Variability::Uniform:
@@ -1183,7 +1183,7 @@ PointerType::GetDIType(llvm::DIDescriptor scope) const {
// emit them as an array of pointers
llvm::DIType eltType = m->diBuilder->createPointerType(diTargetType,
bitsSize, ptrAlignBits);
return lCreateDIArray(eltType, g->target.vectorWidth);
return lCreateDIArray(eltType, g->target->getVectorWidth());
}
case Variability::SOA: {
ArrayType at(GetAsUniformType(), variability.soaWidth);
@@ -1712,7 +1712,7 @@ VectorType::GetDIType(llvm::DIDescriptor scope) const {
// explicitly aligned to the machines natural vector alignment.
uint64_t align = eltType.getAlignInBits();
if (IsUniformType())
align = 4 * g->target.nativeVectorWidth;
align = 4 * g->target->getNativeVectorWidth();
if (IsUniformType() || IsVaryingType())
return m->diBuilder->createVectorType(sizeBits, align, eltType, subArray);
@@ -1732,11 +1732,11 @@ VectorType::getVectorMemoryCount() const {
if (base->IsVaryingType())
return numElements;
else if (base->IsUniformType()) {
int nativeWidth = g->target.nativeVectorWidth;
int nativeWidth = g->target->getNativeVectorWidth();
if (Type::Equal(base->GetAsUniformType(), AtomicType::UniformInt64) ||
Type::Equal(base->GetAsUniformType(), AtomicType::UniformUInt64) ||
Type::Equal(base->GetAsUniformType(), AtomicType::UniformDouble))
// target.nativeVectorWidth should be in terms of 32-bit
// target.getNativeVectorWidth() should be in terms of 32-bit
// values, so for the 64-bit guys, it takes half as many of
// them to fill the native width
nativeWidth /= 2;
@@ -1778,7 +1778,7 @@ lMangleStructName(const std::string &name, Variability variability) {
std::string n;
// Encode vector width
sprintf(buf, "v%d", g->target.vectorWidth);
sprintf(buf, "v%d", g->target->getVectorWidth());
n += buf;
// Variability