2
Makefile
2
Makefile
@@ -15,7 +15,7 @@ LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||
LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/)
|
||||
|
||||
BUILD_DATE=$(shell date +%Y%m%d)
|
||||
BUILD_VERSION=$(shell git log | head -1)
|
||||
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||
|
||||
CXX=g++
|
||||
CPP=cpp
|
||||
|
||||
288
builtins.cpp
288
builtins.cpp
@@ -78,8 +78,14 @@ static const Type *
|
||||
lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
if (t == LLVMTypes::VoidType)
|
||||
return AtomicType::Void;
|
||||
|
||||
// uniform
|
||||
else if (t == LLVMTypes::BoolType)
|
||||
return AtomicType::UniformBool;
|
||||
else if (t == LLVMTypes::Int8Type)
|
||||
return intAsUnsigned ? AtomicType::UniformUInt8 : AtomicType::UniformInt8;
|
||||
else if (t == LLVMTypes::Int16Type)
|
||||
return intAsUnsigned ? AtomicType::UniformUInt16 : AtomicType::UniformInt16;
|
||||
else if (t == LLVMTypes::Int32Type)
|
||||
return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
|
||||
else if (t == LLVMTypes::FloatType)
|
||||
@@ -88,6 +94,12 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return AtomicType::UniformDouble;
|
||||
else if (t == LLVMTypes::Int64Type)
|
||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||
|
||||
// varying
|
||||
else if (t == LLVMTypes::Int8VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
||||
else if (t == LLVMTypes::Int16VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
|
||||
else if (t == LLVMTypes::Int32VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
|
||||
else if (t == LLVMTypes::FloatVectorType)
|
||||
@@ -96,6 +108,14 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return AtomicType::VaryingDouble;
|
||||
else if (t == LLVMTypes::Int64VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
|
||||
|
||||
// pointers to uniform
|
||||
else if (t == LLVMTypes::Int8PointerType)
|
||||
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt8 :
|
||||
AtomicType::UniformInt8, false);
|
||||
else if (t == LLVMTypes::Int16PointerType)
|
||||
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt16 :
|
||||
AtomicType::UniformInt16, false);
|
||||
else if (t == LLVMTypes::Int32PointerType)
|
||||
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 :
|
||||
AtomicType::UniformInt32, false);
|
||||
@@ -106,6 +126,14 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return new ReferenceType(AtomicType::UniformFloat, false);
|
||||
else if (t == LLVMTypes::DoublePointerType)
|
||||
return new ReferenceType(AtomicType::UniformDouble, false);
|
||||
|
||||
// pointers to varying
|
||||
else if (t == LLVMTypes::Int8VectorPointerType)
|
||||
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt8 :
|
||||
AtomicType::VaryingInt8, false);
|
||||
else if (t == LLVMTypes::Int16VectorPointerType)
|
||||
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt16 :
|
||||
AtomicType::VaryingInt16, false);
|
||||
else if (t == LLVMTypes::Int32VectorPointerType)
|
||||
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 :
|
||||
AtomicType::VaryingInt32, false);
|
||||
@@ -116,6 +144,8 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return new ReferenceType(AtomicType::VaryingFloat, false);
|
||||
else if (t == LLVMTypes::DoubleVectorPointerType)
|
||||
return new ReferenceType(AtomicType::VaryingDouble, false);
|
||||
|
||||
// arrays
|
||||
else if (llvm::isa<const llvm::PointerType>(t)) {
|
||||
const llvm::PointerType *pt = llvm::dyn_cast<const llvm::PointerType>(t);
|
||||
|
||||
@@ -239,10 +269,49 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lDeclarePG(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType,
|
||||
const char *name) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType = llvm::FunctionType::get(vecType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
name, module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lDeclarePGBO(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType,
|
||||
const char *name) {
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType = llvm::FunctionType::get(vecType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
name, module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
|
||||
/** Declare the 'pseudo-gather' functions. When the ispc front-end needs
|
||||
to perform a gather, it generates a call to one of these functions,
|
||||
which have signatures:
|
||||
|
||||
varying int8 __pseudo_gather(varying int8 *, mask)
|
||||
varying int16 __pseudo_gather(varying int16 *, mask)
|
||||
varying int32 __pseudo_gather(varying int32 *, mask)
|
||||
varying int64 __pseudo_gather(varying int64 *, mask)
|
||||
|
||||
@@ -253,6 +322,10 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
front-end to be relatively simple in how it emits address calculation
|
||||
for gathers.
|
||||
|
||||
varying int8 __pseudo_gather_base_offsets_8(uniform int8 *base,
|
||||
int32 offsets, mask)
|
||||
varying int16 __pseudo_gather_base_offsets_16(uniform int16 *base,
|
||||
int32 offsets, mask)
|
||||
varying int32 __pseudo_gather_base_offsets_32(uniform int32 *base,
|
||||
int32 offsets, mask)
|
||||
varying int64 __pseudo_gather_base_offsets_64(uniform int64 *base,
|
||||
@@ -264,49 +337,54 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
*/
|
||||
static void
|
||||
lDeclarePseudoGathers(llvm::Module *module) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
lDeclarePG(module, LLVMTypes::Int8VectorType, "__pseudo_gather_8");
|
||||
lDeclarePG(module, LLVMTypes::Int16VectorType, "__pseudo_gather_16");
|
||||
lDeclarePG(module, LLVMTypes::Int32VectorType, "__pseudo_gather_32");
|
||||
lDeclarePG(module, LLVMTypes::Int64VectorType, "__pseudo_gather_64");
|
||||
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
lDeclarePGBO(module, LLVMTypes::Int8VectorType,
|
||||
"__pseudo_gather_base_offsets_8");
|
||||
lDeclarePGBO(module, LLVMTypes::Int16VectorType,
|
||||
"__pseudo_gather_base_offsets_16");
|
||||
lDeclarePGBO(module, LLVMTypes::Int32VectorType,
|
||||
"__pseudo_gather_base_offsets_32");
|
||||
lDeclarePGBO(module, LLVMTypes::Int64VectorType,
|
||||
"__pseudo_gather_base_offsets_64");
|
||||
}
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::Int32VectorType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_gather_32", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
|
||||
fType = llvm::FunctionType::get(LLVMTypes::Int64VectorType, argTypes, false);
|
||||
func = llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_gather_64", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
static void
|
||||
lDeclarePS(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType,
|
||||
const char *name) {
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||
argTypes.push_back(vecType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
name, module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::Int32VectorType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_gather_base_offsets_32", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
|
||||
fType = llvm::FunctionType::get(LLVMTypes::Int64VectorType, argTypes, false);
|
||||
func = llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_gather_base_offsets_64", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
static void
|
||||
lDeclarePSBO(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType,
|
||||
const char *name) {
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(vecType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
name, module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
|
||||
@@ -314,16 +392,22 @@ lDeclarePseudoGathers(llvm::Module *module) {
|
||||
we also declare (but never define) pseudo-scatter instructions with
|
||||
signatures:
|
||||
|
||||
void __pseudo_scatter_8 (varying int8 *, varying int8 values, mask)
|
||||
void __pseudo_scatter_16(varying int16 *, varying int16 values, mask)
|
||||
void __pseudo_scatter_32(varying int32 *, varying int32 values, mask)
|
||||
void __pseudo_scatter_64(varying int64 *, varying int64 values, mask)
|
||||
|
||||
The GatherScatterFlattenOpt optimization pass also finds these and
|
||||
transforms them to scatters like:
|
||||
|
||||
void __pseudo_scatter_base_offsets_8(uniform int8 *base,
|
||||
varying int32 offsets, varying int8 values, mask)
|
||||
void __pseudo_scatter_base_offsets_16(uniform int16 *base,
|
||||
varying int32 offsets, varying int16 values, mask)
|
||||
void __pseudo_scatter_base_offsets_32(uniform int32 *base,
|
||||
varying int32 offsets, varying int32 values, mask)
|
||||
void __pseudo_scatter_base_offsets_64(uniform int64 *base,
|
||||
varying int62 offsets, varying int64 values, mask)
|
||||
varying int32 offsets, varying int64 values, mask)
|
||||
|
||||
And the GSImprovementsPass in turn converts these to actual native
|
||||
scatters or masked stores.
|
||||
@@ -333,67 +417,49 @@ lDeclarePseudoScatters(llvm::Module *module) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
lDeclarePS(module, LLVMTypes::Int8VectorType, "__pseudo_scatter_8");
|
||||
lDeclarePS(module, LLVMTypes::Int16VectorType, "__pseudo_scatter_16");
|
||||
lDeclarePS(module, LLVMTypes::Int32VectorType, "__pseudo_scatter_32");
|
||||
lDeclarePS(module, LLVMTypes::Int64VectorType, "__pseudo_scatter_64");
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_scatter_32", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
lDeclarePSBO(module, LLVMTypes::Int8VectorType,
|
||||
"__pseudo_scatter_base_offsets_8");
|
||||
lDeclarePSBO(module, LLVMTypes::Int16VectorType,
|
||||
"__pseudo_scatter_base_offsets_16");
|
||||
lDeclarePSBO(module, LLVMTypes::Int32VectorType,
|
||||
"__pseudo_scatter_base_offsets_32");
|
||||
lDeclarePSBO(module, LLVMTypes::Int64VectorType,
|
||||
"__pseudo_scatter_base_offsets_64");
|
||||
}
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_scatter_64", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
static void
|
||||
lDeclarePMS(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *lvalueType,
|
||||
LLVM_TYPE_CONST llvm::Type *rvalueType, const char *name) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_scatter_base_offsets_32", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(lvalueType);
|
||||
argTypes.push_back(rvalueType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_scatter_base_offsets_64", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
name, module);
|
||||
func->setDoesNotThrow(true);
|
||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
func->setDoesNotCapture(1, true);
|
||||
}
|
||||
|
||||
|
||||
/** This function declares placeholder masked store functions for the
|
||||
front-end to use.
|
||||
|
||||
void __pseudo_masked_store_8 (uniform int8 *ptr, varying int8 values, mask)
|
||||
void __pseudo_masked_store_16(uniform int16 *ptr, varying int16 values, mask)
|
||||
void __pseudo_masked_store_32(uniform int32 *ptr, varying int32 values, mask)
|
||||
void __pseudo_masked_store_64(uniform int64 *ptr, varying int64 values, mask)
|
||||
|
||||
@@ -403,40 +469,14 @@ lDeclarePseudoScatters(llvm::Module *module) {
|
||||
*/
|
||||
static void
|
||||
lDeclarePseudoMaskedStore(llvm::Module *module) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::Int32VectorPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_masked_store_32", module);
|
||||
func->setDoesNotThrow(true);
|
||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
func->setDoesNotCapture(1, true);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::Int64VectorPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_masked_store_64", module);
|
||||
func->setDoesNotThrow(true);
|
||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
func->setDoesNotCapture(1, true);
|
||||
}
|
||||
lDeclarePMS(module, LLVMTypes::Int8VectorPointerType,
|
||||
LLVMTypes::Int8VectorType, "__pseudo_masked_store_8");
|
||||
lDeclarePMS(module, LLVMTypes::Int16VectorPointerType,
|
||||
LLVMTypes::Int16VectorType, "__pseudo_masked_store_16");
|
||||
lDeclarePMS(module, LLVMTypes::Int32VectorPointerType,
|
||||
LLVMTypes::Int32VectorType, "__pseudo_masked_store_32");
|
||||
lDeclarePMS(module, LLVMTypes::Int64VectorPointerType,
|
||||
LLVMTypes::Int64VectorType, "__pseudo_masked_store_64");
|
||||
}
|
||||
|
||||
|
||||
@@ -609,8 +649,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
// needed by the compiled program.
|
||||
{
|
||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||
argTypes.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(*g->ctx), 0));
|
||||
argTypes.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(*g->ctx), 0));
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32Type);
|
||||
argTypes.push_back(LLVMTypes::Int32Type);
|
||||
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidType,
|
||||
|
||||
54
ctx.cpp
54
ctx.cpp
@@ -1448,17 +1448,20 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
|
||||
llvm::Value *mask = GetMask();
|
||||
llvm::Function *gather = NULL;
|
||||
// Figure out which gather function to call based on the size of
|
||||
// the elements; will need to generalize this for 8 and 16-bit
|
||||
// types.
|
||||
// the elements.
|
||||
if (retType == LLVMTypes::DoubleVectorType ||
|
||||
retType == LLVMTypes::Int64VectorType)
|
||||
gather = m->module->getFunction("__pseudo_gather_64");
|
||||
else {
|
||||
assert(retType == LLVMTypes::FloatVectorType ||
|
||||
retType == LLVMTypes::Int32VectorType);
|
||||
else if (retType == LLVMTypes::FloatVectorType ||
|
||||
retType == LLVMTypes::Int32VectorType)
|
||||
gather = m->module->getFunction("__pseudo_gather_32");
|
||||
else if (retType == LLVMTypes::Int16VectorType)
|
||||
gather = m->module->getFunction("__pseudo_gather_16");
|
||||
else {
|
||||
assert(retType == LLVMTypes::Int8VectorType);
|
||||
gather = m->module->getFunction("__pseudo_gather_8");
|
||||
}
|
||||
assert(gather);
|
||||
assert(gather != NULL);
|
||||
|
||||
llvm::Value *voidlvalue = BitCastInst(lvalue, LLVMTypes::VoidPointerType);
|
||||
llvm::Instruction *call = CallInst(gather, voidlvalue, mask, name);
|
||||
@@ -1578,9 +1581,7 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
rvalueType = rvalueType->GetAsNonConstType();
|
||||
|
||||
llvm::Function *maskedStoreFunc = NULL;
|
||||
// Figure out if we need a 32-bit or 64-bit masked store. This
|
||||
// will need to be generalized when/if 8 and 16-bit data types are
|
||||
// added.
|
||||
// Figure out if we need a 8, 16, 32 or 64-bit masked store.
|
||||
if (rvalueType == AtomicType::VaryingDouble ||
|
||||
rvalueType == AtomicType::VaryingInt64 ||
|
||||
rvalueType == AtomicType::VaryingUInt64) {
|
||||
@@ -1590,13 +1591,11 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType,
|
||||
"rvalue_to_int64");
|
||||
}
|
||||
else {
|
||||
assert(rvalueType == AtomicType::VaryingFloat ||
|
||||
rvalueType == AtomicType::VaryingBool ||
|
||||
rvalueType == AtomicType::VaryingInt32 ||
|
||||
rvalueType == AtomicType::VaryingUInt32 ||
|
||||
dynamic_cast<const EnumType *>(rvalueType) != NULL);
|
||||
|
||||
else if (rvalueType == AtomicType::VaryingFloat ||
|
||||
rvalueType == AtomicType::VaryingBool ||
|
||||
rvalueType == AtomicType::VaryingInt32 ||
|
||||
rvalueType == AtomicType::VaryingUInt32 ||
|
||||
dynamic_cast<const EnumType *>(rvalueType) != NULL) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32");
|
||||
lvalue = BitCastInst(lvalue, LLVMTypes::Int32VectorPointerType,
|
||||
"lvalue_to_int32vecptr");
|
||||
@@ -1604,6 +1603,18 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType,
|
||||
"rvalue_to_int32");
|
||||
}
|
||||
else if (rvalueType == AtomicType::VaryingInt16 ||
|
||||
rvalueType == AtomicType::VaryingUInt16) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_16");
|
||||
lvalue = BitCastInst(lvalue, LLVMTypes::Int16VectorPointerType,
|
||||
"lvalue_to_int16vecptr");
|
||||
}
|
||||
else if (rvalueType == AtomicType::VaryingInt8 ||
|
||||
rvalueType == AtomicType::VaryingUInt8) {
|
||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_8");
|
||||
lvalue = BitCastInst(lvalue, LLVMTypes::Int8VectorPointerType,
|
||||
"lvalue_to_int8vecptr");
|
||||
}
|
||||
|
||||
std::vector<llvm::Value *> args;
|
||||
args.push_back(lvalue);
|
||||
@@ -1668,14 +1679,15 @@ FunctionEmitContext::scatter(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
func = m->module->getFunction("__pseudo_scatter_64");
|
||||
rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType, "rvalue2int");
|
||||
}
|
||||
else {
|
||||
// FIXME: if this hits, presumably it's due to needing int8 and/or
|
||||
// int16 versions of scatter...
|
||||
assert(type == LLVMTypes::FloatVectorType ||
|
||||
type == LLVMTypes::Int32VectorType);
|
||||
else if (type == LLVMTypes::FloatVectorType ||
|
||||
type == LLVMTypes::Int32VectorType) {
|
||||
func = m->module->getFunction("__pseudo_scatter_32");
|
||||
rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType, "rvalue2int");
|
||||
}
|
||||
else if (type == LLVMTypes::Int16VectorType)
|
||||
func = m->module->getFunction("__pseudo_scatter_16");
|
||||
else if (type == LLVMTypes::Int8VectorType)
|
||||
func = m->module->getFunction("__pseudo_scatter_8");
|
||||
assert(func != NULL);
|
||||
|
||||
AddInstrumentationPoint("scatter");
|
||||
|
||||
@@ -427,7 +427,8 @@ The following identifiers are reserved as language keywords: ``bool``,
|
||||
``char``, ``cif``, ``cwhile``, ``const``, ``continue``, ``creturn``,
|
||||
``default``, ``do``, ``double``, ``else``, ``enum``, ``export``,
|
||||
``extern``, ``false``, ``float``, ``for``, ``goto``, ``if``, ``inline``, ``int``,
|
||||
``int32``, ``int64``, ``launch``, ``print``, ``reference``, ``return``,
|
||||
``int8``, ``int16``, ``int32``, ``int64``, ``launch``, ``print``,
|
||||
``reference``, ``return``,
|
||||
``signed``, ``sizeof``, ``soa``, ``static``, ``struct``, ``switch``,
|
||||
``sync``, ``task``, ``true``, ``typedef``, ``uniform``, ``union``,
|
||||
``unsigned``, ``varying``, ``void``, ``volatile``, ``while``.
|
||||
@@ -481,6 +482,10 @@ types.
|
||||
* ``void``: "empty" type representing no value.
|
||||
* ``bool``: boolean value; may be assigned ``true``, ``false``, or the
|
||||
value of a boolean expression.
|
||||
* ``int8``: 8-bit signed integer.
|
||||
* ``unsigned int8``: 8-bit unsigned integer.
|
||||
* ``int16``: 16-bit signed integer.
|
||||
* ``unsigned int16``: 16-bit unsigned integer.
|
||||
* ``int``: 32-bit signed integer; may also be specified as ``int32``.
|
||||
* ``unsigned int``: 32-bit unsigned integer; may also be specified as
|
||||
``unsigned int32``.
|
||||
@@ -497,7 +502,8 @@ general" of the two types, with the following precedence:
|
||||
|
||||
::
|
||||
|
||||
double > uint64 > int64 > float > uint32 > int32 > bool
|
||||
double > uint64 > int64 > float > uint32 > int32 >
|
||||
uint16 > int16 > uint8 > int8 > bool
|
||||
|
||||
In other words, adding an ``int64`` to a ``double`` causes the ``int64`` to
|
||||
be converted to a ``double``, the addition to be performed, and a
|
||||
@@ -1709,10 +1715,12 @@ the running program instances.
|
||||
|
||||
::
|
||||
|
||||
float broadcast(float value, uniform int index)
|
||||
int8 broadcast(int8 value, uniform int index)
|
||||
int16 broadcast(int16 value, uniform int index)
|
||||
int32 broadcast(int32 value, uniform int index)
|
||||
double broadcast(double value, uniform int index)
|
||||
int64 broadcast(int64 value, uniform int index)
|
||||
float broadcast(float value, uniform int index)
|
||||
double broadcast(double value, uniform int index)
|
||||
|
||||
The ``rotate()`` function allows each program instance to find the value of
|
||||
the given value that their neighbor ``offset`` steps away has. For
|
||||
@@ -1725,10 +1733,12 @@ provided offset value can be positive or negative, and may be greater than
|
||||
|
||||
::
|
||||
|
||||
float rotate(float value, uniform int offset)
|
||||
int8 rotate(int8 value, uniform int offset)
|
||||
int16 rotate(int16 value, uniform int offset)
|
||||
int32 rotate(int32 value, uniform int offset)
|
||||
double rotate(double value, uniform int offset)
|
||||
int64 rotate(int64 value, uniform int offset)
|
||||
float rotate(float value, uniform int offset)
|
||||
double rotate(double value, uniform int offset)
|
||||
|
||||
|
||||
Finally, the ``shuffle()`` functions allow two variants of fully general
|
||||
@@ -1739,10 +1749,12 @@ from which to get the value of ``value``. The provided values for
|
||||
|
||||
::
|
||||
|
||||
float shuffle(float value, int permutation)
|
||||
int8 shuffle(int8 value, int permutation)
|
||||
int16 shuffle(int16 value, int permutation)
|
||||
int32 shuffle(int32 value, int permutation)
|
||||
double shuffle(double value, int permutation)
|
||||
int64 shuffle(int64 value, int permutation)
|
||||
float shuffle(float value, int permutation)
|
||||
double shuffle(double value, int permutation)
|
||||
|
||||
|
||||
The second variant of ``shuffle()`` permutes over the extended vector that
|
||||
@@ -1753,10 +1765,12 @@ of ``value1``, etc.)
|
||||
|
||||
::
|
||||
|
||||
float shuffle(float value0, float value1, int permutation)
|
||||
int8 shuffle(int8 value0, int8 value1, int permutation)
|
||||
int16 shuffle(int16 value0, int16 value1, int permutation)
|
||||
int32 shuffle(int32 value0, int32 value1, int permutation)
|
||||
double shuffle(double value0, double value1, int permutation)
|
||||
int64 shuffle(int64 value0, int64 value1, int permutation)
|
||||
float shuffle(float value0, float value1, int permutation)
|
||||
double shuffle(double value0, double value1, int permutation)
|
||||
|
||||
The various variants of ``popcnt()`` return the population count--the
|
||||
number of bits set in the given value.
|
||||
@@ -1861,10 +1875,19 @@ where the ``i`` th element of ``x`` has been replaced with the value ``v``
|
||||
|
||||
::
|
||||
|
||||
uniform int8 extract(int8 x, uniform int i)
|
||||
uniform int16 extract(int16 x, uniform int i)
|
||||
uniform int32 extract(int32 x, uniform int i)
|
||||
uniform int64 extract(int64 x, uniform int i)
|
||||
uniform float extract(float x, uniform int i)
|
||||
uniform int extract(int x, uniform int i)
|
||||
|
||||
::
|
||||
|
||||
int8 insert(int8 x, uniform int i, uniform int8 v)
|
||||
int16 insert(int16 x, uniform int i, uniform int16 v)
|
||||
int32 insert(int32 x, uniform int i, uniform int32 v)
|
||||
int64 insert(int64 x, uniform int i, uniform int64 v)
|
||||
float insert(float x, uniform int i, uniform float v)
|
||||
int insert(int x, uniform int i, uniform int v)
|
||||
|
||||
|
||||
Atomic Operations and Memory Fences
|
||||
@@ -1948,41 +1971,6 @@ value ``true`` (rather than just having the value one). The
|
||||
int sign_extend(bool value)
|
||||
uniform int sign_extend(uniform bool value)
|
||||
|
||||
``ispc`` provides a number of bit/memory-level utility routines in its
|
||||
standard library as well. It has routines that load from and store
|
||||
to 8-bit and 16-bit integer values stored in memory, converting to and from
|
||||
32-bit integers for use in computation in ``ispc`` code. (These functions
|
||||
and this conversion step are necessary because ``ispc`` doesn't have native
|
||||
8-bit or 16-bit types in the language.)
|
||||
|
||||
::
|
||||
|
||||
int load_from_int8(uniform int a[], uniform int offset)
|
||||
unsigned int load_from_int8(uniform unsigned int a[],
|
||||
uniform int offset)
|
||||
void store_to_int8(uniform int a[], uniform int offset,
|
||||
int val)
|
||||
void store_to_int8(uniform unsigned int a[], uniform int offset,
|
||||
unsigned int val)
|
||||
unsigned int load_from_int16(uniform int a[],
|
||||
uniform int offset)
|
||||
unsigned unsigned int load_from_int16(uniform unsigned int a[],
|
||||
uniform int offset)
|
||||
void store_to_int16(uniform int a[], uniform int offset,
|
||||
int val)
|
||||
void store_to_int16(uniform unsigned int a[], uniform int offset,
|
||||
unsigned int val)
|
||||
|
||||
There are three things to note in these functions. First, note that these
|
||||
functions take either ``int`` or ``unsigned int`` arrays as parameters; you
|
||||
need to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side
|
||||
to ``int`` or ``unsigned int`` when passing them to ``ispc`` code. Second,
|
||||
although the arrays are passed as 32-bit integers, in the array indexing
|
||||
calculation, with the ``offset`` parameter, they are treated as if they
|
||||
were ``int8`` or ``int16`` types (i.e. the offset treated as being in terms
|
||||
of number of 8 or 16-bit elements). Third, note that the value of
|
||||
``programIndex`` is implicitly added to offset.
|
||||
|
||||
The ``intbits()`` and ``floatbits()`` functions can be used to implement
|
||||
low-level floating-point bit twiddling. For example, ``intbits()`` returns
|
||||
an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
|
||||
|
||||
@@ -190,7 +190,9 @@ int main(int argc, char *argv[]) {
|
||||
nodes[i].bounds[1].v[1] = b[4];
|
||||
nodes[i].bounds[1].v[2] = b[5];
|
||||
READ(nodes[i].offset, 1);
|
||||
READ(nodes[i].primsAxis, 1);
|
||||
READ(nodes[i].nPrimitives, 1);
|
||||
READ(nodes[i].splitAxis, 1);
|
||||
READ(nodes[i].pad, 1);
|
||||
}
|
||||
|
||||
// And then read the triangles
|
||||
|
||||
@@ -50,21 +50,11 @@ struct Triangle {
|
||||
struct LinearBVHNode {
|
||||
uniform float3 bounds[2];
|
||||
uniform unsigned int offset; // num primitives for leaf, second child for interior
|
||||
uniform unsigned int primsAxis; // 0:7 nPrimitives, 8:15 split axis, 16:31 padding
|
||||
uniform unsigned int8 nPrimitives;
|
||||
uniform unsigned int8 splitAxis;
|
||||
uniform unsigned int16 pad;
|
||||
};
|
||||
|
||||
static inline uniform int nPrims(const reference LinearBVHNode node) {
|
||||
return (node.primsAxis & 0xff);
|
||||
}
|
||||
|
||||
static inline uniform int axis(const reference LinearBVHNode node) {
|
||||
return ((node.primsAxis >> 8) & 0xff);
|
||||
}
|
||||
|
||||
static inline uniform bool isInterior(const reference LinearBVHNode node) {
|
||||
return nPrims(node) == 0;
|
||||
}
|
||||
|
||||
static inline float3 Cross(const float3 v1, const float3 v2) {
|
||||
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
|
||||
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
|
||||
@@ -199,7 +189,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
// Check ray against BVH node
|
||||
LinearBVHNode node = nodes[nodeNum];
|
||||
if (any(BBoxIntersect(node.bounds, ray))) {
|
||||
uniform unsigned int nPrimitives = nPrims(node);
|
||||
uniform unsigned int nPrimitives = node.nPrimitives;
|
||||
if (nPrimitives > 0) {
|
||||
// Intersect ray with primitives in leaf BVH node
|
||||
uniform unsigned int primitivesOffset = node.offset;
|
||||
@@ -213,7 +203,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
}
|
||||
else {
|
||||
// Put far BVH node on _todo_ stack, advance to near node
|
||||
if (r.dirIsNeg[axis(node)]) {
|
||||
if (r.dirIsNeg[node.splitAxis]) {
|
||||
todo[todoOffset++] = nodeNum + 1;
|
||||
nodeNum = node.offset;
|
||||
}
|
||||
|
||||
@@ -75,30 +75,20 @@ struct Ray {
|
||||
namespace ispc {
|
||||
struct Triangle {
|
||||
float3 p[3];
|
||||
int id;
|
||||
int32_t id;
|
||||
};
|
||||
|
||||
struct LinearBVHNode {
|
||||
float3 bounds[2];
|
||||
unsigned int offset; // primitives for leaf, second child for interior
|
||||
unsigned int primsAxis; // 0:7 nPrimitives, 8:15 split axis, 16:31 padding
|
||||
int32_t offset; // primitives for leaf, second child for interior
|
||||
uint8_t nPrimitives;
|
||||
uint8_t splitAxis;
|
||||
uint16_t pad;
|
||||
};
|
||||
}
|
||||
|
||||
using namespace ispc;
|
||||
|
||||
inline int nPrims(const LinearBVHNode &node) {
|
||||
return (node.primsAxis & 0xff);
|
||||
}
|
||||
|
||||
inline int axis(const LinearBVHNode &node) {
|
||||
return ((node.primsAxis >> 8) & 0xff);
|
||||
}
|
||||
|
||||
inline bool isInterior(const LinearBVHNode &node) {
|
||||
return nPrims(node) == 0;
|
||||
}
|
||||
|
||||
inline float3 Cross(const float3 &v1, const float3 &v2) {
|
||||
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
|
||||
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
|
||||
@@ -230,7 +220,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
// Check ray against BVH node
|
||||
const LinearBVHNode &node = nodes[nodeNum];
|
||||
if (BBoxIntersect(node.bounds, ray)) {
|
||||
unsigned int nPrimitives = nPrims(node);
|
||||
unsigned int nPrimitives = node.nPrimitives;
|
||||
if (nPrimitives > 0) {
|
||||
// Intersect ray with primitives in leaf BVH node
|
||||
unsigned int primitivesOffset = node.offset;
|
||||
@@ -244,7 +234,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
}
|
||||
else {
|
||||
// Put far BVH node on _todo_ stack, advance to near node
|
||||
if (r.dirIsNeg[axis(node)]) {
|
||||
if (r.dirIsNeg[node.splitAxis]) {
|
||||
todo[todoOffset++] = nodeNum + 1;
|
||||
nodeNum = node.offset;
|
||||
}
|
||||
|
||||
553
expr.cpp
553
expr.cpp
@@ -93,6 +93,10 @@ lMaybeIssuePrecisionWarning(const AtomicType *toAtomicType,
|
||||
SourcePos pos, const char *errorMsgBase) {
|
||||
switch (toAtomicType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_UINT8:
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
case AtomicType::TYPE_FLOAT:
|
||||
@@ -101,6 +105,10 @@ lMaybeIssuePrecisionWarning(const AtomicType *toAtomicType,
|
||||
case AtomicType::TYPE_DOUBLE:
|
||||
if ((int)toAtomicType->basicType < (int)fromAtomicType->basicType &&
|
||||
toAtomicType->basicType != AtomicType::TYPE_BOOL &&
|
||||
!(toAtomicType->basicType == AtomicType::TYPE_INT8 &&
|
||||
fromAtomicType->basicType == AtomicType::TYPE_UINT8) &&
|
||||
!(toAtomicType->basicType == AtomicType::TYPE_INT16 &&
|
||||
fromAtomicType->basicType == AtomicType::TYPE_UINT16) &&
|
||||
!(toAtomicType->basicType == AtomicType::TYPE_INT32 &&
|
||||
fromAtomicType->basicType == AtomicType::TYPE_UINT32) &&
|
||||
!(toAtomicType->basicType == AtomicType::TYPE_INT64 &&
|
||||
@@ -363,15 +371,33 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) {
|
||||
return (value != 0.) ? LLVMTrue : LLVMFalse;
|
||||
else
|
||||
return LLVMBoolVector(value != 0.);
|
||||
case AtomicType::TYPE_UINT32: {
|
||||
case AtomicType::TYPE_INT8: {
|
||||
int i = (int)value;
|
||||
assert((double)i == value);
|
||||
return isUniform ? LLVMInt8(i) : LLVMInt8Vector(i);
|
||||
}
|
||||
case AtomicType::TYPE_UINT8: {
|
||||
unsigned int i = (unsigned int)value;
|
||||
return isUniform ? LLVMUInt32(i) : LLVMUInt32Vector(i);
|
||||
return isUniform ? LLVMUInt8(i) : LLVMUInt8Vector(i);
|
||||
}
|
||||
case AtomicType::TYPE_INT16: {
|
||||
int i = (int)value;
|
||||
assert((double)i == value);
|
||||
return isUniform ? LLVMInt16(i) : LLVMInt16Vector(i);
|
||||
}
|
||||
case AtomicType::TYPE_UINT16: {
|
||||
unsigned int i = (unsigned int)value;
|
||||
return isUniform ? LLVMUInt16(i) : LLVMUInt16Vector(i);
|
||||
}
|
||||
case AtomicType::TYPE_INT32: {
|
||||
int i = (int)value;
|
||||
assert((double)i == value);
|
||||
return isUniform ? LLVMInt32(i) : LLVMInt32Vector(i);
|
||||
}
|
||||
case AtomicType::TYPE_UINT32: {
|
||||
unsigned int i = (unsigned int)value;
|
||||
return isUniform ? LLVMUInt32(i) : LLVMUInt32Vector(i);
|
||||
}
|
||||
case AtomicType::TYPE_FLOAT:
|
||||
return isUniform ? LLVMFloat((float)value) :
|
||||
LLVMFloatVector((float)value);
|
||||
@@ -590,14 +616,13 @@ UnaryExpr::Optimize() {
|
||||
const Type *type = constExpr->GetType();
|
||||
bool isEnumType = dynamic_cast<const EnumType *>(type) != NULL;
|
||||
|
||||
if (type == AtomicType::UniformInt64 ||
|
||||
type == AtomicType::VaryingInt64 ||
|
||||
type == AtomicType::UniformUInt64 ||
|
||||
type == AtomicType::VaryingUInt64 ||
|
||||
type == AtomicType::UniformConstInt64 ||
|
||||
type == AtomicType::VaryingConstInt64 ||
|
||||
type == AtomicType::UniformConstUInt64 ||
|
||||
type == AtomicType::VaryingConstUInt64)
|
||||
const Type *baseType = type->GetAsNonConstType()->GetAsUniformType();
|
||||
if (baseType == AtomicType::UniformInt8 ||
|
||||
baseType == AtomicType::UniformUInt8 ||
|
||||
baseType == AtomicType::UniformInt16 ||
|
||||
baseType == AtomicType::UniformUInt16 ||
|
||||
baseType == AtomicType::UniformInt64 ||
|
||||
baseType == AtomicType::UniformUInt64)
|
||||
// FIXME: should handle these at some point; for now we only do
|
||||
// constant folding for bool, int32 and float types...
|
||||
return this;
|
||||
@@ -3058,6 +3083,86 @@ MemberExpr::getCandidateNearMatches() const {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// ConstExpr
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, int8_t i, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
type = type->GetAsConstType();
|
||||
assert(type == AtomicType::UniformConstInt8);
|
||||
int8Val[0] = i;
|
||||
}
|
||||
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, int8_t *i, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
type = type->GetAsConstType();
|
||||
assert(type == AtomicType::UniformConstInt8 ||
|
||||
type == AtomicType::VaryingConstInt8);
|
||||
for (int j = 0; j < Count(); ++j)
|
||||
int8Val[j] = i[j];
|
||||
}
|
||||
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, uint8_t u, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
type = type->GetAsConstType();
|
||||
assert(type == AtomicType::UniformUInt8);
|
||||
uint8Val[0] = u;
|
||||
}
|
||||
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, uint8_t *u, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
type = type->GetAsConstType();
|
||||
assert(type == AtomicType::UniformConstUInt8 ||
|
||||
type == AtomicType::VaryingConstUInt8);
|
||||
for (int j = 0; j < Count(); ++j)
|
||||
uint8Val[j] = u[j];
|
||||
}
|
||||
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, int16_t i, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
type = type->GetAsConstType();
|
||||
assert(type == AtomicType::UniformConstInt16);
|
||||
int16Val[0] = i;
|
||||
}
|
||||
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, int16_t *i, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
type = type->GetAsConstType();
|
||||
assert(type == AtomicType::UniformConstInt16 ||
|
||||
type == AtomicType::VaryingConstInt16);
|
||||
for (int j = 0; j < Count(); ++j)
|
||||
int16Val[j] = i[j];
|
||||
}
|
||||
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, uint16_t u, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
type = type->GetAsConstType();
|
||||
assert(type == AtomicType::UniformUInt16);
|
||||
uint16Val[0] = u;
|
||||
}
|
||||
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, uint16_t *u, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
type = type->GetAsConstType();
|
||||
assert(type == AtomicType::UniformConstUInt16 ||
|
||||
type == AtomicType::VaryingConstUInt16);
|
||||
for (int j = 0; j < Count(); ++j)
|
||||
uint16Val[j] = u[j];
|
||||
}
|
||||
|
||||
|
||||
ConstExpr::ConstExpr(const Type *t, int32_t i, SourcePos p)
|
||||
: Expr(p) {
|
||||
type = t;
|
||||
@@ -3212,6 +3317,22 @@ ConstExpr::ConstExpr(ConstExpr *old, double *v)
|
||||
for (int i = 0; i < Count(); ++i)
|
||||
boolVal[i] = (v[i] != 0.);
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
for (int i = 0; i < Count(); ++i)
|
||||
int8Val[i] = (int)v[i];
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
for (int i = 0; i < Count(); ++i)
|
||||
uint8Val[i] = (unsigned int)v[i];
|
||||
break;
|
||||
case AtomicType::TYPE_INT16:
|
||||
for (int i = 0; i < Count(); ++i)
|
||||
int16Val[i] = (int)v[i];
|
||||
break;
|
||||
case AtomicType::TYPE_UINT16:
|
||||
for (int i = 0; i < Count(); ++i)
|
||||
uint16Val[i] = (unsigned int)v[i];
|
||||
break;
|
||||
case AtomicType::TYPE_INT32:
|
||||
for (int i = 0; i < Count(); ++i)
|
||||
int32Val[i] = (int)v[i];
|
||||
@@ -3270,6 +3391,18 @@ ConstExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||
return LLVMBoolVector(boolVal);
|
||||
else
|
||||
return boolVal[0] ? LLVMTrue : LLVMFalse;
|
||||
case AtomicType::TYPE_INT8:
|
||||
return isVarying ? LLVMInt8Vector(int8Val) :
|
||||
LLVMInt8(int8Val[0]);
|
||||
case AtomicType::TYPE_UINT8:
|
||||
return isVarying ? LLVMUInt8Vector(uint8Val) :
|
||||
LLVMUInt8(uint8Val[0]);
|
||||
case AtomicType::TYPE_INT16:
|
||||
return isVarying ? LLVMInt16Vector(int16Val) :
|
||||
LLVMInt16(int16Val[0]);
|
||||
case AtomicType::TYPE_UINT16:
|
||||
return isVarying ? LLVMUInt16Vector(uint16Val) :
|
||||
LLVMUInt16(uint16Val[0]);
|
||||
case AtomicType::TYPE_INT32:
|
||||
return isVarying ? LLVMInt32Vector(int32Val) :
|
||||
LLVMInt32(int32Val[0]);
|
||||
@@ -3351,6 +3484,10 @@ int
|
||||
ConstExpr::AsInt64(int64_t *ip, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
||||
@@ -3368,6 +3505,10 @@ int
|
||||
ConstExpr::AsUInt64(uint64_t *up, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
||||
@@ -3385,6 +3526,10 @@ int
|
||||
ConstExpr::AsDouble(double *d, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, d, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, d, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, d, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, d, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, d, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, d, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, d, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, d, Count(), forceVarying); break;
|
||||
@@ -3402,6 +3547,10 @@ int
|
||||
ConstExpr::AsFloat(float *fp, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, fp, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, fp, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, fp, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, fp, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, fp, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, fp, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, fp, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, fp, Count(), forceVarying); break;
|
||||
@@ -3419,6 +3568,10 @@ int
|
||||
ConstExpr::AsBool(bool *b, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, b, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, b, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, b, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, b, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, b, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, b, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, b, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, b, Count(), forceVarying); break;
|
||||
@@ -3432,10 +3585,98 @@ ConstExpr::AsBool(bool *b, bool forceVarying) const {
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ConstExpr::AsInt8(int8_t *ip, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT64: lConvert(int64Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT64: lConvert(uint64Val, ip, Count(), forceVarying); break;
|
||||
default:
|
||||
FATAL("unimplemented const type");
|
||||
}
|
||||
return Count();
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ConstExpr::AsUInt8(uint8_t *up, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT64: lConvert(int64Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT64: lConvert(uint64Val, up, Count(), forceVarying); break;
|
||||
default:
|
||||
FATAL("unimplemented const type");
|
||||
}
|
||||
return Count();
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ConstExpr::AsInt16(int16_t *ip, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT64: lConvert(int64Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT64: lConvert(uint64Val, ip, Count(), forceVarying); break;
|
||||
default:
|
||||
FATAL("unimplemented const type");
|
||||
}
|
||||
return Count();
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ConstExpr::AsUInt16(uint16_t *up, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT64: lConvert(int64Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT64: lConvert(uint64Val, up, Count(), forceVarying); break;
|
||||
default:
|
||||
FATAL("unimplemented const type");
|
||||
}
|
||||
return Count();
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ConstExpr::AsInt32(int32_t *ip, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
||||
@@ -3453,6 +3694,10 @@ int
|
||||
ConstExpr::AsUInt32(uint32_t *up, bool forceVarying) const {
|
||||
switch (getBasicType()) {
|
||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
||||
@@ -3488,6 +3733,40 @@ ConstExpr::GetConstant(const Type *type) const {
|
||||
else
|
||||
return LLVMBoolVector(bv);
|
||||
}
|
||||
else if (type == AtomicType::UniformInt8 || type == AtomicType::VaryingInt8) {
|
||||
int8_t iv[ISPC_MAX_NVEC];
|
||||
AsInt8(iv, type->IsVaryingType());
|
||||
if (type->IsUniformType())
|
||||
return LLVMInt8(iv[0]);
|
||||
else
|
||||
return LLVMInt8Vector(iv);
|
||||
}
|
||||
else if (type == AtomicType::UniformUInt8 || type == AtomicType::VaryingUInt8 ||
|
||||
dynamic_cast<const EnumType *>(type) != NULL) {
|
||||
uint8_t uiv[ISPC_MAX_NVEC];
|
||||
AsUInt8(uiv, type->IsVaryingType());
|
||||
if (type->IsUniformType())
|
||||
return LLVMUInt8(uiv[0]);
|
||||
else
|
||||
return LLVMUInt8Vector(uiv);
|
||||
}
|
||||
else if (type == AtomicType::UniformInt16 || type == AtomicType::VaryingInt16) {
|
||||
int16_t iv[ISPC_MAX_NVEC];
|
||||
AsInt16(iv, type->IsVaryingType());
|
||||
if (type->IsUniformType())
|
||||
return LLVMInt16(iv[0]);
|
||||
else
|
||||
return LLVMInt16Vector(iv);
|
||||
}
|
||||
else if (type == AtomicType::UniformUInt16 || type == AtomicType::VaryingUInt16 ||
|
||||
dynamic_cast<const EnumType *>(type) != NULL) {
|
||||
uint16_t uiv[ISPC_MAX_NVEC];
|
||||
AsUInt16(uiv, type->IsVaryingType());
|
||||
if (type->IsUniformType())
|
||||
return LLVMUInt16(uiv[0]);
|
||||
else
|
||||
return LLVMUInt16Vector(uiv);
|
||||
}
|
||||
else if (type == AtomicType::UniformInt32 || type == AtomicType::VaryingInt32) {
|
||||
int32_t iv[ISPC_MAX_NVEC];
|
||||
AsInt32(iv, type->IsVaryingType());
|
||||
@@ -3564,6 +3843,18 @@ ConstExpr::Print() const {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
printf("%s", boolVal[i] ? "true" : "false");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
printf("%d", (int)int8Val[i]);
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
printf("%u", (int)uint8Val[i]);
|
||||
break;
|
||||
case AtomicType::TYPE_INT16:
|
||||
printf("%d", (int)int16Val[i]);
|
||||
break;
|
||||
case AtomicType::TYPE_UINT16:
|
||||
printf("%u", (int)uint16Val[i]);
|
||||
break;
|
||||
case AtomicType::TYPE_INT32:
|
||||
printf("%d", int32Val[i]);
|
||||
break;
|
||||
@@ -3637,11 +3928,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int
|
||||
exprVal, targetType, "bool2float");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_INT64:
|
||||
cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int to float
|
||||
exprVal, targetType, "int2float");
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
case AtomicType::TYPE_UINT64:
|
||||
if (fromType->IsVaryingType())
|
||||
@@ -3675,11 +3970,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to double
|
||||
exprVal, targetType, "bool2double");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_INT64:
|
||||
cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int
|
||||
exprVal, targetType, "int2double");
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
case AtomicType::TYPE_UINT64:
|
||||
if (fromType->IsVaryingType())
|
||||
@@ -3699,6 +3998,170 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AtomicType::TYPE_INT8: {
|
||||
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||
fromType->IsUniformType() ? LLVMTypes::Int8Type :
|
||||
LLVMTypes::Int8VectorType;
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2int");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_UINT8:
|
||||
cast = exprVal;
|
||||
break;
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
case AtomicType::TYPE_INT64:
|
||||
case AtomicType::TYPE_UINT64:
|
||||
cast = ctx->TruncInst(exprVal, targetType, "int64_to_int8");
|
||||
break;
|
||||
case AtomicType::TYPE_FLOAT:
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||
exprVal, targetType, "float2int");
|
||||
break;
|
||||
case AtomicType::TYPE_DOUBLE:
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||
exprVal, targetType, "double2int");
|
||||
break;
|
||||
default:
|
||||
FATAL("unimplemented");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AtomicType::TYPE_UINT8: {
|
||||
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||
fromType->IsUniformType() ? LLVMTypes::Int8Type :
|
||||
LLVMTypes::Int8VectorType;
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_UINT8:
|
||||
cast = exprVal;
|
||||
break;
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
case AtomicType::TYPE_INT64:
|
||||
case AtomicType::TYPE_UINT64:
|
||||
cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint8");
|
||||
break;
|
||||
case AtomicType::TYPE_FLOAT:
|
||||
if (fromType->IsVaryingType())
|
||||
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
|
||||
"Use \"int\" if possible");
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
|
||||
exprVal, targetType, "float2uint");
|
||||
break;
|
||||
case AtomicType::TYPE_DOUBLE:
|
||||
if (fromType->IsVaryingType())
|
||||
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
|
||||
"Use \"int\" if possible");
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
|
||||
exprVal, targetType, "double2uint");
|
||||
break;
|
||||
default:
|
||||
FATAL("unimplemented");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AtomicType::TYPE_INT16: {
|
||||
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||
fromType->IsUniformType() ? LLVMTypes::Int16Type :
|
||||
LLVMTypes::Int16VectorType;
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2int");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
cast = ctx->SExtInst(exprVal, targetType, "int2int16");
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "uint2uint16");
|
||||
break;
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
cast = exprVal;
|
||||
break;
|
||||
case AtomicType::TYPE_FLOAT:
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||
exprVal, targetType, "float2int");
|
||||
break;
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
case AtomicType::TYPE_INT64:
|
||||
case AtomicType::TYPE_UINT64:
|
||||
cast = ctx->TruncInst(exprVal, targetType, "int64_to_int16");
|
||||
break;
|
||||
case AtomicType::TYPE_DOUBLE:
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||
exprVal, targetType, "double2int");
|
||||
break;
|
||||
default:
|
||||
FATAL("unimplemented");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AtomicType::TYPE_UINT16: {
|
||||
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||
fromType->IsUniformType() ? LLVMTypes::Int16Type :
|
||||
LLVMTypes::Int16VectorType;
|
||||
switch (fromType->basicType) {
|
||||
case AtomicType::TYPE_BOOL:
|
||||
if (fromType->IsVaryingType() &&
|
||||
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint16");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
cast = ctx->SExtInst(exprVal, targetType, "uint2uint16");
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "uint2uint16");
|
||||
break;
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
cast = exprVal;
|
||||
break;
|
||||
case AtomicType::TYPE_FLOAT:
|
||||
if (fromType->IsVaryingType())
|
||||
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
|
||||
"Use \"int\" if possible");
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
|
||||
exprVal, targetType, "float2uint");
|
||||
break;
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
case AtomicType::TYPE_INT64:
|
||||
case AtomicType::TYPE_UINT64:
|
||||
cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint16");
|
||||
break;
|
||||
case AtomicType::TYPE_DOUBLE:
|
||||
if (fromType->IsVaryingType())
|
||||
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
|
||||
"Use \"int\" if possible");
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
|
||||
exprVal, targetType, "double2uint");
|
||||
break;
|
||||
default:
|
||||
FATAL("unimplemented");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AtomicType::TYPE_INT32: {
|
||||
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||
fromType->IsUniformType() ? LLVMTypes::Int32Type :
|
||||
@@ -3710,6 +4173,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2int");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_INT16:
|
||||
cast = ctx->SExtInst(exprVal, targetType, "int2int32");
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "uint2uint32");
|
||||
break;
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
cast = exprVal;
|
||||
@@ -3742,6 +4213,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_INT16:
|
||||
cast = ctx->SExtInst(exprVal, targetType, "uint2uint");
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "uint2uint");
|
||||
break;
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
cast = exprVal;
|
||||
@@ -3780,11 +4259,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2int64");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_INT32:
|
||||
cast = ctx->SExtInst(exprVal, targetType, "int32_to_int64");
|
||||
cast = ctx->SExtInst(exprVal, targetType, "int_to_int64");
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "uint32_to_int64");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "uint_to_int64");
|
||||
break;
|
||||
case AtomicType::TYPE_FLOAT:
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||
@@ -3796,7 +4279,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
break;
|
||||
case AtomicType::TYPE_DOUBLE:
|
||||
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||
exprVal, targetType, "double2int");
|
||||
exprVal, targetType, "double2int64");
|
||||
break;
|
||||
default:
|
||||
FATAL("unimplemented");
|
||||
@@ -3814,11 +4297,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_INT32:
|
||||
cast = ctx->SExtInst(exprVal, targetType, "int32_to_uint64");
|
||||
cast = ctx->SExtInst(exprVal, targetType, "int_to_uint64");
|
||||
break;
|
||||
case AtomicType::TYPE_UINT8:
|
||||
case AtomicType::TYPE_UINT16:
|
||||
case AtomicType::TYPE_UINT32:
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "uint32_to_uint64");
|
||||
cast = ctx->ZExtInst(exprVal, targetType, "uint_to_uint64");
|
||||
break;
|
||||
case AtomicType::TYPE_FLOAT:
|
||||
if (fromType->IsVaryingType())
|
||||
@@ -3848,6 +4335,22 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
||||
case AtomicType::TYPE_BOOL:
|
||||
cast = exprVal;
|
||||
break;
|
||||
case AtomicType::TYPE_INT8:
|
||||
case AtomicType::TYPE_UINT8: {
|
||||
llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt8(0) :
|
||||
(llvm::Value *)LLVMInt8Vector((int8_t)0);
|
||||
cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
|
||||
exprVal, zero, "cmpi0");
|
||||
break;
|
||||
}
|
||||
case AtomicType::TYPE_INT16:
|
||||
case AtomicType::TYPE_UINT16: {
|
||||
llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt16(0) :
|
||||
(llvm::Value *)LLVMInt16Vector((int16_t)0);
|
||||
cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
|
||||
exprVal, zero, "cmpi0");
|
||||
break;
|
||||
}
|
||||
case AtomicType::TYPE_INT32:
|
||||
case AtomicType::TYPE_UINT32: {
|
||||
llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt32(0) :
|
||||
@@ -4195,6 +4698,26 @@ TypeCastExpr::Optimize() {
|
||||
constExpr->AsBool(bv, forceVarying);
|
||||
return new ConstExpr(toType, bv, pos);
|
||||
}
|
||||
case AtomicType::TYPE_INT8: {
|
||||
int8_t iv[ISPC_MAX_NVEC];
|
||||
constExpr->AsInt8(iv, forceVarying);
|
||||
return new ConstExpr(toType, iv, pos);
|
||||
}
|
||||
case AtomicType::TYPE_UINT8: {
|
||||
uint8_t uv[ISPC_MAX_NVEC];
|
||||
constExpr->AsUInt8(uv, forceVarying);
|
||||
return new ConstExpr(toType, uv, pos);
|
||||
}
|
||||
case AtomicType::TYPE_INT16: {
|
||||
int16_t iv[ISPC_MAX_NVEC];
|
||||
constExpr->AsInt16(iv, forceVarying);
|
||||
return new ConstExpr(toType, iv, pos);
|
||||
}
|
||||
case AtomicType::TYPE_UINT16: {
|
||||
uint16_t uv[ISPC_MAX_NVEC];
|
||||
constExpr->AsUInt16(uv, forceVarying);
|
||||
return new ConstExpr(toType, uv, pos);
|
||||
}
|
||||
case AtomicType::TYPE_INT32: {
|
||||
int32_t iv[ISPC_MAX_NVEC];
|
||||
constExpr->AsInt32(iv, forceVarying);
|
||||
|
||||
51
expr.h
51
expr.h
@@ -325,6 +325,24 @@ private:
|
||||
*/
|
||||
class ConstExpr : public Expr {
|
||||
public:
|
||||
/** Create a ConstExpr from a uniform int8 value */
|
||||
ConstExpr(const Type *t, int8_t i, SourcePos p);
|
||||
/** Create a ConstExpr from a varying int8 value */
|
||||
ConstExpr(const Type *t, int8_t *i, SourcePos p);
|
||||
/** Create a ConstExpr from a uniform uint8 value */
|
||||
ConstExpr(const Type *t, uint8_t u, SourcePos p);
|
||||
/** Create a ConstExpr from a varying uint8 value */
|
||||
ConstExpr(const Type *t, uint8_t *u, SourcePos p);
|
||||
|
||||
/** Create a ConstExpr from a uniform int16 value */
|
||||
ConstExpr(const Type *t, int16_t i, SourcePos p);
|
||||
/** Create a ConstExpr from a varying int16 value */
|
||||
ConstExpr(const Type *t, int16_t *i, SourcePos p);
|
||||
/** Create a ConstExpr from a uniform uint16 value */
|
||||
ConstExpr(const Type *t, uint16_t u, SourcePos p);
|
||||
/** Create a ConstExpr from a varying uint16 value */
|
||||
ConstExpr(const Type *t, uint16_t *u, SourcePos p);
|
||||
|
||||
/** Create a ConstExpr from a uniform int32 value */
|
||||
ConstExpr(const Type *t, int32_t i, SourcePos p);
|
||||
/** Create a ConstExpr from a varying int32 value */
|
||||
@@ -333,14 +351,17 @@ public:
|
||||
ConstExpr(const Type *t, uint32_t u, SourcePos p);
|
||||
/** Create a ConstExpr from a varying uint32 value */
|
||||
ConstExpr(const Type *t, uint32_t *u, SourcePos p);
|
||||
|
||||
/** Create a ConstExpr from a uniform float value */
|
||||
ConstExpr(const Type *t, float f, SourcePos p);
|
||||
/** Create a ConstExpr from a varying float value */
|
||||
ConstExpr(const Type *t, float *f, SourcePos p);
|
||||
|
||||
/** Create a ConstExpr from a uniform double value */
|
||||
ConstExpr(const Type *t, double d, SourcePos p);
|
||||
/** Create a ConstExpr from a varying double value */
|
||||
ConstExpr(const Type *t, double *d, SourcePos p);
|
||||
|
||||
/** Create a ConstExpr from a uniform int64 value */
|
||||
ConstExpr(const Type *t, int64_t i, SourcePos p);
|
||||
/** Create a ConstExpr from a varying int64 value */
|
||||
@@ -349,10 +370,12 @@ public:
|
||||
ConstExpr(const Type *t, uint64_t i, SourcePos p);
|
||||
/** Create a ConstExpr from a varying uint64 value */
|
||||
ConstExpr(const Type *t, uint64_t *i, SourcePos p);
|
||||
|
||||
/** Create a ConstExpr from a uniform bool value */
|
||||
ConstExpr(const Type *t, bool b, SourcePos p);
|
||||
/** Create a ConstExpr from a varying bool value */
|
||||
ConstExpr(const Type *t, bool *b, SourcePos p);
|
||||
|
||||
/** Create a ConstExpr of the same type as the given old ConstExpr,
|
||||
with values given by the "vales" parameter. */
|
||||
ConstExpr(ConstExpr *old, double *values);
|
||||
@@ -371,6 +394,30 @@ public:
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsBool(bool *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as int8s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsInt8(int8_t *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as uint8s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsUInt8(uint8_t *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as int16s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsInt16(int16_t *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as uint16s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsUInt16(uint16_t *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as int32s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
@@ -417,6 +464,10 @@ private:
|
||||
|
||||
const Type *type;
|
||||
union {
|
||||
int8_t int8Val[ISPC_MAX_NVEC];
|
||||
uint8_t uint8Val[ISPC_MAX_NVEC];
|
||||
int16_t int16Val[ISPC_MAX_NVEC];
|
||||
uint16_t uint16Val[ISPC_MAX_NVEC];
|
||||
int32_t int32Val[ISPC_MAX_NVEC];
|
||||
uint32_t uint32Val[ISPC_MAX_NVEC];
|
||||
bool boolVal[ISPC_MAX_NVEC];
|
||||
|
||||
16
failing_tests/shuffle2-10.ispc
Normal file
16
failing_tests/shuffle2-10.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
/* failing due to llvm bug http://llvm.org/bugs/show_bug.cgi?id=10421 */
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int8 aa = aFOO[programIndex];
|
||||
int8 bb = aa + programCount;
|
||||
int8 shuf = shuffle(aa, bb, 2*programIndex+(int)b-5);
|
||||
//CO print("%\n%\n%\n%\n", aa, bb, 2*programIndex+(int)b-5, shuf);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + 2*programIndex;
|
||||
}
|
||||
@@ -158,38 +158,40 @@ static bool lRunTest(const char *fn) {
|
||||
}
|
||||
|
||||
llvm::Function *func;
|
||||
if ((func = module->getFunction("ISPCLaunch")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCLaunch);
|
||||
if ((func = module->getFunction("ISPCSync")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCSync);
|
||||
#define DO_FUNC(FUNC ,FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)FUNC)
|
||||
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
||||
DO_FUNC(ISPCSync, "ISPCSync");
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
if ((func = module->getFunction("ISPCMalloc")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCMalloc);
|
||||
if ((func = module->getFunction("ISPCFree")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCFree);
|
||||
DO_FUNC(ISPCMalloc, "ISPCMalloc");
|
||||
DO_FUNC(ISPCFree, "ISPCFree");
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
if ((func = module->getFunction("putchar")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)putchar);
|
||||
if ((func = module->getFunction("printf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)printf);
|
||||
if ((func = module->getFunction("fflush")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)fflush);
|
||||
if ((func = module->getFunction("sinf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)sinf);
|
||||
if ((func = module->getFunction("cosf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)cosf);
|
||||
if ((func = module->getFunction("tanf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)tanf);
|
||||
if ((func = module->getFunction("atanf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)atanf);
|
||||
if ((func = module->getFunction("atan2f")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)atan2f);
|
||||
if ((func = module->getFunction("powf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)powf);
|
||||
if ((func = module->getFunction("expf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)expf);
|
||||
if ((func = module->getFunction("logf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)logf);
|
||||
DO_FUNC(putchar, "putchar");
|
||||
DO_FUNC(printf, "printf");
|
||||
DO_FUNC(fflush, "fflush");
|
||||
DO_FUNC(sinf, "sinf");
|
||||
DO_FUNC(cosf, "cosf");
|
||||
DO_FUNC(tanf, "tanf");
|
||||
DO_FUNC(atanf, "atanf");
|
||||
DO_FUNC(atan2f, "atan2f");
|
||||
DO_FUNC(powf, "powf");
|
||||
DO_FUNC(expf, "expf");
|
||||
DO_FUNC(logf, "logf");
|
||||
DO_FUNC(sin, "sin");
|
||||
DO_FUNC(cos, "cos");
|
||||
DO_FUNC(tan, "tan");
|
||||
DO_FUNC(atan, "atan");
|
||||
DO_FUNC(atan2, "atan2");
|
||||
DO_FUNC(pow, "pow");
|
||||
DO_FUNC(exp, "exp");
|
||||
DO_FUNC(log, "log");
|
||||
DO_FUNC(memset, "memset");
|
||||
#ifdef ISPC_IS_APPLE
|
||||
DO_FUNC(memset_pattern4, "memset_pattern4");
|
||||
DO_FUNC(memset_pattern8, "memset_pattern8");
|
||||
DO_FUNC(memset_pattern16, "memset_pattern16");
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#define DO_SVML(FUNC ,FUNCNAME) \
|
||||
|
||||
2
lex.ll
2
lex.ll
@@ -104,6 +104,8 @@ goto { return TOKEN_GOTO; }
|
||||
if { return TOKEN_IF; }
|
||||
inline { return TOKEN_INLINE; }
|
||||
int { return TOKEN_INT; }
|
||||
int8 { return TOKEN_INT8; }
|
||||
int16 { return TOKEN_INT16; }
|
||||
int32 { return TOKEN_INT; }
|
||||
int64 { return TOKEN_INT64; }
|
||||
launch { return TOKEN_LAUNCH; }
|
||||
|
||||
154
llvmutil.cpp
154
llvmutil.cpp
@@ -41,28 +41,39 @@
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL;
|
||||
LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16Type = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32Type = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64Type = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoublePointerType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::MaskType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int8VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int16VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::ArrayType *LLVMTypes::VoidPointerVectorType = NULL;
|
||||
|
||||
llvm::Constant *LLVMTrue = NULL;
|
||||
@@ -75,16 +86,20 @@ void
|
||||
InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
||||
LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx);
|
||||
LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0);
|
||||
|
||||
LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx);
|
||||
LLVMTypes::Int8Type = llvm::Type::getInt8Ty(*ctx);
|
||||
LLVMTypes::Int16Type = llvm::Type::getInt16Ty(*ctx);
|
||||
LLVMTypes::Int32Type = llvm::Type::getInt32Ty(*ctx);
|
||||
LLVMTypes::Int32PointerType = llvm::PointerType::get(LLVMTypes::Int32Type, 0);
|
||||
LLVMTypes::Int64Type = llvm::Type::getInt64Ty(*ctx);
|
||||
LLVMTypes::Int64PointerType = llvm::PointerType::get(LLVMTypes::Int64Type, 0);
|
||||
LLVMTypes::FloatType = llvm::Type::getFloatTy(*ctx);
|
||||
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
||||
LLVMTypes::DoubleType = llvm::Type::getDoubleTy(*ctx);
|
||||
|
||||
LLVMTypes::Int8PointerType = llvm::PointerType::get(LLVMTypes::Int8Type, 0);
|
||||
LLVMTypes::Int16PointerType = llvm::PointerType::get(LLVMTypes::Int16Type, 0);
|
||||
LLVMTypes::Int32PointerType = llvm::PointerType::get(LLVMTypes::Int32Type, 0);
|
||||
LLVMTypes::Int64PointerType = llvm::PointerType::get(LLVMTypes::Int64Type, 0);
|
||||
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
||||
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
||||
|
||||
// Note that both the mask and bool vectors are vector of int32s
|
||||
@@ -95,18 +110,26 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
||||
|
||||
LLVMTypes::Int1VectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
||||
LLVMTypes::Int8VectorType =
|
||||
llvm::VectorType::get(LLVMTypes::Int8Type, target.vectorWidth);
|
||||
LLVMTypes::Int16VectorType =
|
||||
llvm::VectorType::get(LLVMTypes::Int16Type, target.vectorWidth);
|
||||
LLVMTypes::Int32VectorType =
|
||||
llvm::VectorType::get(LLVMTypes::Int32Type, target.vectorWidth);
|
||||
LLVMTypes::Int32VectorPointerType = llvm::PointerType::get(LLVMTypes::Int32VectorType, 0);
|
||||
LLVMTypes::Int64VectorType =
|
||||
llvm::VectorType::get(LLVMTypes::Int64Type, target.vectorWidth);
|
||||
LLVMTypes::Int64VectorPointerType = llvm::PointerType::get(LLVMTypes::Int64VectorType, 0);
|
||||
LLVMTypes::FloatVectorType =
|
||||
llvm::VectorType::get(LLVMTypes::FloatType, target.vectorWidth);
|
||||
LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0);
|
||||
LLVMTypes::DoubleVectorType =
|
||||
llvm::VectorType::get(LLVMTypes::DoubleType, target.vectorWidth);
|
||||
|
||||
LLVMTypes::Int8VectorPointerType = llvm::PointerType::get(LLVMTypes::Int8VectorType, 0);
|
||||
LLVMTypes::Int16VectorPointerType = llvm::PointerType::get(LLVMTypes::Int16VectorType, 0);
|
||||
LLVMTypes::Int32VectorPointerType = llvm::PointerType::get(LLVMTypes::Int32VectorType, 0);
|
||||
LLVMTypes::Int64VectorPointerType = llvm::PointerType::get(LLVMTypes::Int64VectorType, 0);
|
||||
LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0);
|
||||
LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0);
|
||||
|
||||
LLVMTypes::VoidPointerVectorType =
|
||||
llvm::ArrayType::get(LLVMTypes::VoidPointerType, target.vectorWidth);
|
||||
|
||||
@@ -133,7 +156,36 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *LLVMInt32(int32_t ival) {
|
||||
llvm::ConstantInt *
|
||||
LLVMInt8(int8_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt8Ty(*g->ctx), ival,
|
||||
true /*signed*/);
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *
|
||||
LLVMUInt8(uint8_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt8Ty(*g->ctx), ival,
|
||||
false /*unsigned*/);
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *
|
||||
LLVMInt16(int16_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt16Ty(*g->ctx), ival,
|
||||
true /*signed*/);
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *
|
||||
LLVMUInt16(uint16_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt16Ty(*g->ctx), ival,
|
||||
false /*unsigned*/);
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *
|
||||
LLVMInt32(int32_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt32Ty(*g->ctx), ival,
|
||||
true /*signed*/);
|
||||
}
|
||||
@@ -172,6 +224,82 @@ LLVMDouble(double dval) {
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt8Vector(int8_t ival) {
|
||||
llvm::Constant *v = LLVMInt8(ival);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt8Vector(const int8_t *ivec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMInt8(ivec[i]));
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUInt8Vector(uint8_t ival) {
|
||||
llvm::Constant *v = LLVMUInt8(ival);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUInt8Vector(const uint8_t *ivec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMUInt8(ivec[i]));
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt16Vector(int16_t ival) {
|
||||
llvm::Constant *v = LLVMInt16(ival);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt16Vector(const int16_t *ivec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMInt16(ivec[i]));
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUInt16Vector(uint16_t ival) {
|
||||
llvm::Constant *v = LLVMUInt16(ival);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUInt16Vector(const uint16_t *ivec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMUInt16(ivec[i]));
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt32Vector(int32_t ival) {
|
||||
llvm::Constant *v = LLVMInt32(ival);
|
||||
|
||||
65
llvmutil.h
65
llvmutil.h
@@ -53,28 +53,39 @@ struct LLVMTypes {
|
||||
static LLVM_TYPE_CONST llvm::Type *VoidType;
|
||||
static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *BoolType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoubleType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoublePointerType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::VectorType *MaskType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int8VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int16VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::ArrayType *VoidPointerVectorType;
|
||||
};
|
||||
|
||||
@@ -89,6 +100,14 @@ extern llvm::Constant *LLVMTrue, *LLVMFalse;
|
||||
*/
|
||||
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target);
|
||||
|
||||
/** Returns an LLVM i8 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMInt8(int8_t i);
|
||||
/** Returns an LLVM i8 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMUInt8(uint8_t i);
|
||||
/** Returns an LLVM i16 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMInt16(int16_t i);
|
||||
/** Returns an LLVM i16 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMUInt16(uint16_t i);
|
||||
/** Returns an LLVM i32 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMInt32(int32_t i);
|
||||
/** Returns an LLVM i32 constant of the given value */
|
||||
@@ -105,18 +124,35 @@ extern llvm::Constant *LLVMDouble(double f);
|
||||
/** Returns an LLVM boolean vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMBoolVector(bool v);
|
||||
|
||||
/** Returns an LLVM i8 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMInt8Vector(int8_t i);
|
||||
/** Returns an LLVM i8 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMUInt8Vector(uint8_t i);
|
||||
|
||||
/** Returns an LLVM i16 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMInt16Vector(int16_t i);
|
||||
/** Returns an LLVM i16 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMUInt16Vector(uint16_t i);
|
||||
|
||||
/** Returns an LLVM i32 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMInt32Vector(int32_t i);
|
||||
/** Returns an LLVM i32 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMUInt32Vector(uint32_t i);
|
||||
|
||||
/** Returns an LLVM i64 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMInt64Vector(int64_t i);
|
||||
/** Returns an LLVM i64 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMUInt64Vector(uint64_t i);
|
||||
|
||||
/** Returns an LLVM float vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMFloatVector(float f);
|
||||
@@ -127,18 +163,35 @@ extern llvm::Constant *LLVMDoubleVector(double f);
|
||||
/** Returns an LLVM boolean vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMBoolVector(const bool *v);
|
||||
|
||||
/** Returns an LLVM i8 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMInt8Vector(const int8_t *i);
|
||||
/** Returns an LLVM i8 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMUInt8Vector(const uint8_t *i);
|
||||
|
||||
/** Returns an LLVM i16 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMInt16Vector(const int16_t *i);
|
||||
/** Returns an LLVM i16 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMUInt16Vector(const uint16_t *i);
|
||||
|
||||
/** Returns an LLVM i32 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMInt32Vector(const int32_t *i);
|
||||
/** Returns an LLVM i32 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMUInt32Vector(const uint32_t *i);
|
||||
|
||||
/** Returns an LLVM i64 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMInt64Vector(const int64_t *i);
|
||||
/** Returns an LLVM i64 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMUInt64Vector(const uint64_t *i);
|
||||
|
||||
/** Returns an LLVM float vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMFloatVector(const float *f);
|
||||
|
||||
487
opt.cpp
487
opt.cpp
@@ -409,7 +409,6 @@ IntrinsicsOpt::IntrinsicsOpt()
|
||||
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse_movmsk_ps);
|
||||
maskInstructions.push_back(sseMovmsk);
|
||||
maskInstructions.push_back(m->module->getFunction("llvm.x86.avx.movmsk.ps"));
|
||||
maskInstructions.push_back(m->module->getFunction("llvm.x86.mic.mask16.to.int"));
|
||||
maskInstructions.push_back(m->module->getFunction("__movmsk"));
|
||||
|
||||
// And all of the blend instructions
|
||||
@@ -418,8 +417,6 @@ IntrinsicsOpt::IntrinsicsOpt()
|
||||
0xf, 0, 1, 2));
|
||||
blendInstructions.push_back(BlendInstruction(
|
||||
m->module->getFunction("llvm.x86.avx.blendvps"), 0xff, 0, 1, 2));
|
||||
blendInstructions.push_back(BlendInstruction(
|
||||
m->module->getFunction("llvm.x86.mic.blend.ps"), 0xffff, 1, 2, 0));
|
||||
}
|
||||
|
||||
|
||||
@@ -499,8 +496,8 @@ bool
|
||||
IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
bool modifiedAny = false;
|
||||
restart:
|
||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||
if (!callInst)
|
||||
continue;
|
||||
|
||||
@@ -512,7 +509,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
|
||||
// If the values are the same, then no need to blend..
|
||||
if (v[0] == v[1]) {
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[0]);
|
||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||
iter, v[0]);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
@@ -524,12 +522,14 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
// otherwise the result is undefined and any value is fine,
|
||||
// ergo the defined one is an acceptable result.)
|
||||
if (lIsUndef(v[0])) {
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[1]);
|
||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||
iter, v[1]);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
if (lIsUndef(v[1])) {
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[0]);
|
||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||
iter, v[0]);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
@@ -544,7 +544,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
value = v[1];
|
||||
|
||||
if (value != NULL) {
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, value);
|
||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||
iter, value);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
@@ -557,7 +558,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
// with the corresponding integer mask from its elements
|
||||
// high bits.
|
||||
llvm::Value *value = LLVMInt32(mask);
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, value);
|
||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||
iter, value);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
@@ -653,10 +655,18 @@ lSizeOfIfKnown(const llvm::Type *type, uint64_t *size) {
|
||||
*size = 1;
|
||||
return true;
|
||||
}
|
||||
if (type == LLVMTypes::Int8VectorType) {
|
||||
*size = g->target.vectorWidth * 1;
|
||||
return true;
|
||||
}
|
||||
else if (type == LLVMTypes::Int16Type) {
|
||||
*size = 2;
|
||||
return true;
|
||||
}
|
||||
if (type == LLVMTypes::Int16VectorType) {
|
||||
*size = g->target.vectorWidth * 2;
|
||||
return true;
|
||||
}
|
||||
else if (type == LLVMTypes::FloatType || type == LLVMTypes::Int32Type) {
|
||||
*size = 4;
|
||||
return true;
|
||||
@@ -978,33 +988,53 @@ lGetPtrAndOffsets(llvm::Value *ptrs, llvm::Value **basePtr,
|
||||
}
|
||||
|
||||
|
||||
struct GSInfo {
|
||||
GSInfo(const char *pgFuncName, const char *pgboFuncName, bool ig, int es)
|
||||
: isGather(ig), elementSize(es) {
|
||||
func = m->module->getFunction(pgFuncName);
|
||||
baseOffsetsFunc = m->module->getFunction(pgboFuncName);
|
||||
}
|
||||
llvm::Function *func;
|
||||
llvm::Function *baseOffsetsFunc;
|
||||
const bool isGather;
|
||||
const int elementSize;
|
||||
};
|
||||
|
||||
|
||||
bool
|
||||
GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_32");
|
||||
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_64");
|
||||
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_32");
|
||||
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_64");
|
||||
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
|
||||
GSInfo gsFuncs[] = {
|
||||
GSInfo("__pseudo_gather_8", "__pseudo_gather_base_offsets_8", true, 1),
|
||||
GSInfo("__pseudo_gather_16", "__pseudo_gather_base_offsets_16", true, 2),
|
||||
GSInfo("__pseudo_gather_32", "__pseudo_gather_base_offsets_32", true, 4),
|
||||
GSInfo("__pseudo_gather_64", "__pseudo_gather_base_offsets_64", true, 8),
|
||||
GSInfo("__pseudo_scatter_8", "__pseudo_scatter_base_offsets_8", false, 1),
|
||||
GSInfo("__pseudo_scatter_16", "__pseudo_scatter_base_offsets_16", false, 2),
|
||||
GSInfo("__pseudo_scatter_32", "__pseudo_scatter_base_offsets_32", false, 4),
|
||||
GSInfo("__pseudo_scatter_64", "__pseudo_scatter_base_offsets_64", false, 8),
|
||||
};
|
||||
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
||||
for (int i = 0; i < numGSFuncs; ++i)
|
||||
assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL);
|
||||
|
||||
bool modifiedAny = false;
|
||||
restart:
|
||||
// Iterate through all of the instructions in the basic block.
|
||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||
// If we don't have a call to one of the
|
||||
// __pseudo_{gather,scatter}_* functions, then just go on to the
|
||||
// next instruction.
|
||||
if (!callInst ||
|
||||
(callInst->getCalledFunction() != gather32Func &&
|
||||
callInst->getCalledFunction() != gather64Func &&
|
||||
callInst->getCalledFunction() != scatter32Func &&
|
||||
callInst->getCalledFunction() != scatter64Func))
|
||||
if (callInst == NULL)
|
||||
continue;
|
||||
GSInfo *info = NULL;
|
||||
for (int i = 0; i < numGSFuncs; ++i)
|
||||
if (callInst->getCalledFunction() == gsFuncs[i].func) {
|
||||
info = &gsFuncs[i];
|
||||
break;
|
||||
}
|
||||
if (info == NULL)
|
||||
continue;
|
||||
|
||||
bool isGather = (callInst->getCalledFunction() == gather32Func ||
|
||||
callInst->getCalledFunction() == gather64Func);
|
||||
bool is32 = (callInst->getCalledFunction() == gather32Func ||
|
||||
callInst->getCalledFunction() == scatter32Func);
|
||||
|
||||
// Transform the array of pointers to a single base pointer and an
|
||||
// array of int32 offsets. (All the hard work is done by
|
||||
@@ -1012,19 +1042,15 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Value *ptrs = callInst->getArgOperand(0);
|
||||
llvm::Value *basePtr = NULL;
|
||||
llvm::Value *offsetVector = lGetPtrAndOffsets(ptrs, &basePtr, callInst,
|
||||
is32 ? 4 : 8);
|
||||
info->elementSize);
|
||||
// Cast the base pointer to a void *, since that's what the
|
||||
// __pseudo_*_base_offsets_* functions want.
|
||||
basePtr = new llvm::BitCastInst(basePtr, LLVMTypes::VoidPointerType, "base2void",
|
||||
callInst);
|
||||
basePtr = new llvm::BitCastInst(basePtr, LLVMTypes::VoidPointerType,
|
||||
"base2void", callInst);
|
||||
lCopyMetadata(basePtr, callInst);
|
||||
|
||||
if (isGather) {
|
||||
if (info->isGather) {
|
||||
llvm::Value *mask = callInst->getArgOperand(1);
|
||||
llvm::Function *gFunc =
|
||||
m->module->getFunction(is32 ? "__pseudo_gather_base_offsets_32" :
|
||||
"__pseudo_gather_base_offsets_64");
|
||||
assert(gFunc != NULL);
|
||||
|
||||
// Generate a new function call to the next pseudo gather
|
||||
// base+offsets instruction. Note that we're passing a NULL
|
||||
@@ -1035,11 +1061,12 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&newArgs[0], &newArgs[3]);
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(gFunc, newArgArray, "newgather",
|
||||
(llvm::Instruction *)NULL);
|
||||
llvm::CallInst::Create(info->baseOffsetsFunc, newArgArray,
|
||||
"newgather", (llvm::Instruction *)NULL);
|
||||
#else
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(gFunc, &newArgs[0], &newArgs[3], "newgather");
|
||||
llvm::CallInst::Create(info->baseOffsetsFunc, &newArgs[0], &newArgs[3],
|
||||
"newgather");
|
||||
#endif
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
@@ -1047,10 +1074,6 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
else {
|
||||
llvm::Value *mask = callInst->getArgOperand(2);
|
||||
llvm::Value *rvalue = callInst->getArgOperand(1);
|
||||
llvm::Function *gFunc =
|
||||
m->module->getFunction(is32 ? "__pseudo_scatter_base_offsets_32" :
|
||||
"__pseudo_scatter_base_offsets_64");
|
||||
assert(gFunc);
|
||||
|
||||
// Generate a new function call to the next pseudo scatter
|
||||
// base+offsets instruction. See above for why passing NULL
|
||||
@@ -1059,11 +1082,12 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&newArgs[0], &newArgs[4]);
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(gFunc, newArgArray, "",
|
||||
llvm::CallInst::Create(info->baseOffsetsFunc, newArgArray, "",
|
||||
(llvm::Instruction *)NULL);
|
||||
#else
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(gFunc, &newArgs[0], &newArgs[4]);
|
||||
llvm::CallInst::Create(info->baseOffsetsFunc, &newArgs[0],
|
||||
&newArgs[4]);
|
||||
#endif
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
@@ -1105,28 +1129,53 @@ char MaskedStoreOptPass::ID = 0;
|
||||
llvm::RegisterPass<MaskedStoreOptPass> mss("masked-store-scalarize",
|
||||
"Masked Store Scalarize Pass");
|
||||
|
||||
struct MSInfo {
|
||||
MSInfo(const char *name, const int a)
|
||||
: align(a) {
|
||||
func = m->module->getFunction(name);
|
||||
assert(func != NULL);
|
||||
}
|
||||
llvm::Function *func;
|
||||
const int align;
|
||||
};
|
||||
|
||||
|
||||
bool
|
||||
MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Function *pms32Func = m->module->getFunction("__pseudo_masked_store_32");
|
||||
llvm::Function *pms64Func = m->module->getFunction("__pseudo_masked_store_64");
|
||||
llvm::Function *msb32Func = m->module->getFunction("__masked_store_blend_32");
|
||||
llvm::Function *msb64Func = m->module->getFunction("__masked_store_blend_64");
|
||||
llvm::Function *ms32Func = m->module->getFunction("__masked_store_32");
|
||||
llvm::Function *ms64Func = m->module->getFunction("__masked_store_64");
|
||||
MSInfo msInfo[] = {
|
||||
MSInfo("__pseudo_masked_store_8", 1),
|
||||
MSInfo("__pseudo_masked_store_16", 2),
|
||||
MSInfo("__pseudo_masked_store_32", 4),
|
||||
MSInfo("__pseudo_masked_store_64", 8),
|
||||
MSInfo("__masked_store_blend_8", 1),
|
||||
MSInfo("__masked_store_blend_16", 2),
|
||||
MSInfo("__masked_store_blend_32", 4),
|
||||
MSInfo("__masked_store_blend_64", 8),
|
||||
MSInfo("__masked_store_8", 1),
|
||||
MSInfo("__masked_store_16", 2),
|
||||
MSInfo("__masked_store_32", 4),
|
||||
MSInfo("__masked_store_64", 8)
|
||||
};
|
||||
|
||||
bool modifiedAny = false;
|
||||
restart:
|
||||
// Iterate over all of the instructions to look for one of the various
|
||||
// masked store functions
|
||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||
if (!callInst)
|
||||
continue;
|
||||
|
||||
llvm::Function *called = callInst->getCalledFunction();
|
||||
if (called != pms32Func && called != pms64Func &&
|
||||
called != msb32Func && called != msb64Func &&
|
||||
called != ms32Func && called != ms64Func)
|
||||
int nMSFuncs = sizeof(msInfo) / sizeof(msInfo[0]);
|
||||
MSInfo *info = NULL;
|
||||
for (int i = 0; i < nMSFuncs; ++i) {
|
||||
if (called == msInfo[i].func) {
|
||||
info = &msInfo[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (info == NULL)
|
||||
continue;
|
||||
|
||||
// Got one; grab the operands
|
||||
@@ -1150,15 +1199,12 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
LLVM_TYPE_CONST llvm::Type *rvalueType = rvalue->getType();
|
||||
LLVM_TYPE_CONST llvm::Type *ptrType =
|
||||
llvm::PointerType::get(rvalueType, 0);
|
||||
// Need to update this when int8/int16 are added
|
||||
int align = (called == pms32Func || called == pms64Func ||
|
||||
called == msb32Func) ? 4 : 8;
|
||||
|
||||
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
||||
lCopyMetadata(lvalue, callInst);
|
||||
llvm::Instruction *store =
|
||||
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||
align);
|
||||
info->align);
|
||||
lCopyMetadata(store, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, store);
|
||||
|
||||
@@ -1180,9 +1226,9 @@ CreateMaskedStoreOptPass() {
|
||||
// LowerMaskedStorePass
|
||||
|
||||
/** When the front-end needs to do a masked store, it emits a
|
||||
__pseudo_masked_store_{32,64} call as a placeholder. This pass lowers
|
||||
these calls to either __masked_store_{32,64} or
|
||||
__masked_store_blend_{32,64} calls.
|
||||
__pseudo_masked_store_{8,16,32,64} call as a placeholder. This pass
|
||||
lowers these calls to either __masked_store_{8,16,32,64} or
|
||||
__masked_store_blend_{8,16,32,64} calls.
|
||||
*/
|
||||
class LowerMaskedStorePass : public llvm::BasicBlockPass {
|
||||
public:
|
||||
@@ -1227,45 +1273,51 @@ lIsStackVariablePointer(llvm::Value *lvalue) {
|
||||
}
|
||||
|
||||
|
||||
/** Utilty routine to figure out which masked store function to use. The
|
||||
blend parameter indicates if we want the blending version, is32
|
||||
indicates if the element size is 32 bits.
|
||||
*/
|
||||
static const char *
|
||||
lMaskedStoreName(bool blend, bool is32) {
|
||||
if (blend) {
|
||||
if (is32)
|
||||
return "__masked_store_blend_32";
|
||||
else
|
||||
return "__masked_store_blend_64";
|
||||
struct LMSInfo {
|
||||
LMSInfo(const char *pname, const char *bname, const char *msname) {
|
||||
pseudoFunc = m->module->getFunction(pname);
|
||||
blendFunc = m->module->getFunction(bname);
|
||||
maskedStoreFunc = m->module->getFunction(msname);
|
||||
assert(pseudoFunc != NULL && blendFunc != NULL &&
|
||||
maskedStoreFunc != NULL);
|
||||
}
|
||||
else {
|
||||
if (is32)
|
||||
return "__masked_store_32";
|
||||
else
|
||||
return "__masked_store_64";
|
||||
}
|
||||
}
|
||||
llvm::Function *pseudoFunc;
|
||||
llvm::Function *blendFunc;
|
||||
llvm::Function *maskedStoreFunc;
|
||||
};
|
||||
|
||||
|
||||
bool
|
||||
LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Function *maskedStore32Func = m->module->getFunction("__pseudo_masked_store_32");
|
||||
llvm::Function *maskedStore64Func = m->module->getFunction("__pseudo_masked_store_64");
|
||||
assert(maskedStore32Func && maskedStore64Func);
|
||||
LMSInfo msInfo[] = {
|
||||
LMSInfo("__pseudo_masked_store_8", "__masked_store_blend_8",
|
||||
"__masked_store_8"),
|
||||
LMSInfo("__pseudo_masked_store_16", "__masked_store_blend_16",
|
||||
"__masked_store_16"),
|
||||
LMSInfo("__pseudo_masked_store_32", "__masked_store_blend_32",
|
||||
"__masked_store_32"),
|
||||
LMSInfo("__pseudo_masked_store_64", "__masked_store_blend_64",
|
||||
"__masked_store_64")
|
||||
};
|
||||
|
||||
bool modifiedAny = false;
|
||||
restart:
|
||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||
// Iterate through all of the instructions and look for
|
||||
// __pseudo_masked_store_* calls.
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
if (!callInst ||
|
||||
(callInst->getCalledFunction() != maskedStore32Func &&
|
||||
callInst->getCalledFunction() != maskedStore64Func))
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||
if (callInst == NULL)
|
||||
continue;
|
||||
LMSInfo *info = NULL;
|
||||
for (unsigned int i = 0; i < sizeof(msInfo) / sizeof(msInfo[0]); ++i) {
|
||||
if (callInst->getCalledFunction() == msInfo[i].pseudoFunc) {
|
||||
info = &msInfo[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (info == NULL)
|
||||
continue;
|
||||
|
||||
bool is32 = (callInst->getCalledFunction() == maskedStore32Func);
|
||||
llvm::Value *lvalue = callInst->getArgOperand(0);
|
||||
llvm::Value *rvalue = callInst->getArgOperand(1);
|
||||
llvm::Value *mask = callInst->getArgOperand(2);
|
||||
@@ -1282,8 +1334,7 @@ LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
|
||||
// Generate the call to the appropriate masked store function and
|
||||
// replace the __pseudo_* one with it.
|
||||
llvm::Function *fms = m->module->getFunction(lMaskedStoreName(doBlend, is32));
|
||||
assert(fms);
|
||||
llvm::Function *fms = doBlend ? info->blendFunc : info->maskedStoreFunc;
|
||||
llvm::Value *args[3] = { lvalue, rvalue, mask };
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]);
|
||||
@@ -1872,37 +1923,94 @@ lVectorIsLinear(llvm::Value *v[ISPC_MAX_NVEC], int stride) {
|
||||
}
|
||||
|
||||
|
||||
struct GatherImpInfo {
|
||||
GatherImpInfo(const char *pName, const char *lbName, const char *lmName,
|
||||
int a)
|
||||
: align(a) {
|
||||
pseudoFunc = m->module->getFunction(pName);
|
||||
loadBroadcastFunc = m->module->getFunction(lbName);
|
||||
loadMaskedFunc = m->module->getFunction(lmName);
|
||||
|
||||
assert(pseudoFunc != NULL && loadBroadcastFunc != NULL &&
|
||||
loadMaskedFunc != NULL);
|
||||
}
|
||||
llvm::Function *pseudoFunc;
|
||||
llvm::Function *loadBroadcastFunc;
|
||||
llvm::Function *loadMaskedFunc;
|
||||
const int align;
|
||||
};
|
||||
|
||||
|
||||
struct ScatterImpInfo {
|
||||
ScatterImpInfo(const char *pName, const char *msName,
|
||||
LLVM_TYPE_CONST llvm::Type *vpt, int a)
|
||||
: align(a) {
|
||||
pseudoFunc = m->module->getFunction(pName);
|
||||
maskedStoreFunc = m->module->getFunction(msName);
|
||||
vecPtrType = vpt;
|
||||
assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
|
||||
}
|
||||
llvm::Function *pseudoFunc;
|
||||
llvm::Function *maskedStoreFunc;
|
||||
LLVM_TYPE_CONST llvm::Type *vecPtrType;
|
||||
const int align;
|
||||
};
|
||||
|
||||
|
||||
bool
|
||||
GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_base_offsets_32");
|
||||
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_base_offsets_64");
|
||||
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_base_offsets_32");
|
||||
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_base_offsets_64");
|
||||
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
|
||||
GatherImpInfo gInfo[] = {
|
||||
GatherImpInfo("__pseudo_gather_base_offsets_8", "__load_and_broadcast_8",
|
||||
"__load_masked_8", 1),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets_16", "__load_and_broadcast_16",
|
||||
"__load_masked_16", 2),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets_32", "__load_and_broadcast_32",
|
||||
"__load_masked_32", 4),
|
||||
GatherImpInfo("__pseudo_gather_base_offsets_64", "__load_and_broadcast_64",
|
||||
"__load_masked_64", 8)
|
||||
};
|
||||
ScatterImpInfo sInfo[] = {
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets_8", "__pseudo_masked_store_8",
|
||||
LLVMTypes::Int8VectorPointerType, 1),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets_16", "__pseudo_masked_store_16",
|
||||
LLVMTypes::Int16VectorPointerType, 2),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets_32", "__pseudo_masked_store_32",
|
||||
LLVMTypes::Int32VectorPointerType, 4),
|
||||
ScatterImpInfo("__pseudo_scatter_base_offsets_64", "__pseudo_masked_store_64",
|
||||
LLVMTypes::Int64VectorPointerType, 8)
|
||||
};
|
||||
|
||||
bool modifiedAny = false;
|
||||
|
||||
restart:
|
||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||
// Iterate over all of the instructions and look for calls to
|
||||
// __pseudo_*_base_offsets_* calls.
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
if (!callInst ||
|
||||
(callInst->getCalledFunction() != gather32Func &&
|
||||
callInst->getCalledFunction() != gather64Func &&
|
||||
callInst->getCalledFunction() != scatter32Func &&
|
||||
callInst->getCalledFunction() != scatter64Func))
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||
if (callInst == NULL)
|
||||
continue;
|
||||
llvm::Function *calledFunc = callInst->getCalledFunction();
|
||||
GatherImpInfo *gatherInfo = NULL;
|
||||
ScatterImpInfo *scatterInfo = NULL;
|
||||
for (unsigned int i = 0; i < sizeof(gInfo) / sizeof(gInfo[0]); ++i) {
|
||||
if (calledFunc == gInfo[i].pseudoFunc) {
|
||||
gatherInfo = &gInfo[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (unsigned int i = 0; i < sizeof(sInfo) / sizeof(sInfo[0]); ++i) {
|
||||
if (calledFunc == sInfo[i].pseudoFunc) {
|
||||
scatterInfo = &sInfo[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (gatherInfo == NULL && scatterInfo == NULL)
|
||||
continue;
|
||||
|
||||
SourcePos pos;
|
||||
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
|
||||
assert(ok);
|
||||
|
||||
bool isGather = (callInst->getCalledFunction() == gather32Func ||
|
||||
callInst->getCalledFunction() == gather64Func);
|
||||
bool is32 = (callInst->getCalledFunction() == gather32Func ||
|
||||
callInst->getCalledFunction() == scatter32Func);
|
||||
|
||||
// Get the actual base pointer; note that it comes into the gather
|
||||
// or scatter function bitcast to an i8 *, so we need to work back
|
||||
// to get the pointer as the original type.
|
||||
@@ -1921,7 +2029,7 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
if (!lScalarizeVector(callInst->getArgOperand(1), offsetElements))
|
||||
continue;
|
||||
|
||||
llvm::Value *mask = callInst->getArgOperand(isGather ? 2 : 3);
|
||||
llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 2 : 3);
|
||||
|
||||
if (lVectorValuesAllEqual(offsetElements)) {
|
||||
// If all the offsets are equal, then compute the single
|
||||
@@ -1929,14 +2037,15 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
// (arbitrarily).
|
||||
llvm::Value *indices[1] = { offsetElements[0] };
|
||||
llvm::Value *basei8 =
|
||||
new llvm::BitCastInst(base, LLVMTypes::VoidPointerType, "base2i8", callInst);
|
||||
new llvm::BitCastInst(base, LLVMTypes::VoidPointerType,
|
||||
"base2i8", callInst);
|
||||
lCopyMetadata(basei8, callInst);
|
||||
llvm::Value *ptr =
|
||||
llvm::GetElementPtrInst::Create(basei8, &indices[0], &indices[1],
|
||||
"ptr", callInst);
|
||||
lCopyMetadata(ptr, callInst);
|
||||
|
||||
if (isGather) {
|
||||
if (gatherInfo != NULL) {
|
||||
// A gather with everyone going to the same location is
|
||||
// handled as a scalar load and broadcast across the lanes.
|
||||
// Note that we do still have to pass the mask to the
|
||||
@@ -1944,20 +2053,16 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
// access memory if the mask is all off (the location may
|
||||
// be invalid in that case).
|
||||
Debug(pos, "Transformed gather to scalar load and broadcast!");
|
||||
llvm::Function *loadBroadcast =
|
||||
m->module->getFunction(is32 ? "__load_and_broadcast_32" :
|
||||
"__load_and_broadcast_64");
|
||||
assert(loadBroadcast);
|
||||
llvm::Value *args[2] = { ptr, mask };
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]);
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(loadBroadcast, newArgArray,
|
||||
llvm::CallInst::Create(gatherInfo->loadBroadcastFunc, newArgArray,
|
||||
"load_broadcast", (llvm::Instruction *)NULL);
|
||||
#else
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(loadBroadcast, &args[0], &args[2],
|
||||
"load_broadcast");
|
||||
llvm::CallInst::Create(gatherInfo->loadBroadcastFunc, &args[0],
|
||||
&args[2], "load_broadcast");
|
||||
#endif
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
@@ -1977,8 +2082,8 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0),
|
||||
"ptr2rvalue_type", callInst);
|
||||
lCopyMetadata(ptr, callInst);
|
||||
llvm::Instruction *sinst =
|
||||
new llvm::StoreInst(first, ptr, false, is32 ? 4 : 8 /* align */);
|
||||
llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false,
|
||||
scatterInfo->align);
|
||||
lCopyMetadata(sinst, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, sinst);
|
||||
}
|
||||
@@ -1987,7 +2092,8 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
goto restart;
|
||||
}
|
||||
|
||||
if (lVectorIsLinear(offsetElements, is32 ? 4 : 8)) {
|
||||
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
|
||||
if (lVectorIsLinear(offsetElements, step)) {
|
||||
// We have a linear sequence of memory locations being accessed
|
||||
// starting with the location given by the offset from
|
||||
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
|
||||
@@ -2003,53 +2109,38 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
"ptr", callInst);
|
||||
lCopyMetadata(ptr, callInst);
|
||||
|
||||
if (isGather) {
|
||||
if (gatherInfo != NULL) {
|
||||
Debug(pos, "Transformed gather to unaligned vector load!");
|
||||
// FIXME: make this an aligned load when possible..
|
||||
// FIXME: are there lurking potential bugs when e.g. the
|
||||
// last few entries of the mask are off and the load ends
|
||||
// up straddling a page boundary?
|
||||
llvm::Function *loadMasked =
|
||||
m->module->getFunction(is32 ? "__load_masked_32" : "__load_masked_64");
|
||||
assert(loadMasked);
|
||||
|
||||
llvm::Value *args[2] = { ptr, mask };
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[2]);
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(loadMasked, argArray, "load_masked",
|
||||
(llvm::Instruction *)NULL);
|
||||
llvm::CallInst::Create(gatherInfo->loadMaskedFunc, argArray,
|
||||
"load_masked", (llvm::Instruction *)NULL);
|
||||
#else
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(loadMasked, &args[0], &args[2], "load_masked");
|
||||
llvm::CallInst::Create(gatherInfo->loadMaskedFunc, &args[0],
|
||||
&args[2], "load_masked");
|
||||
#endif
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
}
|
||||
else {
|
||||
Debug(pos, "Transformed scatter to unaligned vector store!");
|
||||
// FIXME: make this an aligned store when possible. Need
|
||||
// to work through the messiness of issuing a pseudo store
|
||||
// here.
|
||||
llvm::Value *rvalue = callInst->getArgOperand(2);
|
||||
|
||||
llvm::Function *storeMasked =
|
||||
m->module->getFunction(is32 ? "__pseudo_masked_store_32" :
|
||||
"__pseudo_masked_store_64");
|
||||
assert(storeMasked);
|
||||
LLVM_TYPE_CONST llvm::Type *vecPtrType = is32 ?
|
||||
LLVMTypes::Int32VectorPointerType : LLVMTypes::Int64VectorPointerType;
|
||||
ptr = new llvm::BitCastInst(ptr, vecPtrType, "ptrcast", callInst);
|
||||
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
|
||||
callInst);
|
||||
|
||||
llvm::Value *args[3] = { ptr, rvalue, mask };
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[3]);
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(storeMasked, argArray, "",
|
||||
(llvm::Instruction *)NULL);
|
||||
llvm::CallInst::Create(scatterInfo->maskedStoreFunc, argArray,
|
||||
"", (llvm::Instruction *)NULL);
|
||||
#else
|
||||
llvm::Instruction *newCall =
|
||||
llvm::CallInst::Create(storeMasked, &args[0], &args[3], "");
|
||||
llvm::CallInst::Create(scatterInfo->maskedStoreFunc,
|
||||
&args[0], &args[3], "");
|
||||
#endif
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
@@ -2097,31 +2188,50 @@ char LowerGSPass::ID = 0;
|
||||
llvm::RegisterPass<LowerGSPass> lgs("lower-gs",
|
||||
"Lower Gather/Scatter Pass");
|
||||
|
||||
struct LowerGSInfo {
|
||||
LowerGSInfo(const char *pName, const char *aName, bool ig)
|
||||
: isGather(ig) {
|
||||
pseudoFunc = m->module->getFunction(pName);
|
||||
actualFunc = m->module->getFunction(aName);
|
||||
assert(pseudoFunc != NULL && actualFunc != NULL);
|
||||
}
|
||||
llvm::Function *pseudoFunc;
|
||||
llvm::Function *actualFunc;
|
||||
const bool isGather;
|
||||
};
|
||||
|
||||
|
||||
bool
|
||||
LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_base_offsets_32");
|
||||
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_base_offsets_64");
|
||||
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_base_offsets_32");
|
||||
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_base_offsets_64");
|
||||
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
|
||||
LowerGSInfo lgsInfo[] = {
|
||||
LowerGSInfo("__pseudo_gather_base_offsets_8", "__gather_base_offsets_i8", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets_16", "__gather_base_offsets_i16", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets_32", "__gather_base_offsets_i32", true),
|
||||
LowerGSInfo("__pseudo_gather_base_offsets_32", "__gather_base_offsets_i32", true),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets_8", "__scatter_base_offsets_i8", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets_16", "__scatter_base_offsets_i16", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets_32", "__scatter_base_offsets_i32", false),
|
||||
LowerGSInfo("__pseudo_scatter_base_offsets_32", "__scatter_base_offsets_i32", false)
|
||||
};
|
||||
|
||||
bool modifiedAny = false;
|
||||
restart:
|
||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||
// Loop over the instructions and find calls to the
|
||||
// __pseudo_*_base_offsets_* functions.
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
||||
if (!callInst ||
|
||||
(callInst->getCalledFunction() != gather32Func &&
|
||||
callInst->getCalledFunction() != gather64Func &&
|
||||
callInst->getCalledFunction() != scatter32Func &&
|
||||
callInst->getCalledFunction() != scatter64Func))
|
||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||
if (callInst == NULL)
|
||||
continue;
|
||||
llvm::Function *calledFunc = callInst->getCalledFunction();
|
||||
LowerGSInfo *info = NULL;
|
||||
for (unsigned int i = 0; i < sizeof(lgsInfo) / sizeof(lgsInfo[0]); ++i) {
|
||||
if (calledFunc == lgsInfo[i].pseudoFunc) {
|
||||
info = &lgsInfo[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (info == NULL)
|
||||
continue;
|
||||
|
||||
bool isGather = (callInst->getCalledFunction() == gather32Func ||
|
||||
callInst->getCalledFunction() == gather64Func);
|
||||
bool is32 = (callInst->getCalledFunction() == gather32Func ||
|
||||
callInst->getCalledFunction() == scatter32Func);
|
||||
|
||||
// Get the source position from the metadata attached to the call
|
||||
// instruction so that we can issue PerformanceWarning()s below.
|
||||
@@ -2129,20 +2239,11 @@ LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
|
||||
assert(ok);
|
||||
|
||||
if (isGather) {
|
||||
llvm::Function *gFunc = m->module->getFunction(is32 ? "__gather_base_offsets_i32" :
|
||||
"__gather_base_offsets_i64");
|
||||
assert(gFunc);
|
||||
callInst->setCalledFunction(gFunc);
|
||||
callInst->setCalledFunction(info->actualFunc);
|
||||
if (info->isGather)
|
||||
PerformanceWarning(pos, "Gather required to compute value in expression.");
|
||||
}
|
||||
else {
|
||||
llvm::Function *sFunc = m->module->getFunction(is32 ? "__scatter_base_offsets_i32" :
|
||||
"__scatter_base_offsets_i64");
|
||||
assert(sFunc);
|
||||
callInst->setCalledFunction(sFunc);
|
||||
else
|
||||
PerformanceWarning(pos, "Scatter required for storing value.");
|
||||
}
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
@@ -2286,25 +2387,41 @@ char MakeInternalFuncsStaticPass::ID = 0;
|
||||
llvm::RegisterPass<MakeInternalFuncsStaticPass>
|
||||
mifsp("make-internal-funcs-static", "Make Internal Funcs Static Pass");
|
||||
|
||||
|
||||
bool
|
||||
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
||||
const char *names[] = {
|
||||
"__do_print", "__gather_base_offsets_i32", "__gather_base_offsets_i64",
|
||||
"__gather_elt_32", "__gather_elt_64", "__load_and_broadcast_32",
|
||||
"__load_and_broadcast_64", "__load_masked_32", "__load_masked_64",
|
||||
"__masked_store_32", "__masked_store_64", "__masked_store_blend_32",
|
||||
"__masked_store_blend_64", "__packed_load_active", "__packed_store_active",
|
||||
"__scatter_base_offsets_i32", "__scatter_base_offsets_i64", "__scatter_elt_32",
|
||||
"__scatter_elt_64", };
|
||||
"__do_print",
|
||||
"__gather_base_offsets_i8", "__gather_base_offsets_i16",
|
||||
"__gather_base_offsets_i32", "__gather_base_offsets_i64",
|
||||
"__gather_elt_8", "__gather_elt_16",
|
||||
"__gather_elt_32", "__gather_elt_64",
|
||||
"__load_and_broadcast_8", "__load_and_broadcast_16",
|
||||
"__load_and_broadcast_32", "__load_and_broadcast_64",
|
||||
"__load_masked_8", "__load_masked_16",
|
||||
"__load_masked_32", "__load_masked_64",
|
||||
"__masked_store_8", "__masked_store_16",
|
||||
"__masked_store_32", "__masked_store_64",
|
||||
"__masked_store_blend_8", "__masked_store_blend_16",
|
||||
"__masked_store_blend_32", "__masked_store_blend_64",
|
||||
"__packed_load_active", "__packed_store_active",
|
||||
"__scatter_base_offsets_i8", "__scatter_base_offsets_i16",
|
||||
"__scatter_base_offsets_i32", "__scatter_base_offsets_i64",
|
||||
"__scatter_elt_8", "__scatter_elt_16",
|
||||
"__scatter_elt_32", "__scatter_elt_64",
|
||||
};
|
||||
|
||||
bool modifiedAny = false;
|
||||
int count = sizeof(names) / sizeof(names[0]);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
llvm::Function *f = m->module->getFunction(names[i]);
|
||||
if (f != NULL)
|
||||
if (f != NULL) {
|
||||
f->setLinkage(llvm::GlobalValue::PrivateLinkage);
|
||||
modifiedAny = true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return modifiedAny;
|
||||
}
|
||||
|
||||
|
||||
|
||||
12
parse.yy
12
parse.yy
@@ -102,15 +102,16 @@ static const char *lBuiltinTokens[] = {
|
||||
"bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor", "char",
|
||||
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
|
||||
"else", "enum", "export", "extern", "false", "float", "for", "goto", "if",
|
||||
"inline", "int", "int32", "int64", "launch", "print", "reference", "return",
|
||||
"inline", "int", "int8", "int16", "int32", "int64", "launch", "print",
|
||||
"reference", "return",
|
||||
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
|
||||
"unsigned", "varying", "void", "while", NULL
|
||||
};
|
||||
|
||||
static const char *lParamListTokens[] = {
|
||||
"bool", "char", "const", "double", "enum", "false", "float", "int",
|
||||
"int32", "int64", "reference", "struct", "true", "uniform", "unsigned",
|
||||
"varying", "void", NULL
|
||||
"int8", "int16", "int32", "int64", "reference", "struct", "true",
|
||||
"uniform", "unsigned", "varying", "void", NULL
|
||||
};
|
||||
|
||||
%}
|
||||
@@ -154,7 +155,7 @@ static const char *lParamListTokens[] = {
|
||||
%token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK
|
||||
%token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA
|
||||
%token TOKEN_CHAR TOKEN_INT TOKEN_UNSIGNED TOKEN_FLOAT TOKEN_DOUBLE
|
||||
%token TOKEN_INT64 TOKEN_CONST TOKEN_VOID TOKEN_BOOL
|
||||
%token TOKEN_INT8 TOKEN_INT16 TOKEN_INT64 TOKEN_CONST TOKEN_VOID TOKEN_BOOL
|
||||
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE TOKEN_REFERENCE
|
||||
|
||||
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
|
||||
@@ -587,7 +588,8 @@ type_specifier
|
||||
atomic_var_type_specifier
|
||||
: TOKEN_VOID { $$ = AtomicType::Void; }
|
||||
| TOKEN_BOOL { $$ = AtomicType::VaryingBool; }
|
||||
/* | TOKEN_CHAR { UNIMPLEMENTED; } */
|
||||
| TOKEN_INT8 { $$ = AtomicType::VaryingInt8; }
|
||||
| TOKEN_INT16 { $$ = AtomicType::VaryingInt16; }
|
||||
| TOKEN_INT { $$ = AtomicType::VaryingInt32; }
|
||||
| TOKEN_FLOAT { $$ = AtomicType::VaryingFloat; }
|
||||
| TOKEN_DOUBLE { $$ = AtomicType::VaryingDouble; }
|
||||
|
||||
@@ -41,7 +41,6 @@
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
int8_16(8)
|
||||
int64minmax(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@@ -539,55 +538,14 @@ define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone alwaysinli
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
define <8 x i32> @__load_and_broadcast_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<8 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
; TODO: make sure this becomes a vbroadcast...
|
||||
%ptr = bitcast i8 * %0 to i32 *
|
||||
%val = load i32 * %ptr
|
||||
|
||||
%ret0 = insertelement <8 x i32> undef, i32 %val, i32 0
|
||||
%ret1 = insertelement <8 x i32> %ret0, i32 %val, i32 1
|
||||
%ret2 = insertelement <8 x i32> %ret1, i32 %val, i32 2
|
||||
%ret3 = insertelement <8 x i32> %ret2, i32 %val, i32 3
|
||||
%ret4 = insertelement <8 x i32> %ret3, i32 %val, i32 4
|
||||
%ret5 = insertelement <8 x i32> %ret4, i32 %val, i32 5
|
||||
%ret6 = insertelement <8 x i32> %ret5, i32 %val, i32 6
|
||||
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
|
||||
ret <8 x i32> %ret7
|
||||
|
||||
skip:
|
||||
ret <8 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define <8 x i64> @__load_and_broadcast_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<8 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
; TODO: make sure this becomes a vbroadcast...
|
||||
%ptr = bitcast i8 * %0 to i64 *
|
||||
%val = load i64 * %ptr
|
||||
|
||||
%ret0 = insertelement <8 x i64> undef, i64 %val, i32 0
|
||||
%ret1 = insertelement <8 x i64> %ret0, i64 %val, i32 1
|
||||
%ret2 = insertelement <8 x i64> %ret1, i64 %val, i32 2
|
||||
%ret3 = insertelement <8 x i64> %ret2, i64 %val, i32 3
|
||||
%ret4 = insertelement <8 x i64> %ret3, i64 %val, i32 4
|
||||
%ret5 = insertelement <8 x i64> %ret4, i64 %val, i32 5
|
||||
%ret6 = insertelement <8 x i64> %ret5, i64 %val, i32 6
|
||||
%ret7 = insertelement <8 x i64> %ret6, i64 %val, i32 7
|
||||
ret <8 x i64> %ret3
|
||||
|
||||
skip:
|
||||
ret <8 x i64> undef
|
||||
}
|
||||
load_and_broadcast(8, i8, 8)
|
||||
load_and_broadcast(8, i16, 16)
|
||||
load_and_broadcast(8, i32, 32)
|
||||
load_and_broadcast(8, i64, 64)
|
||||
|
||||
; no masked load instruction for i8 and i16 types??
|
||||
load_masked(8, i8, 8, 1)
|
||||
load_masked(8, i16, 16, 2)
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask)
|
||||
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask)
|
||||
@@ -623,6 +581,12 @@ define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
; FIXME: there is no AVX instruction for these, but we could be clever
|
||||
; by packing the bits down and setting the last 3/4 or half, respectively,
|
||||
; of the mask to zero... Not sure if this would be a win in the end
|
||||
gen_masked_store(8, i8, 8)
|
||||
gen_masked_store(8, i16, 16)
|
||||
|
||||
; note that mask is the 2nd parameter, not the 3rd one!!
|
||||
declare void @llvm.x86.avx.maskstore.ps.256(i8 *, <8 x float>, <8 x float>)
|
||||
declare void @llvm.x86.avx.maskstore.pd.256(i8 *, <4 x double>, <4 x double>)
|
||||
@@ -660,13 +624,14 @@ define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>,
|
||||
ret void
|
||||
}
|
||||
|
||||
masked_store_blend_8_16_by_8()
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>,
|
||||
<8 x float>) nounwind readnone
|
||||
|
||||
|
||||
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
%mask_as_float = bitcast <8 x i32> %2 to <8 x float>
|
||||
%oldValue = load <8 x i32>* %0, align 4
|
||||
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
||||
|
||||
@@ -36,7 +36,6 @@
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
int8_16(4)
|
||||
int64minmax(4)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@@ -380,29 +379,23 @@ define internal i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_32(<4 x i32>* nocapture, <4 x i32>, <4 x i32>) nounwind alwaysinline {
|
||||
per_lane(4, <4 x i32> %2, `
|
||||
; compute address for this one
|
||||
%ptr_ID = getelementptr <4 x i32> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <4 x i32> %1, i32 LANE
|
||||
store i32 %storeval_ID, i32 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_64(<4 x i64>* nocapture, <4 x i64>, <4 x i32>) nounwind alwaysinline {
|
||||
per_lane(4, <4 x i32> %2, `
|
||||
%ptr_ID = getelementptr <4 x i64> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <4 x i64> %1, i32 LANE
|
||||
store i64 %storeval_ID, i64 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
masked_store_blend_8_16_by_4()
|
||||
|
||||
gen_masked_store(4, i8, 8)
|
||||
gen_masked_store(4, i16, 16)
|
||||
gen_masked_store(4, i32, 32)
|
||||
gen_masked_store(4, i64, 64)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
load_and_broadcast(4, i8, 8)
|
||||
load_and_broadcast(4, i16, 16)
|
||||
load_and_broadcast(4, i32, 32)
|
||||
load_and_broadcast(4, i64, 64)
|
||||
|
||||
load_masked(4, i8, 8, 1)
|
||||
load_masked(4, i16, 16, 2)
|
||||
load_masked(4, i32, 32, 4)
|
||||
load_masked(4, i64, 64, 8)
|
||||
|
||||
@@ -411,7 +404,12 @@ load_masked(4, i64, 64, 8)
|
||||
|
||||
; define these with the macros from stdlib.m4
|
||||
|
||||
gen_gather(4, i8)
|
||||
gen_gather(4, i16)
|
||||
gen_gather(4, i32)
|
||||
gen_gather(4, i64)
|
||||
|
||||
gen_scatter(4, i8)
|
||||
gen_scatter(4, i16)
|
||||
gen_scatter(4, i32)
|
||||
gen_scatter(4, i64)
|
||||
|
||||
@@ -38,7 +38,6 @@
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
int8_16(8)
|
||||
int64minmax(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@@ -435,44 +434,29 @@ define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone {
|
||||
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
per_lane(8, <8 x i32> %2, `
|
||||
; compute address for this one
|
||||
%ptr_ID = getelementptr <8 x i32> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <8 x i32> %1, i32 LANE
|
||||
store i32 %storeval_ID, i32 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
per_lane(8, <8 x i32> %2, `
|
||||
; compute address for this one
|
||||
%ptr_ID = getelementptr <8 x i64> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <8 x i64> %1, i32 LANE
|
||||
store i64 %storeval_ID, i64 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
load_and_broadcast(8, i8, 8)
|
||||
load_and_broadcast(8, i16, 16)
|
||||
load_and_broadcast(8, i32, 32)
|
||||
load_and_broadcast(8, i64, 64)
|
||||
|
||||
load_masked(8, i8, 8, 1)
|
||||
load_masked(8, i16, 16, 2)
|
||||
load_masked(8, i32, 32, 4)
|
||||
load_masked(8, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
gen_gather(8, i8)
|
||||
gen_gather(8, i16)
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
|
||||
gen_scatter(8, i8)
|
||||
gen_scatter(8, i16)
|
||||
gen_scatter(8, i32)
|
||||
gen_scatter(8, i64)
|
||||
|
||||
@@ -619,6 +603,13 @@ define internal float @__reduce_add_float(<8 x float>) nounwind readonly alwaysi
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
gen_masked_store(8, i8, 8)
|
||||
gen_masked_store(8, i16, 16)
|
||||
gen_masked_store(8, i32, 32)
|
||||
gen_masked_store(8, i64, 64)
|
||||
|
||||
masked_store_blend_8_16_by_8()
|
||||
|
||||
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>,
|
||||
<4 x float>) nounwind readnone
|
||||
|
||||
|
||||
301
stdlib.ispc
301
stdlib.ispc
@@ -85,6 +85,14 @@ static inline float broadcast(float v, uniform int i) {
|
||||
return __broadcast_float(v, i);
|
||||
}
|
||||
|
||||
static inline int8 broadcast(int8 v, uniform int i) {
|
||||
return __broadcast_int8(v, i);
|
||||
}
|
||||
|
||||
static inline int16 broadcast(int16 v, uniform int i) {
|
||||
return __broadcast_int16(v, i);
|
||||
}
|
||||
|
||||
static inline int32 broadcast(int32 v, uniform int i) {
|
||||
return __broadcast_int32(v, i);
|
||||
}
|
||||
@@ -101,6 +109,14 @@ static inline float rotate(float v, uniform int i) {
|
||||
return __rotate_float(v, i);
|
||||
}
|
||||
|
||||
static inline int8 rotate(int8 v, uniform int i) {
|
||||
return __rotate_int8(v, i);
|
||||
}
|
||||
|
||||
static inline int16 rotate(int16 v, uniform int i) {
|
||||
return __rotate_int16(v, i);
|
||||
}
|
||||
|
||||
static inline int32 rotate(int32 v, uniform int i) {
|
||||
return __rotate_int32(v, i);
|
||||
}
|
||||
@@ -117,6 +133,14 @@ static inline float shuffle(float v, int i) {
|
||||
return __shuffle_float(v, i);
|
||||
}
|
||||
|
||||
static inline int8 shuffle(int8 v, int i) {
|
||||
return __shuffle_int8(v, i);
|
||||
}
|
||||
|
||||
static inline int16 shuffle(int16 v, int i) {
|
||||
return __shuffle_int16(v, i);
|
||||
}
|
||||
|
||||
static inline int32 shuffle(int32 v, int i) {
|
||||
return __shuffle_int32(v, i);
|
||||
}
|
||||
@@ -133,6 +157,14 @@ static inline float shuffle(float v0, float v1, int i) {
|
||||
return __shuffle2_float(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline int8 shuffle(int8 v0, int8 v1, int i) {
|
||||
return __shuffle2_int8(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline int16 shuffle(int16 v0, int16 v1, int i) {
|
||||
return __shuffle2_int16(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline int32 shuffle(int32 v0, int32 v1, int i) {
|
||||
return __shuffle2_int32(v0, v1, i);
|
||||
}
|
||||
@@ -150,11 +182,27 @@ static inline uniform float extract(float x, uniform int i) {
|
||||
return floatbits(__extract_int32((int)intbits(x), i));
|
||||
}
|
||||
|
||||
static inline uniform int extract(int x, uniform int i) {
|
||||
static inline uniform int8 extract(int8 x, uniform int i) {
|
||||
return __extract_int8(x, i);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int8 extract(unsigned int8 x, uniform int i) {
|
||||
return __extract_int8(x, (unsigned int)i);
|
||||
}
|
||||
|
||||
static inline uniform int16 extract(int16 x, uniform int i) {
|
||||
return __extract_int16(x, i);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int16 extract(unsigned int16 x, uniform int i) {
|
||||
return __extract_int16(x, (unsigned int)i);
|
||||
}
|
||||
|
||||
static inline uniform int32 extract(int32 x, uniform int i) {
|
||||
return __extract_int32(x, i);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int extract(unsigned int x, uniform int i) {
|
||||
static inline uniform unsigned int32 extract(unsigned int32 x, uniform int i) {
|
||||
return __extract_int32(x, (unsigned int)i);
|
||||
}
|
||||
|
||||
@@ -175,12 +223,30 @@ static inline float insert(float x, uniform int i, uniform float v) {
|
||||
return floatbits(__insert_int32((int)intbits(x), i, (int)intbits(v)));
|
||||
}
|
||||
|
||||
static inline int insert(int x, uniform int i, uniform int v) {
|
||||
static inline int8 insert(int8 x, uniform int i, uniform int8 v) {
|
||||
return __insert_int8(x, i, v);
|
||||
}
|
||||
|
||||
static inline unsigned int8 insert(unsigned int8 x, uniform int i,
|
||||
uniform unsigned int8 v) {
|
||||
return __insert_int8(x, (unsigned int)i, v);
|
||||
}
|
||||
|
||||
static inline int16 insert(int16 x, uniform int i, uniform int16 v) {
|
||||
return __insert_int16(x, i, v);
|
||||
}
|
||||
|
||||
static inline unsigned int16 insert(unsigned int16 x, uniform int i,
|
||||
uniform unsigned int16 v) {
|
||||
return __insert_int16(x, (unsigned int)i, v);
|
||||
}
|
||||
|
||||
static inline int32 insert(int32 x, uniform int i, uniform int32 v) {
|
||||
return __insert_int32(x, i, v);
|
||||
}
|
||||
|
||||
static inline unsigned int insert(unsigned int x, uniform int i,
|
||||
uniform unsigned int v) {
|
||||
static inline unsigned int32 insert(unsigned int32 x, uniform int i,
|
||||
uniform unsigned int32 v) {
|
||||
return __insert_int32(x, (unsigned int)i, v);
|
||||
}
|
||||
|
||||
@@ -218,7 +284,7 @@ static inline uniform bool all(bool v) {
|
||||
return __movmsk(match) == (1 << programCount) - 1;
|
||||
}
|
||||
|
||||
static inline uniform int popcnt(uniform int v) {
|
||||
static inline uniform int32 popcnt(uniform int32 v) {
|
||||
return __popcnt_int32(v);
|
||||
}
|
||||
|
||||
@@ -473,52 +539,7 @@ ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
|
||||
ATOMIC_DECL_CMPXCHG(double, double)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Load/store from/to 8/16-bit types
|
||||
|
||||
static inline int load_from_int8(uniform int a[], uniform int offset) {
|
||||
return __load_int8(a, offset, __mask);
|
||||
}
|
||||
|
||||
static inline unsigned int load_from_uint8(uniform unsigned int a[],
|
||||
uniform int offset) {
|
||||
return __load_uint8(a, offset, __mask);
|
||||
}
|
||||
|
||||
static inline void store_to_int8(uniform int a[], uniform int offset,
|
||||
unsigned int val) {
|
||||
__store_int8(a, offset, val, __mask);
|
||||
}
|
||||
|
||||
static inline void store_to_uint8(uniform unsigned int a[], uniform int offset,
|
||||
unsigned int val) {
|
||||
// Can use __store_int8 for unsigned stuff, since it truncates bits in
|
||||
// either case.
|
||||
__store_int8(a, offset, val, __mask);
|
||||
}
|
||||
|
||||
static inline int load_from_int16(uniform int a[], uniform int offset) {
|
||||
return __load_int16(a, offset, __mask);
|
||||
}
|
||||
|
||||
static inline unsigned int load_from_int16(uniform unsigned int a[],
|
||||
uniform int offset) {
|
||||
return __load_uint16(a, offset, __mask);
|
||||
}
|
||||
|
||||
static inline void store_to_int16(uniform int a[], uniform int offset,
|
||||
int val) {
|
||||
__store_int16(a, offset, val, __mask);
|
||||
}
|
||||
|
||||
static inline void store_to_uint16(uniform unsigned int a[], uniform int offset,
|
||||
unsigned int val) {
|
||||
// Can use __store_int16 for unsigned stuff, since it truncates bits in
|
||||
// either case.
|
||||
__store_int16(a, offset, val, __mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Math
|
||||
// Floating-Point Math
|
||||
|
||||
static inline float abs(float a) {
|
||||
// Floating-point hack: zeroing the high bit clears the sign
|
||||
@@ -622,6 +643,11 @@ static inline uniform float rcp(uniform float v) {
|
||||
return __rcp_uniform_float(v);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// min/max
|
||||
|
||||
// float
|
||||
|
||||
static inline float min(float a, float b) {
|
||||
return __min_varying_float(a, b);
|
||||
}
|
||||
@@ -630,14 +656,6 @@ static inline uniform float min(uniform float a, uniform float b) {
|
||||
return __min_uniform_float(a, b);
|
||||
}
|
||||
|
||||
static inline double min(double a, double b) {
|
||||
return __min_varying_double(a, b);
|
||||
}
|
||||
|
||||
static inline uniform double min(uniform double a, uniform double b) {
|
||||
return __min_uniform_double(a, b);
|
||||
}
|
||||
|
||||
static inline float max(float a, float b) {
|
||||
return __max_varying_float(a, b);
|
||||
}
|
||||
@@ -646,6 +664,17 @@ static inline uniform float max(uniform float a, uniform float b) {
|
||||
return __max_uniform_float(a, b);
|
||||
}
|
||||
|
||||
|
||||
// double
|
||||
|
||||
static inline double min(double a, double b) {
|
||||
return __min_varying_double(a, b);
|
||||
}
|
||||
|
||||
static inline uniform double min(uniform double a, uniform double b) {
|
||||
return __min_uniform_double(a, b);
|
||||
}
|
||||
|
||||
static inline double max(double a, double b) {
|
||||
return __max_varying_double(a, b);
|
||||
}
|
||||
@@ -654,6 +683,80 @@ static inline uniform double max(uniform double a, uniform double b) {
|
||||
return __max_uniform_double(a, b);
|
||||
}
|
||||
|
||||
// int8
|
||||
|
||||
static inline uniform unsigned int8 min(uniform unsigned int8 a,
|
||||
uniform unsigned int8 b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline uniform unsigned int8 max(uniform unsigned int8 a,
|
||||
uniform unsigned int8 b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
static inline uniform int8 min(uniform int8 a, uniform int8 b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline uniform int8 max(uniform int8 a, uniform int8 b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
static inline unsigned int8 min(unsigned int8 a, unsigned int8 b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline unsigned int8 max(unsigned int8 a, unsigned int8 b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
static inline int8 min(int8 a, int8 b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline int8 max(int8 a, int8 b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
// int16
|
||||
|
||||
static inline uniform unsigned int16 min(uniform unsigned int16 a,
|
||||
uniform unsigned int16 b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline uniform unsigned int16 max(uniform unsigned int16 a,
|
||||
uniform unsigned int16 b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
static inline uniform int16 min(uniform int16 a, uniform int16 b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline uniform int16 max(uniform int16 a, uniform int16 b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
static inline unsigned int16 min(unsigned int16 a, unsigned int16 b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline unsigned int16 max(unsigned int16 a, unsigned int16 b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
static inline int16 min(int16 a, int16 b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline int16 max(int16 a, int16 b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
// int32
|
||||
|
||||
static inline unsigned int min(unsigned int a, unsigned int b) {
|
||||
return __min_varying_uint32(a, b);
|
||||
}
|
||||
@@ -686,6 +789,8 @@ static inline uniform int max(uniform int a, uniform int b) {
|
||||
return __max_uniform_int32(a, b);
|
||||
}
|
||||
|
||||
// int64
|
||||
|
||||
static inline unsigned int64 min(unsigned int64 a, unsigned int64 b) {
|
||||
return __min_varying_uint64(a, b);
|
||||
}
|
||||
@@ -718,6 +823,11 @@ static inline uniform int64 max(uniform int64 a, uniform int64 b) {
|
||||
return __max_uniform_int64(a, b);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// clamps
|
||||
|
||||
// float
|
||||
|
||||
static inline float clamp(float v, float low, float high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
@@ -726,6 +836,52 @@ static inline uniform float clamp(uniform float v, uniform float low, uniform fl
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
// int8
|
||||
|
||||
static inline unsigned int8 clamp(unsigned int8 v, unsigned int8 low,
|
||||
unsigned int8 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int8 clamp(uniform unsigned int8 v,
|
||||
uniform unsigned int8 low,
|
||||
uniform unsigned int8 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline int8 clamp(int8 v, int8 low, int8 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline uniform int8 clamp(uniform int8 v, uniform int8 low,
|
||||
uniform int8 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
// int16
|
||||
|
||||
static inline unsigned int16 clamp(unsigned int16 v, unsigned int16 low,
|
||||
unsigned int16 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int16 clamp(uniform unsigned int16 v,
|
||||
uniform unsigned int16 low,
|
||||
uniform unsigned int16 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline int16 clamp(int16 v, int16 low, int16 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline uniform int16 clamp(uniform int16 v, uniform int16 low,
|
||||
uniform int16 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
// int32
|
||||
|
||||
static inline unsigned int clamp(unsigned int v, unsigned int low, unsigned int high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
@@ -735,15 +891,6 @@ static inline uniform unsigned int clamp(uniform unsigned int v, uniform unsigne
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low, unsigned int64 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int64 clamp(uniform unsigned int64 v, uniform unsigned int64 low,
|
||||
uniform unsigned int64 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline int clamp(int v, int low, int high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
@@ -752,11 +899,25 @@ static inline uniform int clamp(uniform int v, uniform int low, uniform int high
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
// int64
|
||||
|
||||
static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low,
|
||||
unsigned int64 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int64 clamp(uniform unsigned int64 v,
|
||||
uniform unsigned int64 low,
|
||||
uniform unsigned int64 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline int64 clamp(int64 v, int64 low, int64 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
static inline uniform int64 clamp(uniform int64 v, uniform int64 low, uniform int64 high) {
|
||||
static inline uniform int64 clamp(uniform int64 v, uniform int64 low,
|
||||
uniform int64 high) {
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
|
||||
288
stdlib.m4
288
stdlib.m4
@@ -566,6 +566,28 @@ declare i1 @__is_compile_time_constant_varying_int32(<$1 x i32>)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; vector ops
|
||||
|
||||
define internal i8 @__extract_int8(<$1 x i8>, i32) nounwind readnone alwaysinline {
|
||||
%extract = extractelement <$1 x i8> %0, i32 %1
|
||||
ret i8 %extract
|
||||
}
|
||||
|
||||
define internal <$1 x i8> @__insert_int8(<$1 x i8>, i32,
|
||||
i8) nounwind readnone alwaysinline {
|
||||
%insert = insertelement <$1 x i8> %0, i8 %2, i32 %1
|
||||
ret <$1 x i8> %insert
|
||||
}
|
||||
|
||||
define internal i16 @__extract_int16(<$1 x i16>, i32) nounwind readnone alwaysinline {
|
||||
%extract = extractelement <$1 x i16> %0, i32 %1
|
||||
ret i16 %extract
|
||||
}
|
||||
|
||||
define internal <$1 x i16> @__insert_int16(<$1 x i16>, i32,
|
||||
i16) nounwind readnone alwaysinline {
|
||||
%insert = insertelement <$1 x i16> %0, i16 %2, i32 %1
|
||||
ret <$1 x i16> %insert
|
||||
}
|
||||
|
||||
define internal i32 @__extract_int32(<$1 x i32>, i32) nounwind readnone alwaysinline {
|
||||
%extract = extractelement <$1 x i32> %0, i32 %1
|
||||
ret i32 %extract
|
||||
@@ -588,6 +610,8 @@ define internal <$1 x i64> @__insert_int64(<$1 x i64>, i32,
|
||||
ret <$1 x i64> %insert
|
||||
}
|
||||
|
||||
shuffles($1, i8, int8, 1)
|
||||
shuffles($1, i16, int16, 2)
|
||||
shuffles($1, float, float, 4)
|
||||
shuffles($1, i32, int32, 4)
|
||||
shuffles($1, double, double, 8)
|
||||
@@ -901,171 +925,6 @@ i64minmax($1,min,uint64,ult)
|
||||
i64minmax($1,max,uint64,ugt)
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Definitions of 8 and 16-bit load and store functions
|
||||
;;
|
||||
;; The `int8_16' macro defines functions related to loading and storing 8 and
|
||||
;; 16-bit values in memory, converting to and from i32. (This is a workaround
|
||||
;; to be able to use in-memory values of types in ispc programs, since the
|
||||
;; compiler doesn't yet support 8 and 16-bit datatypes...
|
||||
;;
|
||||
;; Arguments to pass to `int8_16':
|
||||
;; $1: vector width of the target
|
||||
|
||||
define(`int8_16', `
|
||||
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %doload, label %skip
|
||||
|
||||
doload:
|
||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
||||
%val = load i`'eval(8*$1) * %ptr64, align 1
|
||||
|
||||
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
|
||||
; unsigned, so zero-extend to i32...
|
||||
%ret = zext <$1 x i8> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
|
||||
skip:
|
||||
ret <$1 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define internal <$1 x i32> @__load_int8([0 x i32] *, i32 %offset,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %doload, label %skip
|
||||
|
||||
doload:
|
||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
||||
%val = load i`'eval(8*$1) * %ptr64, align 1
|
||||
|
||||
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
|
||||
; signed, so sign-extend to i32...
|
||||
%ret = sext <$1 x i8> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
|
||||
skip:
|
||||
ret <$1 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %doload, label %skip
|
||||
|
||||
doload:
|
||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||
%val = load i`'eval(16*$1) * %ptr64, align 2
|
||||
|
||||
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
||||
; unsigned, so use zero-extend...
|
||||
%ret = zext <$1 x i16> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
|
||||
skip:
|
||||
ret <$1 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define internal <$1 x i32> @__load_int16([0 x i32] *, i32 %offset,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %doload, label %skip
|
||||
|
||||
doload:
|
||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||
%val = load i`'eval(16*$1) * %ptr64, align 2
|
||||
|
||||
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
||||
; signed, so use sign-extend...
|
||||
%ret = sext <$1 x i16> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
|
||||
skip:
|
||||
ret <$1 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define internal void @__store_int8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %dostore, label %skip
|
||||
|
||||
dostore:
|
||||
%val = trunc <$1 x i32> %val32 to <$1 x i8>
|
||||
%val64 = bitcast <$1 x i8> %val to i`'eval(8*$1)
|
||||
|
||||
%mask8 = trunc <$1 x i32> %mask to <$1 x i8>
|
||||
%mask64 = bitcast <$1 x i8> %mask8 to i`'eval(8*$1)
|
||||
%notmask = xor i`'eval(8*$1) %mask64, -1
|
||||
|
||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
||||
|
||||
;; load the old value, use logical ops to blend based on the mask, then
|
||||
;; store the result back
|
||||
%old = load i`'eval(8*$1) * %ptr64, align 1
|
||||
%oldmasked = and i`'eval(8*$1) %old, %notmask
|
||||
%newmasked = and i`'eval(8*$1) %val64, %mask64
|
||||
%final = or i`'eval(8*$1) %oldmasked, %newmasked
|
||||
store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64, align 1
|
||||
|
||||
ret void
|
||||
|
||||
skip:
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @__store_int16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %dostore, label %skip
|
||||
|
||||
dostore:
|
||||
%val = trunc <$1 x i32> %val32 to <$1 x i16>
|
||||
%val64 = bitcast <$1 x i16> %val to i`'eval(16*$1)
|
||||
|
||||
%mask8 = trunc <$1 x i32> %mask to <$1 x i16>
|
||||
%mask64 = bitcast <$1 x i16> %mask8 to i`'eval(16*$1)
|
||||
%notmask = xor i`'eval(16*$1) %mask64, -1
|
||||
|
||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||
|
||||
;; as above, use mask to do blending with logical ops...
|
||||
%old = load i`'eval(16*$1) * %ptr64, align 2
|
||||
%oldmasked = and i`'eval(16*$1) %old, %notmask
|
||||
%newmasked = and i`'eval(16*$1) %val64, %mask64
|
||||
%final = or i`'eval(16*$1) %oldmasked, %newmasked
|
||||
store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64, align 2
|
||||
|
||||
ret void
|
||||
|
||||
skip:
|
||||
ret void
|
||||
}
|
||||
'
|
||||
)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Emit code to safely load a scalar value and broadcast it across the
|
||||
;; elements of a vector. Parameters:
|
||||
@@ -1150,6 +1009,105 @@ return:
|
||||
}
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
;; emit code to do masked store as a set of per-lane scalar stores
|
||||
;; parameters:
|
||||
;; $1: target vector width
|
||||
;; $2: llvm type of elements
|
||||
;; $3: suffix for function name
|
||||
|
||||
define(`gen_masked_store', `
|
||||
define void @__masked_store_$3(<$1 x $2>* nocapture, <$1 x $2>, <$1 x i32>) nounwind alwaysinline {
|
||||
per_lane($1, <$1 x i32> %2, `
|
||||
%ptr_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <$1 x $2> %1, i32 LANE
|
||||
store $2 %storeval_ID, $2 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
')
|
||||
|
||||
define(`masked_store_blend_8_16_by_4', `
|
||||
define void @__masked_store_blend_8(<4 x i8>* nocapture, <4 x i8>,
|
||||
<4 x i32>) nounwind alwaysinline {
|
||||
%old = load <4 x i8> * %0
|
||||
%old32 = bitcast <4 x i8> %old to i32
|
||||
%new32 = bitcast <4 x i8> %1 to i32
|
||||
|
||||
%mask8 = trunc <4 x i32> %2 to <4 x i8>
|
||||
%mask32 = bitcast <4 x i8> %mask8 to i32
|
||||
%notmask32 = xor i32 %mask32, -1
|
||||
|
||||
%newmasked = and i32 %new32, %mask32
|
||||
%oldmasked = and i32 %old32, %notmask32
|
||||
%result = or i32 %newmasked, %oldmasked
|
||||
|
||||
%resultvec = bitcast i32 %result to <4 x i8>
|
||||
store <4 x i8> %resultvec, <4 x i8> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_16(<4 x i16>* nocapture, <4 x i16>,
|
||||
<4 x i32>) nounwind alwaysinline {
|
||||
%old = load <4 x i16> * %0
|
||||
%old64 = bitcast <4 x i16> %old to i64
|
||||
%new64 = bitcast <4 x i16> %1 to i64
|
||||
|
||||
%mask16 = trunc <4 x i32> %2 to <4 x i16>
|
||||
%mask64 = bitcast <4 x i16> %mask16 to i64
|
||||
%notmask64 = xor i64 %mask64, -1
|
||||
|
||||
%newmasked = and i64 %new64, %mask64
|
||||
%oldmasked = and i64 %old64, %notmask64
|
||||
%result = or i64 %newmasked, %oldmasked
|
||||
|
||||
%resultvec = bitcast i64 %result to <4 x i16>
|
||||
store <4 x i16> %resultvec, <4 x i16> * %0
|
||||
ret void
|
||||
}
|
||||
')
|
||||
|
||||
define(`masked_store_blend_8_16_by_8', `
|
||||
define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
%old = load <8 x i8> * %0
|
||||
%old64 = bitcast <8 x i8> %old to i64
|
||||
%new64 = bitcast <8 x i8> %1 to i64
|
||||
|
||||
%mask8 = trunc <8 x i32> %2 to <8 x i8>
|
||||
%mask64 = bitcast <8 x i8> %mask8 to i64
|
||||
%notmask64 = xor i64 %mask64, -1
|
||||
|
||||
%newmasked = and i64 %new64, %mask64
|
||||
%oldmasked = and i64 %old64, %notmask64
|
||||
%result = or i64 %newmasked, %oldmasked
|
||||
|
||||
%resultvec = bitcast i64 %result to <8 x i8>
|
||||
store <8 x i8> %resultvec, <8 x i8> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
%old = load <8 x i16> * %0
|
||||
%old128 = bitcast <8 x i16> %old to i128
|
||||
%new128 = bitcast <8 x i16> %1 to i128
|
||||
|
||||
%mask16 = trunc <8 x i32> %2 to <8 x i16>
|
||||
%mask128 = bitcast <8 x i16> %mask16 to i128
|
||||
%notmask128 = xor i128 %mask128, -1
|
||||
|
||||
%newmasked = and i128 %new128, %mask128
|
||||
%oldmasked = and i128 %old128, %notmask128
|
||||
%result = or i128 %newmasked, %oldmasked
|
||||
|
||||
%resultvec = bitcast i128 %result to <8 x i16>
|
||||
store <8 x i16> %resultvec, <8 x i16> * %0
|
||||
ret void
|
||||
}
|
||||
')
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; packed load and store functions
|
||||
;;
|
||||
|
||||
12
stmt.cpp
12
stmt.cpp
@@ -1405,6 +1405,18 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Just int8 and int16 types to int32s...
|
||||
const Type *baseType = type->GetAsNonConstType()->GetAsUniformType();
|
||||
if (baseType == AtomicType::UniformInt8 ||
|
||||
baseType == AtomicType::UniformUInt8 ||
|
||||
baseType == AtomicType::UniformInt16 ||
|
||||
baseType == AtomicType::UniformUInt16) {
|
||||
expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 :
|
||||
AtomicType::VaryingInt32,
|
||||
expr, expr->pos);
|
||||
type = expr->GetType();
|
||||
}
|
||||
|
||||
char t = lEncodeType(type->GetAsNonConstType());
|
||||
if (t == '\0') {
|
||||
Error(expr->pos, "Only atomic types are allowed in print statements; "
|
||||
|
||||
@@ -8,7 +8,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform float x[47][47];
|
||||
for (uniform int i = 0; i < 47; ++i)
|
||||
for (uniform int j = 0; j < 47; ++j)
|
||||
x[i][j] = 2;
|
||||
x[i][j] = 2+b-5;
|
||||
|
||||
// all are 2 except (3,4) = 0, (1,4) = 1, (2,4) = 1, (4,4) = 1
|
||||
if (a == 3.)
|
||||
|
||||
@@ -7,7 +7,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform float x[47][47];
|
||||
for (uniform int i = 0; i < 47; ++i)
|
||||
for (uniform int j = 0; j < 47; ++j)
|
||||
x[i][j] = 2;
|
||||
x[i][j] = 2+b-5;
|
||||
|
||||
// all are 2 except (4,2) = 0, (4,...) = 1, (4,programCount-1)=2
|
||||
if (a == 3.)
|
||||
|
||||
@@ -8,7 +8,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform float x[47][47];
|
||||
for (uniform int i = 0; i < 47; ++i)
|
||||
for (uniform int j = 0; j < 47; ++j)
|
||||
x[i][j] = 2;
|
||||
x[i][j] = 2+b-5;
|
||||
|
||||
x[a][b-1] = 0;
|
||||
RET[programIndex] = x[2][a];
|
||||
|
||||
12
tests/broadcast-2.ispc
Normal file
12
tests/broadcast-2.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int16 a = aFOO[programIndex];
|
||||
int16 b = broadcast(a, 2);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3;
|
||||
}
|
||||
12
tests/broadcast-3.ispc
Normal file
12
tests/broadcast-3.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int8 a = aFOO[programIndex];
|
||||
int8 br = broadcast(a, (uniform int)b-2);
|
||||
RET[programIndex] = br;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 4;
|
||||
}
|
||||
19
tests/gather-int16-1.ispc
Normal file
19
tests/gather-int16-1.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int16 x[programCount];
|
||||
x[programIndex] = programIndex;
|
||||
int a = aFOO[programIndex]-1;
|
||||
unsigned int16 v;
|
||||
if (programIndex < 2)
|
||||
v = x[a];
|
||||
else
|
||||
v = 2;
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
RET[0] = 0;
|
||||
RET[1] = 1;
|
||||
}
|
||||
13
tests/gather-int16.ispc
Normal file
13
tests/gather-int16.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int16 x[programCount];
|
||||
x[programIndex] = programIndex;
|
||||
int a = aFOO[programIndex]-1;
|
||||
unsigned int16 v = x[a];
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
19
tests/gather-int8-1.ispc
Normal file
19
tests/gather-int8-1.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int8 x[programCount];
|
||||
x[programIndex] = programIndex;
|
||||
int a = aFOO[programIndex]-1;
|
||||
unsigned int8 v;
|
||||
if (programIndex < 2)
|
||||
v = x[a];
|
||||
else
|
||||
v = 2;
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
RET[0] = 0;
|
||||
RET[1] = 1;
|
||||
}
|
||||
13
tests/gather-int8.ispc
Normal file
13
tests/gather-int8.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int8 x[programCount];
|
||||
x[programIndex] = programIndex;
|
||||
int a = aFOO[programIndex]-1;
|
||||
unsigned int8 v = x[a];
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
12
tests/int16-wrap.ispc
Normal file
12
tests/int16-wrap.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bb) {
|
||||
unsigned int16 a = aFOO[programIndex], b = bb;
|
||||
RET[programIndex] = ((unsigned int16)4000*a)+b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (((4000*(programIndex+1))&0xffff)+5)&0xffff;
|
||||
}
|
||||
12
tests/int8-wrap.ispc
Normal file
12
tests/int8-wrap.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bb) {
|
||||
unsigned int8 a = aFOO[programIndex], b = bb;
|
||||
RET[programIndex] = ((unsigned int8)100*a)+b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (((100*(programIndex+1))&0xff)+5)&0xff;
|
||||
}
|
||||
@@ -1,13 +1,17 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int x[9] = { 0x00020001, 0x00040003, 0x00060005, 0x00080007,
|
||||
0x000a0009, 0x000c000b, 0x000e000d, 0x0010000f,
|
||||
0x00120011 };
|
||||
unsigned int v = load_from_int16(x, 1);
|
||||
uniform int16 x[programCount];
|
||||
x[programIndex] = aFOO[programIndex];
|
||||
unsigned int16 v = 0;
|
||||
if (programIndex & 1)
|
||||
v = x[programIndex];
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2+programIndex;
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = 1+programIndex;
|
||||
else
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int x[8] = { 0x00020001, 0x00040003, 0x00060005, 0x00080007,
|
||||
0x000a0009, 0x000c000b, 0x000e000d, 0x0010000f };
|
||||
unsigned int v = load_from_int16(x, 0);
|
||||
uniform int16 x[programCount];
|
||||
x[programIndex] = aFOO[programIndex];
|
||||
unsigned int16 v = x[programIndex];
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int x[5] = { 0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d,
|
||||
0x14131211 };
|
||||
unsigned int v = load_from_int8(x, 2);
|
||||
uniform int8 x[programCount];
|
||||
x[programIndex] = aFOO[programIndex];
|
||||
unsigned int8 v = 0;
|
||||
if (programIndex & 1)
|
||||
v = x[programIndex];
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3+programIndex;
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = 1+programIndex;
|
||||
else
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int x[4] = { 0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d };
|
||||
unsigned int v = load_from_int8(x, 0);
|
||||
uniform int8 x[programCount];
|
||||
x[programIndex] = aFOO[programIndex];
|
||||
unsigned int8 v = x[programIndex];
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform Bar bar;
|
||||
for (uniform int i = 0; i < 6; ++i)
|
||||
for (uniform int j = 0; j < 18; ++j)
|
||||
bar.foo[i].f[j] = 2.;
|
||||
bar.foo[i].f[j] = 2.+b-5;
|
||||
|
||||
bar.foo[5].f[a] = a;
|
||||
RET[programIndex] = bar.foo[b].f[a];
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
|
||||
struct Foo {
|
||||
float f[6];
|
||||
};
|
||||
@@ -16,7 +14,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform Bar bar;
|
||||
for (uniform int i = 0; i < 6; ++i)
|
||||
for (uniform int j = 0; j < 6; ++j)
|
||||
bar.foo[i].f[j] = 2.;
|
||||
bar.foo[i].f[j] = 2.+b-5;
|
||||
RET[programIndex] = bar.foo[b].f[b];
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ export uniform int width() { return programCount; }
|
||||
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
||||
uniform float foo[16];
|
||||
for (uniform int i = 0; i < 16; ++i)
|
||||
foo[i] = 1;
|
||||
foo[i] = i;
|
||||
|
||||
uniform int i = 0;
|
||||
foo[i++] += 1;
|
||||
|
||||
@@ -6,10 +6,10 @@ void inc(reference float v) { ++v; }
|
||||
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
||||
uniform float foo[32];
|
||||
for (uniform int i = 0; i < 32; ++i)
|
||||
foo[i] = 10;
|
||||
foo[i] = 10+i;
|
||||
int a = (int)aa[programIndex];
|
||||
inc(foo[a]);
|
||||
ret[programIndex] = foo[programIndex];
|
||||
ret[programIndex] = foo[programIndex]-programIndex;
|
||||
}
|
||||
|
||||
export void result(uniform float ret[]) {
|
||||
|
||||
12
tests/rotate-5.ispc
Normal file
12
tests/rotate-5.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int8 a = aFOO[programIndex];
|
||||
int8 rot = rotate(a, 2);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + 2) % programCount;
|
||||
}
|
||||
12
tests/rotate-6.ispc
Normal file
12
tests/rotate-6.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int16 a = aFOO[programIndex];
|
||||
int16 rot = rotate(a, -1);
|
||||
RET[programIndex] = rot;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||
}
|
||||
17
tests/scatter-int16-1.ispc
Normal file
17
tests/scatter-int16-1.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int16 x[programCount];
|
||||
x[programIndex] = -1;
|
||||
int a = aFOO[programIndex]-1;
|
||||
if (programIndex < 3)
|
||||
x[a] = programIndex;
|
||||
RET[programIndex] = x[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -1;
|
||||
RET[0] = 0;
|
||||
RET[1] = 1;
|
||||
RET[2] = 2;
|
||||
}
|
||||
13
tests/scatter-int16.ispc
Normal file
13
tests/scatter-int16.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int16 x[programCount];
|
||||
x[programIndex] = 0;
|
||||
int a = aFOO[programIndex]-1;
|
||||
x[a] = programIndex;
|
||||
RET[programIndex] = x[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
17
tests/scatter-int8-1.ispc
Normal file
17
tests/scatter-int8-1.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int8 x[programCount];
|
||||
x[programIndex] = -1;
|
||||
int a = aFOO[programIndex]-1;
|
||||
if (programIndex < 3)
|
||||
x[a] = programIndex;
|
||||
RET[programIndex] = x[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -1;
|
||||
RET[0] = 0;
|
||||
RET[1] = 1;
|
||||
RET[2] = 2;
|
||||
}
|
||||
13
tests/scatter-int8.ispc
Normal file
13
tests/scatter-int8.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int8 x[programCount];
|
||||
x[programIndex] = 0;
|
||||
int a = aFOO[programIndex]-1;
|
||||
x[a] = programIndex;
|
||||
RET[programIndex] = x[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
12
tests/shuffle-3.ispc
Normal file
12
tests/shuffle-3.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int8 a = aFOO[programIndex];
|
||||
int8 shuf = shuffle(a, 1);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
}
|
||||
13
tests/shuffle-4.ispc
Normal file
13
tests/shuffle-4.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int16 a = aFOO[programIndex];
|
||||
int reverse = programCount - 1 - programIndex;
|
||||
int16 shuf = shuffle(a, reverse);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount - programIndex;
|
||||
}
|
||||
13
tests/shuffle-5.ispc
Normal file
13
tests/shuffle-5.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int8 a = aFOO[programIndex];
|
||||
int reverse = programCount - 1 - programIndex + (int)b - 5;
|
||||
int8 shuf = shuffle(a, reverse);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount - programIndex;
|
||||
}
|
||||
13
tests/shuffle2-11.ispc
Normal file
13
tests/shuffle2-11.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int16 aa = aFOO[programIndex];
|
||||
int16 bb = aa + programCount;
|
||||
int16 shuf = shuffle(aa, bb, 2*programIndex);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + 2*programIndex;
|
||||
}
|
||||
13
tests/shuffle2-6.ispc
Normal file
13
tests/shuffle2-6.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int8 aa = aFOO[programIndex];
|
||||
int8 bb = aa + programCount;
|
||||
int8 shuf = shuffle(aa, bb, 1);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
}
|
||||
13
tests/shuffle2-7.ispc
Normal file
13
tests/shuffle2-7.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int16 aa = aFOO[programIndex];
|
||||
int16 bb = aa + programCount;
|
||||
int16 shuf = shuffle(aa, bb, programCount + 1);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2 + programCount;
|
||||
}
|
||||
13
tests/shuffle2-8.ispc
Normal file
13
tests/shuffle2-8.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int8 aa = aFOO[programIndex];
|
||||
int8 bb = aa + programCount;
|
||||
int8 shuf = shuffle(aa, bb, programIndex + 2);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3 + programIndex;
|
||||
}
|
||||
13
tests/shuffle2-9.ispc
Normal file
13
tests/shuffle2-9.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int16 aa = aFOO[programIndex];
|
||||
int16 bb = aa + programCount;
|
||||
int16 shuf = shuffle(aa, bb, programIndex + 2 + (int)b - 5);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3 + programIndex;
|
||||
}
|
||||
@@ -1,16 +1,15 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int x[16];
|
||||
for (uniform int i = 0; i < 16; ++i)
|
||||
x[i] = 0xffffffff;
|
||||
unsigned int val = aFOO[programIndex];
|
||||
store_to_int16(x, 5, val);
|
||||
unsigned int v = load_from_int16(x, 6);
|
||||
RET[programIndex] = v;
|
||||
uniform unsigned int16 x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0xffff;
|
||||
unsigned int16 val = aFOO[programIndex];
|
||||
x[2+programIndex] = val;
|
||||
RET[programIndex] = x[1+programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2+programIndex;
|
||||
RET[programCount-1] = (unsigned int)0xffffffff;
|
||||
RET[programIndex] = programIndex;
|
||||
RET[0] = 65535;
|
||||
}
|
||||
|
||||
19
tests/store-int16-2.ispc
Normal file
19
tests/store-int16-2.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform unsigned int16 x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0xffff;
|
||||
unsigned int16 val = aFOO[programIndex];
|
||||
if (programIndex & 1)
|
||||
x[2+programIndex] = val;
|
||||
RET[programIndex] = x[1+programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = 65535;
|
||||
else
|
||||
RET[programIndex] = programIndex;
|
||||
RET[0] = 65535;
|
||||
}
|
||||
@@ -1,16 +1,15 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int x[16];
|
||||
for (uniform int i = 0; i < 16; ++i)
|
||||
x[i] = 0xffffffff;
|
||||
unsigned int val = aFOO[programIndex];
|
||||
store_to_int16(x, 5, val);
|
||||
int v = load_from_int16(x, 6);
|
||||
RET[programIndex] = v;
|
||||
uniform int16 x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0xffff;
|
||||
unsigned int8 val = aFOO[programIndex];
|
||||
x[2+programIndex] = val;
|
||||
RET[programIndex] = x[1+programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2+programIndex;
|
||||
RET[programCount-1] = -1;
|
||||
RET[programIndex] = programIndex;
|
||||
RET[0] = -1.;
|
||||
}
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform unsigned int x[8];
|
||||
for (uniform int i = 0; i < 8; ++i)
|
||||
x[i] = 0xffffffff;
|
||||
unsigned int val = aFOO[programIndex];
|
||||
store_to_uint8(x, 2, val);
|
||||
unsigned int v = load_from_uint8(x, 1);
|
||||
RET[programIndex] = v;
|
||||
uniform unsigned int8 x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0xff;
|
||||
unsigned int8 val = aFOO[programIndex];
|
||||
x[2+programIndex] = val;
|
||||
RET[programIndex] = x[1+programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
RET[0] = (unsigned int)0xff;
|
||||
RET[0] = 255;
|
||||
}
|
||||
|
||||
19
tests/store-int8-2.ispc
Normal file
19
tests/store-int8-2.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform unsigned int8 x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0xff;
|
||||
unsigned int8 val = aFOO[programIndex];
|
||||
if (programIndex & 1)
|
||||
x[2+programIndex] = val;
|
||||
RET[programIndex] = x[1+programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = 255;
|
||||
else
|
||||
RET[programIndex] = programIndex;
|
||||
RET[0] = 255;
|
||||
}
|
||||
@@ -1,13 +1,12 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int x[8];
|
||||
for (uniform int i = 0; i < 8; ++i)
|
||||
x[i] = 0xffffffff;
|
||||
unsigned int val = aFOO[programIndex];
|
||||
store_to_int8(x, 2, val);
|
||||
int v = load_from_int8(x, 1);
|
||||
RET[programIndex] = v;
|
||||
uniform int8 x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0xff;
|
||||
unsigned int8 val = aFOO[programIndex];
|
||||
x[2+programIndex] = val;
|
||||
RET[programIndex] = x[1+programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
|
||||
@@ -4,12 +4,12 @@ export uniform int width() { return programCount; }
|
||||
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
||||
uniform int foo[10];
|
||||
for (uniform int i = 0; i < 10; ++i)
|
||||
foo[i] = 10;
|
||||
foo[i] = 10+i;
|
||||
int bb = b;
|
||||
foo[bb] = 0;
|
||||
ret[programIndex] = foo[4] + foo[5];
|
||||
}
|
||||
|
||||
export void result(uniform float ret[]) {
|
||||
ret[programIndex] = 10;
|
||||
ret[programIndex] = 14;
|
||||
}
|
||||
|
||||
114
type.cpp
114
type.cpp
@@ -74,6 +74,14 @@ lShouldPrintName(const std::string &name) {
|
||||
|
||||
const AtomicType *AtomicType::UniformBool = new AtomicType(TYPE_BOOL, true, false);
|
||||
const AtomicType *AtomicType::VaryingBool = new AtomicType(TYPE_BOOL, false, false);
|
||||
const AtomicType *AtomicType::UniformInt8 = new AtomicType(TYPE_INT8, true, false);
|
||||
const AtomicType *AtomicType::VaryingInt8 = new AtomicType(TYPE_INT8, false, false);
|
||||
const AtomicType *AtomicType::UniformUInt8 = new AtomicType(TYPE_UINT8, true, false);
|
||||
const AtomicType *AtomicType::VaryingUInt8 = new AtomicType(TYPE_UINT8, false, false);
|
||||
const AtomicType *AtomicType::UniformInt16 = new AtomicType(TYPE_INT16, true, false);
|
||||
const AtomicType *AtomicType::VaryingInt16 = new AtomicType(TYPE_INT16, false, false);
|
||||
const AtomicType *AtomicType::UniformUInt16 = new AtomicType(TYPE_UINT16, true, false);
|
||||
const AtomicType *AtomicType::VaryingUInt16 = new AtomicType(TYPE_UINT16, false, false);
|
||||
const AtomicType *AtomicType::UniformInt32 = new AtomicType(TYPE_INT32, true, false);
|
||||
const AtomicType *AtomicType::VaryingInt32 = new AtomicType(TYPE_INT32, false, false);
|
||||
const AtomicType *AtomicType::UniformUInt32 = new AtomicType(TYPE_UINT32, true, false);
|
||||
@@ -89,6 +97,14 @@ const AtomicType *AtomicType::VaryingDouble = new AtomicType(TYPE_DOUBLE, false,
|
||||
|
||||
const AtomicType *AtomicType::UniformConstBool = new AtomicType(TYPE_BOOL, true, true);
|
||||
const AtomicType *AtomicType::VaryingConstBool = new AtomicType(TYPE_BOOL, false, true);
|
||||
const AtomicType *AtomicType::UniformConstInt8 = new AtomicType(TYPE_INT8, true, true);
|
||||
const AtomicType *AtomicType::VaryingConstInt8 = new AtomicType(TYPE_INT8, false, true);
|
||||
const AtomicType *AtomicType::UniformConstUInt8 = new AtomicType(TYPE_UINT8, true, true);
|
||||
const AtomicType *AtomicType::VaryingConstUInt8 = new AtomicType(TYPE_UINT8, false, true);
|
||||
const AtomicType *AtomicType::UniformConstInt16 = new AtomicType(TYPE_INT16, true, true);
|
||||
const AtomicType *AtomicType::VaryingConstInt16 = new AtomicType(TYPE_INT16, false, true);
|
||||
const AtomicType *AtomicType::UniformConstUInt16 = new AtomicType(TYPE_UINT16, true, true);
|
||||
const AtomicType *AtomicType::VaryingConstUInt16 = new AtomicType(TYPE_UINT16, false, true);
|
||||
const AtomicType *AtomicType::UniformConstInt32 = new AtomicType(TYPE_INT32, true, true);
|
||||
const AtomicType *AtomicType::VaryingConstInt32 = new AtomicType(TYPE_INT32, false, true);
|
||||
const AtomicType *AtomicType::UniformConstUInt32 = new AtomicType(TYPE_UINT32, true, true);
|
||||
@@ -101,6 +117,7 @@ const AtomicType *AtomicType::UniformConstUInt64 = new AtomicType(TYPE_UINT64, t
|
||||
const AtomicType *AtomicType::VaryingConstUInt64 = new AtomicType(TYPE_UINT64, false, true);
|
||||
const AtomicType *AtomicType::UniformConstDouble = new AtomicType(TYPE_DOUBLE, true, true);
|
||||
const AtomicType *AtomicType::VaryingConstDouble = new AtomicType(TYPE_DOUBLE, false, true);
|
||||
|
||||
const AtomicType *AtomicType::Void = new AtomicType(TYPE_VOID, true, false);
|
||||
|
||||
|
||||
@@ -123,14 +140,17 @@ AtomicType::IsFloatType() const {
|
||||
|
||||
bool
|
||||
AtomicType::IsIntType() const {
|
||||
return (basicType == TYPE_INT32 || basicType == TYPE_UINT32 ||
|
||||
return (basicType == TYPE_INT8 || basicType == TYPE_UINT8 ||
|
||||
basicType == TYPE_INT16 || basicType == TYPE_UINT16 ||
|
||||
basicType == TYPE_INT32 || basicType == TYPE_UINT32 ||
|
||||
basicType == TYPE_INT64 || basicType == TYPE_UINT64);
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
AtomicType::IsUnsignedType() const {
|
||||
return (basicType == TYPE_UINT32 || basicType == TYPE_UINT64);
|
||||
return (basicType == TYPE_UINT8 || basicType == TYPE_UINT16 ||
|
||||
basicType == TYPE_UINT32 || basicType == TYPE_UINT64);
|
||||
}
|
||||
|
||||
|
||||
@@ -151,10 +171,18 @@ AtomicType::GetAsUnsignedType() const {
|
||||
if (IsUnsignedType())
|
||||
return this;
|
||||
|
||||
if (this == AtomicType::UniformInt32) return AtomicType::UniformUInt32;
|
||||
if (this == AtomicType::UniformInt8) return AtomicType::UniformUInt8;
|
||||
else if (this == AtomicType::VaryingInt8) return AtomicType::VaryingUInt8;
|
||||
else if (this == AtomicType::UniformInt16) return AtomicType::UniformUInt16;
|
||||
else if (this == AtomicType::VaryingInt16) return AtomicType::VaryingUInt16;
|
||||
else if (this == AtomicType::UniformInt32) return AtomicType::UniformUInt32;
|
||||
else if (this == AtomicType::VaryingInt32) return AtomicType::VaryingUInt32;
|
||||
else if (this == AtomicType::UniformInt64) return AtomicType::UniformUInt64;
|
||||
else if (this == AtomicType::VaryingInt64) return AtomicType::VaryingUInt64;
|
||||
else if (this == AtomicType::UniformConstInt8) return AtomicType::UniformConstUInt8;
|
||||
else if (this == AtomicType::VaryingConstInt8) return AtomicType::VaryingConstUInt8;
|
||||
else if (this == AtomicType::UniformConstInt16) return AtomicType::UniformConstUInt16;
|
||||
else if (this == AtomicType::VaryingConstInt16) return AtomicType::VaryingConstUInt16;
|
||||
else if (this == AtomicType::UniformConstInt32) return AtomicType::UniformConstUInt32;
|
||||
else if (this == AtomicType::VaryingConstInt32) return AtomicType::VaryingConstUInt32;
|
||||
else if (this == AtomicType::UniformConstInt64) return AtomicType::UniformConstUInt64;
|
||||
@@ -170,6 +198,10 @@ AtomicType::GetAsConstType() const {
|
||||
|
||||
switch (basicType) {
|
||||
case TYPE_BOOL: return isUniform ? UniformConstBool : VaryingConstBool;
|
||||
case TYPE_INT8: return isUniform ? UniformConstInt8 : VaryingConstInt8;
|
||||
case TYPE_UINT8: return isUniform ? UniformConstUInt8 : VaryingConstUInt8;
|
||||
case TYPE_INT16: return isUniform ? UniformConstInt16 : VaryingConstInt16;
|
||||
case TYPE_UINT16: return isUniform ? UniformConstUInt16 : VaryingConstUInt16;
|
||||
case TYPE_INT32: return isUniform ? UniformConstInt32 : VaryingConstInt32;
|
||||
case TYPE_UINT32: return isUniform ? UniformConstUInt32 : VaryingConstUInt32;
|
||||
case TYPE_FLOAT: return isUniform ? UniformConstFloat : VaryingConstFloat;
|
||||
@@ -190,6 +222,10 @@ AtomicType::GetAsNonConstType() const {
|
||||
|
||||
switch (basicType) {
|
||||
case TYPE_BOOL: return isUniform ? UniformBool : VaryingBool;
|
||||
case TYPE_INT8: return isUniform ? UniformInt8 : VaryingInt8;
|
||||
case TYPE_UINT8: return isUniform ? UniformUInt8 : VaryingUInt8;
|
||||
case TYPE_INT16: return isUniform ? UniformInt16 : VaryingInt16;
|
||||
case TYPE_UINT16: return isUniform ? UniformUInt16 : VaryingUInt16;
|
||||
case TYPE_INT32: return isUniform ? UniformInt32 : VaryingInt32;
|
||||
case TYPE_UINT32: return isUniform ? UniformUInt32 : VaryingUInt32;
|
||||
case TYPE_FLOAT: return isUniform ? UniformFloat : VaryingFloat;
|
||||
@@ -216,13 +252,17 @@ AtomicType::GetAsVaryingType() const {
|
||||
|
||||
switch (basicType) {
|
||||
case TYPE_VOID: return this;
|
||||
case TYPE_BOOL: return isConst ? AtomicType::VaryingConstBool : AtomicType::VaryingBool;
|
||||
case TYPE_INT32: return isConst ? AtomicType::VaryingConstInt32 : AtomicType::VaryingInt32;
|
||||
case TYPE_UINT32: return isConst ? AtomicType::VaryingConstUInt32 : AtomicType::VaryingUInt32;
|
||||
case TYPE_FLOAT: return isConst ? AtomicType::VaryingConstFloat : AtomicType::VaryingFloat;
|
||||
case TYPE_INT64: return isConst ? AtomicType::VaryingConstInt64 : AtomicType::VaryingInt64;
|
||||
case TYPE_UINT64: return isConst ? AtomicType::VaryingConstUInt64 : AtomicType::VaryingUInt64;
|
||||
case TYPE_DOUBLE: return isConst ? AtomicType::VaryingConstDouble : AtomicType::VaryingDouble;
|
||||
case TYPE_BOOL: return isConst ? VaryingConstBool : VaryingBool;
|
||||
case TYPE_INT8: return isConst ? VaryingConstInt8 : VaryingInt8;
|
||||
case TYPE_UINT8: return isConst ? VaryingConstUInt8 : VaryingUInt8;
|
||||
case TYPE_INT16: return isConst ? VaryingConstInt16 : VaryingInt16;
|
||||
case TYPE_UINT16: return isConst ? VaryingConstUInt16 : VaryingUInt16;
|
||||
case TYPE_INT32: return isConst ? VaryingConstInt32 : VaryingInt32;
|
||||
case TYPE_UINT32: return isConst ? VaryingConstUInt32 : VaryingUInt32;
|
||||
case TYPE_FLOAT: return isConst ? VaryingConstFloat : VaryingFloat;
|
||||
case TYPE_INT64: return isConst ? VaryingConstInt64 : VaryingInt64;
|
||||
case TYPE_UINT64: return isConst ? VaryingConstUInt64 : VaryingUInt64;
|
||||
case TYPE_DOUBLE: return isConst ? VaryingConstDouble : VaryingDouble;
|
||||
default: FATAL("Logic error in AtomicType::GetAsVaryingType()");
|
||||
}
|
||||
return NULL;
|
||||
@@ -236,13 +276,17 @@ AtomicType::GetAsUniformType() const {
|
||||
|
||||
switch (basicType) {
|
||||
case TYPE_VOID: return this;
|
||||
case TYPE_BOOL: return isConst ? AtomicType::UniformConstBool : AtomicType::UniformBool;
|
||||
case TYPE_INT32: return isConst ? AtomicType::UniformConstInt32 : AtomicType::UniformInt32;
|
||||
case TYPE_UINT32: return isConst ? AtomicType::UniformConstUInt32 : AtomicType::UniformUInt32;
|
||||
case TYPE_FLOAT: return isConst ? AtomicType::UniformConstFloat : AtomicType::UniformFloat;
|
||||
case TYPE_INT64: return isConst ? AtomicType::UniformConstInt64 : AtomicType::UniformInt64;
|
||||
case TYPE_UINT64: return isConst ? AtomicType::UniformConstUInt64 : AtomicType::UniformUInt64;
|
||||
case TYPE_DOUBLE: return isConst ? AtomicType::UniformConstDouble : AtomicType::UniformDouble;
|
||||
case TYPE_BOOL: return isConst ? UniformConstBool : UniformBool;
|
||||
case TYPE_INT8: return isConst ? UniformConstInt8 : UniformInt8;
|
||||
case TYPE_UINT8: return isConst ? UniformConstUInt8 : UniformUInt8;
|
||||
case TYPE_INT16: return isConst ? UniformConstInt16 : UniformInt16;
|
||||
case TYPE_UINT16: return isConst ? UniformConstUInt16 : UniformUInt16;
|
||||
case TYPE_INT32: return isConst ? UniformConstInt32 : UniformInt32;
|
||||
case TYPE_UINT32: return isConst ? UniformConstUInt32 : UniformUInt32;
|
||||
case TYPE_FLOAT: return isConst ? UniformConstFloat : UniformFloat;
|
||||
case TYPE_INT64: return isConst ? UniformConstInt64 : UniformInt64;
|
||||
case TYPE_UINT64: return isConst ? UniformConstUInt64 : UniformUInt64;
|
||||
case TYPE_DOUBLE: return isConst ? UniformConstDouble : UniformDouble;
|
||||
default: FATAL("Logic error in AtomicType::GetAsUniformType()");
|
||||
}
|
||||
return NULL;
|
||||
@@ -267,6 +311,10 @@ AtomicType::GetString() const {
|
||||
switch (basicType) {
|
||||
case TYPE_VOID: ret += "void"; break;
|
||||
case TYPE_BOOL: ret += "bool"; break;
|
||||
case TYPE_INT8: ret += "int8"; break;
|
||||
case TYPE_UINT8: ret += "unsigned int8"; break;
|
||||
case TYPE_INT16: ret += "int16"; break;
|
||||
case TYPE_UINT16: ret += "unsigned int16"; break;
|
||||
case TYPE_INT32: ret += "int32"; break;
|
||||
case TYPE_UINT32: ret += "unsigned int32"; break;
|
||||
case TYPE_FLOAT: ret += "float"; break;
|
||||
@@ -288,6 +336,10 @@ AtomicType::Mangle() const {
|
||||
switch (basicType) {
|
||||
case TYPE_VOID: ret += "v"; break;
|
||||
case TYPE_BOOL: ret += "b"; break;
|
||||
case TYPE_INT8: ret += "t"; break;
|
||||
case TYPE_UINT8: ret += "T"; break;
|
||||
case TYPE_INT16: ret += "s"; break;
|
||||
case TYPE_UINT16: ret += "S"; break;
|
||||
case TYPE_INT32: ret += "i"; break;
|
||||
case TYPE_UINT32: ret += "u"; break;
|
||||
case TYPE_FLOAT: ret += "f"; break;
|
||||
@@ -309,12 +361,16 @@ AtomicType::GetCDeclaration(const std::string &name) const {
|
||||
switch (basicType) {
|
||||
case TYPE_VOID: ret += "void"; break;
|
||||
case TYPE_BOOL: ret += "bool"; break;
|
||||
case TYPE_INT8: ret += "int8_t"; break;
|
||||
case TYPE_UINT8: ret += "uint8_t"; break;
|
||||
case TYPE_INT16: ret += "int16_t"; break;
|
||||
case TYPE_UINT16: ret += "uint16_t"; break;
|
||||
case TYPE_INT32: ret += "int32_t"; break;
|
||||
case TYPE_UINT32: ret += "uint32_t"; break;
|
||||
case TYPE_FLOAT: ret += "float"; break;
|
||||
case TYPE_DOUBLE: ret += "double"; break;
|
||||
case TYPE_INT64: ret += "int64_t"; break;
|
||||
case TYPE_UINT64: ret += "uint64_t"; break;
|
||||
case TYPE_DOUBLE: ret += "double"; break;
|
||||
default: FATAL("Logic error in AtomicType::GetCDeclaration()");
|
||||
}
|
||||
|
||||
@@ -333,6 +389,12 @@ AtomicType::LLVMType(llvm::LLVMContext *ctx) const {
|
||||
return llvm::Type::getVoidTy(*ctx);
|
||||
case TYPE_BOOL:
|
||||
return isUniform ? LLVMTypes::BoolType : LLVMTypes::BoolVectorType;
|
||||
case TYPE_INT8:
|
||||
case TYPE_UINT8:
|
||||
return isUniform ? LLVMTypes::Int8Type : LLVMTypes::Int8VectorType;
|
||||
case TYPE_INT16:
|
||||
case TYPE_UINT16:
|
||||
return isUniform ? LLVMTypes::Int16Type : LLVMTypes::Int16VectorType;
|
||||
case TYPE_INT32:
|
||||
case TYPE_UINT32:
|
||||
return isUniform ? LLVMTypes::Int32Type : LLVMTypes::Int32VectorType;
|
||||
@@ -364,6 +426,22 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
|
||||
return m->diBuilder->createBasicType("bool", 32 /* size */, 32 /* align */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
case TYPE_INT8:
|
||||
return m->diBuilder->createBasicType("int8", 8 /* size */, 8 /* align */,
|
||||
llvm::dwarf::DW_ATE_signed);
|
||||
break;
|
||||
case TYPE_UINT8:
|
||||
return m->diBuilder->createBasicType("uint8", 8 /* size */, 8 /* align */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
case TYPE_INT16:
|
||||
return m->diBuilder->createBasicType("int16", 16 /* size */, 16 /* align */,
|
||||
llvm::dwarf::DW_ATE_signed);
|
||||
break;
|
||||
case TYPE_UINT16:
|
||||
return m->diBuilder->createBasicType("uint16", 16 /* size */, 16 /* align */,
|
||||
llvm::dwarf::DW_ATE_unsigned);
|
||||
break;
|
||||
case TYPE_INT32:
|
||||
return m->diBuilder->createBasicType("int32", 32 /* size */, 32 /* align */,
|
||||
llvm::dwarf::DW_ATE_signed);
|
||||
|
||||
12
type.h
12
type.h
@@ -210,6 +210,10 @@ public:
|
||||
enum BasicType {
|
||||
TYPE_VOID,
|
||||
TYPE_BOOL,
|
||||
TYPE_INT8,
|
||||
TYPE_UINT8,
|
||||
TYPE_INT16,
|
||||
TYPE_UINT16,
|
||||
TYPE_INT32,
|
||||
TYPE_UINT32,
|
||||
TYPE_FLOAT,
|
||||
@@ -221,14 +225,22 @@ public:
|
||||
const BasicType basicType;
|
||||
|
||||
static const AtomicType *UniformBool, *VaryingBool;
|
||||
static const AtomicType *UniformInt8, *VaryingInt8;
|
||||
static const AtomicType *UniformInt16, *VaryingInt16;
|
||||
static const AtomicType *UniformInt32, *VaryingInt32;
|
||||
static const AtomicType *UniformUInt8, *VaryingUInt8;
|
||||
static const AtomicType *UniformUInt16, *VaryingUInt16;
|
||||
static const AtomicType *UniformUInt32, *VaryingUInt32;
|
||||
static const AtomicType *UniformFloat, *VaryingFloat;
|
||||
static const AtomicType *UniformInt64, *VaryingInt64;
|
||||
static const AtomicType *UniformUInt64, *VaryingUInt64;
|
||||
static const AtomicType *UniformDouble, *VaryingDouble;
|
||||
static const AtomicType *UniformConstBool, *VaryingConstBool;
|
||||
static const AtomicType *UniformConstInt8, *VaryingConstInt8;
|
||||
static const AtomicType *UniformConstInt16, *VaryingConstInt16;
|
||||
static const AtomicType *UniformConstInt32, *VaryingConstInt32;
|
||||
static const AtomicType *UniformConstUInt8, *VaryingConstUInt8;
|
||||
static const AtomicType *UniformConstUInt16, *VaryingConstUInt16;
|
||||
static const AtomicType *UniformConstUInt32, *VaryingConstUInt32;
|
||||
static const AtomicType *UniformConstFloat, *VaryingConstFloat;
|
||||
static const AtomicType *UniformConstInt64, *VaryingConstInt64;
|
||||
|
||||
Reference in New Issue
Block a user