2
Makefile
2
Makefile
@@ -15,7 +15,7 @@ LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
|||||||
LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/)
|
LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/)
|
||||||
|
|
||||||
BUILD_DATE=$(shell date +%Y%m%d)
|
BUILD_DATE=$(shell date +%Y%m%d)
|
||||||
BUILD_VERSION=$(shell git log | head -1)
|
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||||
|
|
||||||
CXX=g++
|
CXX=g++
|
||||||
CPP=cpp
|
CPP=cpp
|
||||||
|
|||||||
288
builtins.cpp
288
builtins.cpp
@@ -78,8 +78,14 @@ static const Type *
|
|||||||
lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||||
if (t == LLVMTypes::VoidType)
|
if (t == LLVMTypes::VoidType)
|
||||||
return AtomicType::Void;
|
return AtomicType::Void;
|
||||||
|
|
||||||
|
// uniform
|
||||||
else if (t == LLVMTypes::BoolType)
|
else if (t == LLVMTypes::BoolType)
|
||||||
return AtomicType::UniformBool;
|
return AtomicType::UniformBool;
|
||||||
|
else if (t == LLVMTypes::Int8Type)
|
||||||
|
return intAsUnsigned ? AtomicType::UniformUInt8 : AtomicType::UniformInt8;
|
||||||
|
else if (t == LLVMTypes::Int16Type)
|
||||||
|
return intAsUnsigned ? AtomicType::UniformUInt16 : AtomicType::UniformInt16;
|
||||||
else if (t == LLVMTypes::Int32Type)
|
else if (t == LLVMTypes::Int32Type)
|
||||||
return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
|
return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
|
||||||
else if (t == LLVMTypes::FloatType)
|
else if (t == LLVMTypes::FloatType)
|
||||||
@@ -88,6 +94,12 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
|||||||
return AtomicType::UniformDouble;
|
return AtomicType::UniformDouble;
|
||||||
else if (t == LLVMTypes::Int64Type)
|
else if (t == LLVMTypes::Int64Type)
|
||||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||||
|
|
||||||
|
// varying
|
||||||
|
else if (t == LLVMTypes::Int8VectorType)
|
||||||
|
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
||||||
|
else if (t == LLVMTypes::Int16VectorType)
|
||||||
|
return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16;
|
||||||
else if (t == LLVMTypes::Int32VectorType)
|
else if (t == LLVMTypes::Int32VectorType)
|
||||||
return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
|
return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
|
||||||
else if (t == LLVMTypes::FloatVectorType)
|
else if (t == LLVMTypes::FloatVectorType)
|
||||||
@@ -96,6 +108,14 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
|||||||
return AtomicType::VaryingDouble;
|
return AtomicType::VaryingDouble;
|
||||||
else if (t == LLVMTypes::Int64VectorType)
|
else if (t == LLVMTypes::Int64VectorType)
|
||||||
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
|
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
|
||||||
|
|
||||||
|
// pointers to uniform
|
||||||
|
else if (t == LLVMTypes::Int8PointerType)
|
||||||
|
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt8 :
|
||||||
|
AtomicType::UniformInt8, false);
|
||||||
|
else if (t == LLVMTypes::Int16PointerType)
|
||||||
|
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt16 :
|
||||||
|
AtomicType::UniformInt16, false);
|
||||||
else if (t == LLVMTypes::Int32PointerType)
|
else if (t == LLVMTypes::Int32PointerType)
|
||||||
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 :
|
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 :
|
||||||
AtomicType::UniformInt32, false);
|
AtomicType::UniformInt32, false);
|
||||||
@@ -106,6 +126,14 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
|||||||
return new ReferenceType(AtomicType::UniformFloat, false);
|
return new ReferenceType(AtomicType::UniformFloat, false);
|
||||||
else if (t == LLVMTypes::DoublePointerType)
|
else if (t == LLVMTypes::DoublePointerType)
|
||||||
return new ReferenceType(AtomicType::UniformDouble, false);
|
return new ReferenceType(AtomicType::UniformDouble, false);
|
||||||
|
|
||||||
|
// pointers to varying
|
||||||
|
else if (t == LLVMTypes::Int8VectorPointerType)
|
||||||
|
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt8 :
|
||||||
|
AtomicType::VaryingInt8, false);
|
||||||
|
else if (t == LLVMTypes::Int16VectorPointerType)
|
||||||
|
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt16 :
|
||||||
|
AtomicType::VaryingInt16, false);
|
||||||
else if (t == LLVMTypes::Int32VectorPointerType)
|
else if (t == LLVMTypes::Int32VectorPointerType)
|
||||||
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 :
|
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 :
|
||||||
AtomicType::VaryingInt32, false);
|
AtomicType::VaryingInt32, false);
|
||||||
@@ -116,6 +144,8 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
|||||||
return new ReferenceType(AtomicType::VaryingFloat, false);
|
return new ReferenceType(AtomicType::VaryingFloat, false);
|
||||||
else if (t == LLVMTypes::DoubleVectorPointerType)
|
else if (t == LLVMTypes::DoubleVectorPointerType)
|
||||||
return new ReferenceType(AtomicType::VaryingDouble, false);
|
return new ReferenceType(AtomicType::VaryingDouble, false);
|
||||||
|
|
||||||
|
// arrays
|
||||||
else if (llvm::isa<const llvm::PointerType>(t)) {
|
else if (llvm::isa<const llvm::PointerType>(t)) {
|
||||||
const llvm::PointerType *pt = llvm::dyn_cast<const llvm::PointerType>(t);
|
const llvm::PointerType *pt = llvm::dyn_cast<const llvm::PointerType>(t);
|
||||||
|
|
||||||
@@ -239,10 +269,49 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
lDeclarePG(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType,
|
||||||
|
const char *name) {
|
||||||
|
SourcePos noPos;
|
||||||
|
noPos.name = "__stdlib";
|
||||||
|
|
||||||
|
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||||
|
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||||
|
argTypes.push_back(LLVMTypes::MaskType);
|
||||||
|
|
||||||
|
llvm::FunctionType *fType = llvm::FunctionType::get(vecType, argTypes, false);
|
||||||
|
llvm::Function *func =
|
||||||
|
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||||
|
name, module);
|
||||||
|
func->setOnlyReadsMemory(true);
|
||||||
|
func->setDoesNotThrow(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
lDeclarePGBO(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType,
|
||||||
|
const char *name) {
|
||||||
|
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||||
|
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||||
|
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||||
|
argTypes.push_back(LLVMTypes::MaskType);
|
||||||
|
|
||||||
|
llvm::FunctionType *fType = llvm::FunctionType::get(vecType, argTypes, false);
|
||||||
|
llvm::Function *func =
|
||||||
|
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||||
|
name, module);
|
||||||
|
func->setOnlyReadsMemory(true);
|
||||||
|
func->setDoesNotThrow(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Declare the 'pseudo-gather' functions. When the ispc front-end needs
|
/** Declare the 'pseudo-gather' functions. When the ispc front-end needs
|
||||||
to perform a gather, it generates a call to one of these functions,
|
to perform a gather, it generates a call to one of these functions,
|
||||||
which have signatures:
|
which have signatures:
|
||||||
|
|
||||||
|
varying int8 __pseudo_gather(varying int8 *, mask)
|
||||||
|
varying int16 __pseudo_gather(varying int16 *, mask)
|
||||||
varying int32 __pseudo_gather(varying int32 *, mask)
|
varying int32 __pseudo_gather(varying int32 *, mask)
|
||||||
varying int64 __pseudo_gather(varying int64 *, mask)
|
varying int64 __pseudo_gather(varying int64 *, mask)
|
||||||
|
|
||||||
@@ -253,6 +322,10 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
|||||||
front-end to be relatively simple in how it emits address calculation
|
front-end to be relatively simple in how it emits address calculation
|
||||||
for gathers.
|
for gathers.
|
||||||
|
|
||||||
|
varying int8 __pseudo_gather_base_offsets_8(uniform int8 *base,
|
||||||
|
int32 offsets, mask)
|
||||||
|
varying int16 __pseudo_gather_base_offsets_16(uniform int16 *base,
|
||||||
|
int32 offsets, mask)
|
||||||
varying int32 __pseudo_gather_base_offsets_32(uniform int32 *base,
|
varying int32 __pseudo_gather_base_offsets_32(uniform int32 *base,
|
||||||
int32 offsets, mask)
|
int32 offsets, mask)
|
||||||
varying int64 __pseudo_gather_base_offsets_64(uniform int64 *base,
|
varying int64 __pseudo_gather_base_offsets_64(uniform int64 *base,
|
||||||
@@ -264,49 +337,54 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
lDeclarePseudoGathers(llvm::Module *module) {
|
lDeclarePseudoGathers(llvm::Module *module) {
|
||||||
SourcePos noPos;
|
lDeclarePG(module, LLVMTypes::Int8VectorType, "__pseudo_gather_8");
|
||||||
noPos.name = "__stdlib";
|
lDeclarePG(module, LLVMTypes::Int16VectorType, "__pseudo_gather_16");
|
||||||
|
lDeclarePG(module, LLVMTypes::Int32VectorType, "__pseudo_gather_32");
|
||||||
|
lDeclarePG(module, LLVMTypes::Int64VectorType, "__pseudo_gather_64");
|
||||||
|
|
||||||
{
|
lDeclarePGBO(module, LLVMTypes::Int8VectorType,
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
"__pseudo_gather_base_offsets_8");
|
||||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
lDeclarePGBO(module, LLVMTypes::Int16VectorType,
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
"__pseudo_gather_base_offsets_16");
|
||||||
|
lDeclarePGBO(module, LLVMTypes::Int32VectorType,
|
||||||
|
"__pseudo_gather_base_offsets_32");
|
||||||
|
lDeclarePGBO(module, LLVMTypes::Int64VectorType,
|
||||||
|
"__pseudo_gather_base_offsets_64");
|
||||||
|
}
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
|
||||||
llvm::FunctionType::get(LLVMTypes::Int32VectorType, argTypes, false);
|
|
||||||
llvm::Function *func =
|
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
|
||||||
"__pseudo_gather_32", module);
|
|
||||||
func->setOnlyReadsMemory(true);
|
|
||||||
func->setDoesNotThrow(true);
|
|
||||||
|
|
||||||
fType = llvm::FunctionType::get(LLVMTypes::Int64VectorType, argTypes, false);
|
static void
|
||||||
func = llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
lDeclarePS(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType,
|
||||||
"__pseudo_gather_64", module);
|
const char *name) {
|
||||||
func->setOnlyReadsMemory(true);
|
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||||
func->setDoesNotThrow(true);
|
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||||
}
|
argTypes.push_back(vecType);
|
||||||
|
argTypes.push_back(LLVMTypes::MaskType);
|
||||||
|
|
||||||
{
|
llvm::FunctionType *fType =
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
llvm::Function *func =
|
||||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
name, module);
|
||||||
|
func->setDoesNotThrow(true);
|
||||||
|
}
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
|
||||||
llvm::FunctionType::get(LLVMTypes::Int32VectorType, argTypes, false);
|
|
||||||
llvm::Function *func =
|
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
|
||||||
"__pseudo_gather_base_offsets_32", module);
|
|
||||||
func->setOnlyReadsMemory(true);
|
|
||||||
func->setDoesNotThrow(true);
|
|
||||||
|
|
||||||
fType = llvm::FunctionType::get(LLVMTypes::Int64VectorType, argTypes, false);
|
static void
|
||||||
func = llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
lDeclarePSBO(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType,
|
||||||
"__pseudo_gather_base_offsets_64", module);
|
const char *name) {
|
||||||
func->setOnlyReadsMemory(true);
|
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||||
func->setDoesNotThrow(true);
|
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||||
}
|
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||||
|
argTypes.push_back(vecType);
|
||||||
|
argTypes.push_back(LLVMTypes::MaskType);
|
||||||
|
|
||||||
|
llvm::FunctionType *fType =
|
||||||
|
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||||
|
llvm::Function *func =
|
||||||
|
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||||
|
name, module);
|
||||||
|
func->setDoesNotThrow(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -314,16 +392,22 @@ lDeclarePseudoGathers(llvm::Module *module) {
|
|||||||
we also declare (but never define) pseudo-scatter instructions with
|
we also declare (but never define) pseudo-scatter instructions with
|
||||||
signatures:
|
signatures:
|
||||||
|
|
||||||
|
void __pseudo_scatter_8 (varying int8 *, varying int8 values, mask)
|
||||||
|
void __pseudo_scatter_16(varying int16 *, varying int16 values, mask)
|
||||||
void __pseudo_scatter_32(varying int32 *, varying int32 values, mask)
|
void __pseudo_scatter_32(varying int32 *, varying int32 values, mask)
|
||||||
void __pseudo_scatter_64(varying int64 *, varying int64 values, mask)
|
void __pseudo_scatter_64(varying int64 *, varying int64 values, mask)
|
||||||
|
|
||||||
The GatherScatterFlattenOpt optimization pass also finds these and
|
The GatherScatterFlattenOpt optimization pass also finds these and
|
||||||
transforms them to scatters like:
|
transforms them to scatters like:
|
||||||
|
|
||||||
|
void __pseudo_scatter_base_offsets_8(uniform int8 *base,
|
||||||
|
varying int32 offsets, varying int8 values, mask)
|
||||||
|
void __pseudo_scatter_base_offsets_16(uniform int16 *base,
|
||||||
|
varying int32 offsets, varying int16 values, mask)
|
||||||
void __pseudo_scatter_base_offsets_32(uniform int32 *base,
|
void __pseudo_scatter_base_offsets_32(uniform int32 *base,
|
||||||
varying int32 offsets, varying int32 values, mask)
|
varying int32 offsets, varying int32 values, mask)
|
||||||
void __pseudo_scatter_base_offsets_64(uniform int64 *base,
|
void __pseudo_scatter_base_offsets_64(uniform int64 *base,
|
||||||
varying int62 offsets, varying int64 values, mask)
|
varying int32 offsets, varying int64 values, mask)
|
||||||
|
|
||||||
And the GSImprovementsPass in turn converts these to actual native
|
And the GSImprovementsPass in turn converts these to actual native
|
||||||
scatters or masked stores.
|
scatters or masked stores.
|
||||||
@@ -333,67 +417,49 @@ lDeclarePseudoScatters(llvm::Module *module) {
|
|||||||
SourcePos noPos;
|
SourcePos noPos;
|
||||||
noPos.name = "__stdlib";
|
noPos.name = "__stdlib";
|
||||||
|
|
||||||
{
|
lDeclarePS(module, LLVMTypes::Int8VectorType, "__pseudo_scatter_8");
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
lDeclarePS(module, LLVMTypes::Int16VectorType, "__pseudo_scatter_16");
|
||||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
lDeclarePS(module, LLVMTypes::Int32VectorType, "__pseudo_scatter_32");
|
||||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
lDeclarePS(module, LLVMTypes::Int64VectorType, "__pseudo_scatter_64");
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
lDeclarePSBO(module, LLVMTypes::Int8VectorType,
|
||||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
"__pseudo_scatter_base_offsets_8");
|
||||||
llvm::Function *func =
|
lDeclarePSBO(module, LLVMTypes::Int16VectorType,
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
"__pseudo_scatter_base_offsets_16");
|
||||||
"__pseudo_scatter_32", module);
|
lDeclarePSBO(module, LLVMTypes::Int32VectorType,
|
||||||
func->setDoesNotThrow(true);
|
"__pseudo_scatter_base_offsets_32");
|
||||||
}
|
lDeclarePSBO(module, LLVMTypes::Int64VectorType,
|
||||||
{
|
"__pseudo_scatter_base_offsets_64");
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
}
|
||||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
|
||||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
|
||||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
|
||||||
llvm::Function *func =
|
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
|
||||||
"__pseudo_scatter_64", module);
|
|
||||||
func->setDoesNotThrow(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
static void
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
lDeclarePMS(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *lvalueType,
|
||||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
LLVM_TYPE_CONST llvm::Type *rvalueType, const char *name) {
|
||||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
SourcePos noPos;
|
||||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
noPos.name = "__stdlib";
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
argTypes.push_back(lvalueType);
|
||||||
llvm::Function *func =
|
argTypes.push_back(rvalueType);
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
argTypes.push_back(LLVMTypes::MaskType);
|
||||||
"__pseudo_scatter_base_offsets_32", module);
|
|
||||||
func->setDoesNotThrow(true);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
|
||||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
|
||||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
|
||||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
llvm::FunctionType *fType =
|
||||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||||
llvm::Function *func =
|
llvm::Function *func =
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||||
"__pseudo_scatter_base_offsets_64", module);
|
name, module);
|
||||||
func->setDoesNotThrow(true);
|
func->setDoesNotThrow(true);
|
||||||
}
|
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||||
|
func->setDoesNotCapture(1, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** This function declares placeholder masked store functions for the
|
/** This function declares placeholder masked store functions for the
|
||||||
front-end to use.
|
front-end to use.
|
||||||
|
|
||||||
|
void __pseudo_masked_store_8 (uniform int8 *ptr, varying int8 values, mask)
|
||||||
|
void __pseudo_masked_store_16(uniform int16 *ptr, varying int16 values, mask)
|
||||||
void __pseudo_masked_store_32(uniform int32 *ptr, varying int32 values, mask)
|
void __pseudo_masked_store_32(uniform int32 *ptr, varying int32 values, mask)
|
||||||
void __pseudo_masked_store_64(uniform int64 *ptr, varying int64 values, mask)
|
void __pseudo_masked_store_64(uniform int64 *ptr, varying int64 values, mask)
|
||||||
|
|
||||||
@@ -403,40 +469,14 @@ lDeclarePseudoScatters(llvm::Module *module) {
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
lDeclarePseudoMaskedStore(llvm::Module *module) {
|
lDeclarePseudoMaskedStore(llvm::Module *module) {
|
||||||
SourcePos noPos;
|
lDeclarePMS(module, LLVMTypes::Int8VectorPointerType,
|
||||||
noPos.name = "__stdlib";
|
LLVMTypes::Int8VectorType, "__pseudo_masked_store_8");
|
||||||
|
lDeclarePMS(module, LLVMTypes::Int16VectorPointerType,
|
||||||
{
|
LLVMTypes::Int16VectorType, "__pseudo_masked_store_16");
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
lDeclarePMS(module, LLVMTypes::Int32VectorPointerType,
|
||||||
argTypes.push_back(LLVMTypes::Int32VectorPointerType);
|
LLVMTypes::Int32VectorType, "__pseudo_masked_store_32");
|
||||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
lDeclarePMS(module, LLVMTypes::Int64VectorPointerType,
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
LLVMTypes::Int64VectorType, "__pseudo_masked_store_64");
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
|
||||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
|
||||||
llvm::Function *func =
|
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
|
||||||
"__pseudo_masked_store_32", module);
|
|
||||||
func->setDoesNotThrow(true);
|
|
||||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
|
||||||
func->setDoesNotCapture(1, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
|
||||||
argTypes.push_back(LLVMTypes::Int64VectorPointerType);
|
|
||||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
|
||||||
argTypes.push_back(LLVMTypes::MaskType);
|
|
||||||
|
|
||||||
llvm::FunctionType *fType =
|
|
||||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
|
||||||
llvm::Function *func =
|
|
||||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
|
||||||
"__pseudo_masked_store_64", module);
|
|
||||||
func->setDoesNotThrow(true);
|
|
||||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
|
||||||
func->setDoesNotCapture(1, true);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -609,8 +649,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
// needed by the compiled program.
|
// needed by the compiled program.
|
||||||
{
|
{
|
||||||
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
std::vector<LLVM_TYPE_CONST llvm::Type *> argTypes;
|
||||||
argTypes.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(*g->ctx), 0));
|
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||||
argTypes.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(*g->ctx), 0));
|
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||||
argTypes.push_back(LLVMTypes::Int32Type);
|
argTypes.push_back(LLVMTypes::Int32Type);
|
||||||
argTypes.push_back(LLVMTypes::Int32Type);
|
argTypes.push_back(LLVMTypes::Int32Type);
|
||||||
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidType,
|
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidType,
|
||||||
|
|||||||
54
ctx.cpp
54
ctx.cpp
@@ -1448,17 +1448,20 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
|
|||||||
llvm::Value *mask = GetMask();
|
llvm::Value *mask = GetMask();
|
||||||
llvm::Function *gather = NULL;
|
llvm::Function *gather = NULL;
|
||||||
// Figure out which gather function to call based on the size of
|
// Figure out which gather function to call based on the size of
|
||||||
// the elements; will need to generalize this for 8 and 16-bit
|
// the elements.
|
||||||
// types.
|
|
||||||
if (retType == LLVMTypes::DoubleVectorType ||
|
if (retType == LLVMTypes::DoubleVectorType ||
|
||||||
retType == LLVMTypes::Int64VectorType)
|
retType == LLVMTypes::Int64VectorType)
|
||||||
gather = m->module->getFunction("__pseudo_gather_64");
|
gather = m->module->getFunction("__pseudo_gather_64");
|
||||||
else {
|
else if (retType == LLVMTypes::FloatVectorType ||
|
||||||
assert(retType == LLVMTypes::FloatVectorType ||
|
retType == LLVMTypes::Int32VectorType)
|
||||||
retType == LLVMTypes::Int32VectorType);
|
|
||||||
gather = m->module->getFunction("__pseudo_gather_32");
|
gather = m->module->getFunction("__pseudo_gather_32");
|
||||||
|
else if (retType == LLVMTypes::Int16VectorType)
|
||||||
|
gather = m->module->getFunction("__pseudo_gather_16");
|
||||||
|
else {
|
||||||
|
assert(retType == LLVMTypes::Int8VectorType);
|
||||||
|
gather = m->module->getFunction("__pseudo_gather_8");
|
||||||
}
|
}
|
||||||
assert(gather);
|
assert(gather != NULL);
|
||||||
|
|
||||||
llvm::Value *voidlvalue = BitCastInst(lvalue, LLVMTypes::VoidPointerType);
|
llvm::Value *voidlvalue = BitCastInst(lvalue, LLVMTypes::VoidPointerType);
|
||||||
llvm::Instruction *call = CallInst(gather, voidlvalue, mask, name);
|
llvm::Instruction *call = CallInst(gather, voidlvalue, mask, name);
|
||||||
@@ -1578,9 +1581,7 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
rvalueType = rvalueType->GetAsNonConstType();
|
rvalueType = rvalueType->GetAsNonConstType();
|
||||||
|
|
||||||
llvm::Function *maskedStoreFunc = NULL;
|
llvm::Function *maskedStoreFunc = NULL;
|
||||||
// Figure out if we need a 32-bit or 64-bit masked store. This
|
// Figure out if we need a 8, 16, 32 or 64-bit masked store.
|
||||||
// will need to be generalized when/if 8 and 16-bit data types are
|
|
||||||
// added.
|
|
||||||
if (rvalueType == AtomicType::VaryingDouble ||
|
if (rvalueType == AtomicType::VaryingDouble ||
|
||||||
rvalueType == AtomicType::VaryingInt64 ||
|
rvalueType == AtomicType::VaryingInt64 ||
|
||||||
rvalueType == AtomicType::VaryingUInt64) {
|
rvalueType == AtomicType::VaryingUInt64) {
|
||||||
@@ -1590,13 +1591,11 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType,
|
rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType,
|
||||||
"rvalue_to_int64");
|
"rvalue_to_int64");
|
||||||
}
|
}
|
||||||
else {
|
else if (rvalueType == AtomicType::VaryingFloat ||
|
||||||
assert(rvalueType == AtomicType::VaryingFloat ||
|
rvalueType == AtomicType::VaryingBool ||
|
||||||
rvalueType == AtomicType::VaryingBool ||
|
rvalueType == AtomicType::VaryingInt32 ||
|
||||||
rvalueType == AtomicType::VaryingInt32 ||
|
rvalueType == AtomicType::VaryingUInt32 ||
|
||||||
rvalueType == AtomicType::VaryingUInt32 ||
|
dynamic_cast<const EnumType *>(rvalueType) != NULL) {
|
||||||
dynamic_cast<const EnumType *>(rvalueType) != NULL);
|
|
||||||
|
|
||||||
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32");
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32");
|
||||||
lvalue = BitCastInst(lvalue, LLVMTypes::Int32VectorPointerType,
|
lvalue = BitCastInst(lvalue, LLVMTypes::Int32VectorPointerType,
|
||||||
"lvalue_to_int32vecptr");
|
"lvalue_to_int32vecptr");
|
||||||
@@ -1604,6 +1603,18 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType,
|
rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType,
|
||||||
"rvalue_to_int32");
|
"rvalue_to_int32");
|
||||||
}
|
}
|
||||||
|
else if (rvalueType == AtomicType::VaryingInt16 ||
|
||||||
|
rvalueType == AtomicType::VaryingUInt16) {
|
||||||
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_16");
|
||||||
|
lvalue = BitCastInst(lvalue, LLVMTypes::Int16VectorPointerType,
|
||||||
|
"lvalue_to_int16vecptr");
|
||||||
|
}
|
||||||
|
else if (rvalueType == AtomicType::VaryingInt8 ||
|
||||||
|
rvalueType == AtomicType::VaryingUInt8) {
|
||||||
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_8");
|
||||||
|
lvalue = BitCastInst(lvalue, LLVMTypes::Int8VectorPointerType,
|
||||||
|
"lvalue_to_int8vecptr");
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<llvm::Value *> args;
|
std::vector<llvm::Value *> args;
|
||||||
args.push_back(lvalue);
|
args.push_back(lvalue);
|
||||||
@@ -1668,14 +1679,15 @@ FunctionEmitContext::scatter(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
func = m->module->getFunction("__pseudo_scatter_64");
|
func = m->module->getFunction("__pseudo_scatter_64");
|
||||||
rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType, "rvalue2int");
|
rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType, "rvalue2int");
|
||||||
}
|
}
|
||||||
else {
|
else if (type == LLVMTypes::FloatVectorType ||
|
||||||
// FIXME: if this hits, presumably it's due to needing int8 and/or
|
type == LLVMTypes::Int32VectorType) {
|
||||||
// int16 versions of scatter...
|
|
||||||
assert(type == LLVMTypes::FloatVectorType ||
|
|
||||||
type == LLVMTypes::Int32VectorType);
|
|
||||||
func = m->module->getFunction("__pseudo_scatter_32");
|
func = m->module->getFunction("__pseudo_scatter_32");
|
||||||
rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType, "rvalue2int");
|
rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType, "rvalue2int");
|
||||||
}
|
}
|
||||||
|
else if (type == LLVMTypes::Int16VectorType)
|
||||||
|
func = m->module->getFunction("__pseudo_scatter_16");
|
||||||
|
else if (type == LLVMTypes::Int8VectorType)
|
||||||
|
func = m->module->getFunction("__pseudo_scatter_8");
|
||||||
assert(func != NULL);
|
assert(func != NULL);
|
||||||
|
|
||||||
AddInstrumentationPoint("scatter");
|
AddInstrumentationPoint("scatter");
|
||||||
|
|||||||
@@ -427,7 +427,8 @@ The following identifiers are reserved as language keywords: ``bool``,
|
|||||||
``char``, ``cif``, ``cwhile``, ``const``, ``continue``, ``creturn``,
|
``char``, ``cif``, ``cwhile``, ``const``, ``continue``, ``creturn``,
|
||||||
``default``, ``do``, ``double``, ``else``, ``enum``, ``export``,
|
``default``, ``do``, ``double``, ``else``, ``enum``, ``export``,
|
||||||
``extern``, ``false``, ``float``, ``for``, ``goto``, ``if``, ``inline``, ``int``,
|
``extern``, ``false``, ``float``, ``for``, ``goto``, ``if``, ``inline``, ``int``,
|
||||||
``int32``, ``int64``, ``launch``, ``print``, ``reference``, ``return``,
|
``int8``, ``int16``, ``int32``, ``int64``, ``launch``, ``print``,
|
||||||
|
``reference``, ``return``,
|
||||||
``signed``, ``sizeof``, ``soa``, ``static``, ``struct``, ``switch``,
|
``signed``, ``sizeof``, ``soa``, ``static``, ``struct``, ``switch``,
|
||||||
``sync``, ``task``, ``true``, ``typedef``, ``uniform``, ``union``,
|
``sync``, ``task``, ``true``, ``typedef``, ``uniform``, ``union``,
|
||||||
``unsigned``, ``varying``, ``void``, ``volatile``, ``while``.
|
``unsigned``, ``varying``, ``void``, ``volatile``, ``while``.
|
||||||
@@ -481,6 +482,10 @@ types.
|
|||||||
* ``void``: "empty" type representing no value.
|
* ``void``: "empty" type representing no value.
|
||||||
* ``bool``: boolean value; may be assigned ``true``, ``false``, or the
|
* ``bool``: boolean value; may be assigned ``true``, ``false``, or the
|
||||||
value of a boolean expression.
|
value of a boolean expression.
|
||||||
|
* ``int8``: 8-bit signed integer.
|
||||||
|
* ``unsigned int8``: 8-bit unsigned integer.
|
||||||
|
* ``int16``: 16-bit signed integer.
|
||||||
|
* ``unsigned int16``: 16-bit unsigned integer.
|
||||||
* ``int``: 32-bit signed integer; may also be specified as ``int32``.
|
* ``int``: 32-bit signed integer; may also be specified as ``int32``.
|
||||||
* ``unsigned int``: 32-bit unsigned integer; may also be specified as
|
* ``unsigned int``: 32-bit unsigned integer; may also be specified as
|
||||||
``unsigned int32``.
|
``unsigned int32``.
|
||||||
@@ -497,7 +502,8 @@ general" of the two types, with the following precedence:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
double > uint64 > int64 > float > uint32 > int32 > bool
|
double > uint64 > int64 > float > uint32 > int32 >
|
||||||
|
uint16 > int16 > uint8 > int8 > bool
|
||||||
|
|
||||||
In other words, adding an ``int64`` to a ``double`` causes the ``int64`` to
|
In other words, adding an ``int64`` to a ``double`` causes the ``int64`` to
|
||||||
be converted to a ``double``, the addition to be performed, and a
|
be converted to a ``double``, the addition to be performed, and a
|
||||||
@@ -1709,10 +1715,12 @@ the running program instances.
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
float broadcast(float value, uniform int index)
|
int8 broadcast(int8 value, uniform int index)
|
||||||
|
int16 broadcast(int16 value, uniform int index)
|
||||||
int32 broadcast(int32 value, uniform int index)
|
int32 broadcast(int32 value, uniform int index)
|
||||||
double broadcast(double value, uniform int index)
|
|
||||||
int64 broadcast(int64 value, uniform int index)
|
int64 broadcast(int64 value, uniform int index)
|
||||||
|
float broadcast(float value, uniform int index)
|
||||||
|
double broadcast(double value, uniform int index)
|
||||||
|
|
||||||
The ``rotate()`` function allows each program instance to find the value of
|
The ``rotate()`` function allows each program instance to find the value of
|
||||||
the given value that their neighbor ``offset`` steps away has. For
|
the given value that their neighbor ``offset`` steps away has. For
|
||||||
@@ -1725,10 +1733,12 @@ provided offset value can be positive or negative, and may be greater than
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
float rotate(float value, uniform int offset)
|
int8 rotate(int8 value, uniform int offset)
|
||||||
|
int16 rotate(int16 value, uniform int offset)
|
||||||
int32 rotate(int32 value, uniform int offset)
|
int32 rotate(int32 value, uniform int offset)
|
||||||
double rotate(double value, uniform int offset)
|
|
||||||
int64 rotate(int64 value, uniform int offset)
|
int64 rotate(int64 value, uniform int offset)
|
||||||
|
float rotate(float value, uniform int offset)
|
||||||
|
double rotate(double value, uniform int offset)
|
||||||
|
|
||||||
|
|
||||||
Finally, the ``shuffle()`` functions allow two variants of fully general
|
Finally, the ``shuffle()`` functions allow two variants of fully general
|
||||||
@@ -1739,10 +1749,12 @@ from which to get the value of ``value``. The provided values for
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
float shuffle(float value, int permutation)
|
int8 shuffle(int8 value, int permutation)
|
||||||
|
int16 shuffle(int16 value, int permutation)
|
||||||
int32 shuffle(int32 value, int permutation)
|
int32 shuffle(int32 value, int permutation)
|
||||||
double shuffle(double value, int permutation)
|
|
||||||
int64 shuffle(int64 value, int permutation)
|
int64 shuffle(int64 value, int permutation)
|
||||||
|
float shuffle(float value, int permutation)
|
||||||
|
double shuffle(double value, int permutation)
|
||||||
|
|
||||||
|
|
||||||
The second variant of ``shuffle()`` permutes over the extended vector that
|
The second variant of ``shuffle()`` permutes over the extended vector that
|
||||||
@@ -1753,10 +1765,12 @@ of ``value1``, etc.)
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
float shuffle(float value0, float value1, int permutation)
|
int8 shuffle(int8 value0, int8 value1, int permutation)
|
||||||
|
int16 shuffle(int16 value0, int16 value1, int permutation)
|
||||||
int32 shuffle(int32 value0, int32 value1, int permutation)
|
int32 shuffle(int32 value0, int32 value1, int permutation)
|
||||||
double shuffle(double value0, double value1, int permutation)
|
|
||||||
int64 shuffle(int64 value0, int64 value1, int permutation)
|
int64 shuffle(int64 value0, int64 value1, int permutation)
|
||||||
|
float shuffle(float value0, float value1, int permutation)
|
||||||
|
double shuffle(double value0, double value1, int permutation)
|
||||||
|
|
||||||
The various variants of ``popcnt()`` return the population count--the
|
The various variants of ``popcnt()`` return the population count--the
|
||||||
number of bits set in the given value.
|
number of bits set in the given value.
|
||||||
@@ -1861,10 +1875,19 @@ where the ``i`` th element of ``x`` has been replaced with the value ``v``
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
|
uniform int8 extract(int8 x, uniform int i)
|
||||||
|
uniform int16 extract(int16 x, uniform int i)
|
||||||
|
uniform int32 extract(int32 x, uniform int i)
|
||||||
|
uniform int64 extract(int64 x, uniform int i)
|
||||||
uniform float extract(float x, uniform int i)
|
uniform float extract(float x, uniform int i)
|
||||||
uniform int extract(int x, uniform int i)
|
|
||||||
|
::
|
||||||
|
|
||||||
|
int8 insert(int8 x, uniform int i, uniform int8 v)
|
||||||
|
int16 insert(int16 x, uniform int i, uniform int16 v)
|
||||||
|
int32 insert(int32 x, uniform int i, uniform int32 v)
|
||||||
|
int64 insert(int64 x, uniform int i, uniform int64 v)
|
||||||
float insert(float x, uniform int i, uniform float v)
|
float insert(float x, uniform int i, uniform float v)
|
||||||
int insert(int x, uniform int i, uniform int v)
|
|
||||||
|
|
||||||
|
|
||||||
Atomic Operations and Memory Fences
|
Atomic Operations and Memory Fences
|
||||||
@@ -1948,41 +1971,6 @@ value ``true`` (rather than just having the value one). The
|
|||||||
int sign_extend(bool value)
|
int sign_extend(bool value)
|
||||||
uniform int sign_extend(uniform bool value)
|
uniform int sign_extend(uniform bool value)
|
||||||
|
|
||||||
``ispc`` provides a number of bit/memory-level utility routines in its
|
|
||||||
standard library as well. It has routines that load from and store
|
|
||||||
to 8-bit and 16-bit integer values stored in memory, converting to and from
|
|
||||||
32-bit integers for use in computation in ``ispc`` code. (These functions
|
|
||||||
and this conversion step are necessary because ``ispc`` doesn't have native
|
|
||||||
8-bit or 16-bit types in the language.)
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
int load_from_int8(uniform int a[], uniform int offset)
|
|
||||||
unsigned int load_from_int8(uniform unsigned int a[],
|
|
||||||
uniform int offset)
|
|
||||||
void store_to_int8(uniform int a[], uniform int offset,
|
|
||||||
int val)
|
|
||||||
void store_to_int8(uniform unsigned int a[], uniform int offset,
|
|
||||||
unsigned int val)
|
|
||||||
unsigned int load_from_int16(uniform int a[],
|
|
||||||
uniform int offset)
|
|
||||||
unsigned unsigned int load_from_int16(uniform unsigned int a[],
|
|
||||||
uniform int offset)
|
|
||||||
void store_to_int16(uniform int a[], uniform int offset,
|
|
||||||
int val)
|
|
||||||
void store_to_int16(uniform unsigned int a[], uniform int offset,
|
|
||||||
unsigned int val)
|
|
||||||
|
|
||||||
There are three things to note in these functions. First, note that these
|
|
||||||
functions take either ``int`` or ``unsigned int`` arrays as parameters; you
|
|
||||||
need to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side
|
|
||||||
to ``int`` or ``unsigned int`` when passing them to ``ispc`` code. Second,
|
|
||||||
although the arrays are passed as 32-bit integers, in the array indexing
|
|
||||||
calculation, with the ``offset`` parameter, they are treated as if they
|
|
||||||
were ``int8`` or ``int16`` types (i.e. the offset treated as being in terms
|
|
||||||
of number of 8 or 16-bit elements). Third, note that the value of
|
|
||||||
``programIndex`` is implicitly added to offset.
|
|
||||||
|
|
||||||
The ``intbits()`` and ``floatbits()`` functions can be used to implement
|
The ``intbits()`` and ``floatbits()`` functions can be used to implement
|
||||||
low-level floating-point bit twiddling. For example, ``intbits()`` returns
|
low-level floating-point bit twiddling. For example, ``intbits()`` returns
|
||||||
an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
|
an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
|
||||||
|
|||||||
@@ -190,7 +190,9 @@ int main(int argc, char *argv[]) {
|
|||||||
nodes[i].bounds[1].v[1] = b[4];
|
nodes[i].bounds[1].v[1] = b[4];
|
||||||
nodes[i].bounds[1].v[2] = b[5];
|
nodes[i].bounds[1].v[2] = b[5];
|
||||||
READ(nodes[i].offset, 1);
|
READ(nodes[i].offset, 1);
|
||||||
READ(nodes[i].primsAxis, 1);
|
READ(nodes[i].nPrimitives, 1);
|
||||||
|
READ(nodes[i].splitAxis, 1);
|
||||||
|
READ(nodes[i].pad, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// And then read the triangles
|
// And then read the triangles
|
||||||
|
|||||||
@@ -50,21 +50,11 @@ struct Triangle {
|
|||||||
struct LinearBVHNode {
|
struct LinearBVHNode {
|
||||||
uniform float3 bounds[2];
|
uniform float3 bounds[2];
|
||||||
uniform unsigned int offset; // num primitives for leaf, second child for interior
|
uniform unsigned int offset; // num primitives for leaf, second child for interior
|
||||||
uniform unsigned int primsAxis; // 0:7 nPrimitives, 8:15 split axis, 16:31 padding
|
uniform unsigned int8 nPrimitives;
|
||||||
|
uniform unsigned int8 splitAxis;
|
||||||
|
uniform unsigned int16 pad;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline uniform int nPrims(const reference LinearBVHNode node) {
|
|
||||||
return (node.primsAxis & 0xff);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uniform int axis(const reference LinearBVHNode node) {
|
|
||||||
return ((node.primsAxis >> 8) & 0xff);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uniform bool isInterior(const reference LinearBVHNode node) {
|
|
||||||
return nPrims(node) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline float3 Cross(const float3 v1, const float3 v2) {
|
static inline float3 Cross(const float3 v1, const float3 v2) {
|
||||||
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
|
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
|
||||||
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
|
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
|
||||||
@@ -199,7 +189,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
|||||||
// Check ray against BVH node
|
// Check ray against BVH node
|
||||||
LinearBVHNode node = nodes[nodeNum];
|
LinearBVHNode node = nodes[nodeNum];
|
||||||
if (any(BBoxIntersect(node.bounds, ray))) {
|
if (any(BBoxIntersect(node.bounds, ray))) {
|
||||||
uniform unsigned int nPrimitives = nPrims(node);
|
uniform unsigned int nPrimitives = node.nPrimitives;
|
||||||
if (nPrimitives > 0) {
|
if (nPrimitives > 0) {
|
||||||
// Intersect ray with primitives in leaf BVH node
|
// Intersect ray with primitives in leaf BVH node
|
||||||
uniform unsigned int primitivesOffset = node.offset;
|
uniform unsigned int primitivesOffset = node.offset;
|
||||||
@@ -213,7 +203,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Put far BVH node on _todo_ stack, advance to near node
|
// Put far BVH node on _todo_ stack, advance to near node
|
||||||
if (r.dirIsNeg[axis(node)]) {
|
if (r.dirIsNeg[node.splitAxis]) {
|
||||||
todo[todoOffset++] = nodeNum + 1;
|
todo[todoOffset++] = nodeNum + 1;
|
||||||
nodeNum = node.offset;
|
nodeNum = node.offset;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,30 +75,20 @@ struct Ray {
|
|||||||
namespace ispc {
|
namespace ispc {
|
||||||
struct Triangle {
|
struct Triangle {
|
||||||
float3 p[3];
|
float3 p[3];
|
||||||
int id;
|
int32_t id;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LinearBVHNode {
|
struct LinearBVHNode {
|
||||||
float3 bounds[2];
|
float3 bounds[2];
|
||||||
unsigned int offset; // primitives for leaf, second child for interior
|
int32_t offset; // primitives for leaf, second child for interior
|
||||||
unsigned int primsAxis; // 0:7 nPrimitives, 8:15 split axis, 16:31 padding
|
uint8_t nPrimitives;
|
||||||
|
uint8_t splitAxis;
|
||||||
|
uint16_t pad;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
using namespace ispc;
|
using namespace ispc;
|
||||||
|
|
||||||
inline int nPrims(const LinearBVHNode &node) {
|
|
||||||
return (node.primsAxis & 0xff);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int axis(const LinearBVHNode &node) {
|
|
||||||
return ((node.primsAxis >> 8) & 0xff);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool isInterior(const LinearBVHNode &node) {
|
|
||||||
return nPrims(node) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline float3 Cross(const float3 &v1, const float3 &v2) {
|
inline float3 Cross(const float3 &v1, const float3 &v2) {
|
||||||
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
|
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
|
||||||
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
|
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
|
||||||
@@ -230,7 +220,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
|||||||
// Check ray against BVH node
|
// Check ray against BVH node
|
||||||
const LinearBVHNode &node = nodes[nodeNum];
|
const LinearBVHNode &node = nodes[nodeNum];
|
||||||
if (BBoxIntersect(node.bounds, ray)) {
|
if (BBoxIntersect(node.bounds, ray)) {
|
||||||
unsigned int nPrimitives = nPrims(node);
|
unsigned int nPrimitives = node.nPrimitives;
|
||||||
if (nPrimitives > 0) {
|
if (nPrimitives > 0) {
|
||||||
// Intersect ray with primitives in leaf BVH node
|
// Intersect ray with primitives in leaf BVH node
|
||||||
unsigned int primitivesOffset = node.offset;
|
unsigned int primitivesOffset = node.offset;
|
||||||
@@ -244,7 +234,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Put far BVH node on _todo_ stack, advance to near node
|
// Put far BVH node on _todo_ stack, advance to near node
|
||||||
if (r.dirIsNeg[axis(node)]) {
|
if (r.dirIsNeg[node.splitAxis]) {
|
||||||
todo[todoOffset++] = nodeNum + 1;
|
todo[todoOffset++] = nodeNum + 1;
|
||||||
nodeNum = node.offset;
|
nodeNum = node.offset;
|
||||||
}
|
}
|
||||||
|
|||||||
553
expr.cpp
553
expr.cpp
@@ -93,6 +93,10 @@ lMaybeIssuePrecisionWarning(const AtomicType *toAtomicType,
|
|||||||
SourcePos pos, const char *errorMsgBase) {
|
SourcePos pos, const char *errorMsgBase) {
|
||||||
switch (toAtomicType->basicType) {
|
switch (toAtomicType->basicType) {
|
||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
case AtomicType::TYPE_UINT32:
|
case AtomicType::TYPE_UINT32:
|
||||||
case AtomicType::TYPE_FLOAT:
|
case AtomicType::TYPE_FLOAT:
|
||||||
@@ -101,6 +105,10 @@ lMaybeIssuePrecisionWarning(const AtomicType *toAtomicType,
|
|||||||
case AtomicType::TYPE_DOUBLE:
|
case AtomicType::TYPE_DOUBLE:
|
||||||
if ((int)toAtomicType->basicType < (int)fromAtomicType->basicType &&
|
if ((int)toAtomicType->basicType < (int)fromAtomicType->basicType &&
|
||||||
toAtomicType->basicType != AtomicType::TYPE_BOOL &&
|
toAtomicType->basicType != AtomicType::TYPE_BOOL &&
|
||||||
|
!(toAtomicType->basicType == AtomicType::TYPE_INT8 &&
|
||||||
|
fromAtomicType->basicType == AtomicType::TYPE_UINT8) &&
|
||||||
|
!(toAtomicType->basicType == AtomicType::TYPE_INT16 &&
|
||||||
|
fromAtomicType->basicType == AtomicType::TYPE_UINT16) &&
|
||||||
!(toAtomicType->basicType == AtomicType::TYPE_INT32 &&
|
!(toAtomicType->basicType == AtomicType::TYPE_INT32 &&
|
||||||
fromAtomicType->basicType == AtomicType::TYPE_UINT32) &&
|
fromAtomicType->basicType == AtomicType::TYPE_UINT32) &&
|
||||||
!(toAtomicType->basicType == AtomicType::TYPE_INT64 &&
|
!(toAtomicType->basicType == AtomicType::TYPE_INT64 &&
|
||||||
@@ -363,15 +371,33 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) {
|
|||||||
return (value != 0.) ? LLVMTrue : LLVMFalse;
|
return (value != 0.) ? LLVMTrue : LLVMFalse;
|
||||||
else
|
else
|
||||||
return LLVMBoolVector(value != 0.);
|
return LLVMBoolVector(value != 0.);
|
||||||
case AtomicType::TYPE_UINT32: {
|
case AtomicType::TYPE_INT8: {
|
||||||
|
int i = (int)value;
|
||||||
|
assert((double)i == value);
|
||||||
|
return isUniform ? LLVMInt8(i) : LLVMInt8Vector(i);
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_UINT8: {
|
||||||
unsigned int i = (unsigned int)value;
|
unsigned int i = (unsigned int)value;
|
||||||
return isUniform ? LLVMUInt32(i) : LLVMUInt32Vector(i);
|
return isUniform ? LLVMUInt8(i) : LLVMUInt8Vector(i);
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_INT16: {
|
||||||
|
int i = (int)value;
|
||||||
|
assert((double)i == value);
|
||||||
|
return isUniform ? LLVMInt16(i) : LLVMInt16Vector(i);
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_UINT16: {
|
||||||
|
unsigned int i = (unsigned int)value;
|
||||||
|
return isUniform ? LLVMUInt16(i) : LLVMUInt16Vector(i);
|
||||||
}
|
}
|
||||||
case AtomicType::TYPE_INT32: {
|
case AtomicType::TYPE_INT32: {
|
||||||
int i = (int)value;
|
int i = (int)value;
|
||||||
assert((double)i == value);
|
assert((double)i == value);
|
||||||
return isUniform ? LLVMInt32(i) : LLVMInt32Vector(i);
|
return isUniform ? LLVMInt32(i) : LLVMInt32Vector(i);
|
||||||
}
|
}
|
||||||
|
case AtomicType::TYPE_UINT32: {
|
||||||
|
unsigned int i = (unsigned int)value;
|
||||||
|
return isUniform ? LLVMUInt32(i) : LLVMUInt32Vector(i);
|
||||||
|
}
|
||||||
case AtomicType::TYPE_FLOAT:
|
case AtomicType::TYPE_FLOAT:
|
||||||
return isUniform ? LLVMFloat((float)value) :
|
return isUniform ? LLVMFloat((float)value) :
|
||||||
LLVMFloatVector((float)value);
|
LLVMFloatVector((float)value);
|
||||||
@@ -590,14 +616,13 @@ UnaryExpr::Optimize() {
|
|||||||
const Type *type = constExpr->GetType();
|
const Type *type = constExpr->GetType();
|
||||||
bool isEnumType = dynamic_cast<const EnumType *>(type) != NULL;
|
bool isEnumType = dynamic_cast<const EnumType *>(type) != NULL;
|
||||||
|
|
||||||
if (type == AtomicType::UniformInt64 ||
|
const Type *baseType = type->GetAsNonConstType()->GetAsUniformType();
|
||||||
type == AtomicType::VaryingInt64 ||
|
if (baseType == AtomicType::UniformInt8 ||
|
||||||
type == AtomicType::UniformUInt64 ||
|
baseType == AtomicType::UniformUInt8 ||
|
||||||
type == AtomicType::VaryingUInt64 ||
|
baseType == AtomicType::UniformInt16 ||
|
||||||
type == AtomicType::UniformConstInt64 ||
|
baseType == AtomicType::UniformUInt16 ||
|
||||||
type == AtomicType::VaryingConstInt64 ||
|
baseType == AtomicType::UniformInt64 ||
|
||||||
type == AtomicType::UniformConstUInt64 ||
|
baseType == AtomicType::UniformUInt64)
|
||||||
type == AtomicType::VaryingConstUInt64)
|
|
||||||
// FIXME: should handle these at some point; for now we only do
|
// FIXME: should handle these at some point; for now we only do
|
||||||
// constant folding for bool, int32 and float types...
|
// constant folding for bool, int32 and float types...
|
||||||
return this;
|
return this;
|
||||||
@@ -3058,6 +3083,86 @@ MemberExpr::getCandidateNearMatches() const {
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// ConstExpr
|
// ConstExpr
|
||||||
|
|
||||||
|
ConstExpr::ConstExpr(const Type *t, int8_t i, SourcePos p)
|
||||||
|
: Expr(p) {
|
||||||
|
type = t;
|
||||||
|
type = type->GetAsConstType();
|
||||||
|
assert(type == AtomicType::UniformConstInt8);
|
||||||
|
int8Val[0] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ConstExpr::ConstExpr(const Type *t, int8_t *i, SourcePos p)
|
||||||
|
: Expr(p) {
|
||||||
|
type = t;
|
||||||
|
type = type->GetAsConstType();
|
||||||
|
assert(type == AtomicType::UniformConstInt8 ||
|
||||||
|
type == AtomicType::VaryingConstInt8);
|
||||||
|
for (int j = 0; j < Count(); ++j)
|
||||||
|
int8Val[j] = i[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ConstExpr::ConstExpr(const Type *t, uint8_t u, SourcePos p)
|
||||||
|
: Expr(p) {
|
||||||
|
type = t;
|
||||||
|
type = type->GetAsConstType();
|
||||||
|
assert(type == AtomicType::UniformUInt8);
|
||||||
|
uint8Val[0] = u;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ConstExpr::ConstExpr(const Type *t, uint8_t *u, SourcePos p)
|
||||||
|
: Expr(p) {
|
||||||
|
type = t;
|
||||||
|
type = type->GetAsConstType();
|
||||||
|
assert(type == AtomicType::UniformConstUInt8 ||
|
||||||
|
type == AtomicType::VaryingConstUInt8);
|
||||||
|
for (int j = 0; j < Count(); ++j)
|
||||||
|
uint8Val[j] = u[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ConstExpr::ConstExpr(const Type *t, int16_t i, SourcePos p)
|
||||||
|
: Expr(p) {
|
||||||
|
type = t;
|
||||||
|
type = type->GetAsConstType();
|
||||||
|
assert(type == AtomicType::UniformConstInt16);
|
||||||
|
int16Val[0] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ConstExpr::ConstExpr(const Type *t, int16_t *i, SourcePos p)
|
||||||
|
: Expr(p) {
|
||||||
|
type = t;
|
||||||
|
type = type->GetAsConstType();
|
||||||
|
assert(type == AtomicType::UniformConstInt16 ||
|
||||||
|
type == AtomicType::VaryingConstInt16);
|
||||||
|
for (int j = 0; j < Count(); ++j)
|
||||||
|
int16Val[j] = i[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ConstExpr::ConstExpr(const Type *t, uint16_t u, SourcePos p)
|
||||||
|
: Expr(p) {
|
||||||
|
type = t;
|
||||||
|
type = type->GetAsConstType();
|
||||||
|
assert(type == AtomicType::UniformUInt16);
|
||||||
|
uint16Val[0] = u;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ConstExpr::ConstExpr(const Type *t, uint16_t *u, SourcePos p)
|
||||||
|
: Expr(p) {
|
||||||
|
type = t;
|
||||||
|
type = type->GetAsConstType();
|
||||||
|
assert(type == AtomicType::UniformConstUInt16 ||
|
||||||
|
type == AtomicType::VaryingConstUInt16);
|
||||||
|
for (int j = 0; j < Count(); ++j)
|
||||||
|
uint16Val[j] = u[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ConstExpr::ConstExpr(const Type *t, int32_t i, SourcePos p)
|
ConstExpr::ConstExpr(const Type *t, int32_t i, SourcePos p)
|
||||||
: Expr(p) {
|
: Expr(p) {
|
||||||
type = t;
|
type = t;
|
||||||
@@ -3212,6 +3317,22 @@ ConstExpr::ConstExpr(ConstExpr *old, double *v)
|
|||||||
for (int i = 0; i < Count(); ++i)
|
for (int i = 0; i < Count(); ++i)
|
||||||
boolVal[i] = (v[i] != 0.);
|
boolVal[i] = (v[i] != 0.);
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
for (int i = 0; i < Count(); ++i)
|
||||||
|
int8Val[i] = (int)v[i];
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
for (int i = 0; i < Count(); ++i)
|
||||||
|
uint8Val[i] = (unsigned int)v[i];
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
for (int i = 0; i < Count(); ++i)
|
||||||
|
int16Val[i] = (int)v[i];
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
for (int i = 0; i < Count(); ++i)
|
||||||
|
uint16Val[i] = (unsigned int)v[i];
|
||||||
|
break;
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
for (int i = 0; i < Count(); ++i)
|
for (int i = 0; i < Count(); ++i)
|
||||||
int32Val[i] = (int)v[i];
|
int32Val[i] = (int)v[i];
|
||||||
@@ -3270,6 +3391,18 @@ ConstExpr::GetValue(FunctionEmitContext *ctx) const {
|
|||||||
return LLVMBoolVector(boolVal);
|
return LLVMBoolVector(boolVal);
|
||||||
else
|
else
|
||||||
return boolVal[0] ? LLVMTrue : LLVMFalse;
|
return boolVal[0] ? LLVMTrue : LLVMFalse;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
return isVarying ? LLVMInt8Vector(int8Val) :
|
||||||
|
LLVMInt8(int8Val[0]);
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
return isVarying ? LLVMUInt8Vector(uint8Val) :
|
||||||
|
LLVMUInt8(uint8Val[0]);
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
return isVarying ? LLVMInt16Vector(int16Val) :
|
||||||
|
LLVMInt16(int16Val[0]);
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
return isVarying ? LLVMUInt16Vector(uint16Val) :
|
||||||
|
LLVMUInt16(uint16Val[0]);
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
return isVarying ? LLVMInt32Vector(int32Val) :
|
return isVarying ? LLVMInt32Vector(int32Val) :
|
||||||
LLVMInt32(int32Val[0]);
|
LLVMInt32(int32Val[0]);
|
||||||
@@ -3351,6 +3484,10 @@ int
|
|||||||
ConstExpr::AsInt64(int64_t *ip, bool forceVarying) const {
|
ConstExpr::AsInt64(int64_t *ip, bool forceVarying) const {
|
||||||
switch (getBasicType()) {
|
switch (getBasicType()) {
|
||||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
||||||
@@ -3368,6 +3505,10 @@ int
|
|||||||
ConstExpr::AsUInt64(uint64_t *up, bool forceVarying) const {
|
ConstExpr::AsUInt64(uint64_t *up, bool forceVarying) const {
|
||||||
switch (getBasicType()) {
|
switch (getBasicType()) {
|
||||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
||||||
@@ -3385,6 +3526,10 @@ int
|
|||||||
ConstExpr::AsDouble(double *d, bool forceVarying) const {
|
ConstExpr::AsDouble(double *d, bool forceVarying) const {
|
||||||
switch (getBasicType()) {
|
switch (getBasicType()) {
|
||||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, d, Count(), forceVarying); break;
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, d, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, d, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, d, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, d, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, d, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_INT32: lConvert(int32Val, d, Count(), forceVarying); break;
|
case AtomicType::TYPE_INT32: lConvert(int32Val, d, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, d, Count(), forceVarying); break;
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, d, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, d, Count(), forceVarying); break;
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, d, Count(), forceVarying); break;
|
||||||
@@ -3402,6 +3547,10 @@ int
|
|||||||
ConstExpr::AsFloat(float *fp, bool forceVarying) const {
|
ConstExpr::AsFloat(float *fp, bool forceVarying) const {
|
||||||
switch (getBasicType()) {
|
switch (getBasicType()) {
|
||||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, fp, Count(), forceVarying); break;
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, fp, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, fp, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, fp, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, fp, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, fp, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_INT32: lConvert(int32Val, fp, Count(), forceVarying); break;
|
case AtomicType::TYPE_INT32: lConvert(int32Val, fp, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, fp, Count(), forceVarying); break;
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, fp, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, fp, Count(), forceVarying); break;
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, fp, Count(), forceVarying); break;
|
||||||
@@ -3419,6 +3568,10 @@ int
|
|||||||
ConstExpr::AsBool(bool *b, bool forceVarying) const {
|
ConstExpr::AsBool(bool *b, bool forceVarying) const {
|
||||||
switch (getBasicType()) {
|
switch (getBasicType()) {
|
||||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, b, Count(), forceVarying); break;
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, b, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, b, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, b, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, b, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, b, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_INT32: lConvert(int32Val, b, Count(), forceVarying); break;
|
case AtomicType::TYPE_INT32: lConvert(int32Val, b, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, b, Count(), forceVarying); break;
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, b, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, b, Count(), forceVarying); break;
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, b, Count(), forceVarying); break;
|
||||||
@@ -3432,10 +3585,98 @@ ConstExpr::AsBool(bool *b, bool forceVarying) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ConstExpr::AsInt8(int8_t *ip, bool forceVarying) const {
|
||||||
|
switch (getBasicType()) {
|
||||||
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT64: lConvert(int64Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT64: lConvert(uint64Val, ip, Count(), forceVarying); break;
|
||||||
|
default:
|
||||||
|
FATAL("unimplemented const type");
|
||||||
|
}
|
||||||
|
return Count();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ConstExpr::AsUInt8(uint8_t *up, bool forceVarying) const {
|
||||||
|
switch (getBasicType()) {
|
||||||
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT64: lConvert(int64Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT64: lConvert(uint64Val, up, Count(), forceVarying); break;
|
||||||
|
default:
|
||||||
|
FATAL("unimplemented const type");
|
||||||
|
}
|
||||||
|
return Count();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ConstExpr::AsInt16(int16_t *ip, bool forceVarying) const {
|
||||||
|
switch (getBasicType()) {
|
||||||
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT64: lConvert(int64Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT64: lConvert(uint64Val, ip, Count(), forceVarying); break;
|
||||||
|
default:
|
||||||
|
FATAL("unimplemented const type");
|
||||||
|
}
|
||||||
|
return Count();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ConstExpr::AsUInt16(uint16_t *up, bool forceVarying) const {
|
||||||
|
switch (getBasicType()) {
|
||||||
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT64: lConvert(int64Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT64: lConvert(uint64Val, up, Count(), forceVarying); break;
|
||||||
|
default:
|
||||||
|
FATAL("unimplemented const type");
|
||||||
|
}
|
||||||
|
return Count();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
ConstExpr::AsInt32(int32_t *ip, bool forceVarying) const {
|
ConstExpr::AsInt32(int32_t *ip, bool forceVarying) const {
|
||||||
switch (getBasicType()) {
|
switch (getBasicType()) {
|
||||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break;
|
||||||
@@ -3453,6 +3694,10 @@ int
|
|||||||
ConstExpr::AsUInt32(uint32_t *up, bool forceVarying) const {
|
ConstExpr::AsUInt32(uint32_t *up, bool forceVarying) const {
|
||||||
switch (getBasicType()) {
|
switch (getBasicType()) {
|
||||||
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break;
|
||||||
|
case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break;
|
||||||
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break;
|
||||||
@@ -3488,6 +3733,40 @@ ConstExpr::GetConstant(const Type *type) const {
|
|||||||
else
|
else
|
||||||
return LLVMBoolVector(bv);
|
return LLVMBoolVector(bv);
|
||||||
}
|
}
|
||||||
|
else if (type == AtomicType::UniformInt8 || type == AtomicType::VaryingInt8) {
|
||||||
|
int8_t iv[ISPC_MAX_NVEC];
|
||||||
|
AsInt8(iv, type->IsVaryingType());
|
||||||
|
if (type->IsUniformType())
|
||||||
|
return LLVMInt8(iv[0]);
|
||||||
|
else
|
||||||
|
return LLVMInt8Vector(iv);
|
||||||
|
}
|
||||||
|
else if (type == AtomicType::UniformUInt8 || type == AtomicType::VaryingUInt8 ||
|
||||||
|
dynamic_cast<const EnumType *>(type) != NULL) {
|
||||||
|
uint8_t uiv[ISPC_MAX_NVEC];
|
||||||
|
AsUInt8(uiv, type->IsVaryingType());
|
||||||
|
if (type->IsUniformType())
|
||||||
|
return LLVMUInt8(uiv[0]);
|
||||||
|
else
|
||||||
|
return LLVMUInt8Vector(uiv);
|
||||||
|
}
|
||||||
|
else if (type == AtomicType::UniformInt16 || type == AtomicType::VaryingInt16) {
|
||||||
|
int16_t iv[ISPC_MAX_NVEC];
|
||||||
|
AsInt16(iv, type->IsVaryingType());
|
||||||
|
if (type->IsUniformType())
|
||||||
|
return LLVMInt16(iv[0]);
|
||||||
|
else
|
||||||
|
return LLVMInt16Vector(iv);
|
||||||
|
}
|
||||||
|
else if (type == AtomicType::UniformUInt16 || type == AtomicType::VaryingUInt16 ||
|
||||||
|
dynamic_cast<const EnumType *>(type) != NULL) {
|
||||||
|
uint16_t uiv[ISPC_MAX_NVEC];
|
||||||
|
AsUInt16(uiv, type->IsVaryingType());
|
||||||
|
if (type->IsUniformType())
|
||||||
|
return LLVMUInt16(uiv[0]);
|
||||||
|
else
|
||||||
|
return LLVMUInt16Vector(uiv);
|
||||||
|
}
|
||||||
else if (type == AtomicType::UniformInt32 || type == AtomicType::VaryingInt32) {
|
else if (type == AtomicType::UniformInt32 || type == AtomicType::VaryingInt32) {
|
||||||
int32_t iv[ISPC_MAX_NVEC];
|
int32_t iv[ISPC_MAX_NVEC];
|
||||||
AsInt32(iv, type->IsVaryingType());
|
AsInt32(iv, type->IsVaryingType());
|
||||||
@@ -3564,6 +3843,18 @@ ConstExpr::Print() const {
|
|||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
printf("%s", boolVal[i] ? "true" : "false");
|
printf("%s", boolVal[i] ? "true" : "false");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
printf("%d", (int)int8Val[i]);
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
printf("%u", (int)uint8Val[i]);
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
printf("%d", (int)int16Val[i]);
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
printf("%u", (int)uint16Val[i]);
|
||||||
|
break;
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
printf("%d", int32Val[i]);
|
printf("%d", int32Val[i]);
|
||||||
break;
|
break;
|
||||||
@@ -3637,11 +3928,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int
|
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int
|
||||||
exprVal, targetType, "bool2float");
|
exprVal, targetType, "bool2float");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
case AtomicType::TYPE_INT64:
|
case AtomicType::TYPE_INT64:
|
||||||
cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int to float
|
cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int to float
|
||||||
exprVal, targetType, "int2float");
|
exprVal, targetType, "int2float");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
case AtomicType::TYPE_UINT32:
|
case AtomicType::TYPE_UINT32:
|
||||||
case AtomicType::TYPE_UINT64:
|
case AtomicType::TYPE_UINT64:
|
||||||
if (fromType->IsVaryingType())
|
if (fromType->IsVaryingType())
|
||||||
@@ -3675,11 +3970,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to double
|
cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to double
|
||||||
exprVal, targetType, "bool2double");
|
exprVal, targetType, "bool2double");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
case AtomicType::TYPE_INT64:
|
case AtomicType::TYPE_INT64:
|
||||||
cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int
|
cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int
|
||||||
exprVal, targetType, "int2double");
|
exprVal, targetType, "int2double");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
case AtomicType::TYPE_UINT32:
|
case AtomicType::TYPE_UINT32:
|
||||||
case AtomicType::TYPE_UINT64:
|
case AtomicType::TYPE_UINT64:
|
||||||
if (fromType->IsVaryingType())
|
if (fromType->IsVaryingType())
|
||||||
@@ -3699,6 +3998,170 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case AtomicType::TYPE_INT8: {
|
||||||
|
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||||
|
fromType->IsUniformType() ? LLVMTypes::Int8Type :
|
||||||
|
LLVMTypes::Int8VectorType;
|
||||||
|
switch (fromType->basicType) {
|
||||||
|
case AtomicType::TYPE_BOOL:
|
||||||
|
if (fromType->IsVaryingType() &&
|
||||||
|
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||||
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
|
cast = ctx->ZExtInst(exprVal, targetType, "bool2int");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
cast = exprVal;
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
case AtomicType::TYPE_INT32:
|
||||||
|
case AtomicType::TYPE_UINT32:
|
||||||
|
case AtomicType::TYPE_INT64:
|
||||||
|
case AtomicType::TYPE_UINT64:
|
||||||
|
cast = ctx->TruncInst(exprVal, targetType, "int64_to_int8");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_FLOAT:
|
||||||
|
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||||
|
exprVal, targetType, "float2int");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_DOUBLE:
|
||||||
|
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||||
|
exprVal, targetType, "double2int");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("unimplemented");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_UINT8: {
|
||||||
|
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||||
|
fromType->IsUniformType() ? LLVMTypes::Int8Type :
|
||||||
|
LLVMTypes::Int8VectorType;
|
||||||
|
switch (fromType->basicType) {
|
||||||
|
case AtomicType::TYPE_BOOL:
|
||||||
|
if (fromType->IsVaryingType() &&
|
||||||
|
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||||
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
|
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
cast = exprVal;
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
case AtomicType::TYPE_INT32:
|
||||||
|
case AtomicType::TYPE_UINT32:
|
||||||
|
case AtomicType::TYPE_INT64:
|
||||||
|
case AtomicType::TYPE_UINT64:
|
||||||
|
cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint8");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_FLOAT:
|
||||||
|
if (fromType->IsVaryingType())
|
||||||
|
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
|
||||||
|
"Use \"int\" if possible");
|
||||||
|
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
|
||||||
|
exprVal, targetType, "float2uint");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_DOUBLE:
|
||||||
|
if (fromType->IsVaryingType())
|
||||||
|
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
|
||||||
|
"Use \"int\" if possible");
|
||||||
|
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
|
||||||
|
exprVal, targetType, "double2uint");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("unimplemented");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_INT16: {
|
||||||
|
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||||
|
fromType->IsUniformType() ? LLVMTypes::Int16Type :
|
||||||
|
LLVMTypes::Int16VectorType;
|
||||||
|
switch (fromType->basicType) {
|
||||||
|
case AtomicType::TYPE_BOOL:
|
||||||
|
if (fromType->IsVaryingType() &&
|
||||||
|
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||||
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
|
cast = ctx->ZExtInst(exprVal, targetType, "bool2int");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
cast = ctx->SExtInst(exprVal, targetType, "int2int16");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
cast = ctx->ZExtInst(exprVal, targetType, "uint2uint16");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
cast = exprVal;
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_FLOAT:
|
||||||
|
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||||
|
exprVal, targetType, "float2int");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT32:
|
||||||
|
case AtomicType::TYPE_UINT32:
|
||||||
|
case AtomicType::TYPE_INT64:
|
||||||
|
case AtomicType::TYPE_UINT64:
|
||||||
|
cast = ctx->TruncInst(exprVal, targetType, "int64_to_int16");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_DOUBLE:
|
||||||
|
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||||
|
exprVal, targetType, "double2int");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("unimplemented");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_UINT16: {
|
||||||
|
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||||
|
fromType->IsUniformType() ? LLVMTypes::Int16Type :
|
||||||
|
LLVMTypes::Int16VectorType;
|
||||||
|
switch (fromType->basicType) {
|
||||||
|
case AtomicType::TYPE_BOOL:
|
||||||
|
if (fromType->IsVaryingType() &&
|
||||||
|
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||||
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
|
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint16");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
cast = ctx->SExtInst(exprVal, targetType, "uint2uint16");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
cast = ctx->ZExtInst(exprVal, targetType, "uint2uint16");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
cast = exprVal;
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_FLOAT:
|
||||||
|
if (fromType->IsVaryingType())
|
||||||
|
PerformanceWarning(pos, "Conversion from float to unsigned int is slow. "
|
||||||
|
"Use \"int\" if possible");
|
||||||
|
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
|
||||||
|
exprVal, targetType, "float2uint");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_INT32:
|
||||||
|
case AtomicType::TYPE_UINT32:
|
||||||
|
case AtomicType::TYPE_INT64:
|
||||||
|
case AtomicType::TYPE_UINT64:
|
||||||
|
cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint16");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_DOUBLE:
|
||||||
|
if (fromType->IsVaryingType())
|
||||||
|
PerformanceWarning(pos, "Conversion from double to unsigned int is slow. "
|
||||||
|
"Use \"int\" if possible");
|
||||||
|
cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int
|
||||||
|
exprVal, targetType, "double2uint");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("unimplemented");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case AtomicType::TYPE_INT32: {
|
case AtomicType::TYPE_INT32: {
|
||||||
LLVM_TYPE_CONST llvm::Type *targetType =
|
LLVM_TYPE_CONST llvm::Type *targetType =
|
||||||
fromType->IsUniformType() ? LLVMTypes::Int32Type :
|
fromType->IsUniformType() ? LLVMTypes::Int32Type :
|
||||||
@@ -3710,6 +4173,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2int");
|
cast = ctx->ZExtInst(exprVal, targetType, "bool2int");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
cast = ctx->SExtInst(exprVal, targetType, "int2int32");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
cast = ctx->ZExtInst(exprVal, targetType, "uint2uint32");
|
||||||
|
break;
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
case AtomicType::TYPE_UINT32:
|
case AtomicType::TYPE_UINT32:
|
||||||
cast = exprVal;
|
cast = exprVal;
|
||||||
@@ -3742,6 +4213,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
|
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
cast = ctx->SExtInst(exprVal, targetType, "uint2uint");
|
||||||
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
|
cast = ctx->ZExtInst(exprVal, targetType, "uint2uint");
|
||||||
|
break;
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
case AtomicType::TYPE_UINT32:
|
case AtomicType::TYPE_UINT32:
|
||||||
cast = exprVal;
|
cast = exprVal;
|
||||||
@@ -3780,11 +4259,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2int64");
|
cast = ctx->ZExtInst(exprVal, targetType, "bool2int64");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
cast = ctx->SExtInst(exprVal, targetType, "int32_to_int64");
|
cast = ctx->SExtInst(exprVal, targetType, "int_to_int64");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
case AtomicType::TYPE_UINT32:
|
case AtomicType::TYPE_UINT32:
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, "uint32_to_int64");
|
cast = ctx->ZExtInst(exprVal, targetType, "uint_to_int64");
|
||||||
break;
|
break;
|
||||||
case AtomicType::TYPE_FLOAT:
|
case AtomicType::TYPE_FLOAT:
|
||||||
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||||
@@ -3796,7 +4279,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
break;
|
break;
|
||||||
case AtomicType::TYPE_DOUBLE:
|
case AtomicType::TYPE_DOUBLE:
|
||||||
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int
|
||||||
exprVal, targetType, "double2int");
|
exprVal, targetType, "double2int64");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
FATAL("unimplemented");
|
FATAL("unimplemented");
|
||||||
@@ -3814,11 +4297,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
|
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
cast = ctx->SExtInst(exprVal, targetType, "int32_to_uint64");
|
cast = ctx->SExtInst(exprVal, targetType, "int_to_uint64");
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_UINT8:
|
||||||
|
case AtomicType::TYPE_UINT16:
|
||||||
case AtomicType::TYPE_UINT32:
|
case AtomicType::TYPE_UINT32:
|
||||||
cast = ctx->ZExtInst(exprVal, targetType, "uint32_to_uint64");
|
cast = ctx->ZExtInst(exprVal, targetType, "uint_to_uint64");
|
||||||
break;
|
break;
|
||||||
case AtomicType::TYPE_FLOAT:
|
case AtomicType::TYPE_FLOAT:
|
||||||
if (fromType->IsVaryingType())
|
if (fromType->IsVaryingType())
|
||||||
@@ -3848,6 +4335,22 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
|
|||||||
case AtomicType::TYPE_BOOL:
|
case AtomicType::TYPE_BOOL:
|
||||||
cast = exprVal;
|
cast = exprVal;
|
||||||
break;
|
break;
|
||||||
|
case AtomicType::TYPE_INT8:
|
||||||
|
case AtomicType::TYPE_UINT8: {
|
||||||
|
llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt8(0) :
|
||||||
|
(llvm::Value *)LLVMInt8Vector((int8_t)0);
|
||||||
|
cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
|
||||||
|
exprVal, zero, "cmpi0");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_INT16:
|
||||||
|
case AtomicType::TYPE_UINT16: {
|
||||||
|
llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt16(0) :
|
||||||
|
(llvm::Value *)LLVMInt16Vector((int16_t)0);
|
||||||
|
cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,
|
||||||
|
exprVal, zero, "cmpi0");
|
||||||
|
break;
|
||||||
|
}
|
||||||
case AtomicType::TYPE_INT32:
|
case AtomicType::TYPE_INT32:
|
||||||
case AtomicType::TYPE_UINT32: {
|
case AtomicType::TYPE_UINT32: {
|
||||||
llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt32(0) :
|
llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt32(0) :
|
||||||
@@ -4195,6 +4698,26 @@ TypeCastExpr::Optimize() {
|
|||||||
constExpr->AsBool(bv, forceVarying);
|
constExpr->AsBool(bv, forceVarying);
|
||||||
return new ConstExpr(toType, bv, pos);
|
return new ConstExpr(toType, bv, pos);
|
||||||
}
|
}
|
||||||
|
case AtomicType::TYPE_INT8: {
|
||||||
|
int8_t iv[ISPC_MAX_NVEC];
|
||||||
|
constExpr->AsInt8(iv, forceVarying);
|
||||||
|
return new ConstExpr(toType, iv, pos);
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_UINT8: {
|
||||||
|
uint8_t uv[ISPC_MAX_NVEC];
|
||||||
|
constExpr->AsUInt8(uv, forceVarying);
|
||||||
|
return new ConstExpr(toType, uv, pos);
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_INT16: {
|
||||||
|
int16_t iv[ISPC_MAX_NVEC];
|
||||||
|
constExpr->AsInt16(iv, forceVarying);
|
||||||
|
return new ConstExpr(toType, iv, pos);
|
||||||
|
}
|
||||||
|
case AtomicType::TYPE_UINT16: {
|
||||||
|
uint16_t uv[ISPC_MAX_NVEC];
|
||||||
|
constExpr->AsUInt16(uv, forceVarying);
|
||||||
|
return new ConstExpr(toType, uv, pos);
|
||||||
|
}
|
||||||
case AtomicType::TYPE_INT32: {
|
case AtomicType::TYPE_INT32: {
|
||||||
int32_t iv[ISPC_MAX_NVEC];
|
int32_t iv[ISPC_MAX_NVEC];
|
||||||
constExpr->AsInt32(iv, forceVarying);
|
constExpr->AsInt32(iv, forceVarying);
|
||||||
|
|||||||
51
expr.h
51
expr.h
@@ -325,6 +325,24 @@ private:
|
|||||||
*/
|
*/
|
||||||
class ConstExpr : public Expr {
|
class ConstExpr : public Expr {
|
||||||
public:
|
public:
|
||||||
|
/** Create a ConstExpr from a uniform int8 value */
|
||||||
|
ConstExpr(const Type *t, int8_t i, SourcePos p);
|
||||||
|
/** Create a ConstExpr from a varying int8 value */
|
||||||
|
ConstExpr(const Type *t, int8_t *i, SourcePos p);
|
||||||
|
/** Create a ConstExpr from a uniform uint8 value */
|
||||||
|
ConstExpr(const Type *t, uint8_t u, SourcePos p);
|
||||||
|
/** Create a ConstExpr from a varying uint8 value */
|
||||||
|
ConstExpr(const Type *t, uint8_t *u, SourcePos p);
|
||||||
|
|
||||||
|
/** Create a ConstExpr from a uniform int16 value */
|
||||||
|
ConstExpr(const Type *t, int16_t i, SourcePos p);
|
||||||
|
/** Create a ConstExpr from a varying int16 value */
|
||||||
|
ConstExpr(const Type *t, int16_t *i, SourcePos p);
|
||||||
|
/** Create a ConstExpr from a uniform uint16 value */
|
||||||
|
ConstExpr(const Type *t, uint16_t u, SourcePos p);
|
||||||
|
/** Create a ConstExpr from a varying uint16 value */
|
||||||
|
ConstExpr(const Type *t, uint16_t *u, SourcePos p);
|
||||||
|
|
||||||
/** Create a ConstExpr from a uniform int32 value */
|
/** Create a ConstExpr from a uniform int32 value */
|
||||||
ConstExpr(const Type *t, int32_t i, SourcePos p);
|
ConstExpr(const Type *t, int32_t i, SourcePos p);
|
||||||
/** Create a ConstExpr from a varying int32 value */
|
/** Create a ConstExpr from a varying int32 value */
|
||||||
@@ -333,14 +351,17 @@ public:
|
|||||||
ConstExpr(const Type *t, uint32_t u, SourcePos p);
|
ConstExpr(const Type *t, uint32_t u, SourcePos p);
|
||||||
/** Create a ConstExpr from a varying uint32 value */
|
/** Create a ConstExpr from a varying uint32 value */
|
||||||
ConstExpr(const Type *t, uint32_t *u, SourcePos p);
|
ConstExpr(const Type *t, uint32_t *u, SourcePos p);
|
||||||
|
|
||||||
/** Create a ConstExpr from a uniform float value */
|
/** Create a ConstExpr from a uniform float value */
|
||||||
ConstExpr(const Type *t, float f, SourcePos p);
|
ConstExpr(const Type *t, float f, SourcePos p);
|
||||||
/** Create a ConstExpr from a varying float value */
|
/** Create a ConstExpr from a varying float value */
|
||||||
ConstExpr(const Type *t, float *f, SourcePos p);
|
ConstExpr(const Type *t, float *f, SourcePos p);
|
||||||
|
|
||||||
/** Create a ConstExpr from a uniform double value */
|
/** Create a ConstExpr from a uniform double value */
|
||||||
ConstExpr(const Type *t, double d, SourcePos p);
|
ConstExpr(const Type *t, double d, SourcePos p);
|
||||||
/** Create a ConstExpr from a varying double value */
|
/** Create a ConstExpr from a varying double value */
|
||||||
ConstExpr(const Type *t, double *d, SourcePos p);
|
ConstExpr(const Type *t, double *d, SourcePos p);
|
||||||
|
|
||||||
/** Create a ConstExpr from a uniform int64 value */
|
/** Create a ConstExpr from a uniform int64 value */
|
||||||
ConstExpr(const Type *t, int64_t i, SourcePos p);
|
ConstExpr(const Type *t, int64_t i, SourcePos p);
|
||||||
/** Create a ConstExpr from a varying int64 value */
|
/** Create a ConstExpr from a varying int64 value */
|
||||||
@@ -349,10 +370,12 @@ public:
|
|||||||
ConstExpr(const Type *t, uint64_t i, SourcePos p);
|
ConstExpr(const Type *t, uint64_t i, SourcePos p);
|
||||||
/** Create a ConstExpr from a varying uint64 value */
|
/** Create a ConstExpr from a varying uint64 value */
|
||||||
ConstExpr(const Type *t, uint64_t *i, SourcePos p);
|
ConstExpr(const Type *t, uint64_t *i, SourcePos p);
|
||||||
|
|
||||||
/** Create a ConstExpr from a uniform bool value */
|
/** Create a ConstExpr from a uniform bool value */
|
||||||
ConstExpr(const Type *t, bool b, SourcePos p);
|
ConstExpr(const Type *t, bool b, SourcePos p);
|
||||||
/** Create a ConstExpr from a varying bool value */
|
/** Create a ConstExpr from a varying bool value */
|
||||||
ConstExpr(const Type *t, bool *b, SourcePos p);
|
ConstExpr(const Type *t, bool *b, SourcePos p);
|
||||||
|
|
||||||
/** Create a ConstExpr of the same type as the given old ConstExpr,
|
/** Create a ConstExpr of the same type as the given old ConstExpr,
|
||||||
with values given by the "vales" parameter. */
|
with values given by the "vales" parameter. */
|
||||||
ConstExpr(ConstExpr *old, double *values);
|
ConstExpr(ConstExpr *old, double *values);
|
||||||
@@ -371,6 +394,30 @@ public:
|
|||||||
equal to the target vector width into the given pointer. */
|
equal to the target vector width into the given pointer. */
|
||||||
int AsBool(bool *, bool forceVarying = false) const;
|
int AsBool(bool *, bool forceVarying = false) const;
|
||||||
|
|
||||||
|
/** Return the ConstExpr's values as int8s, doing type conversion
|
||||||
|
from the actual type if needed. If forceVarying is true, then type
|
||||||
|
convert to 'varying' so as to always return a number of values
|
||||||
|
equal to the target vector width into the given pointer. */
|
||||||
|
int AsInt8(int8_t *, bool forceVarying = false) const;
|
||||||
|
|
||||||
|
/** Return the ConstExpr's values as uint8s, doing type conversion
|
||||||
|
from the actual type if needed. If forceVarying is true, then type
|
||||||
|
convert to 'varying' so as to always return a number of values
|
||||||
|
equal to the target vector width into the given pointer. */
|
||||||
|
int AsUInt8(uint8_t *, bool forceVarying = false) const;
|
||||||
|
|
||||||
|
/** Return the ConstExpr's values as int16s, doing type conversion
|
||||||
|
from the actual type if needed. If forceVarying is true, then type
|
||||||
|
convert to 'varying' so as to always return a number of values
|
||||||
|
equal to the target vector width into the given pointer. */
|
||||||
|
int AsInt16(int16_t *, bool forceVarying = false) const;
|
||||||
|
|
||||||
|
/** Return the ConstExpr's values as uint16s, doing type conversion
|
||||||
|
from the actual type if needed. If forceVarying is true, then type
|
||||||
|
convert to 'varying' so as to always return a number of values
|
||||||
|
equal to the target vector width into the given pointer. */
|
||||||
|
int AsUInt16(uint16_t *, bool forceVarying = false) const;
|
||||||
|
|
||||||
/** Return the ConstExpr's values as int32s, doing type conversion
|
/** Return the ConstExpr's values as int32s, doing type conversion
|
||||||
from the actual type if needed. If forceVarying is true, then type
|
from the actual type if needed. If forceVarying is true, then type
|
||||||
convert to 'varying' so as to always return a number of values
|
convert to 'varying' so as to always return a number of values
|
||||||
@@ -417,6 +464,10 @@ private:
|
|||||||
|
|
||||||
const Type *type;
|
const Type *type;
|
||||||
union {
|
union {
|
||||||
|
int8_t int8Val[ISPC_MAX_NVEC];
|
||||||
|
uint8_t uint8Val[ISPC_MAX_NVEC];
|
||||||
|
int16_t int16Val[ISPC_MAX_NVEC];
|
||||||
|
uint16_t uint16Val[ISPC_MAX_NVEC];
|
||||||
int32_t int32Val[ISPC_MAX_NVEC];
|
int32_t int32Val[ISPC_MAX_NVEC];
|
||||||
uint32_t uint32Val[ISPC_MAX_NVEC];
|
uint32_t uint32Val[ISPC_MAX_NVEC];
|
||||||
bool boolVal[ISPC_MAX_NVEC];
|
bool boolVal[ISPC_MAX_NVEC];
|
||||||
|
|||||||
16
failing_tests/shuffle2-10.ispc
Normal file
16
failing_tests/shuffle2-10.ispc
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
/* failing due to llvm bug http://llvm.org/bugs/show_bug.cgi?id=10421 */
|
||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int8 aa = aFOO[programIndex];
|
||||||
|
int8 bb = aa + programCount;
|
||||||
|
int8 shuf = shuffle(aa, bb, 2*programIndex+(int)b-5);
|
||||||
|
//CO print("%\n%\n%\n%\n", aa, bb, 2*programIndex+(int)b-5, shuf);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1 + 2*programIndex;
|
||||||
|
}
|
||||||
@@ -158,38 +158,40 @@ static bool lRunTest(const char *fn) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::Function *func;
|
llvm::Function *func;
|
||||||
if ((func = module->getFunction("ISPCLaunch")) != NULL)
|
#define DO_FUNC(FUNC ,FUNCNAME) \
|
||||||
ee->addGlobalMapping(func, (void *)ISPCLaunch);
|
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||||
if ((func = module->getFunction("ISPCSync")) != NULL)
|
ee->addGlobalMapping(func, (void *)FUNC)
|
||||||
ee->addGlobalMapping(func, (void *)ISPCSync);
|
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
||||||
|
DO_FUNC(ISPCSync, "ISPCSync");
|
||||||
#ifdef ISPC_IS_WINDOWS
|
#ifdef ISPC_IS_WINDOWS
|
||||||
if ((func = module->getFunction("ISPCMalloc")) != NULL)
|
DO_FUNC(ISPCMalloc, "ISPCMalloc");
|
||||||
ee->addGlobalMapping(func, (void *)ISPCMalloc);
|
DO_FUNC(ISPCFree, "ISPCFree");
|
||||||
if ((func = module->getFunction("ISPCFree")) != NULL)
|
|
||||||
ee->addGlobalMapping(func, (void *)ISPCFree);
|
|
||||||
#endif // ISPC_IS_WINDOWS
|
#endif // ISPC_IS_WINDOWS
|
||||||
if ((func = module->getFunction("putchar")) != NULL)
|
DO_FUNC(putchar, "putchar");
|
||||||
ee->addGlobalMapping(func, (void *)putchar);
|
DO_FUNC(printf, "printf");
|
||||||
if ((func = module->getFunction("printf")) != NULL)
|
DO_FUNC(fflush, "fflush");
|
||||||
ee->addGlobalMapping(func, (void *)printf);
|
DO_FUNC(sinf, "sinf");
|
||||||
if ((func = module->getFunction("fflush")) != NULL)
|
DO_FUNC(cosf, "cosf");
|
||||||
ee->addGlobalMapping(func, (void *)fflush);
|
DO_FUNC(tanf, "tanf");
|
||||||
if ((func = module->getFunction("sinf")) != NULL)
|
DO_FUNC(atanf, "atanf");
|
||||||
ee->addGlobalMapping(func, (void *)sinf);
|
DO_FUNC(atan2f, "atan2f");
|
||||||
if ((func = module->getFunction("cosf")) != NULL)
|
DO_FUNC(powf, "powf");
|
||||||
ee->addGlobalMapping(func, (void *)cosf);
|
DO_FUNC(expf, "expf");
|
||||||
if ((func = module->getFunction("tanf")) != NULL)
|
DO_FUNC(logf, "logf");
|
||||||
ee->addGlobalMapping(func, (void *)tanf);
|
DO_FUNC(sin, "sin");
|
||||||
if ((func = module->getFunction("atanf")) != NULL)
|
DO_FUNC(cos, "cos");
|
||||||
ee->addGlobalMapping(func, (void *)atanf);
|
DO_FUNC(tan, "tan");
|
||||||
if ((func = module->getFunction("atan2f")) != NULL)
|
DO_FUNC(atan, "atan");
|
||||||
ee->addGlobalMapping(func, (void *)atan2f);
|
DO_FUNC(atan2, "atan2");
|
||||||
if ((func = module->getFunction("powf")) != NULL)
|
DO_FUNC(pow, "pow");
|
||||||
ee->addGlobalMapping(func, (void *)powf);
|
DO_FUNC(exp, "exp");
|
||||||
if ((func = module->getFunction("expf")) != NULL)
|
DO_FUNC(log, "log");
|
||||||
ee->addGlobalMapping(func, (void *)expf);
|
DO_FUNC(memset, "memset");
|
||||||
if ((func = module->getFunction("logf")) != NULL)
|
#ifdef ISPC_IS_APPLE
|
||||||
ee->addGlobalMapping(func, (void *)logf);
|
DO_FUNC(memset_pattern4, "memset_pattern4");
|
||||||
|
DO_FUNC(memset_pattern8, "memset_pattern8");
|
||||||
|
DO_FUNC(memset_pattern16, "memset_pattern16");
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ISPC_HAVE_SVML
|
#ifdef ISPC_HAVE_SVML
|
||||||
#define DO_SVML(FUNC ,FUNCNAME) \
|
#define DO_SVML(FUNC ,FUNCNAME) \
|
||||||
|
|||||||
2
lex.ll
2
lex.ll
@@ -104,6 +104,8 @@ goto { return TOKEN_GOTO; }
|
|||||||
if { return TOKEN_IF; }
|
if { return TOKEN_IF; }
|
||||||
inline { return TOKEN_INLINE; }
|
inline { return TOKEN_INLINE; }
|
||||||
int { return TOKEN_INT; }
|
int { return TOKEN_INT; }
|
||||||
|
int8 { return TOKEN_INT8; }
|
||||||
|
int16 { return TOKEN_INT16; }
|
||||||
int32 { return TOKEN_INT; }
|
int32 { return TOKEN_INT; }
|
||||||
int64 { return TOKEN_INT64; }
|
int64 { return TOKEN_INT64; }
|
||||||
launch { return TOKEN_LAUNCH; }
|
launch { return TOKEN_LAUNCH; }
|
||||||
|
|||||||
154
llvmutil.cpp
154
llvmutil.cpp
@@ -41,28 +41,39 @@
|
|||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
|
LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL;
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16Type = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16Type = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32Type = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32Type = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL;
|
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64Type = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64Type = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL;
|
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatType = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL;
|
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleType = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleType = NULL;
|
||||||
|
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8PointerType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16PointerType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoublePointerType = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoublePointerType = NULL;
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::MaskType = NULL;
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::MaskType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int8VectorType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int16VectorType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
|
|
||||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
|
|
||||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
|
|
||||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
|
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
|
||||||
|
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8VectorPointerType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16VectorPointerType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
|
||||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
|
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::ArrayType *LLVMTypes::VoidPointerVectorType = NULL;
|
LLVM_TYPE_CONST llvm::ArrayType *LLVMTypes::VoidPointerVectorType = NULL;
|
||||||
|
|
||||||
llvm::Constant *LLVMTrue = NULL;
|
llvm::Constant *LLVMTrue = NULL;
|
||||||
@@ -75,16 +86,20 @@ void
|
|||||||
InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
||||||
LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx);
|
LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx);
|
||||||
LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0);
|
LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0);
|
||||||
|
|
||||||
LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx);
|
LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx);
|
||||||
LLVMTypes::Int8Type = llvm::Type::getInt8Ty(*ctx);
|
LLVMTypes::Int8Type = llvm::Type::getInt8Ty(*ctx);
|
||||||
LLVMTypes::Int16Type = llvm::Type::getInt16Ty(*ctx);
|
LLVMTypes::Int16Type = llvm::Type::getInt16Ty(*ctx);
|
||||||
LLVMTypes::Int32Type = llvm::Type::getInt32Ty(*ctx);
|
LLVMTypes::Int32Type = llvm::Type::getInt32Ty(*ctx);
|
||||||
LLVMTypes::Int32PointerType = llvm::PointerType::get(LLVMTypes::Int32Type, 0);
|
|
||||||
LLVMTypes::Int64Type = llvm::Type::getInt64Ty(*ctx);
|
LLVMTypes::Int64Type = llvm::Type::getInt64Ty(*ctx);
|
||||||
LLVMTypes::Int64PointerType = llvm::PointerType::get(LLVMTypes::Int64Type, 0);
|
|
||||||
LLVMTypes::FloatType = llvm::Type::getFloatTy(*ctx);
|
LLVMTypes::FloatType = llvm::Type::getFloatTy(*ctx);
|
||||||
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
|
||||||
LLVMTypes::DoubleType = llvm::Type::getDoubleTy(*ctx);
|
LLVMTypes::DoubleType = llvm::Type::getDoubleTy(*ctx);
|
||||||
|
|
||||||
|
LLVMTypes::Int8PointerType = llvm::PointerType::get(LLVMTypes::Int8Type, 0);
|
||||||
|
LLVMTypes::Int16PointerType = llvm::PointerType::get(LLVMTypes::Int16Type, 0);
|
||||||
|
LLVMTypes::Int32PointerType = llvm::PointerType::get(LLVMTypes::Int32Type, 0);
|
||||||
|
LLVMTypes::Int64PointerType = llvm::PointerType::get(LLVMTypes::Int64Type, 0);
|
||||||
|
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
||||||
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
||||||
|
|
||||||
// Note that both the mask and bool vectors are vector of int32s
|
// Note that both the mask and bool vectors are vector of int32s
|
||||||
@@ -95,18 +110,26 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
|||||||
|
|
||||||
LLVMTypes::Int1VectorType =
|
LLVMTypes::Int1VectorType =
|
||||||
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
||||||
|
LLVMTypes::Int8VectorType =
|
||||||
|
llvm::VectorType::get(LLVMTypes::Int8Type, target.vectorWidth);
|
||||||
|
LLVMTypes::Int16VectorType =
|
||||||
|
llvm::VectorType::get(LLVMTypes::Int16Type, target.vectorWidth);
|
||||||
LLVMTypes::Int32VectorType =
|
LLVMTypes::Int32VectorType =
|
||||||
llvm::VectorType::get(LLVMTypes::Int32Type, target.vectorWidth);
|
llvm::VectorType::get(LLVMTypes::Int32Type, target.vectorWidth);
|
||||||
LLVMTypes::Int32VectorPointerType = llvm::PointerType::get(LLVMTypes::Int32VectorType, 0);
|
|
||||||
LLVMTypes::Int64VectorType =
|
LLVMTypes::Int64VectorType =
|
||||||
llvm::VectorType::get(LLVMTypes::Int64Type, target.vectorWidth);
|
llvm::VectorType::get(LLVMTypes::Int64Type, target.vectorWidth);
|
||||||
LLVMTypes::Int64VectorPointerType = llvm::PointerType::get(LLVMTypes::Int64VectorType, 0);
|
|
||||||
LLVMTypes::FloatVectorType =
|
LLVMTypes::FloatVectorType =
|
||||||
llvm::VectorType::get(LLVMTypes::FloatType, target.vectorWidth);
|
llvm::VectorType::get(LLVMTypes::FloatType, target.vectorWidth);
|
||||||
LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0);
|
|
||||||
LLVMTypes::DoubleVectorType =
|
LLVMTypes::DoubleVectorType =
|
||||||
llvm::VectorType::get(LLVMTypes::DoubleType, target.vectorWidth);
|
llvm::VectorType::get(LLVMTypes::DoubleType, target.vectorWidth);
|
||||||
|
|
||||||
|
LLVMTypes::Int8VectorPointerType = llvm::PointerType::get(LLVMTypes::Int8VectorType, 0);
|
||||||
|
LLVMTypes::Int16VectorPointerType = llvm::PointerType::get(LLVMTypes::Int16VectorType, 0);
|
||||||
|
LLVMTypes::Int32VectorPointerType = llvm::PointerType::get(LLVMTypes::Int32VectorType, 0);
|
||||||
|
LLVMTypes::Int64VectorPointerType = llvm::PointerType::get(LLVMTypes::Int64VectorType, 0);
|
||||||
|
LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0);
|
||||||
LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0);
|
LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0);
|
||||||
|
|
||||||
LLVMTypes::VoidPointerVectorType =
|
LLVMTypes::VoidPointerVectorType =
|
||||||
llvm::ArrayType::get(LLVMTypes::VoidPointerType, target.vectorWidth);
|
llvm::ArrayType::get(LLVMTypes::VoidPointerType, target.vectorWidth);
|
||||||
|
|
||||||
@@ -133,7 +156,36 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
llvm::ConstantInt *LLVMInt32(int32_t ival) {
|
llvm::ConstantInt *
|
||||||
|
LLVMInt8(int8_t ival) {
|
||||||
|
return llvm::ConstantInt::get(llvm::Type::getInt8Ty(*g->ctx), ival,
|
||||||
|
true /*signed*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::ConstantInt *
|
||||||
|
LLVMUInt8(uint8_t ival) {
|
||||||
|
return llvm::ConstantInt::get(llvm::Type::getInt8Ty(*g->ctx), ival,
|
||||||
|
false /*unsigned*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::ConstantInt *
|
||||||
|
LLVMInt16(int16_t ival) {
|
||||||
|
return llvm::ConstantInt::get(llvm::Type::getInt16Ty(*g->ctx), ival,
|
||||||
|
true /*signed*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::ConstantInt *
|
||||||
|
LLVMUInt16(uint16_t ival) {
|
||||||
|
return llvm::ConstantInt::get(llvm::Type::getInt16Ty(*g->ctx), ival,
|
||||||
|
false /*unsigned*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::ConstantInt *
|
||||||
|
LLVMInt32(int32_t ival) {
|
||||||
return llvm::ConstantInt::get(llvm::Type::getInt32Ty(*g->ctx), ival,
|
return llvm::ConstantInt::get(llvm::Type::getInt32Ty(*g->ctx), ival,
|
||||||
true /*signed*/);
|
true /*signed*/);
|
||||||
}
|
}
|
||||||
@@ -172,6 +224,82 @@ LLVMDouble(double dval) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant *
|
||||||
|
LLVMInt8Vector(int8_t ival) {
|
||||||
|
llvm::Constant *v = LLVMInt8(ival);
|
||||||
|
std::vector<llvm::Constant *> vals;
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
vals.push_back(v);
|
||||||
|
return llvm::ConstantVector::get(vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant *
|
||||||
|
LLVMInt8Vector(const int8_t *ivec) {
|
||||||
|
std::vector<llvm::Constant *> vals;
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
vals.push_back(LLVMInt8(ivec[i]));
|
||||||
|
return llvm::ConstantVector::get(vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant *
|
||||||
|
LLVMUInt8Vector(uint8_t ival) {
|
||||||
|
llvm::Constant *v = LLVMUInt8(ival);
|
||||||
|
std::vector<llvm::Constant *> vals;
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
vals.push_back(v);
|
||||||
|
return llvm::ConstantVector::get(vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant *
|
||||||
|
LLVMUInt8Vector(const uint8_t *ivec) {
|
||||||
|
std::vector<llvm::Constant *> vals;
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
vals.push_back(LLVMUInt8(ivec[i]));
|
||||||
|
return llvm::ConstantVector::get(vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant *
|
||||||
|
LLVMInt16Vector(int16_t ival) {
|
||||||
|
llvm::Constant *v = LLVMInt16(ival);
|
||||||
|
std::vector<llvm::Constant *> vals;
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
vals.push_back(v);
|
||||||
|
return llvm::ConstantVector::get(vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant *
|
||||||
|
LLVMInt16Vector(const int16_t *ivec) {
|
||||||
|
std::vector<llvm::Constant *> vals;
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
vals.push_back(LLVMInt16(ivec[i]));
|
||||||
|
return llvm::ConstantVector::get(vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant *
|
||||||
|
LLVMUInt16Vector(uint16_t ival) {
|
||||||
|
llvm::Constant *v = LLVMUInt16(ival);
|
||||||
|
std::vector<llvm::Constant *> vals;
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
vals.push_back(v);
|
||||||
|
return llvm::ConstantVector::get(vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant *
|
||||||
|
LLVMUInt16Vector(const uint16_t *ivec) {
|
||||||
|
std::vector<llvm::Constant *> vals;
|
||||||
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
|
vals.push_back(LLVMUInt16(ivec[i]));
|
||||||
|
return llvm::ConstantVector::get(vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
llvm::Constant *
|
llvm::Constant *
|
||||||
LLVMInt32Vector(int32_t ival) {
|
LLVMInt32Vector(int32_t ival) {
|
||||||
llvm::Constant *v = LLVMInt32(ival);
|
llvm::Constant *v = LLVMInt32(ival);
|
||||||
|
|||||||
65
llvmutil.h
65
llvmutil.h
@@ -53,28 +53,39 @@ struct LLVMTypes {
|
|||||||
static LLVM_TYPE_CONST llvm::Type *VoidType;
|
static LLVM_TYPE_CONST llvm::Type *VoidType;
|
||||||
static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType;
|
static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType;
|
||||||
static LLVM_TYPE_CONST llvm::Type *BoolType;
|
static LLVM_TYPE_CONST llvm::Type *BoolType;
|
||||||
|
|
||||||
static LLVM_TYPE_CONST llvm::Type *Int8Type;
|
static LLVM_TYPE_CONST llvm::Type *Int8Type;
|
||||||
static LLVM_TYPE_CONST llvm::Type *Int16Type;
|
static LLVM_TYPE_CONST llvm::Type *Int16Type;
|
||||||
static LLVM_TYPE_CONST llvm::Type *Int32Type;
|
static LLVM_TYPE_CONST llvm::Type *Int32Type;
|
||||||
static LLVM_TYPE_CONST llvm::Type *Int32PointerType;
|
|
||||||
static LLVM_TYPE_CONST llvm::Type *Int64Type;
|
static LLVM_TYPE_CONST llvm::Type *Int64Type;
|
||||||
static LLVM_TYPE_CONST llvm::Type *Int64PointerType;
|
|
||||||
static LLVM_TYPE_CONST llvm::Type *FloatType;
|
static LLVM_TYPE_CONST llvm::Type *FloatType;
|
||||||
static LLVM_TYPE_CONST llvm::Type *FloatPointerType;
|
|
||||||
static LLVM_TYPE_CONST llvm::Type *DoubleType;
|
static LLVM_TYPE_CONST llvm::Type *DoubleType;
|
||||||
|
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *Int8PointerType;
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *Int16PointerType;
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *Int32PointerType;
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *Int64PointerType;
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *FloatPointerType;
|
||||||
static LLVM_TYPE_CONST llvm::Type *DoublePointerType;
|
static LLVM_TYPE_CONST llvm::Type *DoublePointerType;
|
||||||
|
|
||||||
static LLVM_TYPE_CONST llvm::VectorType *MaskType;
|
static LLVM_TYPE_CONST llvm::VectorType *MaskType;
|
||||||
|
|
||||||
static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType;
|
static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType;
|
||||||
static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType;
|
static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType;
|
||||||
|
static LLVM_TYPE_CONST llvm::VectorType *Int8VectorType;
|
||||||
|
static LLVM_TYPE_CONST llvm::VectorType *Int16VectorType;
|
||||||
static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType;
|
static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType;
|
||||||
static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType;
|
|
||||||
static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType;
|
static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType;
|
||||||
static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType;
|
|
||||||
static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType;
|
static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType;
|
||||||
static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
|
|
||||||
static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType;
|
static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType;
|
||||||
|
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *Int8VectorPointerType;
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *Int16VectorPointerType;
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType;
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType;
|
||||||
|
static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
|
||||||
static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType;
|
static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType;
|
||||||
|
|
||||||
static LLVM_TYPE_CONST llvm::ArrayType *VoidPointerVectorType;
|
static LLVM_TYPE_CONST llvm::ArrayType *VoidPointerVectorType;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -89,6 +100,14 @@ extern llvm::Constant *LLVMTrue, *LLVMFalse;
|
|||||||
*/
|
*/
|
||||||
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target);
|
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target);
|
||||||
|
|
||||||
|
/** Returns an LLVM i8 constant of the given value */
|
||||||
|
extern llvm::ConstantInt *LLVMInt8(int8_t i);
|
||||||
|
/** Returns an LLVM i8 constant of the given value */
|
||||||
|
extern llvm::ConstantInt *LLVMUInt8(uint8_t i);
|
||||||
|
/** Returns an LLVM i16 constant of the given value */
|
||||||
|
extern llvm::ConstantInt *LLVMInt16(int16_t i);
|
||||||
|
/** Returns an LLVM i16 constant of the given value */
|
||||||
|
extern llvm::ConstantInt *LLVMUInt16(uint16_t i);
|
||||||
/** Returns an LLVM i32 constant of the given value */
|
/** Returns an LLVM i32 constant of the given value */
|
||||||
extern llvm::ConstantInt *LLVMInt32(int32_t i);
|
extern llvm::ConstantInt *LLVMInt32(int32_t i);
|
||||||
/** Returns an LLVM i32 constant of the given value */
|
/** Returns an LLVM i32 constant of the given value */
|
||||||
@@ -105,18 +124,35 @@ extern llvm::Constant *LLVMDouble(double f);
|
|||||||
/** Returns an LLVM boolean vector constant of the given value smeared
|
/** Returns an LLVM boolean vector constant of the given value smeared
|
||||||
across all elements */
|
across all elements */
|
||||||
extern llvm::Constant *LLVMBoolVector(bool v);
|
extern llvm::Constant *LLVMBoolVector(bool v);
|
||||||
|
|
||||||
|
/** Returns an LLVM i8 vector constant of the given value smeared
|
||||||
|
across all elements */
|
||||||
|
extern llvm::Constant *LLVMInt8Vector(int8_t i);
|
||||||
|
/** Returns an LLVM i8 vector constant of the given value smeared
|
||||||
|
across all elements */
|
||||||
|
extern llvm::Constant *LLVMUInt8Vector(uint8_t i);
|
||||||
|
|
||||||
|
/** Returns an LLVM i16 vector constant of the given value smeared
|
||||||
|
across all elements */
|
||||||
|
extern llvm::Constant *LLVMInt16Vector(int16_t i);
|
||||||
|
/** Returns an LLVM i16 vector constant of the given value smeared
|
||||||
|
across all elements */
|
||||||
|
extern llvm::Constant *LLVMUInt16Vector(uint16_t i);
|
||||||
|
|
||||||
/** Returns an LLVM i32 vector constant of the given value smeared
|
/** Returns an LLVM i32 vector constant of the given value smeared
|
||||||
across all elements */
|
across all elements */
|
||||||
extern llvm::Constant *LLVMInt32Vector(int32_t i);
|
extern llvm::Constant *LLVMInt32Vector(int32_t i);
|
||||||
/** Returns an LLVM i32 vector constant of the given value smeared
|
/** Returns an LLVM i32 vector constant of the given value smeared
|
||||||
across all elements */
|
across all elements */
|
||||||
extern llvm::Constant *LLVMUInt32Vector(uint32_t i);
|
extern llvm::Constant *LLVMUInt32Vector(uint32_t i);
|
||||||
|
|
||||||
/** Returns an LLVM i64 vector constant of the given value smeared
|
/** Returns an LLVM i64 vector constant of the given value smeared
|
||||||
across all elements */
|
across all elements */
|
||||||
extern llvm::Constant *LLVMInt64Vector(int64_t i);
|
extern llvm::Constant *LLVMInt64Vector(int64_t i);
|
||||||
/** Returns an LLVM i64 vector constant of the given value smeared
|
/** Returns an LLVM i64 vector constant of the given value smeared
|
||||||
across all elements */
|
across all elements */
|
||||||
extern llvm::Constant *LLVMUInt64Vector(uint64_t i);
|
extern llvm::Constant *LLVMUInt64Vector(uint64_t i);
|
||||||
|
|
||||||
/** Returns an LLVM float vector constant of the given value smeared
|
/** Returns an LLVM float vector constant of the given value smeared
|
||||||
across all elements */
|
across all elements */
|
||||||
extern llvm::Constant *LLVMFloatVector(float f);
|
extern llvm::Constant *LLVMFloatVector(float f);
|
||||||
@@ -127,18 +163,35 @@ extern llvm::Constant *LLVMDoubleVector(double f);
|
|||||||
/** Returns an LLVM boolean vector based on the given array of values.
|
/** Returns an LLVM boolean vector based on the given array of values.
|
||||||
The array should have g->target.vectorWidth elements. */
|
The array should have g->target.vectorWidth elements. */
|
||||||
extern llvm::Constant *LLVMBoolVector(const bool *v);
|
extern llvm::Constant *LLVMBoolVector(const bool *v);
|
||||||
|
|
||||||
|
/** Returns an LLVM i8 vector based on the given array of values.
|
||||||
|
The array should have g->target.vectorWidth elements. */
|
||||||
|
extern llvm::Constant *LLVMInt8Vector(const int8_t *i);
|
||||||
|
/** Returns an LLVM i8 vector based on the given array of values.
|
||||||
|
The array should have g->target.vectorWidth elements. */
|
||||||
|
extern llvm::Constant *LLVMUInt8Vector(const uint8_t *i);
|
||||||
|
|
||||||
|
/** Returns an LLVM i16 vector based on the given array of values.
|
||||||
|
The array should have g->target.vectorWidth elements. */
|
||||||
|
extern llvm::Constant *LLVMInt16Vector(const int16_t *i);
|
||||||
|
/** Returns an LLVM i16 vector based on the given array of values.
|
||||||
|
The array should have g->target.vectorWidth elements. */
|
||||||
|
extern llvm::Constant *LLVMUInt16Vector(const uint16_t *i);
|
||||||
|
|
||||||
/** Returns an LLVM i32 vector based on the given array of values.
|
/** Returns an LLVM i32 vector based on the given array of values.
|
||||||
The array should have g->target.vectorWidth elements. */
|
The array should have g->target.vectorWidth elements. */
|
||||||
extern llvm::Constant *LLVMInt32Vector(const int32_t *i);
|
extern llvm::Constant *LLVMInt32Vector(const int32_t *i);
|
||||||
/** Returns an LLVM i32 vector based on the given array of values.
|
/** Returns an LLVM i32 vector based on the given array of values.
|
||||||
The array should have g->target.vectorWidth elements. */
|
The array should have g->target.vectorWidth elements. */
|
||||||
extern llvm::Constant *LLVMUInt32Vector(const uint32_t *i);
|
extern llvm::Constant *LLVMUInt32Vector(const uint32_t *i);
|
||||||
|
|
||||||
/** Returns an LLVM i64 vector based on the given array of values.
|
/** Returns an LLVM i64 vector based on the given array of values.
|
||||||
The array should have g->target.vectorWidth elements. */
|
The array should have g->target.vectorWidth elements. */
|
||||||
extern llvm::Constant *LLVMInt64Vector(const int64_t *i);
|
extern llvm::Constant *LLVMInt64Vector(const int64_t *i);
|
||||||
/** Returns an LLVM i64 vector based on the given array of values.
|
/** Returns an LLVM i64 vector based on the given array of values.
|
||||||
The array should have g->target.vectorWidth elements. */
|
The array should have g->target.vectorWidth elements. */
|
||||||
extern llvm::Constant *LLVMUInt64Vector(const uint64_t *i);
|
extern llvm::Constant *LLVMUInt64Vector(const uint64_t *i);
|
||||||
|
|
||||||
/** Returns an LLVM float vector based on the given array of values.
|
/** Returns an LLVM float vector based on the given array of values.
|
||||||
The array should have g->target.vectorWidth elements. */
|
The array should have g->target.vectorWidth elements. */
|
||||||
extern llvm::Constant *LLVMFloatVector(const float *f);
|
extern llvm::Constant *LLVMFloatVector(const float *f);
|
||||||
|
|||||||
487
opt.cpp
487
opt.cpp
@@ -409,7 +409,6 @@ IntrinsicsOpt::IntrinsicsOpt()
|
|||||||
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse_movmsk_ps);
|
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse_movmsk_ps);
|
||||||
maskInstructions.push_back(sseMovmsk);
|
maskInstructions.push_back(sseMovmsk);
|
||||||
maskInstructions.push_back(m->module->getFunction("llvm.x86.avx.movmsk.ps"));
|
maskInstructions.push_back(m->module->getFunction("llvm.x86.avx.movmsk.ps"));
|
||||||
maskInstructions.push_back(m->module->getFunction("llvm.x86.mic.mask16.to.int"));
|
|
||||||
maskInstructions.push_back(m->module->getFunction("__movmsk"));
|
maskInstructions.push_back(m->module->getFunction("__movmsk"));
|
||||||
|
|
||||||
// And all of the blend instructions
|
// And all of the blend instructions
|
||||||
@@ -418,8 +417,6 @@ IntrinsicsOpt::IntrinsicsOpt()
|
|||||||
0xf, 0, 1, 2));
|
0xf, 0, 1, 2));
|
||||||
blendInstructions.push_back(BlendInstruction(
|
blendInstructions.push_back(BlendInstruction(
|
||||||
m->module->getFunction("llvm.x86.avx.blendvps"), 0xff, 0, 1, 2));
|
m->module->getFunction("llvm.x86.avx.blendvps"), 0xff, 0, 1, 2));
|
||||||
blendInstructions.push_back(BlendInstruction(
|
|
||||||
m->module->getFunction("llvm.x86.mic.blend.ps"), 0xffff, 1, 2, 0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -499,8 +496,8 @@ bool
|
|||||||
IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
restart:
|
restart:
|
||||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||||
if (!callInst)
|
if (!callInst)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@@ -512,7 +509,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
|
|
||||||
// If the values are the same, then no need to blend..
|
// If the values are the same, then no need to blend..
|
||||||
if (v[0] == v[1]) {
|
if (v[0] == v[1]) {
|
||||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[0]);
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
|
iter, v[0]);
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
@@ -524,12 +522,14 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// otherwise the result is undefined and any value is fine,
|
// otherwise the result is undefined and any value is fine,
|
||||||
// ergo the defined one is an acceptable result.)
|
// ergo the defined one is an acceptable result.)
|
||||||
if (lIsUndef(v[0])) {
|
if (lIsUndef(v[0])) {
|
||||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[1]);
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
|
iter, v[1]);
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
if (lIsUndef(v[1])) {
|
if (lIsUndef(v[1])) {
|
||||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[0]);
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
|
iter, v[0]);
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
@@ -544,7 +544,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
value = v[1];
|
value = v[1];
|
||||||
|
|
||||||
if (value != NULL) {
|
if (value != NULL) {
|
||||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, value);
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
|
iter, value);
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
@@ -557,7 +558,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// with the corresponding integer mask from its elements
|
// with the corresponding integer mask from its elements
|
||||||
// high bits.
|
// high bits.
|
||||||
llvm::Value *value = LLVMInt32(mask);
|
llvm::Value *value = LLVMInt32(mask);
|
||||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, value);
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
|
iter, value);
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
@@ -653,10 +655,18 @@ lSizeOfIfKnown(const llvm::Type *type, uint64_t *size) {
|
|||||||
*size = 1;
|
*size = 1;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (type == LLVMTypes::Int8VectorType) {
|
||||||
|
*size = g->target.vectorWidth * 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
else if (type == LLVMTypes::Int16Type) {
|
else if (type == LLVMTypes::Int16Type) {
|
||||||
*size = 2;
|
*size = 2;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (type == LLVMTypes::Int16VectorType) {
|
||||||
|
*size = g->target.vectorWidth * 2;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
else if (type == LLVMTypes::FloatType || type == LLVMTypes::Int32Type) {
|
else if (type == LLVMTypes::FloatType || type == LLVMTypes::Int32Type) {
|
||||||
*size = 4;
|
*size = 4;
|
||||||
return true;
|
return true;
|
||||||
@@ -978,33 +988,53 @@ lGetPtrAndOffsets(llvm::Value *ptrs, llvm::Value **basePtr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct GSInfo {
|
||||||
|
GSInfo(const char *pgFuncName, const char *pgboFuncName, bool ig, int es)
|
||||||
|
: isGather(ig), elementSize(es) {
|
||||||
|
func = m->module->getFunction(pgFuncName);
|
||||||
|
baseOffsetsFunc = m->module->getFunction(pgboFuncName);
|
||||||
|
}
|
||||||
|
llvm::Function *func;
|
||||||
|
llvm::Function *baseOffsetsFunc;
|
||||||
|
const bool isGather;
|
||||||
|
const int elementSize;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_32");
|
GSInfo gsFuncs[] = {
|
||||||
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_64");
|
GSInfo("__pseudo_gather_8", "__pseudo_gather_base_offsets_8", true, 1),
|
||||||
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_32");
|
GSInfo("__pseudo_gather_16", "__pseudo_gather_base_offsets_16", true, 2),
|
||||||
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_64");
|
GSInfo("__pseudo_gather_32", "__pseudo_gather_base_offsets_32", true, 4),
|
||||||
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
|
GSInfo("__pseudo_gather_64", "__pseudo_gather_base_offsets_64", true, 8),
|
||||||
|
GSInfo("__pseudo_scatter_8", "__pseudo_scatter_base_offsets_8", false, 1),
|
||||||
|
GSInfo("__pseudo_scatter_16", "__pseudo_scatter_base_offsets_16", false, 2),
|
||||||
|
GSInfo("__pseudo_scatter_32", "__pseudo_scatter_base_offsets_32", false, 4),
|
||||||
|
GSInfo("__pseudo_scatter_64", "__pseudo_scatter_base_offsets_64", false, 8),
|
||||||
|
};
|
||||||
|
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
||||||
|
for (int i = 0; i < numGSFuncs; ++i)
|
||||||
|
assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL);
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
restart:
|
restart:
|
||||||
// Iterate through all of the instructions in the basic block.
|
// Iterate through all of the instructions in the basic block.
|
||||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||||
// If we don't have a call to one of the
|
// If we don't have a call to one of the
|
||||||
// __pseudo_{gather,scatter}_* functions, then just go on to the
|
// __pseudo_{gather,scatter}_* functions, then just go on to the
|
||||||
// next instruction.
|
// next instruction.
|
||||||
if (!callInst ||
|
if (callInst == NULL)
|
||||||
(callInst->getCalledFunction() != gather32Func &&
|
continue;
|
||||||
callInst->getCalledFunction() != gather64Func &&
|
GSInfo *info = NULL;
|
||||||
callInst->getCalledFunction() != scatter32Func &&
|
for (int i = 0; i < numGSFuncs; ++i)
|
||||||
callInst->getCalledFunction() != scatter64Func))
|
if (callInst->getCalledFunction() == gsFuncs[i].func) {
|
||||||
|
info = &gsFuncs[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (info == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bool isGather = (callInst->getCalledFunction() == gather32Func ||
|
|
||||||
callInst->getCalledFunction() == gather64Func);
|
|
||||||
bool is32 = (callInst->getCalledFunction() == gather32Func ||
|
|
||||||
callInst->getCalledFunction() == scatter32Func);
|
|
||||||
|
|
||||||
// Transform the array of pointers to a single base pointer and an
|
// Transform the array of pointers to a single base pointer and an
|
||||||
// array of int32 offsets. (All the hard work is done by
|
// array of int32 offsets. (All the hard work is done by
|
||||||
@@ -1012,19 +1042,15 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
llvm::Value *ptrs = callInst->getArgOperand(0);
|
llvm::Value *ptrs = callInst->getArgOperand(0);
|
||||||
llvm::Value *basePtr = NULL;
|
llvm::Value *basePtr = NULL;
|
||||||
llvm::Value *offsetVector = lGetPtrAndOffsets(ptrs, &basePtr, callInst,
|
llvm::Value *offsetVector = lGetPtrAndOffsets(ptrs, &basePtr, callInst,
|
||||||
is32 ? 4 : 8);
|
info->elementSize);
|
||||||
// Cast the base pointer to a void *, since that's what the
|
// Cast the base pointer to a void *, since that's what the
|
||||||
// __pseudo_*_base_offsets_* functions want.
|
// __pseudo_*_base_offsets_* functions want.
|
||||||
basePtr = new llvm::BitCastInst(basePtr, LLVMTypes::VoidPointerType, "base2void",
|
basePtr = new llvm::BitCastInst(basePtr, LLVMTypes::VoidPointerType,
|
||||||
callInst);
|
"base2void", callInst);
|
||||||
lCopyMetadata(basePtr, callInst);
|
lCopyMetadata(basePtr, callInst);
|
||||||
|
|
||||||
if (isGather) {
|
if (info->isGather) {
|
||||||
llvm::Value *mask = callInst->getArgOperand(1);
|
llvm::Value *mask = callInst->getArgOperand(1);
|
||||||
llvm::Function *gFunc =
|
|
||||||
m->module->getFunction(is32 ? "__pseudo_gather_base_offsets_32" :
|
|
||||||
"__pseudo_gather_base_offsets_64");
|
|
||||||
assert(gFunc != NULL);
|
|
||||||
|
|
||||||
// Generate a new function call to the next pseudo gather
|
// Generate a new function call to the next pseudo gather
|
||||||
// base+offsets instruction. Note that we're passing a NULL
|
// base+offsets instruction. Note that we're passing a NULL
|
||||||
@@ -1035,11 +1061,12 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
llvm::ArrayRef<llvm::Value *> newArgArray(&newArgs[0], &newArgs[3]);
|
llvm::ArrayRef<llvm::Value *> newArgArray(&newArgs[0], &newArgs[3]);
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(gFunc, newArgArray, "newgather",
|
llvm::CallInst::Create(info->baseOffsetsFunc, newArgArray,
|
||||||
(llvm::Instruction *)NULL);
|
"newgather", (llvm::Instruction *)NULL);
|
||||||
#else
|
#else
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(gFunc, &newArgs[0], &newArgs[3], "newgather");
|
llvm::CallInst::Create(info->baseOffsetsFunc, &newArgs[0], &newArgs[3],
|
||||||
|
"newgather");
|
||||||
#endif
|
#endif
|
||||||
lCopyMetadata(newCall, callInst);
|
lCopyMetadata(newCall, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||||
@@ -1047,10 +1074,6 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
else {
|
else {
|
||||||
llvm::Value *mask = callInst->getArgOperand(2);
|
llvm::Value *mask = callInst->getArgOperand(2);
|
||||||
llvm::Value *rvalue = callInst->getArgOperand(1);
|
llvm::Value *rvalue = callInst->getArgOperand(1);
|
||||||
llvm::Function *gFunc =
|
|
||||||
m->module->getFunction(is32 ? "__pseudo_scatter_base_offsets_32" :
|
|
||||||
"__pseudo_scatter_base_offsets_64");
|
|
||||||
assert(gFunc);
|
|
||||||
|
|
||||||
// Generate a new function call to the next pseudo scatter
|
// Generate a new function call to the next pseudo scatter
|
||||||
// base+offsets instruction. See above for why passing NULL
|
// base+offsets instruction. See above for why passing NULL
|
||||||
@@ -1059,11 +1082,12 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
llvm::ArrayRef<llvm::Value *> newArgArray(&newArgs[0], &newArgs[4]);
|
llvm::ArrayRef<llvm::Value *> newArgArray(&newArgs[0], &newArgs[4]);
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(gFunc, newArgArray, "",
|
llvm::CallInst::Create(info->baseOffsetsFunc, newArgArray, "",
|
||||||
(llvm::Instruction *)NULL);
|
(llvm::Instruction *)NULL);
|
||||||
#else
|
#else
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(gFunc, &newArgs[0], &newArgs[4]);
|
llvm::CallInst::Create(info->baseOffsetsFunc, &newArgs[0],
|
||||||
|
&newArgs[4]);
|
||||||
#endif
|
#endif
|
||||||
lCopyMetadata(newCall, callInst);
|
lCopyMetadata(newCall, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||||
@@ -1105,28 +1129,53 @@ char MaskedStoreOptPass::ID = 0;
|
|||||||
llvm::RegisterPass<MaskedStoreOptPass> mss("masked-store-scalarize",
|
llvm::RegisterPass<MaskedStoreOptPass> mss("masked-store-scalarize",
|
||||||
"Masked Store Scalarize Pass");
|
"Masked Store Scalarize Pass");
|
||||||
|
|
||||||
|
struct MSInfo {
|
||||||
|
MSInfo(const char *name, const int a)
|
||||||
|
: align(a) {
|
||||||
|
func = m->module->getFunction(name);
|
||||||
|
assert(func != NULL);
|
||||||
|
}
|
||||||
|
llvm::Function *func;
|
||||||
|
const int align;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
llvm::Function *pms32Func = m->module->getFunction("__pseudo_masked_store_32");
|
MSInfo msInfo[] = {
|
||||||
llvm::Function *pms64Func = m->module->getFunction("__pseudo_masked_store_64");
|
MSInfo("__pseudo_masked_store_8", 1),
|
||||||
llvm::Function *msb32Func = m->module->getFunction("__masked_store_blend_32");
|
MSInfo("__pseudo_masked_store_16", 2),
|
||||||
llvm::Function *msb64Func = m->module->getFunction("__masked_store_blend_64");
|
MSInfo("__pseudo_masked_store_32", 4),
|
||||||
llvm::Function *ms32Func = m->module->getFunction("__masked_store_32");
|
MSInfo("__pseudo_masked_store_64", 8),
|
||||||
llvm::Function *ms64Func = m->module->getFunction("__masked_store_64");
|
MSInfo("__masked_store_blend_8", 1),
|
||||||
|
MSInfo("__masked_store_blend_16", 2),
|
||||||
|
MSInfo("__masked_store_blend_32", 4),
|
||||||
|
MSInfo("__masked_store_blend_64", 8),
|
||||||
|
MSInfo("__masked_store_8", 1),
|
||||||
|
MSInfo("__masked_store_16", 2),
|
||||||
|
MSInfo("__masked_store_32", 4),
|
||||||
|
MSInfo("__masked_store_64", 8)
|
||||||
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
restart:
|
restart:
|
||||||
// Iterate over all of the instructions to look for one of the various
|
// Iterate over all of the instructions to look for one of the various
|
||||||
// masked store functions
|
// masked store functions
|
||||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||||
if (!callInst)
|
if (!callInst)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
llvm::Function *called = callInst->getCalledFunction();
|
llvm::Function *called = callInst->getCalledFunction();
|
||||||
if (called != pms32Func && called != pms64Func &&
|
int nMSFuncs = sizeof(msInfo) / sizeof(msInfo[0]);
|
||||||
called != msb32Func && called != msb64Func &&
|
MSInfo *info = NULL;
|
||||||
called != ms32Func && called != ms64Func)
|
for (int i = 0; i < nMSFuncs; ++i) {
|
||||||
|
if (called == msInfo[i].func) {
|
||||||
|
info = &msInfo[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Got one; grab the operands
|
// Got one; grab the operands
|
||||||
@@ -1150,15 +1199,12 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
LLVM_TYPE_CONST llvm::Type *rvalueType = rvalue->getType();
|
LLVM_TYPE_CONST llvm::Type *rvalueType = rvalue->getType();
|
||||||
LLVM_TYPE_CONST llvm::Type *ptrType =
|
LLVM_TYPE_CONST llvm::Type *ptrType =
|
||||||
llvm::PointerType::get(rvalueType, 0);
|
llvm::PointerType::get(rvalueType, 0);
|
||||||
// Need to update this when int8/int16 are added
|
|
||||||
int align = (called == pms32Func || called == pms64Func ||
|
|
||||||
called == msb32Func) ? 4 : 8;
|
|
||||||
|
|
||||||
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
||||||
lCopyMetadata(lvalue, callInst);
|
lCopyMetadata(lvalue, callInst);
|
||||||
llvm::Instruction *store =
|
llvm::Instruction *store =
|
||||||
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||||
align);
|
info->align);
|
||||||
lCopyMetadata(store, callInst);
|
lCopyMetadata(store, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, store);
|
llvm::ReplaceInstWithInst(callInst, store);
|
||||||
|
|
||||||
@@ -1180,9 +1226,9 @@ CreateMaskedStoreOptPass() {
|
|||||||
// LowerMaskedStorePass
|
// LowerMaskedStorePass
|
||||||
|
|
||||||
/** When the front-end needs to do a masked store, it emits a
|
/** When the front-end needs to do a masked store, it emits a
|
||||||
__pseudo_masked_store_{32,64} call as a placeholder. This pass lowers
|
__pseudo_masked_store_{8,16,32,64} call as a placeholder. This pass
|
||||||
these calls to either __masked_store_{32,64} or
|
lowers these calls to either __masked_store_{8,16,32,64} or
|
||||||
__masked_store_blend_{32,64} calls.
|
__masked_store_blend_{8,16,32,64} calls.
|
||||||
*/
|
*/
|
||||||
class LowerMaskedStorePass : public llvm::BasicBlockPass {
|
class LowerMaskedStorePass : public llvm::BasicBlockPass {
|
||||||
public:
|
public:
|
||||||
@@ -1227,45 +1273,51 @@ lIsStackVariablePointer(llvm::Value *lvalue) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Utilty routine to figure out which masked store function to use. The
|
struct LMSInfo {
|
||||||
blend parameter indicates if we want the blending version, is32
|
LMSInfo(const char *pname, const char *bname, const char *msname) {
|
||||||
indicates if the element size is 32 bits.
|
pseudoFunc = m->module->getFunction(pname);
|
||||||
*/
|
blendFunc = m->module->getFunction(bname);
|
||||||
static const char *
|
maskedStoreFunc = m->module->getFunction(msname);
|
||||||
lMaskedStoreName(bool blend, bool is32) {
|
assert(pseudoFunc != NULL && blendFunc != NULL &&
|
||||||
if (blend) {
|
maskedStoreFunc != NULL);
|
||||||
if (is32)
|
|
||||||
return "__masked_store_blend_32";
|
|
||||||
else
|
|
||||||
return "__masked_store_blend_64";
|
|
||||||
}
|
}
|
||||||
else {
|
llvm::Function *pseudoFunc;
|
||||||
if (is32)
|
llvm::Function *blendFunc;
|
||||||
return "__masked_store_32";
|
llvm::Function *maskedStoreFunc;
|
||||||
else
|
};
|
||||||
return "__masked_store_64";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
llvm::Function *maskedStore32Func = m->module->getFunction("__pseudo_masked_store_32");
|
LMSInfo msInfo[] = {
|
||||||
llvm::Function *maskedStore64Func = m->module->getFunction("__pseudo_masked_store_64");
|
LMSInfo("__pseudo_masked_store_8", "__masked_store_blend_8",
|
||||||
assert(maskedStore32Func && maskedStore64Func);
|
"__masked_store_8"),
|
||||||
|
LMSInfo("__pseudo_masked_store_16", "__masked_store_blend_16",
|
||||||
|
"__masked_store_16"),
|
||||||
|
LMSInfo("__pseudo_masked_store_32", "__masked_store_blend_32",
|
||||||
|
"__masked_store_32"),
|
||||||
|
LMSInfo("__pseudo_masked_store_64", "__masked_store_blend_64",
|
||||||
|
"__masked_store_64")
|
||||||
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
restart:
|
restart:
|
||||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||||
// Iterate through all of the instructions and look for
|
// Iterate through all of the instructions and look for
|
||||||
// __pseudo_masked_store_* calls.
|
// __pseudo_masked_store_* calls.
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||||
if (!callInst ||
|
if (callInst == NULL)
|
||||||
(callInst->getCalledFunction() != maskedStore32Func &&
|
continue;
|
||||||
callInst->getCalledFunction() != maskedStore64Func))
|
LMSInfo *info = NULL;
|
||||||
|
for (unsigned int i = 0; i < sizeof(msInfo) / sizeof(msInfo[0]); ++i) {
|
||||||
|
if (callInst->getCalledFunction() == msInfo[i].pseudoFunc) {
|
||||||
|
info = &msInfo[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bool is32 = (callInst->getCalledFunction() == maskedStore32Func);
|
|
||||||
llvm::Value *lvalue = callInst->getArgOperand(0);
|
llvm::Value *lvalue = callInst->getArgOperand(0);
|
||||||
llvm::Value *rvalue = callInst->getArgOperand(1);
|
llvm::Value *rvalue = callInst->getArgOperand(1);
|
||||||
llvm::Value *mask = callInst->getArgOperand(2);
|
llvm::Value *mask = callInst->getArgOperand(2);
|
||||||
@@ -1282,8 +1334,7 @@ LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
|
|
||||||
// Generate the call to the appropriate masked store function and
|
// Generate the call to the appropriate masked store function and
|
||||||
// replace the __pseudo_* one with it.
|
// replace the __pseudo_* one with it.
|
||||||
llvm::Function *fms = m->module->getFunction(lMaskedStoreName(doBlend, is32));
|
llvm::Function *fms = doBlend ? info->blendFunc : info->maskedStoreFunc;
|
||||||
assert(fms);
|
|
||||||
llvm::Value *args[3] = { lvalue, rvalue, mask };
|
llvm::Value *args[3] = { lvalue, rvalue, mask };
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]);
|
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]);
|
||||||
@@ -1872,37 +1923,94 @@ lVectorIsLinear(llvm::Value *v[ISPC_MAX_NVEC], int stride) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct GatherImpInfo {
|
||||||
|
GatherImpInfo(const char *pName, const char *lbName, const char *lmName,
|
||||||
|
int a)
|
||||||
|
: align(a) {
|
||||||
|
pseudoFunc = m->module->getFunction(pName);
|
||||||
|
loadBroadcastFunc = m->module->getFunction(lbName);
|
||||||
|
loadMaskedFunc = m->module->getFunction(lmName);
|
||||||
|
|
||||||
|
assert(pseudoFunc != NULL && loadBroadcastFunc != NULL &&
|
||||||
|
loadMaskedFunc != NULL);
|
||||||
|
}
|
||||||
|
llvm::Function *pseudoFunc;
|
||||||
|
llvm::Function *loadBroadcastFunc;
|
||||||
|
llvm::Function *loadMaskedFunc;
|
||||||
|
const int align;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct ScatterImpInfo {
|
||||||
|
ScatterImpInfo(const char *pName, const char *msName,
|
||||||
|
LLVM_TYPE_CONST llvm::Type *vpt, int a)
|
||||||
|
: align(a) {
|
||||||
|
pseudoFunc = m->module->getFunction(pName);
|
||||||
|
maskedStoreFunc = m->module->getFunction(msName);
|
||||||
|
vecPtrType = vpt;
|
||||||
|
assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
|
||||||
|
}
|
||||||
|
llvm::Function *pseudoFunc;
|
||||||
|
llvm::Function *maskedStoreFunc;
|
||||||
|
LLVM_TYPE_CONST llvm::Type *vecPtrType;
|
||||||
|
const int align;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_base_offsets_32");
|
GatherImpInfo gInfo[] = {
|
||||||
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_base_offsets_64");
|
GatherImpInfo("__pseudo_gather_base_offsets_8", "__load_and_broadcast_8",
|
||||||
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_base_offsets_32");
|
"__load_masked_8", 1),
|
||||||
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_base_offsets_64");
|
GatherImpInfo("__pseudo_gather_base_offsets_16", "__load_and_broadcast_16",
|
||||||
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
|
"__load_masked_16", 2),
|
||||||
|
GatherImpInfo("__pseudo_gather_base_offsets_32", "__load_and_broadcast_32",
|
||||||
|
"__load_masked_32", 4),
|
||||||
|
GatherImpInfo("__pseudo_gather_base_offsets_64", "__load_and_broadcast_64",
|
||||||
|
"__load_masked_64", 8)
|
||||||
|
};
|
||||||
|
ScatterImpInfo sInfo[] = {
|
||||||
|
ScatterImpInfo("__pseudo_scatter_base_offsets_8", "__pseudo_masked_store_8",
|
||||||
|
LLVMTypes::Int8VectorPointerType, 1),
|
||||||
|
ScatterImpInfo("__pseudo_scatter_base_offsets_16", "__pseudo_masked_store_16",
|
||||||
|
LLVMTypes::Int16VectorPointerType, 2),
|
||||||
|
ScatterImpInfo("__pseudo_scatter_base_offsets_32", "__pseudo_masked_store_32",
|
||||||
|
LLVMTypes::Int32VectorPointerType, 4),
|
||||||
|
ScatterImpInfo("__pseudo_scatter_base_offsets_64", "__pseudo_masked_store_64",
|
||||||
|
LLVMTypes::Int64VectorPointerType, 8)
|
||||||
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||||
// Iterate over all of the instructions and look for calls to
|
// Iterate over all of the instructions and look for calls to
|
||||||
// __pseudo_*_base_offsets_* calls.
|
// __pseudo_*_base_offsets_* calls.
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||||
if (!callInst ||
|
if (callInst == NULL)
|
||||||
(callInst->getCalledFunction() != gather32Func &&
|
continue;
|
||||||
callInst->getCalledFunction() != gather64Func &&
|
llvm::Function *calledFunc = callInst->getCalledFunction();
|
||||||
callInst->getCalledFunction() != scatter32Func &&
|
GatherImpInfo *gatherInfo = NULL;
|
||||||
callInst->getCalledFunction() != scatter64Func))
|
ScatterImpInfo *scatterInfo = NULL;
|
||||||
|
for (unsigned int i = 0; i < sizeof(gInfo) / sizeof(gInfo[0]); ++i) {
|
||||||
|
if (calledFunc == gInfo[i].pseudoFunc) {
|
||||||
|
gatherInfo = &gInfo[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (unsigned int i = 0; i < sizeof(sInfo) / sizeof(sInfo[0]); ++i) {
|
||||||
|
if (calledFunc == sInfo[i].pseudoFunc) {
|
||||||
|
scatterInfo = &sInfo[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (gatherInfo == NULL && scatterInfo == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
SourcePos pos;
|
SourcePos pos;
|
||||||
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
|
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
|
||||||
assert(ok);
|
assert(ok);
|
||||||
|
|
||||||
bool isGather = (callInst->getCalledFunction() == gather32Func ||
|
|
||||||
callInst->getCalledFunction() == gather64Func);
|
|
||||||
bool is32 = (callInst->getCalledFunction() == gather32Func ||
|
|
||||||
callInst->getCalledFunction() == scatter32Func);
|
|
||||||
|
|
||||||
// Get the actual base pointer; note that it comes into the gather
|
// Get the actual base pointer; note that it comes into the gather
|
||||||
// or scatter function bitcast to an i8 *, so we need to work back
|
// or scatter function bitcast to an i8 *, so we need to work back
|
||||||
// to get the pointer as the original type.
|
// to get the pointer as the original type.
|
||||||
@@ -1921,7 +2029,7 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
if (!lScalarizeVector(callInst->getArgOperand(1), offsetElements))
|
if (!lScalarizeVector(callInst->getArgOperand(1), offsetElements))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
llvm::Value *mask = callInst->getArgOperand(isGather ? 2 : 3);
|
llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 2 : 3);
|
||||||
|
|
||||||
if (lVectorValuesAllEqual(offsetElements)) {
|
if (lVectorValuesAllEqual(offsetElements)) {
|
||||||
// If all the offsets are equal, then compute the single
|
// If all the offsets are equal, then compute the single
|
||||||
@@ -1929,14 +2037,15 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// (arbitrarily).
|
// (arbitrarily).
|
||||||
llvm::Value *indices[1] = { offsetElements[0] };
|
llvm::Value *indices[1] = { offsetElements[0] };
|
||||||
llvm::Value *basei8 =
|
llvm::Value *basei8 =
|
||||||
new llvm::BitCastInst(base, LLVMTypes::VoidPointerType, "base2i8", callInst);
|
new llvm::BitCastInst(base, LLVMTypes::VoidPointerType,
|
||||||
|
"base2i8", callInst);
|
||||||
lCopyMetadata(basei8, callInst);
|
lCopyMetadata(basei8, callInst);
|
||||||
llvm::Value *ptr =
|
llvm::Value *ptr =
|
||||||
llvm::GetElementPtrInst::Create(basei8, &indices[0], &indices[1],
|
llvm::GetElementPtrInst::Create(basei8, &indices[0], &indices[1],
|
||||||
"ptr", callInst);
|
"ptr", callInst);
|
||||||
lCopyMetadata(ptr, callInst);
|
lCopyMetadata(ptr, callInst);
|
||||||
|
|
||||||
if (isGather) {
|
if (gatherInfo != NULL) {
|
||||||
// A gather with everyone going to the same location is
|
// A gather with everyone going to the same location is
|
||||||
// handled as a scalar load and broadcast across the lanes.
|
// handled as a scalar load and broadcast across the lanes.
|
||||||
// Note that we do still have to pass the mask to the
|
// Note that we do still have to pass the mask to the
|
||||||
@@ -1944,20 +2053,16 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// access memory if the mask is all off (the location may
|
// access memory if the mask is all off (the location may
|
||||||
// be invalid in that case).
|
// be invalid in that case).
|
||||||
Debug(pos, "Transformed gather to scalar load and broadcast!");
|
Debug(pos, "Transformed gather to scalar load and broadcast!");
|
||||||
llvm::Function *loadBroadcast =
|
|
||||||
m->module->getFunction(is32 ? "__load_and_broadcast_32" :
|
|
||||||
"__load_and_broadcast_64");
|
|
||||||
assert(loadBroadcast);
|
|
||||||
llvm::Value *args[2] = { ptr, mask };
|
llvm::Value *args[2] = { ptr, mask };
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]);
|
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]);
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(loadBroadcast, newArgArray,
|
llvm::CallInst::Create(gatherInfo->loadBroadcastFunc, newArgArray,
|
||||||
"load_broadcast", (llvm::Instruction *)NULL);
|
"load_broadcast", (llvm::Instruction *)NULL);
|
||||||
#else
|
#else
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(loadBroadcast, &args[0], &args[2],
|
llvm::CallInst::Create(gatherInfo->loadBroadcastFunc, &args[0],
|
||||||
"load_broadcast");
|
&args[2], "load_broadcast");
|
||||||
#endif
|
#endif
|
||||||
lCopyMetadata(newCall, callInst);
|
lCopyMetadata(newCall, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||||
@@ -1977,8 +2082,8 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0),
|
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0),
|
||||||
"ptr2rvalue_type", callInst);
|
"ptr2rvalue_type", callInst);
|
||||||
lCopyMetadata(ptr, callInst);
|
lCopyMetadata(ptr, callInst);
|
||||||
llvm::Instruction *sinst =
|
llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false,
|
||||||
new llvm::StoreInst(first, ptr, false, is32 ? 4 : 8 /* align */);
|
scatterInfo->align);
|
||||||
lCopyMetadata(sinst, callInst);
|
lCopyMetadata(sinst, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, sinst);
|
llvm::ReplaceInstWithInst(callInst, sinst);
|
||||||
}
|
}
|
||||||
@@ -1987,7 +2092,8 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lVectorIsLinear(offsetElements, is32 ? 4 : 8)) {
|
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
|
||||||
|
if (lVectorIsLinear(offsetElements, step)) {
|
||||||
// We have a linear sequence of memory locations being accessed
|
// We have a linear sequence of memory locations being accessed
|
||||||
// starting with the location given by the offset from
|
// starting with the location given by the offset from
|
||||||
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
|
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
|
||||||
@@ -2003,53 +2109,38 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
"ptr", callInst);
|
"ptr", callInst);
|
||||||
lCopyMetadata(ptr, callInst);
|
lCopyMetadata(ptr, callInst);
|
||||||
|
|
||||||
if (isGather) {
|
if (gatherInfo != NULL) {
|
||||||
Debug(pos, "Transformed gather to unaligned vector load!");
|
Debug(pos, "Transformed gather to unaligned vector load!");
|
||||||
// FIXME: make this an aligned load when possible..
|
|
||||||
// FIXME: are there lurking potential bugs when e.g. the
|
|
||||||
// last few entries of the mask are off and the load ends
|
|
||||||
// up straddling a page boundary?
|
|
||||||
llvm::Function *loadMasked =
|
|
||||||
m->module->getFunction(is32 ? "__load_masked_32" : "__load_masked_64");
|
|
||||||
assert(loadMasked);
|
|
||||||
|
|
||||||
llvm::Value *args[2] = { ptr, mask };
|
llvm::Value *args[2] = { ptr, mask };
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[2]);
|
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[2]);
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(loadMasked, argArray, "load_masked",
|
llvm::CallInst::Create(gatherInfo->loadMaskedFunc, argArray,
|
||||||
(llvm::Instruction *)NULL);
|
"load_masked", (llvm::Instruction *)NULL);
|
||||||
#else
|
#else
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(loadMasked, &args[0], &args[2], "load_masked");
|
llvm::CallInst::Create(gatherInfo->loadMaskedFunc, &args[0],
|
||||||
|
&args[2], "load_masked");
|
||||||
#endif
|
#endif
|
||||||
lCopyMetadata(newCall, callInst);
|
lCopyMetadata(newCall, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Debug(pos, "Transformed scatter to unaligned vector store!");
|
Debug(pos, "Transformed scatter to unaligned vector store!");
|
||||||
// FIXME: make this an aligned store when possible. Need
|
|
||||||
// to work through the messiness of issuing a pseudo store
|
|
||||||
// here.
|
|
||||||
llvm::Value *rvalue = callInst->getArgOperand(2);
|
llvm::Value *rvalue = callInst->getArgOperand(2);
|
||||||
|
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
|
||||||
llvm::Function *storeMasked =
|
callInst);
|
||||||
m->module->getFunction(is32 ? "__pseudo_masked_store_32" :
|
|
||||||
"__pseudo_masked_store_64");
|
|
||||||
assert(storeMasked);
|
|
||||||
LLVM_TYPE_CONST llvm::Type *vecPtrType = is32 ?
|
|
||||||
LLVMTypes::Int32VectorPointerType : LLVMTypes::Int64VectorPointerType;
|
|
||||||
ptr = new llvm::BitCastInst(ptr, vecPtrType, "ptrcast", callInst);
|
|
||||||
|
|
||||||
llvm::Value *args[3] = { ptr, rvalue, mask };
|
llvm::Value *args[3] = { ptr, rvalue, mask };
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||||
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[3]);
|
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[3]);
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(storeMasked, argArray, "",
|
llvm::CallInst::Create(scatterInfo->maskedStoreFunc, argArray,
|
||||||
(llvm::Instruction *)NULL);
|
"", (llvm::Instruction *)NULL);
|
||||||
#else
|
#else
|
||||||
llvm::Instruction *newCall =
|
llvm::Instruction *newCall =
|
||||||
llvm::CallInst::Create(storeMasked, &args[0], &args[3], "");
|
llvm::CallInst::Create(scatterInfo->maskedStoreFunc,
|
||||||
|
&args[0], &args[3], "");
|
||||||
#endif
|
#endif
|
||||||
lCopyMetadata(newCall, callInst);
|
lCopyMetadata(newCall, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||||
@@ -2097,31 +2188,50 @@ char LowerGSPass::ID = 0;
|
|||||||
llvm::RegisterPass<LowerGSPass> lgs("lower-gs",
|
llvm::RegisterPass<LowerGSPass> lgs("lower-gs",
|
||||||
"Lower Gather/Scatter Pass");
|
"Lower Gather/Scatter Pass");
|
||||||
|
|
||||||
|
struct LowerGSInfo {
|
||||||
|
LowerGSInfo(const char *pName, const char *aName, bool ig)
|
||||||
|
: isGather(ig) {
|
||||||
|
pseudoFunc = m->module->getFunction(pName);
|
||||||
|
actualFunc = m->module->getFunction(aName);
|
||||||
|
assert(pseudoFunc != NULL && actualFunc != NULL);
|
||||||
|
}
|
||||||
|
llvm::Function *pseudoFunc;
|
||||||
|
llvm::Function *actualFunc;
|
||||||
|
const bool isGather;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_base_offsets_32");
|
LowerGSInfo lgsInfo[] = {
|
||||||
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_base_offsets_64");
|
LowerGSInfo("__pseudo_gather_base_offsets_8", "__gather_base_offsets_i8", true),
|
||||||
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_base_offsets_32");
|
LowerGSInfo("__pseudo_gather_base_offsets_16", "__gather_base_offsets_i16", true),
|
||||||
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_base_offsets_64");
|
LowerGSInfo("__pseudo_gather_base_offsets_32", "__gather_base_offsets_i32", true),
|
||||||
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
|
LowerGSInfo("__pseudo_gather_base_offsets_32", "__gather_base_offsets_i32", true),
|
||||||
|
LowerGSInfo("__pseudo_scatter_base_offsets_8", "__scatter_base_offsets_i8", false),
|
||||||
|
LowerGSInfo("__pseudo_scatter_base_offsets_16", "__scatter_base_offsets_i16", false),
|
||||||
|
LowerGSInfo("__pseudo_scatter_base_offsets_32", "__scatter_base_offsets_i32", false),
|
||||||
|
LowerGSInfo("__pseudo_scatter_base_offsets_32", "__scatter_base_offsets_i32", false)
|
||||||
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
restart:
|
restart:
|
||||||
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||||
// Loop over the instructions and find calls to the
|
// Loop over the instructions and find calls to the
|
||||||
// __pseudo_*_base_offsets_* functions.
|
// __pseudo_*_base_offsets_* functions.
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
|
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||||
if (!callInst ||
|
if (callInst == NULL)
|
||||||
(callInst->getCalledFunction() != gather32Func &&
|
continue;
|
||||||
callInst->getCalledFunction() != gather64Func &&
|
llvm::Function *calledFunc = callInst->getCalledFunction();
|
||||||
callInst->getCalledFunction() != scatter32Func &&
|
LowerGSInfo *info = NULL;
|
||||||
callInst->getCalledFunction() != scatter64Func))
|
for (unsigned int i = 0; i < sizeof(lgsInfo) / sizeof(lgsInfo[0]); ++i) {
|
||||||
|
if (calledFunc == lgsInfo[i].pseudoFunc) {
|
||||||
|
info = &lgsInfo[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bool isGather = (callInst->getCalledFunction() == gather32Func ||
|
|
||||||
callInst->getCalledFunction() == gather64Func);
|
|
||||||
bool is32 = (callInst->getCalledFunction() == gather32Func ||
|
|
||||||
callInst->getCalledFunction() == scatter32Func);
|
|
||||||
|
|
||||||
// Get the source position from the metadata attached to the call
|
// Get the source position from the metadata attached to the call
|
||||||
// instruction so that we can issue PerformanceWarning()s below.
|
// instruction so that we can issue PerformanceWarning()s below.
|
||||||
@@ -2129,20 +2239,11 @@ LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
|
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
|
||||||
assert(ok);
|
assert(ok);
|
||||||
|
|
||||||
if (isGather) {
|
callInst->setCalledFunction(info->actualFunc);
|
||||||
llvm::Function *gFunc = m->module->getFunction(is32 ? "__gather_base_offsets_i32" :
|
if (info->isGather)
|
||||||
"__gather_base_offsets_i64");
|
|
||||||
assert(gFunc);
|
|
||||||
callInst->setCalledFunction(gFunc);
|
|
||||||
PerformanceWarning(pos, "Gather required to compute value in expression.");
|
PerformanceWarning(pos, "Gather required to compute value in expression.");
|
||||||
}
|
else
|
||||||
else {
|
|
||||||
llvm::Function *sFunc = m->module->getFunction(is32 ? "__scatter_base_offsets_i32" :
|
|
||||||
"__scatter_base_offsets_i64");
|
|
||||||
assert(sFunc);
|
|
||||||
callInst->setCalledFunction(sFunc);
|
|
||||||
PerformanceWarning(pos, "Scatter required for storing value.");
|
PerformanceWarning(pos, "Scatter required for storing value.");
|
||||||
}
|
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
@@ -2286,25 +2387,41 @@ char MakeInternalFuncsStaticPass::ID = 0;
|
|||||||
llvm::RegisterPass<MakeInternalFuncsStaticPass>
|
llvm::RegisterPass<MakeInternalFuncsStaticPass>
|
||||||
mifsp("make-internal-funcs-static", "Make Internal Funcs Static Pass");
|
mifsp("make-internal-funcs-static", "Make Internal Funcs Static Pass");
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
||||||
const char *names[] = {
|
const char *names[] = {
|
||||||
"__do_print", "__gather_base_offsets_i32", "__gather_base_offsets_i64",
|
"__do_print",
|
||||||
"__gather_elt_32", "__gather_elt_64", "__load_and_broadcast_32",
|
"__gather_base_offsets_i8", "__gather_base_offsets_i16",
|
||||||
"__load_and_broadcast_64", "__load_masked_32", "__load_masked_64",
|
"__gather_base_offsets_i32", "__gather_base_offsets_i64",
|
||||||
"__masked_store_32", "__masked_store_64", "__masked_store_blend_32",
|
"__gather_elt_8", "__gather_elt_16",
|
||||||
"__masked_store_blend_64", "__packed_load_active", "__packed_store_active",
|
"__gather_elt_32", "__gather_elt_64",
|
||||||
"__scatter_base_offsets_i32", "__scatter_base_offsets_i64", "__scatter_elt_32",
|
"__load_and_broadcast_8", "__load_and_broadcast_16",
|
||||||
"__scatter_elt_64", };
|
"__load_and_broadcast_32", "__load_and_broadcast_64",
|
||||||
|
"__load_masked_8", "__load_masked_16",
|
||||||
|
"__load_masked_32", "__load_masked_64",
|
||||||
|
"__masked_store_8", "__masked_store_16",
|
||||||
|
"__masked_store_32", "__masked_store_64",
|
||||||
|
"__masked_store_blend_8", "__masked_store_blend_16",
|
||||||
|
"__masked_store_blend_32", "__masked_store_blend_64",
|
||||||
|
"__packed_load_active", "__packed_store_active",
|
||||||
|
"__scatter_base_offsets_i8", "__scatter_base_offsets_i16",
|
||||||
|
"__scatter_base_offsets_i32", "__scatter_base_offsets_i64",
|
||||||
|
"__scatter_elt_8", "__scatter_elt_16",
|
||||||
|
"__scatter_elt_32", "__scatter_elt_64",
|
||||||
|
};
|
||||||
|
|
||||||
|
bool modifiedAny = false;
|
||||||
int count = sizeof(names) / sizeof(names[0]);
|
int count = sizeof(names) / sizeof(names[0]);
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
llvm::Function *f = m->module->getFunction(names[i]);
|
llvm::Function *f = m->module->getFunction(names[i]);
|
||||||
if (f != NULL)
|
if (f != NULL) {
|
||||||
f->setLinkage(llvm::GlobalValue::PrivateLinkage);
|
f->setLinkage(llvm::GlobalValue::PrivateLinkage);
|
||||||
|
modifiedAny = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return modifiedAny;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
12
parse.yy
12
parse.yy
@@ -102,15 +102,16 @@ static const char *lBuiltinTokens[] = {
|
|||||||
"bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor", "char",
|
"bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor", "char",
|
||||||
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
|
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
|
||||||
"else", "enum", "export", "extern", "false", "float", "for", "goto", "if",
|
"else", "enum", "export", "extern", "false", "float", "for", "goto", "if",
|
||||||
"inline", "int", "int32", "int64", "launch", "print", "reference", "return",
|
"inline", "int", "int8", "int16", "int32", "int64", "launch", "print",
|
||||||
|
"reference", "return",
|
||||||
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
|
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
|
||||||
"unsigned", "varying", "void", "while", NULL
|
"unsigned", "varying", "void", "while", NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char *lParamListTokens[] = {
|
static const char *lParamListTokens[] = {
|
||||||
"bool", "char", "const", "double", "enum", "false", "float", "int",
|
"bool", "char", "const", "double", "enum", "false", "float", "int",
|
||||||
"int32", "int64", "reference", "struct", "true", "uniform", "unsigned",
|
"int8", "int16", "int32", "int64", "reference", "struct", "true",
|
||||||
"varying", "void", NULL
|
"uniform", "unsigned", "varying", "void", NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
%}
|
%}
|
||||||
@@ -154,7 +155,7 @@ static const char *lParamListTokens[] = {
|
|||||||
%token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK
|
%token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK
|
||||||
%token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA
|
%token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA
|
||||||
%token TOKEN_CHAR TOKEN_INT TOKEN_UNSIGNED TOKEN_FLOAT TOKEN_DOUBLE
|
%token TOKEN_CHAR TOKEN_INT TOKEN_UNSIGNED TOKEN_FLOAT TOKEN_DOUBLE
|
||||||
%token TOKEN_INT64 TOKEN_CONST TOKEN_VOID TOKEN_BOOL
|
%token TOKEN_INT8 TOKEN_INT16 TOKEN_INT64 TOKEN_CONST TOKEN_VOID TOKEN_BOOL
|
||||||
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE TOKEN_REFERENCE
|
%token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE TOKEN_REFERENCE
|
||||||
|
|
||||||
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
|
%token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH
|
||||||
@@ -587,7 +588,8 @@ type_specifier
|
|||||||
atomic_var_type_specifier
|
atomic_var_type_specifier
|
||||||
: TOKEN_VOID { $$ = AtomicType::Void; }
|
: TOKEN_VOID { $$ = AtomicType::Void; }
|
||||||
| TOKEN_BOOL { $$ = AtomicType::VaryingBool; }
|
| TOKEN_BOOL { $$ = AtomicType::VaryingBool; }
|
||||||
/* | TOKEN_CHAR { UNIMPLEMENTED; } */
|
| TOKEN_INT8 { $$ = AtomicType::VaryingInt8; }
|
||||||
|
| TOKEN_INT16 { $$ = AtomicType::VaryingInt16; }
|
||||||
| TOKEN_INT { $$ = AtomicType::VaryingInt32; }
|
| TOKEN_INT { $$ = AtomicType::VaryingInt32; }
|
||||||
| TOKEN_FLOAT { $$ = AtomicType::VaryingFloat; }
|
| TOKEN_FLOAT { $$ = AtomicType::VaryingFloat; }
|
||||||
| TOKEN_DOUBLE { $$ = AtomicType::VaryingDouble; }
|
| TOKEN_DOUBLE { $$ = AtomicType::VaryingDouble; }
|
||||||
|
|||||||
@@ -41,7 +41,6 @@
|
|||||||
|
|
||||||
stdlib_core(8)
|
stdlib_core(8)
|
||||||
packed_load_and_store(8)
|
packed_load_and_store(8)
|
||||||
int8_16(8)
|
|
||||||
int64minmax(8)
|
int64minmax(8)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
@@ -539,55 +538,14 @@ define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone alwaysinli
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
define <8 x i32> @__load_and_broadcast_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
load_and_broadcast(8, i8, 8)
|
||||||
%mm = call i32 @__movmsk(<8 x i32> %mask)
|
load_and_broadcast(8, i16, 16)
|
||||||
%any_on = icmp ne i32 %mm, 0
|
load_and_broadcast(8, i32, 32)
|
||||||
br i1 %any_on, label %load, label %skip
|
load_and_broadcast(8, i64, 64)
|
||||||
|
|
||||||
load:
|
|
||||||
; TODO: make sure this becomes a vbroadcast...
|
|
||||||
%ptr = bitcast i8 * %0 to i32 *
|
|
||||||
%val = load i32 * %ptr
|
|
||||||
|
|
||||||
%ret0 = insertelement <8 x i32> undef, i32 %val, i32 0
|
|
||||||
%ret1 = insertelement <8 x i32> %ret0, i32 %val, i32 1
|
|
||||||
%ret2 = insertelement <8 x i32> %ret1, i32 %val, i32 2
|
|
||||||
%ret3 = insertelement <8 x i32> %ret2, i32 %val, i32 3
|
|
||||||
%ret4 = insertelement <8 x i32> %ret3, i32 %val, i32 4
|
|
||||||
%ret5 = insertelement <8 x i32> %ret4, i32 %val, i32 5
|
|
||||||
%ret6 = insertelement <8 x i32> %ret5, i32 %val, i32 6
|
|
||||||
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
|
|
||||||
ret <8 x i32> %ret7
|
|
||||||
|
|
||||||
skip:
|
|
||||||
ret <8 x i32> undef
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
define <8 x i64> @__load_and_broadcast_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
|
||||||
%mm = call i32 @__movmsk(<8 x i32> %mask)
|
|
||||||
%any_on = icmp ne i32 %mm, 0
|
|
||||||
br i1 %any_on, label %load, label %skip
|
|
||||||
|
|
||||||
load:
|
|
||||||
; TODO: make sure this becomes a vbroadcast...
|
|
||||||
%ptr = bitcast i8 * %0 to i64 *
|
|
||||||
%val = load i64 * %ptr
|
|
||||||
|
|
||||||
%ret0 = insertelement <8 x i64> undef, i64 %val, i32 0
|
|
||||||
%ret1 = insertelement <8 x i64> %ret0, i64 %val, i32 1
|
|
||||||
%ret2 = insertelement <8 x i64> %ret1, i64 %val, i32 2
|
|
||||||
%ret3 = insertelement <8 x i64> %ret2, i64 %val, i32 3
|
|
||||||
%ret4 = insertelement <8 x i64> %ret3, i64 %val, i32 4
|
|
||||||
%ret5 = insertelement <8 x i64> %ret4, i64 %val, i32 5
|
|
||||||
%ret6 = insertelement <8 x i64> %ret5, i64 %val, i32 6
|
|
||||||
%ret7 = insertelement <8 x i64> %ret6, i64 %val, i32 7
|
|
||||||
ret <8 x i64> %ret3
|
|
||||||
|
|
||||||
skip:
|
|
||||||
ret <8 x i64> undef
|
|
||||||
}
|
|
||||||
|
|
||||||
|
; no masked load instruction for i8 and i16 types??
|
||||||
|
load_masked(8, i8, 8, 1)
|
||||||
|
load_masked(8, i16, 16, 2)
|
||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask)
|
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask)
|
||||||
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask)
|
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask)
|
||||||
@@ -623,6 +581,12 @@ define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; masked store
|
;; masked store
|
||||||
|
|
||||||
|
; FIXME: there is no AVX instruction for these, but we could be clever
|
||||||
|
; by packing the bits down and setting the last 3/4 or half, respectively,
|
||||||
|
; of the mask to zero... Not sure if this would be a win in the end
|
||||||
|
gen_masked_store(8, i8, 8)
|
||||||
|
gen_masked_store(8, i16, 16)
|
||||||
|
|
||||||
; note that mask is the 2nd parameter, not the 3rd one!!
|
; note that mask is the 2nd parameter, not the 3rd one!!
|
||||||
declare void @llvm.x86.avx.maskstore.ps.256(i8 *, <8 x float>, <8 x float>)
|
declare void @llvm.x86.avx.maskstore.ps.256(i8 *, <8 x float>, <8 x float>)
|
||||||
declare void @llvm.x86.avx.maskstore.pd.256(i8 *, <4 x double>, <4 x double>)
|
declare void @llvm.x86.avx.maskstore.pd.256(i8 *, <4 x double>, <4 x double>)
|
||||||
@@ -660,13 +624,14 @@ define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>,
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
masked_store_blend_8_16_by_8()
|
||||||
|
|
||||||
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>,
|
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>,
|
||||||
<8 x float>) nounwind readnone
|
<8 x float>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||||
<8 x i32>) nounwind alwaysinline {
|
<8 x i32>) nounwind alwaysinline {
|
||||||
%mask_as_float = bitcast <8 x i32> %2 to <8 x float>
|
%mask_as_float = bitcast <8 x i32> %2 to <8 x float>
|
||||||
%oldValue = load <8 x i32>* %0, align 4
|
%oldValue = load <8 x i32>* %0, align 4
|
||||||
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
||||||
|
|||||||
@@ -36,7 +36,6 @@
|
|||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
int8_16(4)
|
|
||||||
int64minmax(4)
|
int64minmax(4)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
@@ -380,29 +379,23 @@ define internal i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; masked store
|
;; masked store
|
||||||
|
|
||||||
define void @__masked_store_32(<4 x i32>* nocapture, <4 x i32>, <4 x i32>) nounwind alwaysinline {
|
masked_store_blend_8_16_by_4()
|
||||||
per_lane(4, <4 x i32> %2, `
|
|
||||||
; compute address for this one
|
|
||||||
%ptr_ID = getelementptr <4 x i32> * %0, i32 0, i32 LANE
|
|
||||||
%storeval_ID = extractelement <4 x i32> %1, i32 LANE
|
|
||||||
store i32 %storeval_ID, i32 * %ptr_ID')
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @__masked_store_64(<4 x i64>* nocapture, <4 x i64>, <4 x i32>) nounwind alwaysinline {
|
|
||||||
per_lane(4, <4 x i32> %2, `
|
|
||||||
%ptr_ID = getelementptr <4 x i64> * %0, i32 0, i32 LANE
|
|
||||||
%storeval_ID = extractelement <4 x i64> %1, i32 LANE
|
|
||||||
store i64 %storeval_ID, i64 * %ptr_ID')
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
|
gen_masked_store(4, i8, 8)
|
||||||
|
gen_masked_store(4, i16, 16)
|
||||||
|
gen_masked_store(4, i32, 32)
|
||||||
|
gen_masked_store(4, i64, 64)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
|
load_and_broadcast(4, i8, 8)
|
||||||
|
load_and_broadcast(4, i16, 16)
|
||||||
load_and_broadcast(4, i32, 32)
|
load_and_broadcast(4, i32, 32)
|
||||||
load_and_broadcast(4, i64, 64)
|
load_and_broadcast(4, i64, 64)
|
||||||
|
|
||||||
|
load_masked(4, i8, 8, 1)
|
||||||
|
load_masked(4, i16, 16, 2)
|
||||||
load_masked(4, i32, 32, 4)
|
load_masked(4, i32, 32, 4)
|
||||||
load_masked(4, i64, 64, 8)
|
load_masked(4, i64, 64, 8)
|
||||||
|
|
||||||
@@ -411,7 +404,12 @@ load_masked(4, i64, 64, 8)
|
|||||||
|
|
||||||
; define these with the macros from stdlib.m4
|
; define these with the macros from stdlib.m4
|
||||||
|
|
||||||
|
gen_gather(4, i8)
|
||||||
|
gen_gather(4, i16)
|
||||||
gen_gather(4, i32)
|
gen_gather(4, i32)
|
||||||
gen_gather(4, i64)
|
gen_gather(4, i64)
|
||||||
|
|
||||||
|
gen_scatter(4, i8)
|
||||||
|
gen_scatter(4, i16)
|
||||||
gen_scatter(4, i32)
|
gen_scatter(4, i32)
|
||||||
gen_scatter(4, i64)
|
gen_scatter(4, i64)
|
||||||
|
|||||||
@@ -38,7 +38,6 @@
|
|||||||
|
|
||||||
stdlib_core(8)
|
stdlib_core(8)
|
||||||
packed_load_and_store(8)
|
packed_load_and_store(8)
|
||||||
int8_16(8)
|
|
||||||
int64minmax(8)
|
int64minmax(8)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
@@ -435,44 +434,29 @@ define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone {
|
|||||||
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; masked store
|
|
||||||
|
|
||||||
define void @__masked_store_32(<8 x i32>* nocapture, <8 x i32>,
|
|
||||||
<8 x i32>) nounwind alwaysinline {
|
|
||||||
per_lane(8, <8 x i32> %2, `
|
|
||||||
; compute address for this one
|
|
||||||
%ptr_ID = getelementptr <8 x i32> * %0, i32 0, i32 LANE
|
|
||||||
%storeval_ID = extractelement <8 x i32> %1, i32 LANE
|
|
||||||
store i32 %storeval_ID, i32 * %ptr_ID')
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>,
|
|
||||||
<8 x i32>) nounwind alwaysinline {
|
|
||||||
per_lane(8, <8 x i32> %2, `
|
|
||||||
; compute address for this one
|
|
||||||
%ptr_ID = getelementptr <8 x i64> * %0, i32 0, i32 LANE
|
|
||||||
%storeval_ID = extractelement <8 x i64> %1, i32 LANE
|
|
||||||
store i64 %storeval_ID, i64 * %ptr_ID')
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; unaligned loads/loads+broadcasts
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
|
load_and_broadcast(8, i8, 8)
|
||||||
|
load_and_broadcast(8, i16, 16)
|
||||||
load_and_broadcast(8, i32, 32)
|
load_and_broadcast(8, i32, 32)
|
||||||
load_and_broadcast(8, i64, 64)
|
load_and_broadcast(8, i64, 64)
|
||||||
|
|
||||||
|
load_masked(8, i8, 8, 1)
|
||||||
|
load_masked(8, i16, 16, 2)
|
||||||
load_masked(8, i32, 32, 4)
|
load_masked(8, i32, 32, 4)
|
||||||
load_masked(8, i64, 64, 8)
|
load_masked(8, i64, 64, 8)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather/scatter
|
;; gather/scatter
|
||||||
|
|
||||||
|
gen_gather(8, i8)
|
||||||
|
gen_gather(8, i16)
|
||||||
gen_gather(8, i32)
|
gen_gather(8, i32)
|
||||||
gen_gather(8, i64)
|
gen_gather(8, i64)
|
||||||
|
|
||||||
|
gen_scatter(8, i8)
|
||||||
|
gen_scatter(8, i16)
|
||||||
gen_scatter(8, i32)
|
gen_scatter(8, i32)
|
||||||
gen_scatter(8, i64)
|
gen_scatter(8, i64)
|
||||||
|
|
||||||
@@ -619,6 +603,13 @@ define internal float @__reduce_add_float(<8 x float>) nounwind readonly alwaysi
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; masked store
|
;; masked store
|
||||||
|
|
||||||
|
gen_masked_store(8, i8, 8)
|
||||||
|
gen_masked_store(8, i16, 16)
|
||||||
|
gen_masked_store(8, i32, 32)
|
||||||
|
gen_masked_store(8, i64, 64)
|
||||||
|
|
||||||
|
masked_store_blend_8_16_by_8()
|
||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>,
|
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>,
|
||||||
<4 x float>) nounwind readnone
|
<4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
|||||||
301
stdlib.ispc
301
stdlib.ispc
@@ -85,6 +85,14 @@ static inline float broadcast(float v, uniform int i) {
|
|||||||
return __broadcast_float(v, i);
|
return __broadcast_float(v, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int8 broadcast(int8 v, uniform int i) {
|
||||||
|
return __broadcast_int8(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16 broadcast(int16 v, uniform int i) {
|
||||||
|
return __broadcast_int16(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int32 broadcast(int32 v, uniform int i) {
|
static inline int32 broadcast(int32 v, uniform int i) {
|
||||||
return __broadcast_int32(v, i);
|
return __broadcast_int32(v, i);
|
||||||
}
|
}
|
||||||
@@ -101,6 +109,14 @@ static inline float rotate(float v, uniform int i) {
|
|||||||
return __rotate_float(v, i);
|
return __rotate_float(v, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int8 rotate(int8 v, uniform int i) {
|
||||||
|
return __rotate_int8(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16 rotate(int16 v, uniform int i) {
|
||||||
|
return __rotate_int16(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int32 rotate(int32 v, uniform int i) {
|
static inline int32 rotate(int32 v, uniform int i) {
|
||||||
return __rotate_int32(v, i);
|
return __rotate_int32(v, i);
|
||||||
}
|
}
|
||||||
@@ -117,6 +133,14 @@ static inline float shuffle(float v, int i) {
|
|||||||
return __shuffle_float(v, i);
|
return __shuffle_float(v, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int8 shuffle(int8 v, int i) {
|
||||||
|
return __shuffle_int8(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16 shuffle(int16 v, int i) {
|
||||||
|
return __shuffle_int16(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int32 shuffle(int32 v, int i) {
|
static inline int32 shuffle(int32 v, int i) {
|
||||||
return __shuffle_int32(v, i);
|
return __shuffle_int32(v, i);
|
||||||
}
|
}
|
||||||
@@ -133,6 +157,14 @@ static inline float shuffle(float v0, float v1, int i) {
|
|||||||
return __shuffle2_float(v0, v1, i);
|
return __shuffle2_float(v0, v1, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int8 shuffle(int8 v0, int8 v1, int i) {
|
||||||
|
return __shuffle2_int8(v0, v1, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16 shuffle(int16 v0, int16 v1, int i) {
|
||||||
|
return __shuffle2_int16(v0, v1, i);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int32 shuffle(int32 v0, int32 v1, int i) {
|
static inline int32 shuffle(int32 v0, int32 v1, int i) {
|
||||||
return __shuffle2_int32(v0, v1, i);
|
return __shuffle2_int32(v0, v1, i);
|
||||||
}
|
}
|
||||||
@@ -150,11 +182,27 @@ static inline uniform float extract(float x, uniform int i) {
|
|||||||
return floatbits(__extract_int32((int)intbits(x), i));
|
return floatbits(__extract_int32((int)intbits(x), i));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform int extract(int x, uniform int i) {
|
static inline uniform int8 extract(int8 x, uniform int i) {
|
||||||
|
return __extract_int8(x, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int8 extract(unsigned int8 x, uniform int i) {
|
||||||
|
return __extract_int8(x, (unsigned int)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int16 extract(int16 x, uniform int i) {
|
||||||
|
return __extract_int16(x, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int16 extract(unsigned int16 x, uniform int i) {
|
||||||
|
return __extract_int16(x, (unsigned int)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int32 extract(int32 x, uniform int i) {
|
||||||
return __extract_int32(x, i);
|
return __extract_int32(x, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform unsigned int extract(unsigned int x, uniform int i) {
|
static inline uniform unsigned int32 extract(unsigned int32 x, uniform int i) {
|
||||||
return __extract_int32(x, (unsigned int)i);
|
return __extract_int32(x, (unsigned int)i);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -175,12 +223,30 @@ static inline float insert(float x, uniform int i, uniform float v) {
|
|||||||
return floatbits(__insert_int32((int)intbits(x), i, (int)intbits(v)));
|
return floatbits(__insert_int32((int)intbits(x), i, (int)intbits(v)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int insert(int x, uniform int i, uniform int v) {
|
static inline int8 insert(int8 x, uniform int i, uniform int8 v) {
|
||||||
|
return __insert_int8(x, i, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int8 insert(unsigned int8 x, uniform int i,
|
||||||
|
uniform unsigned int8 v) {
|
||||||
|
return __insert_int8(x, (unsigned int)i, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16 insert(int16 x, uniform int i, uniform int16 v) {
|
||||||
|
return __insert_int16(x, i, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int16 insert(unsigned int16 x, uniform int i,
|
||||||
|
uniform unsigned int16 v) {
|
||||||
|
return __insert_int16(x, (unsigned int)i, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32 insert(int32 x, uniform int i, uniform int32 v) {
|
||||||
return __insert_int32(x, i, v);
|
return __insert_int32(x, i, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int insert(unsigned int x, uniform int i,
|
static inline unsigned int32 insert(unsigned int32 x, uniform int i,
|
||||||
uniform unsigned int v) {
|
uniform unsigned int32 v) {
|
||||||
return __insert_int32(x, (unsigned int)i, v);
|
return __insert_int32(x, (unsigned int)i, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,7 +284,7 @@ static inline uniform bool all(bool v) {
|
|||||||
return __movmsk(match) == (1 << programCount) - 1;
|
return __movmsk(match) == (1 << programCount) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform int popcnt(uniform int v) {
|
static inline uniform int32 popcnt(uniform int32 v) {
|
||||||
return __popcnt_int32(v);
|
return __popcnt_int32(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -473,52 +539,7 @@ ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
|
|||||||
ATOMIC_DECL_CMPXCHG(double, double)
|
ATOMIC_DECL_CMPXCHG(double, double)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Load/store from/to 8/16-bit types
|
// Floating-Point Math
|
||||||
|
|
||||||
static inline int load_from_int8(uniform int a[], uniform int offset) {
|
|
||||||
return __load_int8(a, offset, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned int load_from_uint8(uniform unsigned int a[],
|
|
||||||
uniform int offset) {
|
|
||||||
return __load_uint8(a, offset, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void store_to_int8(uniform int a[], uniform int offset,
|
|
||||||
unsigned int val) {
|
|
||||||
__store_int8(a, offset, val, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void store_to_uint8(uniform unsigned int a[], uniform int offset,
|
|
||||||
unsigned int val) {
|
|
||||||
// Can use __store_int8 for unsigned stuff, since it truncates bits in
|
|
||||||
// either case.
|
|
||||||
__store_int8(a, offset, val, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int load_from_int16(uniform int a[], uniform int offset) {
|
|
||||||
return __load_int16(a, offset, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned int load_from_int16(uniform unsigned int a[],
|
|
||||||
uniform int offset) {
|
|
||||||
return __load_uint16(a, offset, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void store_to_int16(uniform int a[], uniform int offset,
|
|
||||||
int val) {
|
|
||||||
__store_int16(a, offset, val, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void store_to_uint16(uniform unsigned int a[], uniform int offset,
|
|
||||||
unsigned int val) {
|
|
||||||
// Can use __store_int16 for unsigned stuff, since it truncates bits in
|
|
||||||
// either case.
|
|
||||||
__store_int16(a, offset, val, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
// Math
|
|
||||||
|
|
||||||
static inline float abs(float a) {
|
static inline float abs(float a) {
|
||||||
// Floating-point hack: zeroing the high bit clears the sign
|
// Floating-point hack: zeroing the high bit clears the sign
|
||||||
@@ -622,6 +643,11 @@ static inline uniform float rcp(uniform float v) {
|
|||||||
return __rcp_uniform_float(v);
|
return __rcp_uniform_float(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// min/max
|
||||||
|
|
||||||
|
// float
|
||||||
|
|
||||||
static inline float min(float a, float b) {
|
static inline float min(float a, float b) {
|
||||||
return __min_varying_float(a, b);
|
return __min_varying_float(a, b);
|
||||||
}
|
}
|
||||||
@@ -630,14 +656,6 @@ static inline uniform float min(uniform float a, uniform float b) {
|
|||||||
return __min_uniform_float(a, b);
|
return __min_uniform_float(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline double min(double a, double b) {
|
|
||||||
return __min_varying_double(a, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uniform double min(uniform double a, uniform double b) {
|
|
||||||
return __min_uniform_double(a, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline float max(float a, float b) {
|
static inline float max(float a, float b) {
|
||||||
return __max_varying_float(a, b);
|
return __max_varying_float(a, b);
|
||||||
}
|
}
|
||||||
@@ -646,6 +664,17 @@ static inline uniform float max(uniform float a, uniform float b) {
|
|||||||
return __max_uniform_float(a, b);
|
return __max_uniform_float(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// double
|
||||||
|
|
||||||
|
static inline double min(double a, double b) {
|
||||||
|
return __min_varying_double(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform double min(uniform double a, uniform double b) {
|
||||||
|
return __min_uniform_double(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
static inline double max(double a, double b) {
|
static inline double max(double a, double b) {
|
||||||
return __max_varying_double(a, b);
|
return __max_varying_double(a, b);
|
||||||
}
|
}
|
||||||
@@ -654,6 +683,80 @@ static inline uniform double max(uniform double a, uniform double b) {
|
|||||||
return __max_uniform_double(a, b);
|
return __max_uniform_double(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// int8
|
||||||
|
|
||||||
|
static inline uniform unsigned int8 min(uniform unsigned int8 a,
|
||||||
|
uniform unsigned int8 b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int8 max(uniform unsigned int8 a,
|
||||||
|
uniform unsigned int8 b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int8 min(uniform int8 a, uniform int8 b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int8 max(uniform int8 a, uniform int8 b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int8 min(unsigned int8 a, unsigned int8 b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int8 max(unsigned int8 a, unsigned int8 b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int8 min(int8 a, int8 b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int8 max(int8 a, int8 b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// int16
|
||||||
|
|
||||||
|
static inline uniform unsigned int16 min(uniform unsigned int16 a,
|
||||||
|
uniform unsigned int16 b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int16 max(uniform unsigned int16 a,
|
||||||
|
uniform unsigned int16 b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int16 min(uniform int16 a, uniform int16 b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int16 max(uniform int16 a, uniform int16 b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int16 min(unsigned int16 a, unsigned int16 b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int16 max(unsigned int16 a, unsigned int16 b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16 min(int16 a, int16 b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16 max(int16 a, int16 b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// int32
|
||||||
|
|
||||||
static inline unsigned int min(unsigned int a, unsigned int b) {
|
static inline unsigned int min(unsigned int a, unsigned int b) {
|
||||||
return __min_varying_uint32(a, b);
|
return __min_varying_uint32(a, b);
|
||||||
}
|
}
|
||||||
@@ -686,6 +789,8 @@ static inline uniform int max(uniform int a, uniform int b) {
|
|||||||
return __max_uniform_int32(a, b);
|
return __max_uniform_int32(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// int64
|
||||||
|
|
||||||
static inline unsigned int64 min(unsigned int64 a, unsigned int64 b) {
|
static inline unsigned int64 min(unsigned int64 a, unsigned int64 b) {
|
||||||
return __min_varying_uint64(a, b);
|
return __min_varying_uint64(a, b);
|
||||||
}
|
}
|
||||||
@@ -718,6 +823,11 @@ static inline uniform int64 max(uniform int64 a, uniform int64 b) {
|
|||||||
return __max_uniform_int64(a, b);
|
return __max_uniform_int64(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// clamps
|
||||||
|
|
||||||
|
// float
|
||||||
|
|
||||||
static inline float clamp(float v, float low, float high) {
|
static inline float clamp(float v, float low, float high) {
|
||||||
return min(max(v, low), high);
|
return min(max(v, low), high);
|
||||||
}
|
}
|
||||||
@@ -726,6 +836,52 @@ static inline uniform float clamp(uniform float v, uniform float low, uniform fl
|
|||||||
return min(max(v, low), high);
|
return min(max(v, low), high);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// int8
|
||||||
|
|
||||||
|
static inline unsigned int8 clamp(unsigned int8 v, unsigned int8 low,
|
||||||
|
unsigned int8 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int8 clamp(uniform unsigned int8 v,
|
||||||
|
uniform unsigned int8 low,
|
||||||
|
uniform unsigned int8 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int8 clamp(int8 v, int8 low, int8 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int8 clamp(uniform int8 v, uniform int8 low,
|
||||||
|
uniform int8 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
// int16
|
||||||
|
|
||||||
|
static inline unsigned int16 clamp(unsigned int16 v, unsigned int16 low,
|
||||||
|
unsigned int16 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int16 clamp(uniform unsigned int16 v,
|
||||||
|
uniform unsigned int16 low,
|
||||||
|
uniform unsigned int16 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16 clamp(int16 v, int16 low, int16 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int16 clamp(uniform int16 v, uniform int16 low,
|
||||||
|
uniform int16 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
// int32
|
||||||
|
|
||||||
static inline unsigned int clamp(unsigned int v, unsigned int low, unsigned int high) {
|
static inline unsigned int clamp(unsigned int v, unsigned int low, unsigned int high) {
|
||||||
return min(max(v, low), high);
|
return min(max(v, low), high);
|
||||||
}
|
}
|
||||||
@@ -735,15 +891,6 @@ static inline uniform unsigned int clamp(uniform unsigned int v, uniform unsigne
|
|||||||
return min(max(v, low), high);
|
return min(max(v, low), high);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low, unsigned int64 high) {
|
|
||||||
return min(max(v, low), high);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uniform unsigned int64 clamp(uniform unsigned int64 v, uniform unsigned int64 low,
|
|
||||||
uniform unsigned int64 high) {
|
|
||||||
return min(max(v, low), high);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int clamp(int v, int low, int high) {
|
static inline int clamp(int v, int low, int high) {
|
||||||
return min(max(v, low), high);
|
return min(max(v, low), high);
|
||||||
}
|
}
|
||||||
@@ -752,11 +899,25 @@ static inline uniform int clamp(uniform int v, uniform int low, uniform int high
|
|||||||
return min(max(v, low), high);
|
return min(max(v, low), high);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// int64
|
||||||
|
|
||||||
|
static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low,
|
||||||
|
unsigned int64 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int64 clamp(uniform unsigned int64 v,
|
||||||
|
uniform unsigned int64 low,
|
||||||
|
uniform unsigned int64 high) {
|
||||||
|
return min(max(v, low), high);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int64 clamp(int64 v, int64 low, int64 high) {
|
static inline int64 clamp(int64 v, int64 low, int64 high) {
|
||||||
return min(max(v, low), high);
|
return min(max(v, low), high);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform int64 clamp(uniform int64 v, uniform int64 low, uniform int64 high) {
|
static inline uniform int64 clamp(uniform int64 v, uniform int64 low,
|
||||||
|
uniform int64 high) {
|
||||||
return min(max(v, low), high);
|
return min(max(v, low), high);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
288
stdlib.m4
288
stdlib.m4
@@ -566,6 +566,28 @@ declare i1 @__is_compile_time_constant_varying_int32(<$1 x i32>)
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; vector ops
|
;; vector ops
|
||||||
|
|
||||||
|
define internal i8 @__extract_int8(<$1 x i8>, i32) nounwind readnone alwaysinline {
|
||||||
|
%extract = extractelement <$1 x i8> %0, i32 %1
|
||||||
|
ret i8 %extract
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal <$1 x i8> @__insert_int8(<$1 x i8>, i32,
|
||||||
|
i8) nounwind readnone alwaysinline {
|
||||||
|
%insert = insertelement <$1 x i8> %0, i8 %2, i32 %1
|
||||||
|
ret <$1 x i8> %insert
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal i16 @__extract_int16(<$1 x i16>, i32) nounwind readnone alwaysinline {
|
||||||
|
%extract = extractelement <$1 x i16> %0, i32 %1
|
||||||
|
ret i16 %extract
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal <$1 x i16> @__insert_int16(<$1 x i16>, i32,
|
||||||
|
i16) nounwind readnone alwaysinline {
|
||||||
|
%insert = insertelement <$1 x i16> %0, i16 %2, i32 %1
|
||||||
|
ret <$1 x i16> %insert
|
||||||
|
}
|
||||||
|
|
||||||
define internal i32 @__extract_int32(<$1 x i32>, i32) nounwind readnone alwaysinline {
|
define internal i32 @__extract_int32(<$1 x i32>, i32) nounwind readnone alwaysinline {
|
||||||
%extract = extractelement <$1 x i32> %0, i32 %1
|
%extract = extractelement <$1 x i32> %0, i32 %1
|
||||||
ret i32 %extract
|
ret i32 %extract
|
||||||
@@ -588,6 +610,8 @@ define internal <$1 x i64> @__insert_int64(<$1 x i64>, i32,
|
|||||||
ret <$1 x i64> %insert
|
ret <$1 x i64> %insert
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shuffles($1, i8, int8, 1)
|
||||||
|
shuffles($1, i16, int16, 2)
|
||||||
shuffles($1, float, float, 4)
|
shuffles($1, float, float, 4)
|
||||||
shuffles($1, i32, int32, 4)
|
shuffles($1, i32, int32, 4)
|
||||||
shuffles($1, double, double, 8)
|
shuffles($1, double, double, 8)
|
||||||
@@ -901,171 +925,6 @@ i64minmax($1,min,uint64,ult)
|
|||||||
i64minmax($1,max,uint64,ugt)
|
i64minmax($1,max,uint64,ugt)
|
||||||
')
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; Definitions of 8 and 16-bit load and store functions
|
|
||||||
;;
|
|
||||||
;; The `int8_16' macro defines functions related to loading and storing 8 and
|
|
||||||
;; 16-bit values in memory, converting to and from i32. (This is a workaround
|
|
||||||
;; to be able to use in-memory values of types in ispc programs, since the
|
|
||||||
;; compiler doesn't yet support 8 and 16-bit datatypes...
|
|
||||||
;;
|
|
||||||
;; Arguments to pass to `int8_16':
|
|
||||||
;; $1: vector width of the target
|
|
||||||
|
|
||||||
define(`int8_16', `
|
|
||||||
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset,
|
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
|
||||||
%any = icmp ne i32 %mm, 0
|
|
||||||
br i1 %any, label %doload, label %skip
|
|
||||||
|
|
||||||
doload:
|
|
||||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
|
||||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
|
||||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
|
||||||
%val = load i`'eval(8*$1) * %ptr64, align 1
|
|
||||||
|
|
||||||
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
|
|
||||||
; unsigned, so zero-extend to i32...
|
|
||||||
%ret = zext <$1 x i8> %vval to <$1 x i32>
|
|
||||||
ret <$1 x i32> %ret
|
|
||||||
|
|
||||||
skip:
|
|
||||||
ret <$1 x i32> undef
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
define internal <$1 x i32> @__load_int8([0 x i32] *, i32 %offset,
|
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
|
||||||
%any = icmp ne i32 %mm, 0
|
|
||||||
br i1 %any, label %doload, label %skip
|
|
||||||
|
|
||||||
doload:
|
|
||||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
|
||||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
|
||||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
|
||||||
%val = load i`'eval(8*$1) * %ptr64, align 1
|
|
||||||
|
|
||||||
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
|
|
||||||
; signed, so sign-extend to i32...
|
|
||||||
%ret = sext <$1 x i8> %vval to <$1 x i32>
|
|
||||||
ret <$1 x i32> %ret
|
|
||||||
|
|
||||||
skip:
|
|
||||||
ret <$1 x i32> undef
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset,
|
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
|
||||||
%any = icmp ne i32 %mm, 0
|
|
||||||
br i1 %any, label %doload, label %skip
|
|
||||||
|
|
||||||
doload:
|
|
||||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
|
||||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
|
||||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
|
||||||
%val = load i`'eval(16*$1) * %ptr64, align 2
|
|
||||||
|
|
||||||
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
|
||||||
; unsigned, so use zero-extend...
|
|
||||||
%ret = zext <$1 x i16> %vval to <$1 x i32>
|
|
||||||
ret <$1 x i32> %ret
|
|
||||||
|
|
||||||
skip:
|
|
||||||
ret <$1 x i32> undef
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
define internal <$1 x i32> @__load_int16([0 x i32] *, i32 %offset,
|
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
|
||||||
%any = icmp ne i32 %mm, 0
|
|
||||||
br i1 %any, label %doload, label %skip
|
|
||||||
|
|
||||||
doload:
|
|
||||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
|
||||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
|
||||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
|
||||||
%val = load i`'eval(16*$1) * %ptr64, align 2
|
|
||||||
|
|
||||||
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
|
||||||
; signed, so use sign-extend...
|
|
||||||
%ret = sext <$1 x i16> %vval to <$1 x i32>
|
|
||||||
ret <$1 x i32> %ret
|
|
||||||
|
|
||||||
skip:
|
|
||||||
ret <$1 x i32> undef
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
define internal void @__store_int8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
|
||||||
%any = icmp ne i32 %mm, 0
|
|
||||||
br i1 %any, label %dostore, label %skip
|
|
||||||
|
|
||||||
dostore:
|
|
||||||
%val = trunc <$1 x i32> %val32 to <$1 x i8>
|
|
||||||
%val64 = bitcast <$1 x i8> %val to i`'eval(8*$1)
|
|
||||||
|
|
||||||
%mask8 = trunc <$1 x i32> %mask to <$1 x i8>
|
|
||||||
%mask64 = bitcast <$1 x i8> %mask8 to i`'eval(8*$1)
|
|
||||||
%notmask = xor i`'eval(8*$1) %mask64, -1
|
|
||||||
|
|
||||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
|
||||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
|
||||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
|
||||||
|
|
||||||
;; load the old value, use logical ops to blend based on the mask, then
|
|
||||||
;; store the result back
|
|
||||||
%old = load i`'eval(8*$1) * %ptr64, align 1
|
|
||||||
%oldmasked = and i`'eval(8*$1) %old, %notmask
|
|
||||||
%newmasked = and i`'eval(8*$1) %val64, %mask64
|
|
||||||
%final = or i`'eval(8*$1) %oldmasked, %newmasked
|
|
||||||
store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64, align 1
|
|
||||||
|
|
||||||
ret void
|
|
||||||
|
|
||||||
skip:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define internal void @__store_int16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
|
||||||
<$1 x i32> %mask) nounwind alwaysinline {
|
|
||||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
|
||||||
%any = icmp ne i32 %mm, 0
|
|
||||||
br i1 %any, label %dostore, label %skip
|
|
||||||
|
|
||||||
dostore:
|
|
||||||
%val = trunc <$1 x i32> %val32 to <$1 x i16>
|
|
||||||
%val64 = bitcast <$1 x i16> %val to i`'eval(16*$1)
|
|
||||||
|
|
||||||
%mask8 = trunc <$1 x i32> %mask to <$1 x i16>
|
|
||||||
%mask64 = bitcast <$1 x i16> %mask8 to i`'eval(16*$1)
|
|
||||||
%notmask = xor i`'eval(16*$1) %mask64, -1
|
|
||||||
|
|
||||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
|
||||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
|
||||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
|
||||||
|
|
||||||
;; as above, use mask to do blending with logical ops...
|
|
||||||
%old = load i`'eval(16*$1) * %ptr64, align 2
|
|
||||||
%oldmasked = and i`'eval(16*$1) %old, %notmask
|
|
||||||
%newmasked = and i`'eval(16*$1) %val64, %mask64
|
|
||||||
%final = or i`'eval(16*$1) %oldmasked, %newmasked
|
|
||||||
store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64, align 2
|
|
||||||
|
|
||||||
ret void
|
|
||||||
|
|
||||||
skip:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
'
|
|
||||||
)
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Emit code to safely load a scalar value and broadcast it across the
|
;; Emit code to safely load a scalar value and broadcast it across the
|
||||||
;; elements of a vector. Parameters:
|
;; elements of a vector. Parameters:
|
||||||
@@ -1150,6 +1009,105 @@ return:
|
|||||||
}
|
}
|
||||||
')
|
')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; masked store
|
||||||
|
;; emit code to do masked store as a set of per-lane scalar stores
|
||||||
|
;; parameters:
|
||||||
|
;; $1: target vector width
|
||||||
|
;; $2: llvm type of elements
|
||||||
|
;; $3: suffix for function name
|
||||||
|
|
||||||
|
define(`gen_masked_store', `
|
||||||
|
define void @__masked_store_$3(<$1 x $2>* nocapture, <$1 x $2>, <$1 x i32>) nounwind alwaysinline {
|
||||||
|
per_lane($1, <$1 x i32> %2, `
|
||||||
|
%ptr_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE
|
||||||
|
%storeval_ID = extractelement <$1 x $2> %1, i32 LANE
|
||||||
|
store $2 %storeval_ID, $2 * %ptr_ID')
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
define(`masked_store_blend_8_16_by_4', `
|
||||||
|
define void @__masked_store_blend_8(<4 x i8>* nocapture, <4 x i8>,
|
||||||
|
<4 x i32>) nounwind alwaysinline {
|
||||||
|
%old = load <4 x i8> * %0
|
||||||
|
%old32 = bitcast <4 x i8> %old to i32
|
||||||
|
%new32 = bitcast <4 x i8> %1 to i32
|
||||||
|
|
||||||
|
%mask8 = trunc <4 x i32> %2 to <4 x i8>
|
||||||
|
%mask32 = bitcast <4 x i8> %mask8 to i32
|
||||||
|
%notmask32 = xor i32 %mask32, -1
|
||||||
|
|
||||||
|
%newmasked = and i32 %new32, %mask32
|
||||||
|
%oldmasked = and i32 %old32, %notmask32
|
||||||
|
%result = or i32 %newmasked, %oldmasked
|
||||||
|
|
||||||
|
%resultvec = bitcast i32 %result to <4 x i8>
|
||||||
|
store <4 x i8> %resultvec, <4 x i8> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__masked_store_blend_16(<4 x i16>* nocapture, <4 x i16>,
|
||||||
|
<4 x i32>) nounwind alwaysinline {
|
||||||
|
%old = load <4 x i16> * %0
|
||||||
|
%old64 = bitcast <4 x i16> %old to i64
|
||||||
|
%new64 = bitcast <4 x i16> %1 to i64
|
||||||
|
|
||||||
|
%mask16 = trunc <4 x i32> %2 to <4 x i16>
|
||||||
|
%mask64 = bitcast <4 x i16> %mask16 to i64
|
||||||
|
%notmask64 = xor i64 %mask64, -1
|
||||||
|
|
||||||
|
%newmasked = and i64 %new64, %mask64
|
||||||
|
%oldmasked = and i64 %old64, %notmask64
|
||||||
|
%result = or i64 %newmasked, %oldmasked
|
||||||
|
|
||||||
|
%resultvec = bitcast i64 %result to <4 x i16>
|
||||||
|
store <4 x i16> %resultvec, <4 x i16> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
define(`masked_store_blend_8_16_by_8', `
|
||||||
|
define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>,
|
||||||
|
<8 x i32>) nounwind alwaysinline {
|
||||||
|
%old = load <8 x i8> * %0
|
||||||
|
%old64 = bitcast <8 x i8> %old to i64
|
||||||
|
%new64 = bitcast <8 x i8> %1 to i64
|
||||||
|
|
||||||
|
%mask8 = trunc <8 x i32> %2 to <8 x i8>
|
||||||
|
%mask64 = bitcast <8 x i8> %mask8 to i64
|
||||||
|
%notmask64 = xor i64 %mask64, -1
|
||||||
|
|
||||||
|
%newmasked = and i64 %new64, %mask64
|
||||||
|
%oldmasked = and i64 %old64, %notmask64
|
||||||
|
%result = or i64 %newmasked, %oldmasked
|
||||||
|
|
||||||
|
%resultvec = bitcast i64 %result to <8 x i8>
|
||||||
|
store <8 x i8> %resultvec, <8 x i8> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>,
|
||||||
|
<8 x i32>) nounwind alwaysinline {
|
||||||
|
%old = load <8 x i16> * %0
|
||||||
|
%old128 = bitcast <8 x i16> %old to i128
|
||||||
|
%new128 = bitcast <8 x i16> %1 to i128
|
||||||
|
|
||||||
|
%mask16 = trunc <8 x i32> %2 to <8 x i16>
|
||||||
|
%mask128 = bitcast <8 x i16> %mask16 to i128
|
||||||
|
%notmask128 = xor i128 %mask128, -1
|
||||||
|
|
||||||
|
%newmasked = and i128 %new128, %mask128
|
||||||
|
%oldmasked = and i128 %old128, %notmask128
|
||||||
|
%result = or i128 %newmasked, %oldmasked
|
||||||
|
|
||||||
|
%resultvec = bitcast i128 %result to <8 x i16>
|
||||||
|
store <8 x i16> %resultvec, <8 x i16> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; packed load and store functions
|
;; packed load and store functions
|
||||||
;;
|
;;
|
||||||
|
|||||||
12
stmt.cpp
12
stmt.cpp
@@ -1405,6 +1405,18 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Just int8 and int16 types to int32s...
|
||||||
|
const Type *baseType = type->GetAsNonConstType()->GetAsUniformType();
|
||||||
|
if (baseType == AtomicType::UniformInt8 ||
|
||||||
|
baseType == AtomicType::UniformUInt8 ||
|
||||||
|
baseType == AtomicType::UniformInt16 ||
|
||||||
|
baseType == AtomicType::UniformUInt16) {
|
||||||
|
expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 :
|
||||||
|
AtomicType::VaryingInt32,
|
||||||
|
expr, expr->pos);
|
||||||
|
type = expr->GetType();
|
||||||
|
}
|
||||||
|
|
||||||
char t = lEncodeType(type->GetAsNonConstType());
|
char t = lEncodeType(type->GetAsNonConstType());
|
||||||
if (t == '\0') {
|
if (t == '\0') {
|
||||||
Error(expr->pos, "Only atomic types are allowed in print statements; "
|
Error(expr->pos, "Only atomic types are allowed in print statements; "
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
uniform float x[47][47];
|
uniform float x[47][47];
|
||||||
for (uniform int i = 0; i < 47; ++i)
|
for (uniform int i = 0; i < 47; ++i)
|
||||||
for (uniform int j = 0; j < 47; ++j)
|
for (uniform int j = 0; j < 47; ++j)
|
||||||
x[i][j] = 2;
|
x[i][j] = 2+b-5;
|
||||||
|
|
||||||
// all are 2 except (3,4) = 0, (1,4) = 1, (2,4) = 1, (4,4) = 1
|
// all are 2 except (3,4) = 0, (1,4) = 1, (2,4) = 1, (4,4) = 1
|
||||||
if (a == 3.)
|
if (a == 3.)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
uniform float x[47][47];
|
uniform float x[47][47];
|
||||||
for (uniform int i = 0; i < 47; ++i)
|
for (uniform int i = 0; i < 47; ++i)
|
||||||
for (uniform int j = 0; j < 47; ++j)
|
for (uniform int j = 0; j < 47; ++j)
|
||||||
x[i][j] = 2;
|
x[i][j] = 2+b-5;
|
||||||
|
|
||||||
// all are 2 except (4,2) = 0, (4,...) = 1, (4,programCount-1)=2
|
// all are 2 except (4,2) = 0, (4,...) = 1, (4,programCount-1)=2
|
||||||
if (a == 3.)
|
if (a == 3.)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
uniform float x[47][47];
|
uniform float x[47][47];
|
||||||
for (uniform int i = 0; i < 47; ++i)
|
for (uniform int i = 0; i < 47; ++i)
|
||||||
for (uniform int j = 0; j < 47; ++j)
|
for (uniform int j = 0; j < 47; ++j)
|
||||||
x[i][j] = 2;
|
x[i][j] = 2+b-5;
|
||||||
|
|
||||||
x[a][b-1] = 0;
|
x[a][b-1] = 0;
|
||||||
RET[programIndex] = x[2][a];
|
RET[programIndex] = x[2][a];
|
||||||
|
|||||||
12
tests/broadcast-2.ispc
Normal file
12
tests/broadcast-2.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int16 a = aFOO[programIndex];
|
||||||
|
int16 b = broadcast(a, 2);
|
||||||
|
RET[programIndex] = b;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 3;
|
||||||
|
}
|
||||||
12
tests/broadcast-3.ispc
Normal file
12
tests/broadcast-3.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int8 a = aFOO[programIndex];
|
||||||
|
int8 br = broadcast(a, (uniform int)b-2);
|
||||||
|
RET[programIndex] = br;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 4;
|
||||||
|
}
|
||||||
19
tests/gather-int16-1.ispc
Normal file
19
tests/gather-int16-1.ispc
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int16 x[programCount];
|
||||||
|
x[programIndex] = programIndex;
|
||||||
|
int a = aFOO[programIndex]-1;
|
||||||
|
unsigned int16 v;
|
||||||
|
if (programIndex < 2)
|
||||||
|
v = x[a];
|
||||||
|
else
|
||||||
|
v = 2;
|
||||||
|
RET[programIndex] = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 2;
|
||||||
|
RET[0] = 0;
|
||||||
|
RET[1] = 1;
|
||||||
|
}
|
||||||
13
tests/gather-int16.ispc
Normal file
13
tests/gather-int16.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int16 x[programCount];
|
||||||
|
x[programIndex] = programIndex;
|
||||||
|
int a = aFOO[programIndex]-1;
|
||||||
|
unsigned int16 v = x[a];
|
||||||
|
RET[programIndex] = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = programIndex;
|
||||||
|
}
|
||||||
19
tests/gather-int8-1.ispc
Normal file
19
tests/gather-int8-1.ispc
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int8 x[programCount];
|
||||||
|
x[programIndex] = programIndex;
|
||||||
|
int a = aFOO[programIndex]-1;
|
||||||
|
unsigned int8 v;
|
||||||
|
if (programIndex < 2)
|
||||||
|
v = x[a];
|
||||||
|
else
|
||||||
|
v = 2;
|
||||||
|
RET[programIndex] = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 2;
|
||||||
|
RET[0] = 0;
|
||||||
|
RET[1] = 1;
|
||||||
|
}
|
||||||
13
tests/gather-int8.ispc
Normal file
13
tests/gather-int8.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int8 x[programCount];
|
||||||
|
x[programIndex] = programIndex;
|
||||||
|
int a = aFOO[programIndex]-1;
|
||||||
|
unsigned int8 v = x[a];
|
||||||
|
RET[programIndex] = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = programIndex;
|
||||||
|
}
|
||||||
12
tests/int16-wrap.ispc
Normal file
12
tests/int16-wrap.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bb) {
|
||||||
|
unsigned int16 a = aFOO[programIndex], b = bb;
|
||||||
|
RET[programIndex] = ((unsigned int16)4000*a)+b;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = (((4000*(programIndex+1))&0xffff)+5)&0xffff;
|
||||||
|
}
|
||||||
12
tests/int8-wrap.ispc
Normal file
12
tests/int8-wrap.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bb) {
|
||||||
|
unsigned int8 a = aFOO[programIndex], b = bb;
|
||||||
|
RET[programIndex] = ((unsigned int8)100*a)+b;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = (((100*(programIndex+1))&0xff)+5)&0xff;
|
||||||
|
}
|
||||||
@@ -1,13 +1,17 @@
|
|||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
uniform int x[9] = { 0x00020001, 0x00040003, 0x00060005, 0x00080007,
|
uniform int16 x[programCount];
|
||||||
0x000a0009, 0x000c000b, 0x000e000d, 0x0010000f,
|
x[programIndex] = aFOO[programIndex];
|
||||||
0x00120011 };
|
unsigned int16 v = 0;
|
||||||
unsigned int v = load_from_int16(x, 1);
|
if (programIndex & 1)
|
||||||
|
v = x[programIndex];
|
||||||
RET[programIndex] = v;
|
RET[programIndex] = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = 2+programIndex;
|
if (programIndex & 1)
|
||||||
|
RET[programIndex] = 1+programIndex;
|
||||||
|
else
|
||||||
|
RET[programIndex] = 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
uniform int x[8] = { 0x00020001, 0x00040003, 0x00060005, 0x00080007,
|
uniform int16 x[programCount];
|
||||||
0x000a0009, 0x000c000b, 0x000e000d, 0x0010000f };
|
x[programIndex] = aFOO[programIndex];
|
||||||
unsigned int v = load_from_int16(x, 0);
|
unsigned int16 v = x[programIndex];
|
||||||
RET[programIndex] = v;
|
RET[programIndex] = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
uniform int x[5] = { 0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d,
|
uniform int8 x[programCount];
|
||||||
0x14131211 };
|
x[programIndex] = aFOO[programIndex];
|
||||||
unsigned int v = load_from_int8(x, 2);
|
unsigned int8 v = 0;
|
||||||
|
if (programIndex & 1)
|
||||||
|
v = x[programIndex];
|
||||||
RET[programIndex] = v;
|
RET[programIndex] = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = 3+programIndex;
|
if (programIndex & 1)
|
||||||
|
RET[programIndex] = 1+programIndex;
|
||||||
|
else
|
||||||
|
RET[programIndex] = 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
uniform int x[4] = { 0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d };
|
uniform int8 x[programCount];
|
||||||
unsigned int v = load_from_int8(x, 0);
|
x[programIndex] = aFOO[programIndex];
|
||||||
|
unsigned int8 v = x[programIndex];
|
||||||
RET[programIndex] = v;
|
RET[programIndex] = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
uniform Bar bar;
|
uniform Bar bar;
|
||||||
for (uniform int i = 0; i < 6; ++i)
|
for (uniform int i = 0; i < 6; ++i)
|
||||||
for (uniform int j = 0; j < 18; ++j)
|
for (uniform int j = 0; j < 18; ++j)
|
||||||
bar.foo[i].f[j] = 2.;
|
bar.foo[i].f[j] = 2.+b-5;
|
||||||
|
|
||||||
bar.foo[5].f[a] = a;
|
bar.foo[5].f[a] = a;
|
||||||
RET[programIndex] = bar.foo[b].f[a];
|
RET[programIndex] = bar.foo[b].f[a];
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
|
|
||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct Foo {
|
struct Foo {
|
||||||
float f[6];
|
float f[6];
|
||||||
};
|
};
|
||||||
@@ -16,7 +14,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
|||||||
uniform Bar bar;
|
uniform Bar bar;
|
||||||
for (uniform int i = 0; i < 6; ++i)
|
for (uniform int i = 0; i < 6; ++i)
|
||||||
for (uniform int j = 0; j < 6; ++j)
|
for (uniform int j = 0; j < 6; ++j)
|
||||||
bar.foo[i].f[j] = 2.;
|
bar.foo[i].f[j] = 2.+b-5;
|
||||||
RET[programIndex] = bar.foo[b].f[b];
|
RET[programIndex] = bar.foo[b].f[b];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ export uniform int width() { return programCount; }
|
|||||||
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
||||||
uniform float foo[16];
|
uniform float foo[16];
|
||||||
for (uniform int i = 0; i < 16; ++i)
|
for (uniform int i = 0; i < 16; ++i)
|
||||||
foo[i] = 1;
|
foo[i] = i;
|
||||||
|
|
||||||
uniform int i = 0;
|
uniform int i = 0;
|
||||||
foo[i++] += 1;
|
foo[i++] += 1;
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ void inc(reference float v) { ++v; }
|
|||||||
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
||||||
uniform float foo[32];
|
uniform float foo[32];
|
||||||
for (uniform int i = 0; i < 32; ++i)
|
for (uniform int i = 0; i < 32; ++i)
|
||||||
foo[i] = 10;
|
foo[i] = 10+i;
|
||||||
int a = (int)aa[programIndex];
|
int a = (int)aa[programIndex];
|
||||||
inc(foo[a]);
|
inc(foo[a]);
|
||||||
ret[programIndex] = foo[programIndex];
|
ret[programIndex] = foo[programIndex]-programIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float ret[]) {
|
export void result(uniform float ret[]) {
|
||||||
|
|||||||
12
tests/rotate-5.ispc
Normal file
12
tests/rotate-5.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int8 a = aFOO[programIndex];
|
||||||
|
int8 rot = rotate(a, 2);
|
||||||
|
RET[programIndex] = rot;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1 + (programIndex + 2) % programCount;
|
||||||
|
}
|
||||||
12
tests/rotate-6.ispc
Normal file
12
tests/rotate-6.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int16 a = aFOO[programIndex];
|
||||||
|
int16 rot = rotate(a, -1);
|
||||||
|
RET[programIndex] = rot;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount;
|
||||||
|
}
|
||||||
17
tests/scatter-int16-1.ispc
Normal file
17
tests/scatter-int16-1.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int16 x[programCount];
|
||||||
|
x[programIndex] = -1;
|
||||||
|
int a = aFOO[programIndex]-1;
|
||||||
|
if (programIndex < 3)
|
||||||
|
x[a] = programIndex;
|
||||||
|
RET[programIndex] = x[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
RET[0] = 0;
|
||||||
|
RET[1] = 1;
|
||||||
|
RET[2] = 2;
|
||||||
|
}
|
||||||
13
tests/scatter-int16.ispc
Normal file
13
tests/scatter-int16.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int16 x[programCount];
|
||||||
|
x[programIndex] = 0;
|
||||||
|
int a = aFOO[programIndex]-1;
|
||||||
|
x[a] = programIndex;
|
||||||
|
RET[programIndex] = x[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = programIndex;
|
||||||
|
}
|
||||||
17
tests/scatter-int8-1.ispc
Normal file
17
tests/scatter-int8-1.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int8 x[programCount];
|
||||||
|
x[programIndex] = -1;
|
||||||
|
int a = aFOO[programIndex]-1;
|
||||||
|
if (programIndex < 3)
|
||||||
|
x[a] = programIndex;
|
||||||
|
RET[programIndex] = x[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
RET[0] = 0;
|
||||||
|
RET[1] = 1;
|
||||||
|
RET[2] = 2;
|
||||||
|
}
|
||||||
13
tests/scatter-int8.ispc
Normal file
13
tests/scatter-int8.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int8 x[programCount];
|
||||||
|
x[programIndex] = 0;
|
||||||
|
int a = aFOO[programIndex]-1;
|
||||||
|
x[a] = programIndex;
|
||||||
|
RET[programIndex] = x[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = programIndex;
|
||||||
|
}
|
||||||
12
tests/shuffle-3.ispc
Normal file
12
tests/shuffle-3.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int8 a = aFOO[programIndex];
|
||||||
|
int8 shuf = shuffle(a, 1);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 2;
|
||||||
|
}
|
||||||
13
tests/shuffle-4.ispc
Normal file
13
tests/shuffle-4.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int16 a = aFOO[programIndex];
|
||||||
|
int reverse = programCount - 1 - programIndex;
|
||||||
|
int16 shuf = shuffle(a, reverse);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = programCount - programIndex;
|
||||||
|
}
|
||||||
13
tests/shuffle-5.ispc
Normal file
13
tests/shuffle-5.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int8 a = aFOO[programIndex];
|
||||||
|
int reverse = programCount - 1 - programIndex + (int)b - 5;
|
||||||
|
int8 shuf = shuffle(a, reverse);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = programCount - programIndex;
|
||||||
|
}
|
||||||
13
tests/shuffle2-11.ispc
Normal file
13
tests/shuffle2-11.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int16 aa = aFOO[programIndex];
|
||||||
|
int16 bb = aa + programCount;
|
||||||
|
int16 shuf = shuffle(aa, bb, 2*programIndex);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1 + 2*programIndex;
|
||||||
|
}
|
||||||
13
tests/shuffle2-6.ispc
Normal file
13
tests/shuffle2-6.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int8 aa = aFOO[programIndex];
|
||||||
|
int8 bb = aa + programCount;
|
||||||
|
int8 shuf = shuffle(aa, bb, 1);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 2;
|
||||||
|
}
|
||||||
13
tests/shuffle2-7.ispc
Normal file
13
tests/shuffle2-7.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int16 aa = aFOO[programIndex];
|
||||||
|
int16 bb = aa + programCount;
|
||||||
|
int16 shuf = shuffle(aa, bb, programCount + 1);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 2 + programCount;
|
||||||
|
}
|
||||||
13
tests/shuffle2-8.ispc
Normal file
13
tests/shuffle2-8.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int8 aa = aFOO[programIndex];
|
||||||
|
int8 bb = aa + programCount;
|
||||||
|
int8 shuf = shuffle(aa, bb, programIndex + 2);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 3 + programIndex;
|
||||||
|
}
|
||||||
13
tests/shuffle2-9.ispc
Normal file
13
tests/shuffle2-9.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
int16 aa = aFOO[programIndex];
|
||||||
|
int16 bb = aa + programCount;
|
||||||
|
int16 shuf = shuffle(aa, bb, programIndex + 2 + (int)b - 5);
|
||||||
|
RET[programIndex] = shuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 3 + programIndex;
|
||||||
|
}
|
||||||
@@ -1,16 +1,15 @@
|
|||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
uniform int x[16];
|
uniform unsigned int16 x[2*programCount];
|
||||||
for (uniform int i = 0; i < 16; ++i)
|
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||||
x[i] = 0xffffffff;
|
x[i] = 0xffff;
|
||||||
unsigned int val = aFOO[programIndex];
|
unsigned int16 val = aFOO[programIndex];
|
||||||
store_to_int16(x, 5, val);
|
x[2+programIndex] = val;
|
||||||
unsigned int v = load_from_int16(x, 6);
|
RET[programIndex] = x[1+programIndex];
|
||||||
RET[programIndex] = v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = 2+programIndex;
|
RET[programIndex] = programIndex;
|
||||||
RET[programCount-1] = (unsigned int)0xffffffff;
|
RET[0] = 65535;
|
||||||
}
|
}
|
||||||
|
|||||||
19
tests/store-int16-2.ispc
Normal file
19
tests/store-int16-2.ispc
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform unsigned int16 x[2*programCount];
|
||||||
|
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||||
|
x[i] = 0xffff;
|
||||||
|
unsigned int16 val = aFOO[programIndex];
|
||||||
|
if (programIndex & 1)
|
||||||
|
x[2+programIndex] = val;
|
||||||
|
RET[programIndex] = x[1+programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
if (programIndex & 1)
|
||||||
|
RET[programIndex] = 65535;
|
||||||
|
else
|
||||||
|
RET[programIndex] = programIndex;
|
||||||
|
RET[0] = 65535;
|
||||||
|
}
|
||||||
@@ -1,16 +1,15 @@
|
|||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
uniform int x[16];
|
uniform int16 x[2*programCount];
|
||||||
for (uniform int i = 0; i < 16; ++i)
|
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||||
x[i] = 0xffffffff;
|
x[i] = 0xffff;
|
||||||
unsigned int val = aFOO[programIndex];
|
unsigned int8 val = aFOO[programIndex];
|
||||||
store_to_int16(x, 5, val);
|
x[2+programIndex] = val;
|
||||||
int v = load_from_int16(x, 6);
|
RET[programIndex] = x[1+programIndex];
|
||||||
RET[programIndex] = v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = 2+programIndex;
|
RET[programIndex] = programIndex;
|
||||||
RET[programCount-1] = -1;
|
RET[0] = -1.;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +1,15 @@
|
|||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
uniform unsigned int x[8];
|
uniform unsigned int8 x[2*programCount];
|
||||||
for (uniform int i = 0; i < 8; ++i)
|
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||||
x[i] = 0xffffffff;
|
x[i] = 0xff;
|
||||||
unsigned int val = aFOO[programIndex];
|
unsigned int8 val = aFOO[programIndex];
|
||||||
store_to_uint8(x, 2, val);
|
x[2+programIndex] = val;
|
||||||
unsigned int v = load_from_uint8(x, 1);
|
RET[programIndex] = x[1+programIndex];
|
||||||
RET[programIndex] = v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
RET[programIndex] = programIndex;
|
RET[programIndex] = programIndex;
|
||||||
RET[0] = (unsigned int)0xff;
|
RET[0] = 255;
|
||||||
}
|
}
|
||||||
|
|||||||
19
tests/store-int8-2.ispc
Normal file
19
tests/store-int8-2.ispc
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform unsigned int8 x[2*programCount];
|
||||||
|
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||||
|
x[i] = 0xff;
|
||||||
|
unsigned int8 val = aFOO[programIndex];
|
||||||
|
if (programIndex & 1)
|
||||||
|
x[2+programIndex] = val;
|
||||||
|
RET[programIndex] = x[1+programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
if (programIndex & 1)
|
||||||
|
RET[programIndex] = 255;
|
||||||
|
else
|
||||||
|
RET[programIndex] = programIndex;
|
||||||
|
RET[0] = 255;
|
||||||
|
}
|
||||||
@@ -1,13 +1,12 @@
|
|||||||
export uniform int width() { return programCount; }
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
uniform int x[8];
|
uniform int8 x[2*programCount];
|
||||||
for (uniform int i = 0; i < 8; ++i)
|
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||||
x[i] = 0xffffffff;
|
x[i] = 0xff;
|
||||||
unsigned int val = aFOO[programIndex];
|
unsigned int8 val = aFOO[programIndex];
|
||||||
store_to_int8(x, 2, val);
|
x[2+programIndex] = val;
|
||||||
int v = load_from_int8(x, 1);
|
RET[programIndex] = x[1+programIndex];
|
||||||
RET[programIndex] = v;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float RET[]) {
|
export void result(uniform float RET[]) {
|
||||||
|
|||||||
@@ -4,12 +4,12 @@ export uniform int width() { return programCount; }
|
|||||||
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
export void f_fu(uniform float ret[], uniform float aa[], uniform float b) {
|
||||||
uniform int foo[10];
|
uniform int foo[10];
|
||||||
for (uniform int i = 0; i < 10; ++i)
|
for (uniform int i = 0; i < 10; ++i)
|
||||||
foo[i] = 10;
|
foo[i] = 10+i;
|
||||||
int bb = b;
|
int bb = b;
|
||||||
foo[bb] = 0;
|
foo[bb] = 0;
|
||||||
ret[programIndex] = foo[4] + foo[5];
|
ret[programIndex] = foo[4] + foo[5];
|
||||||
}
|
}
|
||||||
|
|
||||||
export void result(uniform float ret[]) {
|
export void result(uniform float ret[]) {
|
||||||
ret[programIndex] = 10;
|
ret[programIndex] = 14;
|
||||||
}
|
}
|
||||||
|
|||||||
114
type.cpp
114
type.cpp
@@ -74,6 +74,14 @@ lShouldPrintName(const std::string &name) {
|
|||||||
|
|
||||||
const AtomicType *AtomicType::UniformBool = new AtomicType(TYPE_BOOL, true, false);
|
const AtomicType *AtomicType::UniformBool = new AtomicType(TYPE_BOOL, true, false);
|
||||||
const AtomicType *AtomicType::VaryingBool = new AtomicType(TYPE_BOOL, false, false);
|
const AtomicType *AtomicType::VaryingBool = new AtomicType(TYPE_BOOL, false, false);
|
||||||
|
const AtomicType *AtomicType::UniformInt8 = new AtomicType(TYPE_INT8, true, false);
|
||||||
|
const AtomicType *AtomicType::VaryingInt8 = new AtomicType(TYPE_INT8, false, false);
|
||||||
|
const AtomicType *AtomicType::UniformUInt8 = new AtomicType(TYPE_UINT8, true, false);
|
||||||
|
const AtomicType *AtomicType::VaryingUInt8 = new AtomicType(TYPE_UINT8, false, false);
|
||||||
|
const AtomicType *AtomicType::UniformInt16 = new AtomicType(TYPE_INT16, true, false);
|
||||||
|
const AtomicType *AtomicType::VaryingInt16 = new AtomicType(TYPE_INT16, false, false);
|
||||||
|
const AtomicType *AtomicType::UniformUInt16 = new AtomicType(TYPE_UINT16, true, false);
|
||||||
|
const AtomicType *AtomicType::VaryingUInt16 = new AtomicType(TYPE_UINT16, false, false);
|
||||||
const AtomicType *AtomicType::UniformInt32 = new AtomicType(TYPE_INT32, true, false);
|
const AtomicType *AtomicType::UniformInt32 = new AtomicType(TYPE_INT32, true, false);
|
||||||
const AtomicType *AtomicType::VaryingInt32 = new AtomicType(TYPE_INT32, false, false);
|
const AtomicType *AtomicType::VaryingInt32 = new AtomicType(TYPE_INT32, false, false);
|
||||||
const AtomicType *AtomicType::UniformUInt32 = new AtomicType(TYPE_UINT32, true, false);
|
const AtomicType *AtomicType::UniformUInt32 = new AtomicType(TYPE_UINT32, true, false);
|
||||||
@@ -89,6 +97,14 @@ const AtomicType *AtomicType::VaryingDouble = new AtomicType(TYPE_DOUBLE, false,
|
|||||||
|
|
||||||
const AtomicType *AtomicType::UniformConstBool = new AtomicType(TYPE_BOOL, true, true);
|
const AtomicType *AtomicType::UniformConstBool = new AtomicType(TYPE_BOOL, true, true);
|
||||||
const AtomicType *AtomicType::VaryingConstBool = new AtomicType(TYPE_BOOL, false, true);
|
const AtomicType *AtomicType::VaryingConstBool = new AtomicType(TYPE_BOOL, false, true);
|
||||||
|
const AtomicType *AtomicType::UniformConstInt8 = new AtomicType(TYPE_INT8, true, true);
|
||||||
|
const AtomicType *AtomicType::VaryingConstInt8 = new AtomicType(TYPE_INT8, false, true);
|
||||||
|
const AtomicType *AtomicType::UniformConstUInt8 = new AtomicType(TYPE_UINT8, true, true);
|
||||||
|
const AtomicType *AtomicType::VaryingConstUInt8 = new AtomicType(TYPE_UINT8, false, true);
|
||||||
|
const AtomicType *AtomicType::UniformConstInt16 = new AtomicType(TYPE_INT16, true, true);
|
||||||
|
const AtomicType *AtomicType::VaryingConstInt16 = new AtomicType(TYPE_INT16, false, true);
|
||||||
|
const AtomicType *AtomicType::UniformConstUInt16 = new AtomicType(TYPE_UINT16, true, true);
|
||||||
|
const AtomicType *AtomicType::VaryingConstUInt16 = new AtomicType(TYPE_UINT16, false, true);
|
||||||
const AtomicType *AtomicType::UniformConstInt32 = new AtomicType(TYPE_INT32, true, true);
|
const AtomicType *AtomicType::UniformConstInt32 = new AtomicType(TYPE_INT32, true, true);
|
||||||
const AtomicType *AtomicType::VaryingConstInt32 = new AtomicType(TYPE_INT32, false, true);
|
const AtomicType *AtomicType::VaryingConstInt32 = new AtomicType(TYPE_INT32, false, true);
|
||||||
const AtomicType *AtomicType::UniformConstUInt32 = new AtomicType(TYPE_UINT32, true, true);
|
const AtomicType *AtomicType::UniformConstUInt32 = new AtomicType(TYPE_UINT32, true, true);
|
||||||
@@ -101,6 +117,7 @@ const AtomicType *AtomicType::UniformConstUInt64 = new AtomicType(TYPE_UINT64, t
|
|||||||
const AtomicType *AtomicType::VaryingConstUInt64 = new AtomicType(TYPE_UINT64, false, true);
|
const AtomicType *AtomicType::VaryingConstUInt64 = new AtomicType(TYPE_UINT64, false, true);
|
||||||
const AtomicType *AtomicType::UniformConstDouble = new AtomicType(TYPE_DOUBLE, true, true);
|
const AtomicType *AtomicType::UniformConstDouble = new AtomicType(TYPE_DOUBLE, true, true);
|
||||||
const AtomicType *AtomicType::VaryingConstDouble = new AtomicType(TYPE_DOUBLE, false, true);
|
const AtomicType *AtomicType::VaryingConstDouble = new AtomicType(TYPE_DOUBLE, false, true);
|
||||||
|
|
||||||
const AtomicType *AtomicType::Void = new AtomicType(TYPE_VOID, true, false);
|
const AtomicType *AtomicType::Void = new AtomicType(TYPE_VOID, true, false);
|
||||||
|
|
||||||
|
|
||||||
@@ -123,14 +140,17 @@ AtomicType::IsFloatType() const {
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
AtomicType::IsIntType() const {
|
AtomicType::IsIntType() const {
|
||||||
return (basicType == TYPE_INT32 || basicType == TYPE_UINT32 ||
|
return (basicType == TYPE_INT8 || basicType == TYPE_UINT8 ||
|
||||||
|
basicType == TYPE_INT16 || basicType == TYPE_UINT16 ||
|
||||||
|
basicType == TYPE_INT32 || basicType == TYPE_UINT32 ||
|
||||||
basicType == TYPE_INT64 || basicType == TYPE_UINT64);
|
basicType == TYPE_INT64 || basicType == TYPE_UINT64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
AtomicType::IsUnsignedType() const {
|
AtomicType::IsUnsignedType() const {
|
||||||
return (basicType == TYPE_UINT32 || basicType == TYPE_UINT64);
|
return (basicType == TYPE_UINT8 || basicType == TYPE_UINT16 ||
|
||||||
|
basicType == TYPE_UINT32 || basicType == TYPE_UINT64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -151,10 +171,18 @@ AtomicType::GetAsUnsignedType() const {
|
|||||||
if (IsUnsignedType())
|
if (IsUnsignedType())
|
||||||
return this;
|
return this;
|
||||||
|
|
||||||
if (this == AtomicType::UniformInt32) return AtomicType::UniformUInt32;
|
if (this == AtomicType::UniformInt8) return AtomicType::UniformUInt8;
|
||||||
|
else if (this == AtomicType::VaryingInt8) return AtomicType::VaryingUInt8;
|
||||||
|
else if (this == AtomicType::UniformInt16) return AtomicType::UniformUInt16;
|
||||||
|
else if (this == AtomicType::VaryingInt16) return AtomicType::VaryingUInt16;
|
||||||
|
else if (this == AtomicType::UniformInt32) return AtomicType::UniformUInt32;
|
||||||
else if (this == AtomicType::VaryingInt32) return AtomicType::VaryingUInt32;
|
else if (this == AtomicType::VaryingInt32) return AtomicType::VaryingUInt32;
|
||||||
else if (this == AtomicType::UniformInt64) return AtomicType::UniformUInt64;
|
else if (this == AtomicType::UniformInt64) return AtomicType::UniformUInt64;
|
||||||
else if (this == AtomicType::VaryingInt64) return AtomicType::VaryingUInt64;
|
else if (this == AtomicType::VaryingInt64) return AtomicType::VaryingUInt64;
|
||||||
|
else if (this == AtomicType::UniformConstInt8) return AtomicType::UniformConstUInt8;
|
||||||
|
else if (this == AtomicType::VaryingConstInt8) return AtomicType::VaryingConstUInt8;
|
||||||
|
else if (this == AtomicType::UniformConstInt16) return AtomicType::UniformConstUInt16;
|
||||||
|
else if (this == AtomicType::VaryingConstInt16) return AtomicType::VaryingConstUInt16;
|
||||||
else if (this == AtomicType::UniformConstInt32) return AtomicType::UniformConstUInt32;
|
else if (this == AtomicType::UniformConstInt32) return AtomicType::UniformConstUInt32;
|
||||||
else if (this == AtomicType::VaryingConstInt32) return AtomicType::VaryingConstUInt32;
|
else if (this == AtomicType::VaryingConstInt32) return AtomicType::VaryingConstUInt32;
|
||||||
else if (this == AtomicType::UniformConstInt64) return AtomicType::UniformConstUInt64;
|
else if (this == AtomicType::UniformConstInt64) return AtomicType::UniformConstUInt64;
|
||||||
@@ -170,6 +198,10 @@ AtomicType::GetAsConstType() const {
|
|||||||
|
|
||||||
switch (basicType) {
|
switch (basicType) {
|
||||||
case TYPE_BOOL: return isUniform ? UniformConstBool : VaryingConstBool;
|
case TYPE_BOOL: return isUniform ? UniformConstBool : VaryingConstBool;
|
||||||
|
case TYPE_INT8: return isUniform ? UniformConstInt8 : VaryingConstInt8;
|
||||||
|
case TYPE_UINT8: return isUniform ? UniformConstUInt8 : VaryingConstUInt8;
|
||||||
|
case TYPE_INT16: return isUniform ? UniformConstInt16 : VaryingConstInt16;
|
||||||
|
case TYPE_UINT16: return isUniform ? UniformConstUInt16 : VaryingConstUInt16;
|
||||||
case TYPE_INT32: return isUniform ? UniformConstInt32 : VaryingConstInt32;
|
case TYPE_INT32: return isUniform ? UniformConstInt32 : VaryingConstInt32;
|
||||||
case TYPE_UINT32: return isUniform ? UniformConstUInt32 : VaryingConstUInt32;
|
case TYPE_UINT32: return isUniform ? UniformConstUInt32 : VaryingConstUInt32;
|
||||||
case TYPE_FLOAT: return isUniform ? UniformConstFloat : VaryingConstFloat;
|
case TYPE_FLOAT: return isUniform ? UniformConstFloat : VaryingConstFloat;
|
||||||
@@ -190,6 +222,10 @@ AtomicType::GetAsNonConstType() const {
|
|||||||
|
|
||||||
switch (basicType) {
|
switch (basicType) {
|
||||||
case TYPE_BOOL: return isUniform ? UniformBool : VaryingBool;
|
case TYPE_BOOL: return isUniform ? UniformBool : VaryingBool;
|
||||||
|
case TYPE_INT8: return isUniform ? UniformInt8 : VaryingInt8;
|
||||||
|
case TYPE_UINT8: return isUniform ? UniformUInt8 : VaryingUInt8;
|
||||||
|
case TYPE_INT16: return isUniform ? UniformInt16 : VaryingInt16;
|
||||||
|
case TYPE_UINT16: return isUniform ? UniformUInt16 : VaryingUInt16;
|
||||||
case TYPE_INT32: return isUniform ? UniformInt32 : VaryingInt32;
|
case TYPE_INT32: return isUniform ? UniformInt32 : VaryingInt32;
|
||||||
case TYPE_UINT32: return isUniform ? UniformUInt32 : VaryingUInt32;
|
case TYPE_UINT32: return isUniform ? UniformUInt32 : VaryingUInt32;
|
||||||
case TYPE_FLOAT: return isUniform ? UniformFloat : VaryingFloat;
|
case TYPE_FLOAT: return isUniform ? UniformFloat : VaryingFloat;
|
||||||
@@ -216,13 +252,17 @@ AtomicType::GetAsVaryingType() const {
|
|||||||
|
|
||||||
switch (basicType) {
|
switch (basicType) {
|
||||||
case TYPE_VOID: return this;
|
case TYPE_VOID: return this;
|
||||||
case TYPE_BOOL: return isConst ? AtomicType::VaryingConstBool : AtomicType::VaryingBool;
|
case TYPE_BOOL: return isConst ? VaryingConstBool : VaryingBool;
|
||||||
case TYPE_INT32: return isConst ? AtomicType::VaryingConstInt32 : AtomicType::VaryingInt32;
|
case TYPE_INT8: return isConst ? VaryingConstInt8 : VaryingInt8;
|
||||||
case TYPE_UINT32: return isConst ? AtomicType::VaryingConstUInt32 : AtomicType::VaryingUInt32;
|
case TYPE_UINT8: return isConst ? VaryingConstUInt8 : VaryingUInt8;
|
||||||
case TYPE_FLOAT: return isConst ? AtomicType::VaryingConstFloat : AtomicType::VaryingFloat;
|
case TYPE_INT16: return isConst ? VaryingConstInt16 : VaryingInt16;
|
||||||
case TYPE_INT64: return isConst ? AtomicType::VaryingConstInt64 : AtomicType::VaryingInt64;
|
case TYPE_UINT16: return isConst ? VaryingConstUInt16 : VaryingUInt16;
|
||||||
case TYPE_UINT64: return isConst ? AtomicType::VaryingConstUInt64 : AtomicType::VaryingUInt64;
|
case TYPE_INT32: return isConst ? VaryingConstInt32 : VaryingInt32;
|
||||||
case TYPE_DOUBLE: return isConst ? AtomicType::VaryingConstDouble : AtomicType::VaryingDouble;
|
case TYPE_UINT32: return isConst ? VaryingConstUInt32 : VaryingUInt32;
|
||||||
|
case TYPE_FLOAT: return isConst ? VaryingConstFloat : VaryingFloat;
|
||||||
|
case TYPE_INT64: return isConst ? VaryingConstInt64 : VaryingInt64;
|
||||||
|
case TYPE_UINT64: return isConst ? VaryingConstUInt64 : VaryingUInt64;
|
||||||
|
case TYPE_DOUBLE: return isConst ? VaryingConstDouble : VaryingDouble;
|
||||||
default: FATAL("Logic error in AtomicType::GetAsVaryingType()");
|
default: FATAL("Logic error in AtomicType::GetAsVaryingType()");
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -236,13 +276,17 @@ AtomicType::GetAsUniformType() const {
|
|||||||
|
|
||||||
switch (basicType) {
|
switch (basicType) {
|
||||||
case TYPE_VOID: return this;
|
case TYPE_VOID: return this;
|
||||||
case TYPE_BOOL: return isConst ? AtomicType::UniformConstBool : AtomicType::UniformBool;
|
case TYPE_BOOL: return isConst ? UniformConstBool : UniformBool;
|
||||||
case TYPE_INT32: return isConst ? AtomicType::UniformConstInt32 : AtomicType::UniformInt32;
|
case TYPE_INT8: return isConst ? UniformConstInt8 : UniformInt8;
|
||||||
case TYPE_UINT32: return isConst ? AtomicType::UniformConstUInt32 : AtomicType::UniformUInt32;
|
case TYPE_UINT8: return isConst ? UniformConstUInt8 : UniformUInt8;
|
||||||
case TYPE_FLOAT: return isConst ? AtomicType::UniformConstFloat : AtomicType::UniformFloat;
|
case TYPE_INT16: return isConst ? UniformConstInt16 : UniformInt16;
|
||||||
case TYPE_INT64: return isConst ? AtomicType::UniformConstInt64 : AtomicType::UniformInt64;
|
case TYPE_UINT16: return isConst ? UniformConstUInt16 : UniformUInt16;
|
||||||
case TYPE_UINT64: return isConst ? AtomicType::UniformConstUInt64 : AtomicType::UniformUInt64;
|
case TYPE_INT32: return isConst ? UniformConstInt32 : UniformInt32;
|
||||||
case TYPE_DOUBLE: return isConst ? AtomicType::UniformConstDouble : AtomicType::UniformDouble;
|
case TYPE_UINT32: return isConst ? UniformConstUInt32 : UniformUInt32;
|
||||||
|
case TYPE_FLOAT: return isConst ? UniformConstFloat : UniformFloat;
|
||||||
|
case TYPE_INT64: return isConst ? UniformConstInt64 : UniformInt64;
|
||||||
|
case TYPE_UINT64: return isConst ? UniformConstUInt64 : UniformUInt64;
|
||||||
|
case TYPE_DOUBLE: return isConst ? UniformConstDouble : UniformDouble;
|
||||||
default: FATAL("Logic error in AtomicType::GetAsUniformType()");
|
default: FATAL("Logic error in AtomicType::GetAsUniformType()");
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -267,6 +311,10 @@ AtomicType::GetString() const {
|
|||||||
switch (basicType) {
|
switch (basicType) {
|
||||||
case TYPE_VOID: ret += "void"; break;
|
case TYPE_VOID: ret += "void"; break;
|
||||||
case TYPE_BOOL: ret += "bool"; break;
|
case TYPE_BOOL: ret += "bool"; break;
|
||||||
|
case TYPE_INT8: ret += "int8"; break;
|
||||||
|
case TYPE_UINT8: ret += "unsigned int8"; break;
|
||||||
|
case TYPE_INT16: ret += "int16"; break;
|
||||||
|
case TYPE_UINT16: ret += "unsigned int16"; break;
|
||||||
case TYPE_INT32: ret += "int32"; break;
|
case TYPE_INT32: ret += "int32"; break;
|
||||||
case TYPE_UINT32: ret += "unsigned int32"; break;
|
case TYPE_UINT32: ret += "unsigned int32"; break;
|
||||||
case TYPE_FLOAT: ret += "float"; break;
|
case TYPE_FLOAT: ret += "float"; break;
|
||||||
@@ -288,6 +336,10 @@ AtomicType::Mangle() const {
|
|||||||
switch (basicType) {
|
switch (basicType) {
|
||||||
case TYPE_VOID: ret += "v"; break;
|
case TYPE_VOID: ret += "v"; break;
|
||||||
case TYPE_BOOL: ret += "b"; break;
|
case TYPE_BOOL: ret += "b"; break;
|
||||||
|
case TYPE_INT8: ret += "t"; break;
|
||||||
|
case TYPE_UINT8: ret += "T"; break;
|
||||||
|
case TYPE_INT16: ret += "s"; break;
|
||||||
|
case TYPE_UINT16: ret += "S"; break;
|
||||||
case TYPE_INT32: ret += "i"; break;
|
case TYPE_INT32: ret += "i"; break;
|
||||||
case TYPE_UINT32: ret += "u"; break;
|
case TYPE_UINT32: ret += "u"; break;
|
||||||
case TYPE_FLOAT: ret += "f"; break;
|
case TYPE_FLOAT: ret += "f"; break;
|
||||||
@@ -309,12 +361,16 @@ AtomicType::GetCDeclaration(const std::string &name) const {
|
|||||||
switch (basicType) {
|
switch (basicType) {
|
||||||
case TYPE_VOID: ret += "void"; break;
|
case TYPE_VOID: ret += "void"; break;
|
||||||
case TYPE_BOOL: ret += "bool"; break;
|
case TYPE_BOOL: ret += "bool"; break;
|
||||||
|
case TYPE_INT8: ret += "int8_t"; break;
|
||||||
|
case TYPE_UINT8: ret += "uint8_t"; break;
|
||||||
|
case TYPE_INT16: ret += "int16_t"; break;
|
||||||
|
case TYPE_UINT16: ret += "uint16_t"; break;
|
||||||
case TYPE_INT32: ret += "int32_t"; break;
|
case TYPE_INT32: ret += "int32_t"; break;
|
||||||
case TYPE_UINT32: ret += "uint32_t"; break;
|
case TYPE_UINT32: ret += "uint32_t"; break;
|
||||||
case TYPE_FLOAT: ret += "float"; break;
|
case TYPE_FLOAT: ret += "float"; break;
|
||||||
case TYPE_DOUBLE: ret += "double"; break;
|
|
||||||
case TYPE_INT64: ret += "int64_t"; break;
|
case TYPE_INT64: ret += "int64_t"; break;
|
||||||
case TYPE_UINT64: ret += "uint64_t"; break;
|
case TYPE_UINT64: ret += "uint64_t"; break;
|
||||||
|
case TYPE_DOUBLE: ret += "double"; break;
|
||||||
default: FATAL("Logic error in AtomicType::GetCDeclaration()");
|
default: FATAL("Logic error in AtomicType::GetCDeclaration()");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -333,6 +389,12 @@ AtomicType::LLVMType(llvm::LLVMContext *ctx) const {
|
|||||||
return llvm::Type::getVoidTy(*ctx);
|
return llvm::Type::getVoidTy(*ctx);
|
||||||
case TYPE_BOOL:
|
case TYPE_BOOL:
|
||||||
return isUniform ? LLVMTypes::BoolType : LLVMTypes::BoolVectorType;
|
return isUniform ? LLVMTypes::BoolType : LLVMTypes::BoolVectorType;
|
||||||
|
case TYPE_INT8:
|
||||||
|
case TYPE_UINT8:
|
||||||
|
return isUniform ? LLVMTypes::Int8Type : LLVMTypes::Int8VectorType;
|
||||||
|
case TYPE_INT16:
|
||||||
|
case TYPE_UINT16:
|
||||||
|
return isUniform ? LLVMTypes::Int16Type : LLVMTypes::Int16VectorType;
|
||||||
case TYPE_INT32:
|
case TYPE_INT32:
|
||||||
case TYPE_UINT32:
|
case TYPE_UINT32:
|
||||||
return isUniform ? LLVMTypes::Int32Type : LLVMTypes::Int32VectorType;
|
return isUniform ? LLVMTypes::Int32Type : LLVMTypes::Int32VectorType;
|
||||||
@@ -364,6 +426,22 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
|
|||||||
return m->diBuilder->createBasicType("bool", 32 /* size */, 32 /* align */,
|
return m->diBuilder->createBasicType("bool", 32 /* size */, 32 /* align */,
|
||||||
llvm::dwarf::DW_ATE_unsigned);
|
llvm::dwarf::DW_ATE_unsigned);
|
||||||
break;
|
break;
|
||||||
|
case TYPE_INT8:
|
||||||
|
return m->diBuilder->createBasicType("int8", 8 /* size */, 8 /* align */,
|
||||||
|
llvm::dwarf::DW_ATE_signed);
|
||||||
|
break;
|
||||||
|
case TYPE_UINT8:
|
||||||
|
return m->diBuilder->createBasicType("uint8", 8 /* size */, 8 /* align */,
|
||||||
|
llvm::dwarf::DW_ATE_unsigned);
|
||||||
|
break;
|
||||||
|
case TYPE_INT16:
|
||||||
|
return m->diBuilder->createBasicType("int16", 16 /* size */, 16 /* align */,
|
||||||
|
llvm::dwarf::DW_ATE_signed);
|
||||||
|
break;
|
||||||
|
case TYPE_UINT16:
|
||||||
|
return m->diBuilder->createBasicType("uint16", 16 /* size */, 16 /* align */,
|
||||||
|
llvm::dwarf::DW_ATE_unsigned);
|
||||||
|
break;
|
||||||
case TYPE_INT32:
|
case TYPE_INT32:
|
||||||
return m->diBuilder->createBasicType("int32", 32 /* size */, 32 /* align */,
|
return m->diBuilder->createBasicType("int32", 32 /* size */, 32 /* align */,
|
||||||
llvm::dwarf::DW_ATE_signed);
|
llvm::dwarf::DW_ATE_signed);
|
||||||
|
|||||||
12
type.h
12
type.h
@@ -210,6 +210,10 @@ public:
|
|||||||
enum BasicType {
|
enum BasicType {
|
||||||
TYPE_VOID,
|
TYPE_VOID,
|
||||||
TYPE_BOOL,
|
TYPE_BOOL,
|
||||||
|
TYPE_INT8,
|
||||||
|
TYPE_UINT8,
|
||||||
|
TYPE_INT16,
|
||||||
|
TYPE_UINT16,
|
||||||
TYPE_INT32,
|
TYPE_INT32,
|
||||||
TYPE_UINT32,
|
TYPE_UINT32,
|
||||||
TYPE_FLOAT,
|
TYPE_FLOAT,
|
||||||
@@ -221,14 +225,22 @@ public:
|
|||||||
const BasicType basicType;
|
const BasicType basicType;
|
||||||
|
|
||||||
static const AtomicType *UniformBool, *VaryingBool;
|
static const AtomicType *UniformBool, *VaryingBool;
|
||||||
|
static const AtomicType *UniformInt8, *VaryingInt8;
|
||||||
|
static const AtomicType *UniformInt16, *VaryingInt16;
|
||||||
static const AtomicType *UniformInt32, *VaryingInt32;
|
static const AtomicType *UniformInt32, *VaryingInt32;
|
||||||
|
static const AtomicType *UniformUInt8, *VaryingUInt8;
|
||||||
|
static const AtomicType *UniformUInt16, *VaryingUInt16;
|
||||||
static const AtomicType *UniformUInt32, *VaryingUInt32;
|
static const AtomicType *UniformUInt32, *VaryingUInt32;
|
||||||
static const AtomicType *UniformFloat, *VaryingFloat;
|
static const AtomicType *UniformFloat, *VaryingFloat;
|
||||||
static const AtomicType *UniformInt64, *VaryingInt64;
|
static const AtomicType *UniformInt64, *VaryingInt64;
|
||||||
static const AtomicType *UniformUInt64, *VaryingUInt64;
|
static const AtomicType *UniformUInt64, *VaryingUInt64;
|
||||||
static const AtomicType *UniformDouble, *VaryingDouble;
|
static const AtomicType *UniformDouble, *VaryingDouble;
|
||||||
static const AtomicType *UniformConstBool, *VaryingConstBool;
|
static const AtomicType *UniformConstBool, *VaryingConstBool;
|
||||||
|
static const AtomicType *UniformConstInt8, *VaryingConstInt8;
|
||||||
|
static const AtomicType *UniformConstInt16, *VaryingConstInt16;
|
||||||
static const AtomicType *UniformConstInt32, *VaryingConstInt32;
|
static const AtomicType *UniformConstInt32, *VaryingConstInt32;
|
||||||
|
static const AtomicType *UniformConstUInt8, *VaryingConstUInt8;
|
||||||
|
static const AtomicType *UniformConstUInt16, *VaryingConstUInt16;
|
||||||
static const AtomicType *UniformConstUInt32, *VaryingConstUInt32;
|
static const AtomicType *UniformConstUInt32, *VaryingConstUInt32;
|
||||||
static const AtomicType *UniformConstFloat, *VaryingConstFloat;
|
static const AtomicType *UniformConstFloat, *VaryingConstFloat;
|
||||||
static const AtomicType *UniformConstInt64, *VaryingConstInt64;
|
static const AtomicType *UniformConstInt64, *VaryingConstInt64;
|
||||||
|
|||||||
Reference in New Issue
Block a user