diff --git a/Makefile b/Makefile index fcf708bf..2017163a 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ LLVM_CXXFLAGS=$(shell llvm-config --cppflags) LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/) BUILD_DATE=$(shell date +%Y%m%d) -BUILD_VERSION=$(shell git log | head -1) +BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1) CXX=g++ CPP=cpp diff --git a/builtins.cpp b/builtins.cpp index 9ce28118..b5193cc7 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -78,8 +78,14 @@ static const Type * lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) { if (t == LLVMTypes::VoidType) return AtomicType::Void; + + // uniform else if (t == LLVMTypes::BoolType) return AtomicType::UniformBool; + else if (t == LLVMTypes::Int8Type) + return intAsUnsigned ? AtomicType::UniformUInt8 : AtomicType::UniformInt8; + else if (t == LLVMTypes::Int16Type) + return intAsUnsigned ? AtomicType::UniformUInt16 : AtomicType::UniformInt16; else if (t == LLVMTypes::Int32Type) return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32; else if (t == LLVMTypes::FloatType) @@ -88,6 +94,12 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) { return AtomicType::UniformDouble; else if (t == LLVMTypes::Int64Type) return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64; + + // varying + else if (t == LLVMTypes::Int8VectorType) + return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8; + else if (t == LLVMTypes::Int16VectorType) + return intAsUnsigned ? AtomicType::VaryingUInt16 : AtomicType::VaryingInt16; else if (t == LLVMTypes::Int32VectorType) return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32; else if (t == LLVMTypes::FloatVectorType) @@ -96,6 +108,14 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) { return AtomicType::VaryingDouble; else if (t == LLVMTypes::Int64VectorType) return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64; + + // pointers to uniform + else if (t == LLVMTypes::Int8PointerType) + return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt8 : + AtomicType::UniformInt8, false); + else if (t == LLVMTypes::Int16PointerType) + return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt16 : + AtomicType::UniformInt16, false); else if (t == LLVMTypes::Int32PointerType) return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32, false); @@ -106,6 +126,14 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) { return new ReferenceType(AtomicType::UniformFloat, false); else if (t == LLVMTypes::DoublePointerType) return new ReferenceType(AtomicType::UniformDouble, false); + + // pointers to varying + else if (t == LLVMTypes::Int8VectorPointerType) + return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt8 : + AtomicType::VaryingInt8, false); + else if (t == LLVMTypes::Int16VectorPointerType) + return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt16 : + AtomicType::VaryingInt16, false); else if (t == LLVMTypes::Int32VectorPointerType) return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32, false); @@ -116,6 +144,8 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) { return new ReferenceType(AtomicType::VaryingFloat, false); else if (t == LLVMTypes::DoubleVectorPointerType) return new ReferenceType(AtomicType::VaryingDouble, false); + + // arrays else if (llvm::isa(t)) { const llvm::PointerType *pt = llvm::dyn_cast(t); @@ -239,10 +269,49 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) { } } + +static void +lDeclarePG(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType, + const char *name) { + SourcePos noPos; + noPos.name = "__stdlib"; + + std::vector argTypes; + argTypes.push_back(LLVMTypes::VoidPointerVectorType); + argTypes.push_back(LLVMTypes::MaskType); + + llvm::FunctionType *fType = llvm::FunctionType::get(vecType, argTypes, false); + llvm::Function *func = + llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, + name, module); + func->setOnlyReadsMemory(true); + func->setDoesNotThrow(true); +} + + +static void +lDeclarePGBO(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType, + const char *name) { + std::vector argTypes; + argTypes.push_back(LLVMTypes::VoidPointerType); + argTypes.push_back(LLVMTypes::Int32VectorType); + argTypes.push_back(LLVMTypes::MaskType); + + llvm::FunctionType *fType = llvm::FunctionType::get(vecType, argTypes, false); + llvm::Function *func = + llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, + name, module); + func->setOnlyReadsMemory(true); + func->setDoesNotThrow(true); +} + + /** Declare the 'pseudo-gather' functions. When the ispc front-end needs to perform a gather, it generates a call to one of these functions, which have signatures: + varying int8 __pseudo_gather(varying int8 *, mask) + varying int16 __pseudo_gather(varying int16 *, mask) varying int32 __pseudo_gather(varying int32 *, mask) varying int64 __pseudo_gather(varying int64 *, mask) @@ -253,6 +322,10 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) { front-end to be relatively simple in how it emits address calculation for gathers. + varying int8 __pseudo_gather_base_offsets_8(uniform int8 *base, + int32 offsets, mask) + varying int16 __pseudo_gather_base_offsets_16(uniform int16 *base, + int32 offsets, mask) varying int32 __pseudo_gather_base_offsets_32(uniform int32 *base, int32 offsets, mask) varying int64 __pseudo_gather_base_offsets_64(uniform int64 *base, @@ -264,49 +337,54 @@ lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) { */ static void lDeclarePseudoGathers(llvm::Module *module) { - SourcePos noPos; - noPos.name = "__stdlib"; + lDeclarePG(module, LLVMTypes::Int8VectorType, "__pseudo_gather_8"); + lDeclarePG(module, LLVMTypes::Int16VectorType, "__pseudo_gather_16"); + lDeclarePG(module, LLVMTypes::Int32VectorType, "__pseudo_gather_32"); + lDeclarePG(module, LLVMTypes::Int64VectorType, "__pseudo_gather_64"); - { - std::vector argTypes; - argTypes.push_back(LLVMTypes::VoidPointerVectorType); - argTypes.push_back(LLVMTypes::MaskType); + lDeclarePGBO(module, LLVMTypes::Int8VectorType, + "__pseudo_gather_base_offsets_8"); + lDeclarePGBO(module, LLVMTypes::Int16VectorType, + "__pseudo_gather_base_offsets_16"); + lDeclarePGBO(module, LLVMTypes::Int32VectorType, + "__pseudo_gather_base_offsets_32"); + lDeclarePGBO(module, LLVMTypes::Int64VectorType, + "__pseudo_gather_base_offsets_64"); +} - llvm::FunctionType *fType = - llvm::FunctionType::get(LLVMTypes::Int32VectorType, argTypes, false); - llvm::Function *func = - llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_gather_32", module); - func->setOnlyReadsMemory(true); - func->setDoesNotThrow(true); - fType = llvm::FunctionType::get(LLVMTypes::Int64VectorType, argTypes, false); - func = llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_gather_64", module); - func->setOnlyReadsMemory(true); - func->setDoesNotThrow(true); - } +static void +lDeclarePS(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType, + const char *name) { + std::vector argTypes; + argTypes.push_back(LLVMTypes::VoidPointerVectorType); + argTypes.push_back(vecType); + argTypes.push_back(LLVMTypes::MaskType); - { - std::vector argTypes; - argTypes.push_back(LLVMTypes::VoidPointerType); - argTypes.push_back(LLVMTypes::Int32VectorType); - argTypes.push_back(LLVMTypes::MaskType); + llvm::FunctionType *fType = + llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); + llvm::Function *func = + llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, + name, module); + func->setDoesNotThrow(true); +} - llvm::FunctionType *fType = - llvm::FunctionType::get(LLVMTypes::Int32VectorType, argTypes, false); - llvm::Function *func = - llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_gather_base_offsets_32", module); - func->setOnlyReadsMemory(true); - func->setDoesNotThrow(true); - fType = llvm::FunctionType::get(LLVMTypes::Int64VectorType, argTypes, false); - func = llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_gather_base_offsets_64", module); - func->setOnlyReadsMemory(true); - func->setDoesNotThrow(true); - } +static void +lDeclarePSBO(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *vecType, + const char *name) { + std::vector argTypes; + argTypes.push_back(LLVMTypes::VoidPointerType); + argTypes.push_back(LLVMTypes::Int32VectorType); + argTypes.push_back(vecType); + argTypes.push_back(LLVMTypes::MaskType); + + llvm::FunctionType *fType = + llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); + llvm::Function *func = + llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, + name, module); + func->setDoesNotThrow(true); } @@ -314,16 +392,22 @@ lDeclarePseudoGathers(llvm::Module *module) { we also declare (but never define) pseudo-scatter instructions with signatures: + void __pseudo_scatter_8 (varying int8 *, varying int8 values, mask) + void __pseudo_scatter_16(varying int16 *, varying int16 values, mask) void __pseudo_scatter_32(varying int32 *, varying int32 values, mask) void __pseudo_scatter_64(varying int64 *, varying int64 values, mask) The GatherScatterFlattenOpt optimization pass also finds these and transforms them to scatters like: + void __pseudo_scatter_base_offsets_8(uniform int8 *base, + varying int32 offsets, varying int8 values, mask) + void __pseudo_scatter_base_offsets_16(uniform int16 *base, + varying int32 offsets, varying int16 values, mask) void __pseudo_scatter_base_offsets_32(uniform int32 *base, varying int32 offsets, varying int32 values, mask) void __pseudo_scatter_base_offsets_64(uniform int64 *base, - varying int62 offsets, varying int64 values, mask) + varying int32 offsets, varying int64 values, mask) And the GSImprovementsPass in turn converts these to actual native scatters or masked stores. @@ -333,67 +417,49 @@ lDeclarePseudoScatters(llvm::Module *module) { SourcePos noPos; noPos.name = "__stdlib"; - { - std::vector argTypes; - argTypes.push_back(LLVMTypes::VoidPointerVectorType); - argTypes.push_back(LLVMTypes::Int32VectorType); - argTypes.push_back(LLVMTypes::MaskType); + lDeclarePS(module, LLVMTypes::Int8VectorType, "__pseudo_scatter_8"); + lDeclarePS(module, LLVMTypes::Int16VectorType, "__pseudo_scatter_16"); + lDeclarePS(module, LLVMTypes::Int32VectorType, "__pseudo_scatter_32"); + lDeclarePS(module, LLVMTypes::Int64VectorType, "__pseudo_scatter_64"); - llvm::FunctionType *fType = - llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); - llvm::Function *func = - llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_scatter_32", module); - func->setDoesNotThrow(true); - } - { - std::vector argTypes; - argTypes.push_back(LLVMTypes::VoidPointerVectorType); - argTypes.push_back(LLVMTypes::Int64VectorType); - argTypes.push_back(LLVMTypes::MaskType); + lDeclarePSBO(module, LLVMTypes::Int8VectorType, + "__pseudo_scatter_base_offsets_8"); + lDeclarePSBO(module, LLVMTypes::Int16VectorType, + "__pseudo_scatter_base_offsets_16"); + lDeclarePSBO(module, LLVMTypes::Int32VectorType, + "__pseudo_scatter_base_offsets_32"); + lDeclarePSBO(module, LLVMTypes::Int64VectorType, + "__pseudo_scatter_base_offsets_64"); +} - llvm::FunctionType *fType = - llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); - llvm::Function *func = - llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_scatter_64", module); - func->setDoesNotThrow(true); - } - { - std::vector argTypes; - argTypes.push_back(LLVMTypes::VoidPointerType); - argTypes.push_back(LLVMTypes::Int32VectorType); - argTypes.push_back(LLVMTypes::Int32VectorType); - argTypes.push_back(LLVMTypes::MaskType); +static void +lDeclarePMS(llvm::Module *module, LLVM_TYPE_CONST llvm::Type *lvalueType, + LLVM_TYPE_CONST llvm::Type *rvalueType, const char *name) { + SourcePos noPos; + noPos.name = "__stdlib"; - llvm::FunctionType *fType = - llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); - llvm::Function *func = - llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_scatter_base_offsets_32", module); - func->setDoesNotThrow(true); - } - { - std::vector argTypes; - argTypes.push_back(LLVMTypes::VoidPointerType); - argTypes.push_back(LLVMTypes::Int32VectorType); - argTypes.push_back(LLVMTypes::Int64VectorType); - argTypes.push_back(LLVMTypes::MaskType); + std::vector argTypes; + argTypes.push_back(lvalueType); + argTypes.push_back(rvalueType); + argTypes.push_back(LLVMTypes::MaskType); - llvm::FunctionType *fType = - llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); - llvm::Function *func = - llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_scatter_base_offsets_64", module); - func->setDoesNotThrow(true); - } + llvm::FunctionType *fType = + llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); + llvm::Function *func = + llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, + name, module); + func->setDoesNotThrow(true); + func->addFnAttr(llvm::Attribute::AlwaysInline); + func->setDoesNotCapture(1, true); } /** This function declares placeholder masked store functions for the front-end to use. + void __pseudo_masked_store_8 (uniform int8 *ptr, varying int8 values, mask) + void __pseudo_masked_store_16(uniform int16 *ptr, varying int16 values, mask) void __pseudo_masked_store_32(uniform int32 *ptr, varying int32 values, mask) void __pseudo_masked_store_64(uniform int64 *ptr, varying int64 values, mask) @@ -403,40 +469,14 @@ lDeclarePseudoScatters(llvm::Module *module) { */ static void lDeclarePseudoMaskedStore(llvm::Module *module) { - SourcePos noPos; - noPos.name = "__stdlib"; - - { - std::vector argTypes; - argTypes.push_back(LLVMTypes::Int32VectorPointerType); - argTypes.push_back(LLVMTypes::Int32VectorType); - argTypes.push_back(LLVMTypes::MaskType); - - llvm::FunctionType *fType = - llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); - llvm::Function *func = - llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_masked_store_32", module); - func->setDoesNotThrow(true); - func->addFnAttr(llvm::Attribute::AlwaysInline); - func->setDoesNotCapture(1, true); - } - - { - std::vector argTypes; - argTypes.push_back(LLVMTypes::Int64VectorPointerType); - argTypes.push_back(LLVMTypes::Int64VectorType); - argTypes.push_back(LLVMTypes::MaskType); - - llvm::FunctionType *fType = - llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false); - llvm::Function *func = - llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage, - "__pseudo_masked_store_64", module); - func->setDoesNotThrow(true); - func->addFnAttr(llvm::Attribute::AlwaysInline); - func->setDoesNotCapture(1, true); - } + lDeclarePMS(module, LLVMTypes::Int8VectorPointerType, + LLVMTypes::Int8VectorType, "__pseudo_masked_store_8"); + lDeclarePMS(module, LLVMTypes::Int16VectorPointerType, + LLVMTypes::Int16VectorType, "__pseudo_masked_store_16"); + lDeclarePMS(module, LLVMTypes::Int32VectorPointerType, + LLVMTypes::Int32VectorType, "__pseudo_masked_store_32"); + lDeclarePMS(module, LLVMTypes::Int64VectorPointerType, + LLVMTypes::Int64VectorType, "__pseudo_masked_store_64"); } @@ -609,8 +649,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod // needed by the compiled program. { std::vector argTypes; - argTypes.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(*g->ctx), 0)); - argTypes.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(*g->ctx), 0)); + argTypes.push_back(LLVMTypes::VoidPointerType); + argTypes.push_back(LLVMTypes::VoidPointerType); argTypes.push_back(LLVMTypes::Int32Type); argTypes.push_back(LLVMTypes::Int32Type); llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidType, diff --git a/ctx.cpp b/ctx.cpp index dfc9bd12..7bef353b 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1448,17 +1448,20 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, llvm::Value *mask = GetMask(); llvm::Function *gather = NULL; // Figure out which gather function to call based on the size of - // the elements; will need to generalize this for 8 and 16-bit - // types. + // the elements. if (retType == LLVMTypes::DoubleVectorType || retType == LLVMTypes::Int64VectorType) gather = m->module->getFunction("__pseudo_gather_64"); - else { - assert(retType == LLVMTypes::FloatVectorType || - retType == LLVMTypes::Int32VectorType); + else if (retType == LLVMTypes::FloatVectorType || + retType == LLVMTypes::Int32VectorType) gather = m->module->getFunction("__pseudo_gather_32"); + else if (retType == LLVMTypes::Int16VectorType) + gather = m->module->getFunction("__pseudo_gather_16"); + else { + assert(retType == LLVMTypes::Int8VectorType); + gather = m->module->getFunction("__pseudo_gather_8"); } - assert(gather); + assert(gather != NULL); llvm::Value *voidlvalue = BitCastInst(lvalue, LLVMTypes::VoidPointerType); llvm::Instruction *call = CallInst(gather, voidlvalue, mask, name); @@ -1578,9 +1581,7 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, rvalueType = rvalueType->GetAsNonConstType(); llvm::Function *maskedStoreFunc = NULL; - // Figure out if we need a 32-bit or 64-bit masked store. This - // will need to be generalized when/if 8 and 16-bit data types are - // added. + // Figure out if we need a 8, 16, 32 or 64-bit masked store. if (rvalueType == AtomicType::VaryingDouble || rvalueType == AtomicType::VaryingInt64 || rvalueType == AtomicType::VaryingUInt64) { @@ -1590,13 +1591,11 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType, "rvalue_to_int64"); } - else { - assert(rvalueType == AtomicType::VaryingFloat || - rvalueType == AtomicType::VaryingBool || - rvalueType == AtomicType::VaryingInt32 || - rvalueType == AtomicType::VaryingUInt32 || - dynamic_cast(rvalueType) != NULL); - + else if (rvalueType == AtomicType::VaryingFloat || + rvalueType == AtomicType::VaryingBool || + rvalueType == AtomicType::VaryingInt32 || + rvalueType == AtomicType::VaryingUInt32 || + dynamic_cast(rvalueType) != NULL) { maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32"); lvalue = BitCastInst(lvalue, LLVMTypes::Int32VectorPointerType, "lvalue_to_int32vecptr"); @@ -1604,6 +1603,18 @@ FunctionEmitContext::maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType, "rvalue_to_int32"); } + else if (rvalueType == AtomicType::VaryingInt16 || + rvalueType == AtomicType::VaryingUInt16) { + maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_16"); + lvalue = BitCastInst(lvalue, LLVMTypes::Int16VectorPointerType, + "lvalue_to_int16vecptr"); + } + else if (rvalueType == AtomicType::VaryingInt8 || + rvalueType == AtomicType::VaryingUInt8) { + maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_8"); + lvalue = BitCastInst(lvalue, LLVMTypes::Int8VectorPointerType, + "lvalue_to_int8vecptr"); + } std::vector args; args.push_back(lvalue); @@ -1668,14 +1679,15 @@ FunctionEmitContext::scatter(llvm::Value *rvalue, llvm::Value *lvalue, func = m->module->getFunction("__pseudo_scatter_64"); rvalue = BitCastInst(rvalue, LLVMTypes::Int64VectorType, "rvalue2int"); } - else { - // FIXME: if this hits, presumably it's due to needing int8 and/or - // int16 versions of scatter... - assert(type == LLVMTypes::FloatVectorType || - type == LLVMTypes::Int32VectorType); + else if (type == LLVMTypes::FloatVectorType || + type == LLVMTypes::Int32VectorType) { func = m->module->getFunction("__pseudo_scatter_32"); rvalue = BitCastInst(rvalue, LLVMTypes::Int32VectorType, "rvalue2int"); } + else if (type == LLVMTypes::Int16VectorType) + func = m->module->getFunction("__pseudo_scatter_16"); + else if (type == LLVMTypes::Int8VectorType) + func = m->module->getFunction("__pseudo_scatter_8"); assert(func != NULL); AddInstrumentationPoint("scatter"); diff --git a/docs/ispc.txt b/docs/ispc.txt index cfaf4345..6b994e12 100644 --- a/docs/ispc.txt +++ b/docs/ispc.txt @@ -427,7 +427,8 @@ The following identifiers are reserved as language keywords: ``bool``, ``char``, ``cif``, ``cwhile``, ``const``, ``continue``, ``creturn``, ``default``, ``do``, ``double``, ``else``, ``enum``, ``export``, ``extern``, ``false``, ``float``, ``for``, ``goto``, ``if``, ``inline``, ``int``, -``int32``, ``int64``, ``launch``, ``print``, ``reference``, ``return``, +``int8``, ``int16``, ``int32``, ``int64``, ``launch``, ``print``, +``reference``, ``return``, ``signed``, ``sizeof``, ``soa``, ``static``, ``struct``, ``switch``, ``sync``, ``task``, ``true``, ``typedef``, ``uniform``, ``union``, ``unsigned``, ``varying``, ``void``, ``volatile``, ``while``. @@ -481,6 +482,10 @@ types. * ``void``: "empty" type representing no value. * ``bool``: boolean value; may be assigned ``true``, ``false``, or the value of a boolean expression. +* ``int8``: 8-bit signed integer. +* ``unsigned int8``: 8-bit unsigned integer. +* ``int16``: 16-bit signed integer. +* ``unsigned int16``: 16-bit unsigned integer. * ``int``: 32-bit signed integer; may also be specified as ``int32``. * ``unsigned int``: 32-bit unsigned integer; may also be specified as ``unsigned int32``. @@ -497,7 +502,8 @@ general" of the two types, with the following precedence: :: - double > uint64 > int64 > float > uint32 > int32 > bool + double > uint64 > int64 > float > uint32 > int32 > + uint16 > int16 > uint8 > int8 > bool In other words, adding an ``int64`` to a ``double`` causes the ``int64`` to be converted to a ``double``, the addition to be performed, and a @@ -1709,10 +1715,12 @@ the running program instances. :: - float broadcast(float value, uniform int index) + int8 broadcast(int8 value, uniform int index) + int16 broadcast(int16 value, uniform int index) int32 broadcast(int32 value, uniform int index) - double broadcast(double value, uniform int index) int64 broadcast(int64 value, uniform int index) + float broadcast(float value, uniform int index) + double broadcast(double value, uniform int index) The ``rotate()`` function allows each program instance to find the value of the given value that their neighbor ``offset`` steps away has. For @@ -1725,10 +1733,12 @@ provided offset value can be positive or negative, and may be greater than :: - float rotate(float value, uniform int offset) + int8 rotate(int8 value, uniform int offset) + int16 rotate(int16 value, uniform int offset) int32 rotate(int32 value, uniform int offset) - double rotate(double value, uniform int offset) int64 rotate(int64 value, uniform int offset) + float rotate(float value, uniform int offset) + double rotate(double value, uniform int offset) Finally, the ``shuffle()`` functions allow two variants of fully general @@ -1739,10 +1749,12 @@ from which to get the value of ``value``. The provided values for :: - float shuffle(float value, int permutation) + int8 shuffle(int8 value, int permutation) + int16 shuffle(int16 value, int permutation) int32 shuffle(int32 value, int permutation) - double shuffle(double value, int permutation) int64 shuffle(int64 value, int permutation) + float shuffle(float value, int permutation) + double shuffle(double value, int permutation) The second variant of ``shuffle()`` permutes over the extended vector that @@ -1753,10 +1765,12 @@ of ``value1``, etc.) :: - float shuffle(float value0, float value1, int permutation) + int8 shuffle(int8 value0, int8 value1, int permutation) + int16 shuffle(int16 value0, int16 value1, int permutation) int32 shuffle(int32 value0, int32 value1, int permutation) - double shuffle(double value0, double value1, int permutation) int64 shuffle(int64 value0, int64 value1, int permutation) + float shuffle(float value0, float value1, int permutation) + double shuffle(double value0, double value1, int permutation) The various variants of ``popcnt()`` return the population count--the number of bits set in the given value. @@ -1861,10 +1875,19 @@ where the ``i`` th element of ``x`` has been replaced with the value ``v`` :: + uniform int8 extract(int8 x, uniform int i) + uniform int16 extract(int16 x, uniform int i) + uniform int32 extract(int32 x, uniform int i) + uniform int64 extract(int64 x, uniform int i) uniform float extract(float x, uniform int i) - uniform int extract(int x, uniform int i) + +:: + + int8 insert(int8 x, uniform int i, uniform int8 v) + int16 insert(int16 x, uniform int i, uniform int16 v) + int32 insert(int32 x, uniform int i, uniform int32 v) + int64 insert(int64 x, uniform int i, uniform int64 v) float insert(float x, uniform int i, uniform float v) - int insert(int x, uniform int i, uniform int v) Atomic Operations and Memory Fences @@ -1948,41 +1971,6 @@ value ``true`` (rather than just having the value one). The int sign_extend(bool value) uniform int sign_extend(uniform bool value) -``ispc`` provides a number of bit/memory-level utility routines in its -standard library as well. It has routines that load from and store -to 8-bit and 16-bit integer values stored in memory, converting to and from -32-bit integers for use in computation in ``ispc`` code. (These functions -and this conversion step are necessary because ``ispc`` doesn't have native -8-bit or 16-bit types in the language.) - -:: - - int load_from_int8(uniform int a[], uniform int offset) - unsigned int load_from_int8(uniform unsigned int a[], - uniform int offset) - void store_to_int8(uniform int a[], uniform int offset, - int val) - void store_to_int8(uniform unsigned int a[], uniform int offset, - unsigned int val) - unsigned int load_from_int16(uniform int a[], - uniform int offset) - unsigned unsigned int load_from_int16(uniform unsigned int a[], - uniform int offset) - void store_to_int16(uniform int a[], uniform int offset, - int val) - void store_to_int16(uniform unsigned int a[], uniform int offset, - unsigned int val) - -There are three things to note in these functions. First, note that these -functions take either ``int`` or ``unsigned int`` arrays as parameters; you -need to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side -to ``int`` or ``unsigned int`` when passing them to ``ispc`` code. Second, -although the arrays are passed as 32-bit integers, in the array indexing -calculation, with the ``offset`` parameter, they are treated as if they -were ``int8`` or ``int16`` types (i.e. the offset treated as being in terms -of number of 8 or 16-bit elements). Third, note that the value of -``programIndex`` is implicitly added to offset. - The ``intbits()`` and ``floatbits()`` functions can be used to implement low-level floating-point bit twiddling. For example, ``intbits()`` returns an ``unsigned int`` that is a bit-for-bit copy of the given ``float`` diff --git a/examples/rt/rt.cpp b/examples/rt/rt.cpp index fc798bfb..b97e47e7 100644 --- a/examples/rt/rt.cpp +++ b/examples/rt/rt.cpp @@ -190,7 +190,9 @@ int main(int argc, char *argv[]) { nodes[i].bounds[1].v[1] = b[4]; nodes[i].bounds[1].v[2] = b[5]; READ(nodes[i].offset, 1); - READ(nodes[i].primsAxis, 1); + READ(nodes[i].nPrimitives, 1); + READ(nodes[i].splitAxis, 1); + READ(nodes[i].pad, 1); } // And then read the triangles diff --git a/examples/rt/rt.ispc b/examples/rt/rt.ispc index 08dabb0e..ca150594 100644 --- a/examples/rt/rt.ispc +++ b/examples/rt/rt.ispc @@ -50,21 +50,11 @@ struct Triangle { struct LinearBVHNode { uniform float3 bounds[2]; uniform unsigned int offset; // num primitives for leaf, second child for interior - uniform unsigned int primsAxis; // 0:7 nPrimitives, 8:15 split axis, 16:31 padding + uniform unsigned int8 nPrimitives; + uniform unsigned int8 splitAxis; + uniform unsigned int16 pad; }; -static inline uniform int nPrims(const reference LinearBVHNode node) { - return (node.primsAxis & 0xff); -} - -static inline uniform int axis(const reference LinearBVHNode node) { - return ((node.primsAxis >> 8) & 0xff); -} - -static inline uniform bool isInterior(const reference LinearBVHNode node) { - return nPrims(node) == 0; -} - static inline float3 Cross(const float3 v1, const float3 v2) { float v1x = v1.x, v1y = v1.y, v1z = v1.z; float v2x = v2.x, v2y = v2.y, v2z = v2.z; @@ -199,7 +189,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[], // Check ray against BVH node LinearBVHNode node = nodes[nodeNum]; if (any(BBoxIntersect(node.bounds, ray))) { - uniform unsigned int nPrimitives = nPrims(node); + uniform unsigned int nPrimitives = node.nPrimitives; if (nPrimitives > 0) { // Intersect ray with primitives in leaf BVH node uniform unsigned int primitivesOffset = node.offset; @@ -213,7 +203,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[], } else { // Put far BVH node on _todo_ stack, advance to near node - if (r.dirIsNeg[axis(node)]) { + if (r.dirIsNeg[node.splitAxis]) { todo[todoOffset++] = nodeNum + 1; nodeNum = node.offset; } diff --git a/examples/rt/rt_serial.cpp b/examples/rt/rt_serial.cpp index 53f7d4cb..7953ef47 100644 --- a/examples/rt/rt_serial.cpp +++ b/examples/rt/rt_serial.cpp @@ -75,30 +75,20 @@ struct Ray { namespace ispc { struct Triangle { float3 p[3]; - int id; + int32_t id; }; struct LinearBVHNode { float3 bounds[2]; - unsigned int offset; // primitives for leaf, second child for interior - unsigned int primsAxis; // 0:7 nPrimitives, 8:15 split axis, 16:31 padding + int32_t offset; // primitives for leaf, second child for interior + uint8_t nPrimitives; + uint8_t splitAxis; + uint16_t pad; }; } using namespace ispc; -inline int nPrims(const LinearBVHNode &node) { - return (node.primsAxis & 0xff); -} - -inline int axis(const LinearBVHNode &node) { - return ((node.primsAxis >> 8) & 0xff); -} - -inline bool isInterior(const LinearBVHNode &node) { - return nPrims(node) == 0; -} - inline float3 Cross(const float3 &v1, const float3 &v2) { float v1x = v1.x, v1y = v1.y, v1z = v1.z; float v2x = v2.x, v2y = v2.y, v2z = v2.z; @@ -230,7 +220,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[], // Check ray against BVH node const LinearBVHNode &node = nodes[nodeNum]; if (BBoxIntersect(node.bounds, ray)) { - unsigned int nPrimitives = nPrims(node); + unsigned int nPrimitives = node.nPrimitives; if (nPrimitives > 0) { // Intersect ray with primitives in leaf BVH node unsigned int primitivesOffset = node.offset; @@ -244,7 +234,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[], } else { // Put far BVH node on _todo_ stack, advance to near node - if (r.dirIsNeg[axis(node)]) { + if (r.dirIsNeg[node.splitAxis]) { todo[todoOffset++] = nodeNum + 1; nodeNum = node.offset; } diff --git a/expr.cpp b/expr.cpp index d5d9b388..4052159c 100644 --- a/expr.cpp +++ b/expr.cpp @@ -93,6 +93,10 @@ lMaybeIssuePrecisionWarning(const AtomicType *toAtomicType, SourcePos pos, const char *errorMsgBase) { switch (toAtomicType->basicType) { case AtomicType::TYPE_BOOL: + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_UINT8: + case AtomicType::TYPE_INT16: + case AtomicType::TYPE_UINT16: case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: case AtomicType::TYPE_FLOAT: @@ -101,6 +105,10 @@ lMaybeIssuePrecisionWarning(const AtomicType *toAtomicType, case AtomicType::TYPE_DOUBLE: if ((int)toAtomicType->basicType < (int)fromAtomicType->basicType && toAtomicType->basicType != AtomicType::TYPE_BOOL && + !(toAtomicType->basicType == AtomicType::TYPE_INT8 && + fromAtomicType->basicType == AtomicType::TYPE_UINT8) && + !(toAtomicType->basicType == AtomicType::TYPE_INT16 && + fromAtomicType->basicType == AtomicType::TYPE_UINT16) && !(toAtomicType->basicType == AtomicType::TYPE_INT32 && fromAtomicType->basicType == AtomicType::TYPE_UINT32) && !(toAtomicType->basicType == AtomicType::TYPE_INT64 && @@ -363,15 +371,33 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { return (value != 0.) ? LLVMTrue : LLVMFalse; else return LLVMBoolVector(value != 0.); - case AtomicType::TYPE_UINT32: { + case AtomicType::TYPE_INT8: { + int i = (int)value; + assert((double)i == value); + return isUniform ? LLVMInt8(i) : LLVMInt8Vector(i); + } + case AtomicType::TYPE_UINT8: { unsigned int i = (unsigned int)value; - return isUniform ? LLVMUInt32(i) : LLVMUInt32Vector(i); + return isUniform ? LLVMUInt8(i) : LLVMUInt8Vector(i); + } + case AtomicType::TYPE_INT16: { + int i = (int)value; + assert((double)i == value); + return isUniform ? LLVMInt16(i) : LLVMInt16Vector(i); + } + case AtomicType::TYPE_UINT16: { + unsigned int i = (unsigned int)value; + return isUniform ? LLVMUInt16(i) : LLVMUInt16Vector(i); } case AtomicType::TYPE_INT32: { int i = (int)value; assert((double)i == value); return isUniform ? LLVMInt32(i) : LLVMInt32Vector(i); } + case AtomicType::TYPE_UINT32: { + unsigned int i = (unsigned int)value; + return isUniform ? LLVMUInt32(i) : LLVMUInt32Vector(i); + } case AtomicType::TYPE_FLOAT: return isUniform ? LLVMFloat((float)value) : LLVMFloatVector((float)value); @@ -590,14 +616,13 @@ UnaryExpr::Optimize() { const Type *type = constExpr->GetType(); bool isEnumType = dynamic_cast(type) != NULL; - if (type == AtomicType::UniformInt64 || - type == AtomicType::VaryingInt64 || - type == AtomicType::UniformUInt64 || - type == AtomicType::VaryingUInt64 || - type == AtomicType::UniformConstInt64 || - type == AtomicType::VaryingConstInt64 || - type == AtomicType::UniformConstUInt64 || - type == AtomicType::VaryingConstUInt64) + const Type *baseType = type->GetAsNonConstType()->GetAsUniformType(); + if (baseType == AtomicType::UniformInt8 || + baseType == AtomicType::UniformUInt8 || + baseType == AtomicType::UniformInt16 || + baseType == AtomicType::UniformUInt16 || + baseType == AtomicType::UniformInt64 || + baseType == AtomicType::UniformUInt64) // FIXME: should handle these at some point; for now we only do // constant folding for bool, int32 and float types... return this; @@ -3058,6 +3083,86 @@ MemberExpr::getCandidateNearMatches() const { /////////////////////////////////////////////////////////////////////////// // ConstExpr +ConstExpr::ConstExpr(const Type *t, int8_t i, SourcePos p) + : Expr(p) { + type = t; + type = type->GetAsConstType(); + assert(type == AtomicType::UniformConstInt8); + int8Val[0] = i; +} + + +ConstExpr::ConstExpr(const Type *t, int8_t *i, SourcePos p) + : Expr(p) { + type = t; + type = type->GetAsConstType(); + assert(type == AtomicType::UniformConstInt8 || + type == AtomicType::VaryingConstInt8); + for (int j = 0; j < Count(); ++j) + int8Val[j] = i[j]; +} + + +ConstExpr::ConstExpr(const Type *t, uint8_t u, SourcePos p) + : Expr(p) { + type = t; + type = type->GetAsConstType(); + assert(type == AtomicType::UniformUInt8); + uint8Val[0] = u; +} + + +ConstExpr::ConstExpr(const Type *t, uint8_t *u, SourcePos p) + : Expr(p) { + type = t; + type = type->GetAsConstType(); + assert(type == AtomicType::UniformConstUInt8 || + type == AtomicType::VaryingConstUInt8); + for (int j = 0; j < Count(); ++j) + uint8Val[j] = u[j]; +} + + +ConstExpr::ConstExpr(const Type *t, int16_t i, SourcePos p) + : Expr(p) { + type = t; + type = type->GetAsConstType(); + assert(type == AtomicType::UniformConstInt16); + int16Val[0] = i; +} + + +ConstExpr::ConstExpr(const Type *t, int16_t *i, SourcePos p) + : Expr(p) { + type = t; + type = type->GetAsConstType(); + assert(type == AtomicType::UniformConstInt16 || + type == AtomicType::VaryingConstInt16); + for (int j = 0; j < Count(); ++j) + int16Val[j] = i[j]; +} + + +ConstExpr::ConstExpr(const Type *t, uint16_t u, SourcePos p) + : Expr(p) { + type = t; + type = type->GetAsConstType(); + assert(type == AtomicType::UniformUInt16); + uint16Val[0] = u; +} + + +ConstExpr::ConstExpr(const Type *t, uint16_t *u, SourcePos p) + : Expr(p) { + type = t; + type = type->GetAsConstType(); + assert(type == AtomicType::UniformConstUInt16 || + type == AtomicType::VaryingConstUInt16); + for (int j = 0; j < Count(); ++j) + uint16Val[j] = u[j]; +} + + ConstExpr::ConstExpr(const Type *t, int32_t i, SourcePos p) : Expr(p) { type = t; @@ -3212,6 +3317,22 @@ ConstExpr::ConstExpr(ConstExpr *old, double *v) for (int i = 0; i < Count(); ++i) boolVal[i] = (v[i] != 0.); break; + case AtomicType::TYPE_INT8: + for (int i = 0; i < Count(); ++i) + int8Val[i] = (int)v[i]; + break; + case AtomicType::TYPE_UINT8: + for (int i = 0; i < Count(); ++i) + uint8Val[i] = (unsigned int)v[i]; + break; + case AtomicType::TYPE_INT16: + for (int i = 0; i < Count(); ++i) + int16Val[i] = (int)v[i]; + break; + case AtomicType::TYPE_UINT16: + for (int i = 0; i < Count(); ++i) + uint16Val[i] = (unsigned int)v[i]; + break; case AtomicType::TYPE_INT32: for (int i = 0; i < Count(); ++i) int32Val[i] = (int)v[i]; @@ -3270,6 +3391,18 @@ ConstExpr::GetValue(FunctionEmitContext *ctx) const { return LLVMBoolVector(boolVal); else return boolVal[0] ? LLVMTrue : LLVMFalse; + case AtomicType::TYPE_INT8: + return isVarying ? LLVMInt8Vector(int8Val) : + LLVMInt8(int8Val[0]); + case AtomicType::TYPE_UINT8: + return isVarying ? LLVMUInt8Vector(uint8Val) : + LLVMUInt8(uint8Val[0]); + case AtomicType::TYPE_INT16: + return isVarying ? LLVMInt16Vector(int16Val) : + LLVMInt16(int16Val[0]); + case AtomicType::TYPE_UINT16: + return isVarying ? LLVMUInt16Vector(uint16Val) : + LLVMUInt16(uint16Val[0]); case AtomicType::TYPE_INT32: return isVarying ? LLVMInt32Vector(int32Val) : LLVMInt32(int32Val[0]); @@ -3351,6 +3484,10 @@ int ConstExpr::AsInt64(int64_t *ip, bool forceVarying) const { switch (getBasicType()) { case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break; case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break; case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break; case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break; @@ -3368,6 +3505,10 @@ int ConstExpr::AsUInt64(uint64_t *up, bool forceVarying) const { switch (getBasicType()) { case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break; case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break; case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break; case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break; @@ -3385,6 +3526,10 @@ int ConstExpr::AsDouble(double *d, bool forceVarying) const { switch (getBasicType()) { case AtomicType::TYPE_BOOL: lConvert(boolVal, d, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, d, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, d, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, d, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, d, Count(), forceVarying); break; case AtomicType::TYPE_INT32: lConvert(int32Val, d, Count(), forceVarying); break; case AtomicType::TYPE_UINT32: lConvert(uint32Val, d, Count(), forceVarying); break; case AtomicType::TYPE_FLOAT: lConvert(floatVal, d, Count(), forceVarying); break; @@ -3402,6 +3547,10 @@ int ConstExpr::AsFloat(float *fp, bool forceVarying) const { switch (getBasicType()) { case AtomicType::TYPE_BOOL: lConvert(boolVal, fp, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, fp, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, fp, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, fp, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, fp, Count(), forceVarying); break; case AtomicType::TYPE_INT32: lConvert(int32Val, fp, Count(), forceVarying); break; case AtomicType::TYPE_UINT32: lConvert(uint32Val, fp, Count(), forceVarying); break; case AtomicType::TYPE_FLOAT: lConvert(floatVal, fp, Count(), forceVarying); break; @@ -3419,6 +3568,10 @@ int ConstExpr::AsBool(bool *b, bool forceVarying) const { switch (getBasicType()) { case AtomicType::TYPE_BOOL: lConvert(boolVal, b, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, b, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, b, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, b, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, b, Count(), forceVarying); break; case AtomicType::TYPE_INT32: lConvert(int32Val, b, Count(), forceVarying); break; case AtomicType::TYPE_UINT32: lConvert(uint32Val, b, Count(), forceVarying); break; case AtomicType::TYPE_FLOAT: lConvert(floatVal, b, Count(), forceVarying); break; @@ -3432,10 +3585,98 @@ ConstExpr::AsBool(bool *b, bool forceVarying) const { } +int +ConstExpr::AsInt8(int8_t *ip, bool forceVarying) const { + switch (getBasicType()) { + case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break; + case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT64: lConvert(int64Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT64: lConvert(uint64Val, ip, Count(), forceVarying); break; + default: + FATAL("unimplemented const type"); + } + return Count(); +} + + +int +ConstExpr::AsUInt8(uint8_t *up, bool forceVarying) const { + switch (getBasicType()) { + case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break; + case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT64: lConvert(int64Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT64: lConvert(uint64Val, up, Count(), forceVarying); break; + default: + FATAL("unimplemented const type"); + } + return Count(); +} + + +int +ConstExpr::AsInt16(int16_t *ip, bool forceVarying) const { + switch (getBasicType()) { + case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break; + case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT64: lConvert(int64Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT64: lConvert(uint64Val, ip, Count(), forceVarying); break; + default: + FATAL("unimplemented const type"); + } + return Count(); +} + + +int +ConstExpr::AsUInt16(uint16_t *up, bool forceVarying) const { + switch (getBasicType()) { + case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break; + case AtomicType::TYPE_DOUBLE: lConvert(doubleVal, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT64: lConvert(int64Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT64: lConvert(uint64Val, up, Count(), forceVarying); break; + default: + FATAL("unimplemented const type"); + } + return Count(); +} + + int ConstExpr::AsInt32(int32_t *ip, bool forceVarying) const { switch (getBasicType()) { case AtomicType::TYPE_BOOL: lConvert(boolVal, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, ip, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, ip, Count(), forceVarying); break; case AtomicType::TYPE_INT32: lConvert(int32Val, ip, Count(), forceVarying); break; case AtomicType::TYPE_UINT32: lConvert(uint32Val, ip, Count(), forceVarying); break; case AtomicType::TYPE_FLOAT: lConvert(floatVal, ip, Count(), forceVarying); break; @@ -3453,6 +3694,10 @@ int ConstExpr::AsUInt32(uint32_t *up, bool forceVarying) const { switch (getBasicType()) { case AtomicType::TYPE_BOOL: lConvert(boolVal, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT8: lConvert(int8Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT8: lConvert(uint8Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_INT16: lConvert(int16Val, up, Count(), forceVarying); break; + case AtomicType::TYPE_UINT16: lConvert(uint16Val, up, Count(), forceVarying); break; case AtomicType::TYPE_INT32: lConvert(int32Val, up, Count(), forceVarying); break; case AtomicType::TYPE_UINT32: lConvert(uint32Val, up, Count(), forceVarying); break; case AtomicType::TYPE_FLOAT: lConvert(floatVal, up, Count(), forceVarying); break; @@ -3488,6 +3733,40 @@ ConstExpr::GetConstant(const Type *type) const { else return LLVMBoolVector(bv); } + else if (type == AtomicType::UniformInt8 || type == AtomicType::VaryingInt8) { + int8_t iv[ISPC_MAX_NVEC]; + AsInt8(iv, type->IsVaryingType()); + if (type->IsUniformType()) + return LLVMInt8(iv[0]); + else + return LLVMInt8Vector(iv); + } + else if (type == AtomicType::UniformUInt8 || type == AtomicType::VaryingUInt8 || + dynamic_cast(type) != NULL) { + uint8_t uiv[ISPC_MAX_NVEC]; + AsUInt8(uiv, type->IsVaryingType()); + if (type->IsUniformType()) + return LLVMUInt8(uiv[0]); + else + return LLVMUInt8Vector(uiv); + } + else if (type == AtomicType::UniformInt16 || type == AtomicType::VaryingInt16) { + int16_t iv[ISPC_MAX_NVEC]; + AsInt16(iv, type->IsVaryingType()); + if (type->IsUniformType()) + return LLVMInt16(iv[0]); + else + return LLVMInt16Vector(iv); + } + else if (type == AtomicType::UniformUInt16 || type == AtomicType::VaryingUInt16 || + dynamic_cast(type) != NULL) { + uint16_t uiv[ISPC_MAX_NVEC]; + AsUInt16(uiv, type->IsVaryingType()); + if (type->IsUniformType()) + return LLVMUInt16(uiv[0]); + else + return LLVMUInt16Vector(uiv); + } else if (type == AtomicType::UniformInt32 || type == AtomicType::VaryingInt32) { int32_t iv[ISPC_MAX_NVEC]; AsInt32(iv, type->IsVaryingType()); @@ -3564,6 +3843,18 @@ ConstExpr::Print() const { case AtomicType::TYPE_BOOL: printf("%s", boolVal[i] ? "true" : "false"); break; + case AtomicType::TYPE_INT8: + printf("%d", (int)int8Val[i]); + break; + case AtomicType::TYPE_UINT8: + printf("%u", (int)uint8Val[i]); + break; + case AtomicType::TYPE_INT16: + printf("%d", (int)int16Val[i]); + break; + case AtomicType::TYPE_UINT16: + printf("%u", (int)uint16Val[i]); + break; case AtomicType::TYPE_INT32: printf("%d", int32Val[i]); break; @@ -3637,11 +3928,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int exprVal, targetType, "bool2float"); break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_INT16: case AtomicType::TYPE_INT32: case AtomicType::TYPE_INT64: cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int to float exprVal, targetType, "int2float"); break; + case AtomicType::TYPE_UINT8: + case AtomicType::TYPE_UINT16: case AtomicType::TYPE_UINT32: case AtomicType::TYPE_UINT64: if (fromType->IsVaryingType()) @@ -3675,11 +3970,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, cast = ctx->CastInst(llvm::Instruction::UIToFP, // unsigned int to double exprVal, targetType, "bool2double"); break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_INT16: case AtomicType::TYPE_INT32: case AtomicType::TYPE_INT64: cast = ctx->CastInst(llvm::Instruction::SIToFP, // signed int exprVal, targetType, "int2double"); break; + case AtomicType::TYPE_UINT8: + case AtomicType::TYPE_UINT16: case AtomicType::TYPE_UINT32: case AtomicType::TYPE_UINT64: if (fromType->IsVaryingType()) @@ -3699,6 +3998,170 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, } break; } + case AtomicType::TYPE_INT8: { + LLVM_TYPE_CONST llvm::Type *targetType = + fromType->IsUniformType() ? LLVMTypes::Int8Type : + LLVMTypes::Int8VectorType; + switch (fromType->basicType) { + case AtomicType::TYPE_BOOL: + if (fromType->IsVaryingType() && + LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); + cast = ctx->ZExtInst(exprVal, targetType, "bool2int"); + break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_UINT8: + cast = exprVal; + break; + case AtomicType::TYPE_INT16: + case AtomicType::TYPE_UINT16: + case AtomicType::TYPE_INT32: + case AtomicType::TYPE_UINT32: + case AtomicType::TYPE_INT64: + case AtomicType::TYPE_UINT64: + cast = ctx->TruncInst(exprVal, targetType, "int64_to_int8"); + break; + case AtomicType::TYPE_FLOAT: + cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int + exprVal, targetType, "float2int"); + break; + case AtomicType::TYPE_DOUBLE: + cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int + exprVal, targetType, "double2int"); + break; + default: + FATAL("unimplemented"); + } + break; + } + case AtomicType::TYPE_UINT8: { + LLVM_TYPE_CONST llvm::Type *targetType = + fromType->IsUniformType() ? LLVMTypes::Int8Type : + LLVMTypes::Int8VectorType; + switch (fromType->basicType) { + case AtomicType::TYPE_BOOL: + if (fromType->IsVaryingType() && + LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); + cast = ctx->ZExtInst(exprVal, targetType, "bool2uint"); + break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_UINT8: + cast = exprVal; + break; + case AtomicType::TYPE_INT16: + case AtomicType::TYPE_UINT16: + case AtomicType::TYPE_INT32: + case AtomicType::TYPE_UINT32: + case AtomicType::TYPE_INT64: + case AtomicType::TYPE_UINT64: + cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint8"); + break; + case AtomicType::TYPE_FLOAT: + if (fromType->IsVaryingType()) + PerformanceWarning(pos, "Conversion from float to unsigned int is slow. " + "Use \"int\" if possible"); + cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int + exprVal, targetType, "float2uint"); + break; + case AtomicType::TYPE_DOUBLE: + if (fromType->IsVaryingType()) + PerformanceWarning(pos, "Conversion from double to unsigned int is slow. " + "Use \"int\" if possible"); + cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int + exprVal, targetType, "double2uint"); + break; + default: + FATAL("unimplemented"); + } + break; + } + case AtomicType::TYPE_INT16: { + LLVM_TYPE_CONST llvm::Type *targetType = + fromType->IsUniformType() ? LLVMTypes::Int16Type : + LLVMTypes::Int16VectorType; + switch (fromType->basicType) { + case AtomicType::TYPE_BOOL: + if (fromType->IsVaryingType() && + LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); + cast = ctx->ZExtInst(exprVal, targetType, "bool2int"); + break; + case AtomicType::TYPE_INT8: + cast = ctx->SExtInst(exprVal, targetType, "int2int16"); + break; + case AtomicType::TYPE_UINT8: + cast = ctx->ZExtInst(exprVal, targetType, "uint2uint16"); + break; + case AtomicType::TYPE_INT16: + case AtomicType::TYPE_UINT16: + cast = exprVal; + break; + case AtomicType::TYPE_FLOAT: + cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int + exprVal, targetType, "float2int"); + break; + case AtomicType::TYPE_INT32: + case AtomicType::TYPE_UINT32: + case AtomicType::TYPE_INT64: + case AtomicType::TYPE_UINT64: + cast = ctx->TruncInst(exprVal, targetType, "int64_to_int16"); + break; + case AtomicType::TYPE_DOUBLE: + cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int + exprVal, targetType, "double2int"); + break; + default: + FATAL("unimplemented"); + } + break; + } + case AtomicType::TYPE_UINT16: { + LLVM_TYPE_CONST llvm::Type *targetType = + fromType->IsUniformType() ? LLVMTypes::Int16Type : + LLVMTypes::Int16VectorType; + switch (fromType->basicType) { + case AtomicType::TYPE_BOOL: + if (fromType->IsVaryingType() && + LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) + exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); + cast = ctx->ZExtInst(exprVal, targetType, "bool2uint16"); + break; + case AtomicType::TYPE_INT8: + cast = ctx->SExtInst(exprVal, targetType, "uint2uint16"); + break; + case AtomicType::TYPE_UINT8: + cast = ctx->ZExtInst(exprVal, targetType, "uint2uint16"); + break; + case AtomicType::TYPE_INT16: + case AtomicType::TYPE_UINT16: + cast = exprVal; + break; + case AtomicType::TYPE_FLOAT: + if (fromType->IsVaryingType()) + PerformanceWarning(pos, "Conversion from float to unsigned int is slow. " + "Use \"int\" if possible"); + cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int + exprVal, targetType, "float2uint"); + break; + case AtomicType::TYPE_INT32: + case AtomicType::TYPE_UINT32: + case AtomicType::TYPE_INT64: + case AtomicType::TYPE_UINT64: + cast = ctx->TruncInst(exprVal, targetType, "int64_to_uint16"); + break; + case AtomicType::TYPE_DOUBLE: + if (fromType->IsVaryingType()) + PerformanceWarning(pos, "Conversion from double to unsigned int is slow. " + "Use \"int\" if possible"); + cast = ctx->CastInst(llvm::Instruction::FPToUI, // unsigned int + exprVal, targetType, "double2uint"); + break; + default: + FATAL("unimplemented"); + } + break; + } case AtomicType::TYPE_INT32: { LLVM_TYPE_CONST llvm::Type *targetType = fromType->IsUniformType() ? LLVMTypes::Int32Type : @@ -3710,6 +4173,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); cast = ctx->ZExtInst(exprVal, targetType, "bool2int"); break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_INT16: + cast = ctx->SExtInst(exprVal, targetType, "int2int32"); + break; + case AtomicType::TYPE_UINT8: + case AtomicType::TYPE_UINT16: + cast = ctx->ZExtInst(exprVal, targetType, "uint2uint32"); + break; case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: cast = exprVal; @@ -3742,6 +4213,14 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); cast = ctx->ZExtInst(exprVal, targetType, "bool2uint"); break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_INT16: + cast = ctx->SExtInst(exprVal, targetType, "uint2uint"); + break; + case AtomicType::TYPE_UINT8: + case AtomicType::TYPE_UINT16: + cast = ctx->ZExtInst(exprVal, targetType, "uint2uint"); + break; case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: cast = exprVal; @@ -3780,11 +4259,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); cast = ctx->ZExtInst(exprVal, targetType, "bool2int64"); break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_INT16: case AtomicType::TYPE_INT32: - cast = ctx->SExtInst(exprVal, targetType, "int32_to_int64"); + cast = ctx->SExtInst(exprVal, targetType, "int_to_int64"); break; + case AtomicType::TYPE_UINT8: + case AtomicType::TYPE_UINT16: case AtomicType::TYPE_UINT32: - cast = ctx->ZExtInst(exprVal, targetType, "uint32_to_int64"); + cast = ctx->ZExtInst(exprVal, targetType, "uint_to_int64"); break; case AtomicType::TYPE_FLOAT: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int @@ -3796,7 +4279,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, break; case AtomicType::TYPE_DOUBLE: cast = ctx->CastInst(llvm::Instruction::FPToSI, // signed int - exprVal, targetType, "double2int"); + exprVal, targetType, "double2int64"); break; default: FATAL("unimplemented"); @@ -3814,11 +4297,15 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); cast = ctx->ZExtInst(exprVal, targetType, "bool2uint"); break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_INT16: case AtomicType::TYPE_INT32: - cast = ctx->SExtInst(exprVal, targetType, "int32_to_uint64"); + cast = ctx->SExtInst(exprVal, targetType, "int_to_uint64"); break; + case AtomicType::TYPE_UINT8: + case AtomicType::TYPE_UINT16: case AtomicType::TYPE_UINT32: - cast = ctx->ZExtInst(exprVal, targetType, "uint32_to_uint64"); + cast = ctx->ZExtInst(exprVal, targetType, "uint_to_uint64"); break; case AtomicType::TYPE_FLOAT: if (fromType->IsVaryingType()) @@ -3848,6 +4335,22 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, case AtomicType::TYPE_BOOL: cast = exprVal; break; + case AtomicType::TYPE_INT8: + case AtomicType::TYPE_UINT8: { + llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt8(0) : + (llvm::Value *)LLVMInt8Vector((int8_t)0); + cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, + exprVal, zero, "cmpi0"); + break; + } + case AtomicType::TYPE_INT16: + case AtomicType::TYPE_UINT16: { + llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt16(0) : + (llvm::Value *)LLVMInt16Vector((int16_t)0); + cast = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, + exprVal, zero, "cmpi0"); + break; + } case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: { llvm::Value *zero = fromType->IsUniformType() ? (llvm::Value *)LLVMInt32(0) : @@ -4195,6 +4698,26 @@ TypeCastExpr::Optimize() { constExpr->AsBool(bv, forceVarying); return new ConstExpr(toType, bv, pos); } + case AtomicType::TYPE_INT8: { + int8_t iv[ISPC_MAX_NVEC]; + constExpr->AsInt8(iv, forceVarying); + return new ConstExpr(toType, iv, pos); + } + case AtomicType::TYPE_UINT8: { + uint8_t uv[ISPC_MAX_NVEC]; + constExpr->AsUInt8(uv, forceVarying); + return new ConstExpr(toType, uv, pos); + } + case AtomicType::TYPE_INT16: { + int16_t iv[ISPC_MAX_NVEC]; + constExpr->AsInt16(iv, forceVarying); + return new ConstExpr(toType, iv, pos); + } + case AtomicType::TYPE_UINT16: { + uint16_t uv[ISPC_MAX_NVEC]; + constExpr->AsUInt16(uv, forceVarying); + return new ConstExpr(toType, uv, pos); + } case AtomicType::TYPE_INT32: { int32_t iv[ISPC_MAX_NVEC]; constExpr->AsInt32(iv, forceVarying); diff --git a/expr.h b/expr.h index d6f12181..f0d10b3c 100644 --- a/expr.h +++ b/expr.h @@ -325,6 +325,24 @@ private: */ class ConstExpr : public Expr { public: + /** Create a ConstExpr from a uniform int8 value */ + ConstExpr(const Type *t, int8_t i, SourcePos p); + /** Create a ConstExpr from a varying int8 value */ + ConstExpr(const Type *t, int8_t *i, SourcePos p); + /** Create a ConstExpr from a uniform uint8 value */ + ConstExpr(const Type *t, uint8_t u, SourcePos p); + /** Create a ConstExpr from a varying uint8 value */ + ConstExpr(const Type *t, uint8_t *u, SourcePos p); + + /** Create a ConstExpr from a uniform int16 value */ + ConstExpr(const Type *t, int16_t i, SourcePos p); + /** Create a ConstExpr from a varying int16 value */ + ConstExpr(const Type *t, int16_t *i, SourcePos p); + /** Create a ConstExpr from a uniform uint16 value */ + ConstExpr(const Type *t, uint16_t u, SourcePos p); + /** Create a ConstExpr from a varying uint16 value */ + ConstExpr(const Type *t, uint16_t *u, SourcePos p); + /** Create a ConstExpr from a uniform int32 value */ ConstExpr(const Type *t, int32_t i, SourcePos p); /** Create a ConstExpr from a varying int32 value */ @@ -333,14 +351,17 @@ public: ConstExpr(const Type *t, uint32_t u, SourcePos p); /** Create a ConstExpr from a varying uint32 value */ ConstExpr(const Type *t, uint32_t *u, SourcePos p); + /** Create a ConstExpr from a uniform float value */ ConstExpr(const Type *t, float f, SourcePos p); /** Create a ConstExpr from a varying float value */ ConstExpr(const Type *t, float *f, SourcePos p); + /** Create a ConstExpr from a uniform double value */ ConstExpr(const Type *t, double d, SourcePos p); /** Create a ConstExpr from a varying double value */ ConstExpr(const Type *t, double *d, SourcePos p); + /** Create a ConstExpr from a uniform int64 value */ ConstExpr(const Type *t, int64_t i, SourcePos p); /** Create a ConstExpr from a varying int64 value */ @@ -349,10 +370,12 @@ public: ConstExpr(const Type *t, uint64_t i, SourcePos p); /** Create a ConstExpr from a varying uint64 value */ ConstExpr(const Type *t, uint64_t *i, SourcePos p); + /** Create a ConstExpr from a uniform bool value */ ConstExpr(const Type *t, bool b, SourcePos p); /** Create a ConstExpr from a varying bool value */ ConstExpr(const Type *t, bool *b, SourcePos p); + /** Create a ConstExpr of the same type as the given old ConstExpr, with values given by the "vales" parameter. */ ConstExpr(ConstExpr *old, double *values); @@ -371,6 +394,30 @@ public: equal to the target vector width into the given pointer. */ int AsBool(bool *, bool forceVarying = false) const; + /** Return the ConstExpr's values as int8s, doing type conversion + from the actual type if needed. If forceVarying is true, then type + convert to 'varying' so as to always return a number of values + equal to the target vector width into the given pointer. */ + int AsInt8(int8_t *, bool forceVarying = false) const; + + /** Return the ConstExpr's values as uint8s, doing type conversion + from the actual type if needed. If forceVarying is true, then type + convert to 'varying' so as to always return a number of values + equal to the target vector width into the given pointer. */ + int AsUInt8(uint8_t *, bool forceVarying = false) const; + + /** Return the ConstExpr's values as int16s, doing type conversion + from the actual type if needed. If forceVarying is true, then type + convert to 'varying' so as to always return a number of values + equal to the target vector width into the given pointer. */ + int AsInt16(int16_t *, bool forceVarying = false) const; + + /** Return the ConstExpr's values as uint16s, doing type conversion + from the actual type if needed. If forceVarying is true, then type + convert to 'varying' so as to always return a number of values + equal to the target vector width into the given pointer. */ + int AsUInt16(uint16_t *, bool forceVarying = false) const; + /** Return the ConstExpr's values as int32s, doing type conversion from the actual type if needed. If forceVarying is true, then type convert to 'varying' so as to always return a number of values @@ -417,6 +464,10 @@ private: const Type *type; union { + int8_t int8Val[ISPC_MAX_NVEC]; + uint8_t uint8Val[ISPC_MAX_NVEC]; + int16_t int16Val[ISPC_MAX_NVEC]; + uint16_t uint16Val[ISPC_MAX_NVEC]; int32_t int32Val[ISPC_MAX_NVEC]; uint32_t uint32Val[ISPC_MAX_NVEC]; bool boolVal[ISPC_MAX_NVEC]; diff --git a/failing_tests/shuffle2-10.ispc b/failing_tests/shuffle2-10.ispc new file mode 100644 index 00000000..0baf124a --- /dev/null +++ b/failing_tests/shuffle2-10.ispc @@ -0,0 +1,16 @@ + +/* failing due to llvm bug http://llvm.org/bugs/show_bug.cgi?id=10421 */ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int8 aa = aFOO[programIndex]; + int8 bb = aa + programCount; + int8 shuf = shuffle(aa, bb, 2*programIndex+(int)b-5); +//CO print("%\n%\n%\n%\n", aa, bb, 2*programIndex+(int)b-5, shuf); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1 + 2*programIndex; +} diff --git a/ispc_test.cpp b/ispc_test.cpp index f6bece46..13510e82 100644 --- a/ispc_test.cpp +++ b/ispc_test.cpp @@ -158,38 +158,40 @@ static bool lRunTest(const char *fn) { } llvm::Function *func; - if ((func = module->getFunction("ISPCLaunch")) != NULL) - ee->addGlobalMapping(func, (void *)ISPCLaunch); - if ((func = module->getFunction("ISPCSync")) != NULL) - ee->addGlobalMapping(func, (void *)ISPCSync); +#define DO_FUNC(FUNC ,FUNCNAME) \ + if ((func = module->getFunction(FUNCNAME)) != NULL) \ + ee->addGlobalMapping(func, (void *)FUNC) + DO_FUNC(ISPCLaunch, "ISPCLaunch"); + DO_FUNC(ISPCSync, "ISPCSync"); #ifdef ISPC_IS_WINDOWS - if ((func = module->getFunction("ISPCMalloc")) != NULL) - ee->addGlobalMapping(func, (void *)ISPCMalloc); - if ((func = module->getFunction("ISPCFree")) != NULL) - ee->addGlobalMapping(func, (void *)ISPCFree); + DO_FUNC(ISPCMalloc, "ISPCMalloc"); + DO_FUNC(ISPCFree, "ISPCFree"); #endif // ISPC_IS_WINDOWS - if ((func = module->getFunction("putchar")) != NULL) - ee->addGlobalMapping(func, (void *)putchar); - if ((func = module->getFunction("printf")) != NULL) - ee->addGlobalMapping(func, (void *)printf); - if ((func = module->getFunction("fflush")) != NULL) - ee->addGlobalMapping(func, (void *)fflush); - if ((func = module->getFunction("sinf")) != NULL) - ee->addGlobalMapping(func, (void *)sinf); - if ((func = module->getFunction("cosf")) != NULL) - ee->addGlobalMapping(func, (void *)cosf); - if ((func = module->getFunction("tanf")) != NULL) - ee->addGlobalMapping(func, (void *)tanf); - if ((func = module->getFunction("atanf")) != NULL) - ee->addGlobalMapping(func, (void *)atanf); - if ((func = module->getFunction("atan2f")) != NULL) - ee->addGlobalMapping(func, (void *)atan2f); - if ((func = module->getFunction("powf")) != NULL) - ee->addGlobalMapping(func, (void *)powf); - if ((func = module->getFunction("expf")) != NULL) - ee->addGlobalMapping(func, (void *)expf); - if ((func = module->getFunction("logf")) != NULL) - ee->addGlobalMapping(func, (void *)logf); + DO_FUNC(putchar, "putchar"); + DO_FUNC(printf, "printf"); + DO_FUNC(fflush, "fflush"); + DO_FUNC(sinf, "sinf"); + DO_FUNC(cosf, "cosf"); + DO_FUNC(tanf, "tanf"); + DO_FUNC(atanf, "atanf"); + DO_FUNC(atan2f, "atan2f"); + DO_FUNC(powf, "powf"); + DO_FUNC(expf, "expf"); + DO_FUNC(logf, "logf"); + DO_FUNC(sin, "sin"); + DO_FUNC(cos, "cos"); + DO_FUNC(tan, "tan"); + DO_FUNC(atan, "atan"); + DO_FUNC(atan2, "atan2"); + DO_FUNC(pow, "pow"); + DO_FUNC(exp, "exp"); + DO_FUNC(log, "log"); + DO_FUNC(memset, "memset"); +#ifdef ISPC_IS_APPLE + DO_FUNC(memset_pattern4, "memset_pattern4"); + DO_FUNC(memset_pattern8, "memset_pattern8"); + DO_FUNC(memset_pattern16, "memset_pattern16"); +#endif #ifdef ISPC_HAVE_SVML #define DO_SVML(FUNC ,FUNCNAME) \ diff --git a/lex.ll b/lex.ll index d4cff6f4..b9eff107 100644 --- a/lex.ll +++ b/lex.ll @@ -104,6 +104,8 @@ goto { return TOKEN_GOTO; } if { return TOKEN_IF; } inline { return TOKEN_INLINE; } int { return TOKEN_INT; } +int8 { return TOKEN_INT8; } +int16 { return TOKEN_INT16; } int32 { return TOKEN_INT; } int64 { return TOKEN_INT64; } launch { return TOKEN_LAUNCH; } diff --git a/llvmutil.cpp b/llvmutil.cpp index ebdc1f7b..f31738cc 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -41,28 +41,39 @@ LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL; LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL; + LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16Type = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32Type = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64Type = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleType = NULL; + +LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8PointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16PointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoublePointerType = NULL; LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::MaskType = NULL; LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::BoolVectorType = NULL; + LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int1VectorType = NULL; +LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int8VectorType = NULL; +LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int16VectorType = NULL; LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int32VectorType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL; LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int64VectorType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL; LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::FloatVectorType = NULL; -LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL; LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::DoubleVectorType = NULL; + +LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8VectorPointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16VectorPointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL; +LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL; LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL; + LLVM_TYPE_CONST llvm::ArrayType *LLVMTypes::VoidPointerVectorType = NULL; llvm::Constant *LLVMTrue = NULL; @@ -75,16 +86,20 @@ void InitLLVMUtil(llvm::LLVMContext *ctx, Target target) { LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx); LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0); + LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx); LLVMTypes::Int8Type = llvm::Type::getInt8Ty(*ctx); LLVMTypes::Int16Type = llvm::Type::getInt16Ty(*ctx); LLVMTypes::Int32Type = llvm::Type::getInt32Ty(*ctx); - LLVMTypes::Int32PointerType = llvm::PointerType::get(LLVMTypes::Int32Type, 0); LLVMTypes::Int64Type = llvm::Type::getInt64Ty(*ctx); - LLVMTypes::Int64PointerType = llvm::PointerType::get(LLVMTypes::Int64Type, 0); LLVMTypes::FloatType = llvm::Type::getFloatTy(*ctx); - LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0); LLVMTypes::DoubleType = llvm::Type::getDoubleTy(*ctx); + + LLVMTypes::Int8PointerType = llvm::PointerType::get(LLVMTypes::Int8Type, 0); + LLVMTypes::Int16PointerType = llvm::PointerType::get(LLVMTypes::Int16Type, 0); + LLVMTypes::Int32PointerType = llvm::PointerType::get(LLVMTypes::Int32Type, 0); + LLVMTypes::Int64PointerType = llvm::PointerType::get(LLVMTypes::Int64Type, 0); + LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0); LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0); // Note that both the mask and bool vectors are vector of int32s @@ -95,18 +110,26 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) { LLVMTypes::Int1VectorType = llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth); + LLVMTypes::Int8VectorType = + llvm::VectorType::get(LLVMTypes::Int8Type, target.vectorWidth); + LLVMTypes::Int16VectorType = + llvm::VectorType::get(LLVMTypes::Int16Type, target.vectorWidth); LLVMTypes::Int32VectorType = llvm::VectorType::get(LLVMTypes::Int32Type, target.vectorWidth); - LLVMTypes::Int32VectorPointerType = llvm::PointerType::get(LLVMTypes::Int32VectorType, 0); LLVMTypes::Int64VectorType = llvm::VectorType::get(LLVMTypes::Int64Type, target.vectorWidth); - LLVMTypes::Int64VectorPointerType = llvm::PointerType::get(LLVMTypes::Int64VectorType, 0); LLVMTypes::FloatVectorType = llvm::VectorType::get(LLVMTypes::FloatType, target.vectorWidth); - LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0); LLVMTypes::DoubleVectorType = llvm::VectorType::get(LLVMTypes::DoubleType, target.vectorWidth); + + LLVMTypes::Int8VectorPointerType = llvm::PointerType::get(LLVMTypes::Int8VectorType, 0); + LLVMTypes::Int16VectorPointerType = llvm::PointerType::get(LLVMTypes::Int16VectorType, 0); + LLVMTypes::Int32VectorPointerType = llvm::PointerType::get(LLVMTypes::Int32VectorType, 0); + LLVMTypes::Int64VectorPointerType = llvm::PointerType::get(LLVMTypes::Int64VectorType, 0); + LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0); LLVMTypes::DoubleVectorPointerType = llvm::PointerType::get(LLVMTypes::DoubleVectorType, 0); + LLVMTypes::VoidPointerVectorType = llvm::ArrayType::get(LLVMTypes::VoidPointerType, target.vectorWidth); @@ -133,7 +156,36 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) { } -llvm::ConstantInt *LLVMInt32(int32_t ival) { +llvm::ConstantInt * +LLVMInt8(int8_t ival) { + return llvm::ConstantInt::get(llvm::Type::getInt8Ty(*g->ctx), ival, + true /*signed*/); +} + + +llvm::ConstantInt * +LLVMUInt8(uint8_t ival) { + return llvm::ConstantInt::get(llvm::Type::getInt8Ty(*g->ctx), ival, + false /*unsigned*/); +} + + +llvm::ConstantInt * +LLVMInt16(int16_t ival) { + return llvm::ConstantInt::get(llvm::Type::getInt16Ty(*g->ctx), ival, + true /*signed*/); +} + + +llvm::ConstantInt * +LLVMUInt16(uint16_t ival) { + return llvm::ConstantInt::get(llvm::Type::getInt16Ty(*g->ctx), ival, + false /*unsigned*/); +} + + +llvm::ConstantInt * +LLVMInt32(int32_t ival) { return llvm::ConstantInt::get(llvm::Type::getInt32Ty(*g->ctx), ival, true /*signed*/); } @@ -172,6 +224,82 @@ LLVMDouble(double dval) { } +llvm::Constant * +LLVMInt8Vector(int8_t ival) { + llvm::Constant *v = LLVMInt8(ival); + std::vector vals; + for (int i = 0; i < g->target.vectorWidth; ++i) + vals.push_back(v); + return llvm::ConstantVector::get(vals); +} + + +llvm::Constant * +LLVMInt8Vector(const int8_t *ivec) { + std::vector vals; + for (int i = 0; i < g->target.vectorWidth; ++i) + vals.push_back(LLVMInt8(ivec[i])); + return llvm::ConstantVector::get(vals); +} + + +llvm::Constant * +LLVMUInt8Vector(uint8_t ival) { + llvm::Constant *v = LLVMUInt8(ival); + std::vector vals; + for (int i = 0; i < g->target.vectorWidth; ++i) + vals.push_back(v); + return llvm::ConstantVector::get(vals); +} + + +llvm::Constant * +LLVMUInt8Vector(const uint8_t *ivec) { + std::vector vals; + for (int i = 0; i < g->target.vectorWidth; ++i) + vals.push_back(LLVMUInt8(ivec[i])); + return llvm::ConstantVector::get(vals); +} + + +llvm::Constant * +LLVMInt16Vector(int16_t ival) { + llvm::Constant *v = LLVMInt16(ival); + std::vector vals; + for (int i = 0; i < g->target.vectorWidth; ++i) + vals.push_back(v); + return llvm::ConstantVector::get(vals); +} + + +llvm::Constant * +LLVMInt16Vector(const int16_t *ivec) { + std::vector vals; + for (int i = 0; i < g->target.vectorWidth; ++i) + vals.push_back(LLVMInt16(ivec[i])); + return llvm::ConstantVector::get(vals); +} + + +llvm::Constant * +LLVMUInt16Vector(uint16_t ival) { + llvm::Constant *v = LLVMUInt16(ival); + std::vector vals; + for (int i = 0; i < g->target.vectorWidth; ++i) + vals.push_back(v); + return llvm::ConstantVector::get(vals); +} + + +llvm::Constant * +LLVMUInt16Vector(const uint16_t *ivec) { + std::vector vals; + for (int i = 0; i < g->target.vectorWidth; ++i) + vals.push_back(LLVMUInt16(ivec[i])); + return llvm::ConstantVector::get(vals); +} + + llvm::Constant * LLVMInt32Vector(int32_t ival) { llvm::Constant *v = LLVMInt32(ival); diff --git a/llvmutil.h b/llvmutil.h index 15ed354f..68397b70 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -53,28 +53,39 @@ struct LLVMTypes { static LLVM_TYPE_CONST llvm::Type *VoidType; static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType; static LLVM_TYPE_CONST llvm::Type *BoolType; + static LLVM_TYPE_CONST llvm::Type *Int8Type; static LLVM_TYPE_CONST llvm::Type *Int16Type; static LLVM_TYPE_CONST llvm::Type *Int32Type; - static LLVM_TYPE_CONST llvm::Type *Int32PointerType; static LLVM_TYPE_CONST llvm::Type *Int64Type; - static LLVM_TYPE_CONST llvm::Type *Int64PointerType; static LLVM_TYPE_CONST llvm::Type *FloatType; - static LLVM_TYPE_CONST llvm::Type *FloatPointerType; static LLVM_TYPE_CONST llvm::Type *DoubleType; + + static LLVM_TYPE_CONST llvm::Type *Int8PointerType; + static LLVM_TYPE_CONST llvm::Type *Int16PointerType; + static LLVM_TYPE_CONST llvm::Type *Int32PointerType; + static LLVM_TYPE_CONST llvm::Type *Int64PointerType; + static LLVM_TYPE_CONST llvm::Type *FloatPointerType; static LLVM_TYPE_CONST llvm::Type *DoublePointerType; static LLVM_TYPE_CONST llvm::VectorType *MaskType; + static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType; static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType; + static LLVM_TYPE_CONST llvm::VectorType *Int8VectorType; + static LLVM_TYPE_CONST llvm::VectorType *Int16VectorType; static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType; - static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType; static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType; - static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType; static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType; - static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType; static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType; + + static LLVM_TYPE_CONST llvm::Type *Int8VectorPointerType; + static LLVM_TYPE_CONST llvm::Type *Int16VectorPointerType; + static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType; + static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType; + static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType; static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType; + static LLVM_TYPE_CONST llvm::ArrayType *VoidPointerVectorType; }; @@ -89,6 +100,14 @@ extern llvm::Constant *LLVMTrue, *LLVMFalse; */ extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target); +/** Returns an LLVM i8 constant of the given value */ +extern llvm::ConstantInt *LLVMInt8(int8_t i); +/** Returns an LLVM i8 constant of the given value */ +extern llvm::ConstantInt *LLVMUInt8(uint8_t i); +/** Returns an LLVM i16 constant of the given value */ +extern llvm::ConstantInt *LLVMInt16(int16_t i); +/** Returns an LLVM i16 constant of the given value */ +extern llvm::ConstantInt *LLVMUInt16(uint16_t i); /** Returns an LLVM i32 constant of the given value */ extern llvm::ConstantInt *LLVMInt32(int32_t i); /** Returns an LLVM i32 constant of the given value */ @@ -105,18 +124,35 @@ extern llvm::Constant *LLVMDouble(double f); /** Returns an LLVM boolean vector constant of the given value smeared across all elements */ extern llvm::Constant *LLVMBoolVector(bool v); + +/** Returns an LLVM i8 vector constant of the given value smeared + across all elements */ +extern llvm::Constant *LLVMInt8Vector(int8_t i); +/** Returns an LLVM i8 vector constant of the given value smeared + across all elements */ +extern llvm::Constant *LLVMUInt8Vector(uint8_t i); + +/** Returns an LLVM i16 vector constant of the given value smeared + across all elements */ +extern llvm::Constant *LLVMInt16Vector(int16_t i); +/** Returns an LLVM i16 vector constant of the given value smeared + across all elements */ +extern llvm::Constant *LLVMUInt16Vector(uint16_t i); + /** Returns an LLVM i32 vector constant of the given value smeared across all elements */ extern llvm::Constant *LLVMInt32Vector(int32_t i); /** Returns an LLVM i32 vector constant of the given value smeared across all elements */ extern llvm::Constant *LLVMUInt32Vector(uint32_t i); + /** Returns an LLVM i64 vector constant of the given value smeared across all elements */ extern llvm::Constant *LLVMInt64Vector(int64_t i); /** Returns an LLVM i64 vector constant of the given value smeared across all elements */ extern llvm::Constant *LLVMUInt64Vector(uint64_t i); + /** Returns an LLVM float vector constant of the given value smeared across all elements */ extern llvm::Constant *LLVMFloatVector(float f); @@ -127,18 +163,35 @@ extern llvm::Constant *LLVMDoubleVector(double f); /** Returns an LLVM boolean vector based on the given array of values. The array should have g->target.vectorWidth elements. */ extern llvm::Constant *LLVMBoolVector(const bool *v); + +/** Returns an LLVM i8 vector based on the given array of values. + The array should have g->target.vectorWidth elements. */ +extern llvm::Constant *LLVMInt8Vector(const int8_t *i); +/** Returns an LLVM i8 vector based on the given array of values. + The array should have g->target.vectorWidth elements. */ +extern llvm::Constant *LLVMUInt8Vector(const uint8_t *i); + +/** Returns an LLVM i16 vector based on the given array of values. + The array should have g->target.vectorWidth elements. */ +extern llvm::Constant *LLVMInt16Vector(const int16_t *i); +/** Returns an LLVM i16 vector based on the given array of values. + The array should have g->target.vectorWidth elements. */ +extern llvm::Constant *LLVMUInt16Vector(const uint16_t *i); + /** Returns an LLVM i32 vector based on the given array of values. The array should have g->target.vectorWidth elements. */ extern llvm::Constant *LLVMInt32Vector(const int32_t *i); /** Returns an LLVM i32 vector based on the given array of values. The array should have g->target.vectorWidth elements. */ extern llvm::Constant *LLVMUInt32Vector(const uint32_t *i); + /** Returns an LLVM i64 vector based on the given array of values. The array should have g->target.vectorWidth elements. */ extern llvm::Constant *LLVMInt64Vector(const int64_t *i); /** Returns an LLVM i64 vector based on the given array of values. The array should have g->target.vectorWidth elements. */ extern llvm::Constant *LLVMUInt64Vector(const uint64_t *i); + /** Returns an LLVM float vector based on the given array of values. The array should have g->target.vectorWidth elements. */ extern llvm::Constant *LLVMFloatVector(const float *f); diff --git a/opt.cpp b/opt.cpp index 75ab262c..4e4cd7cb 100644 --- a/opt.cpp +++ b/opt.cpp @@ -409,7 +409,6 @@ IntrinsicsOpt::IntrinsicsOpt() llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse_movmsk_ps); maskInstructions.push_back(sseMovmsk); maskInstructions.push_back(m->module->getFunction("llvm.x86.avx.movmsk.ps")); - maskInstructions.push_back(m->module->getFunction("llvm.x86.mic.mask16.to.int")); maskInstructions.push_back(m->module->getFunction("__movmsk")); // And all of the blend instructions @@ -418,8 +417,6 @@ IntrinsicsOpt::IntrinsicsOpt() 0xf, 0, 1, 2)); blendInstructions.push_back(BlendInstruction( m->module->getFunction("llvm.x86.avx.blendvps"), 0xff, 0, 1, 2)); - blendInstructions.push_back(BlendInstruction( - m->module->getFunction("llvm.x86.mic.blend.ps"), 0xffff, 1, 2, 0)); } @@ -499,8 +496,8 @@ bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { bool modifiedAny = false; restart: - for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) { - llvm::CallInst *callInst = llvm::dyn_cast(&*i); + for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { + llvm::CallInst *callInst = llvm::dyn_cast(&*iter); if (!callInst) continue; @@ -512,7 +509,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { // If the values are the same, then no need to blend.. if (v[0] == v[1]) { - llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[0]); + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, v[0]); modifiedAny = true; goto restart; } @@ -524,12 +522,14 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { // otherwise the result is undefined and any value is fine, // ergo the defined one is an acceptable result.) if (lIsUndef(v[0])) { - llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[1]); + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, v[1]); modifiedAny = true; goto restart; } if (lIsUndef(v[1])) { - llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[0]); + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, v[0]); modifiedAny = true; goto restart; } @@ -544,7 +544,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { value = v[1]; if (value != NULL) { - llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, value); + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, value); modifiedAny = true; goto restart; } @@ -557,7 +558,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { // with the corresponding integer mask from its elements // high bits. llvm::Value *value = LLVMInt32(mask); - llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, value); + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, value); modifiedAny = true; goto restart; } @@ -653,10 +655,18 @@ lSizeOfIfKnown(const llvm::Type *type, uint64_t *size) { *size = 1; return true; } + if (type == LLVMTypes::Int8VectorType) { + *size = g->target.vectorWidth * 1; + return true; + } else if (type == LLVMTypes::Int16Type) { *size = 2; return true; } + if (type == LLVMTypes::Int16VectorType) { + *size = g->target.vectorWidth * 2; + return true; + } else if (type == LLVMTypes::FloatType || type == LLVMTypes::Int32Type) { *size = 4; return true; @@ -978,33 +988,53 @@ lGetPtrAndOffsets(llvm::Value *ptrs, llvm::Value **basePtr, } +struct GSInfo { + GSInfo(const char *pgFuncName, const char *pgboFuncName, bool ig, int es) + : isGather(ig), elementSize(es) { + func = m->module->getFunction(pgFuncName); + baseOffsetsFunc = m->module->getFunction(pgboFuncName); + } + llvm::Function *func; + llvm::Function *baseOffsetsFunc; + const bool isGather; + const int elementSize; +}; + + bool GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) { - llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_32"); - llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_64"); - llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_32"); - llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_64"); - assert(gather32Func && gather64Func && scatter32Func && scatter64Func); + GSInfo gsFuncs[] = { + GSInfo("__pseudo_gather_8", "__pseudo_gather_base_offsets_8", true, 1), + GSInfo("__pseudo_gather_16", "__pseudo_gather_base_offsets_16", true, 2), + GSInfo("__pseudo_gather_32", "__pseudo_gather_base_offsets_32", true, 4), + GSInfo("__pseudo_gather_64", "__pseudo_gather_base_offsets_64", true, 8), + GSInfo("__pseudo_scatter_8", "__pseudo_scatter_base_offsets_8", false, 1), + GSInfo("__pseudo_scatter_16", "__pseudo_scatter_base_offsets_16", false, 2), + GSInfo("__pseudo_scatter_32", "__pseudo_scatter_base_offsets_32", false, 4), + GSInfo("__pseudo_scatter_64", "__pseudo_scatter_base_offsets_64", false, 8), + }; + int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]); + for (int i = 0; i < numGSFuncs; ++i) + assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL); bool modifiedAny = false; restart: // Iterate through all of the instructions in the basic block. - for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) { - llvm::CallInst *callInst = llvm::dyn_cast(&*i); + for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { + llvm::CallInst *callInst = llvm::dyn_cast(&*iter); // If we don't have a call to one of the // __pseudo_{gather,scatter}_* functions, then just go on to the // next instruction. - if (!callInst || - (callInst->getCalledFunction() != gather32Func && - callInst->getCalledFunction() != gather64Func && - callInst->getCalledFunction() != scatter32Func && - callInst->getCalledFunction() != scatter64Func)) + if (callInst == NULL) + continue; + GSInfo *info = NULL; + for (int i = 0; i < numGSFuncs; ++i) + if (callInst->getCalledFunction() == gsFuncs[i].func) { + info = &gsFuncs[i]; + break; + } + if (info == NULL) continue; - - bool isGather = (callInst->getCalledFunction() == gather32Func || - callInst->getCalledFunction() == gather64Func); - bool is32 = (callInst->getCalledFunction() == gather32Func || - callInst->getCalledFunction() == scatter32Func); // Transform the array of pointers to a single base pointer and an // array of int32 offsets. (All the hard work is done by @@ -1012,19 +1042,15 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) { llvm::Value *ptrs = callInst->getArgOperand(0); llvm::Value *basePtr = NULL; llvm::Value *offsetVector = lGetPtrAndOffsets(ptrs, &basePtr, callInst, - is32 ? 4 : 8); + info->elementSize); // Cast the base pointer to a void *, since that's what the // __pseudo_*_base_offsets_* functions want. - basePtr = new llvm::BitCastInst(basePtr, LLVMTypes::VoidPointerType, "base2void", - callInst); + basePtr = new llvm::BitCastInst(basePtr, LLVMTypes::VoidPointerType, + "base2void", callInst); lCopyMetadata(basePtr, callInst); - if (isGather) { + if (info->isGather) { llvm::Value *mask = callInst->getArgOperand(1); - llvm::Function *gFunc = - m->module->getFunction(is32 ? "__pseudo_gather_base_offsets_32" : - "__pseudo_gather_base_offsets_64"); - assert(gFunc != NULL); // Generate a new function call to the next pseudo gather // base+offsets instruction. Note that we're passing a NULL @@ -1035,11 +1061,12 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) { #if defined(LLVM_3_0) || defined(LLVM_3_0svn) llvm::ArrayRef newArgArray(&newArgs[0], &newArgs[3]); llvm::Instruction *newCall = - llvm::CallInst::Create(gFunc, newArgArray, "newgather", - (llvm::Instruction *)NULL); + llvm::CallInst::Create(info->baseOffsetsFunc, newArgArray, + "newgather", (llvm::Instruction *)NULL); #else llvm::Instruction *newCall = - llvm::CallInst::Create(gFunc, &newArgs[0], &newArgs[3], "newgather"); + llvm::CallInst::Create(info->baseOffsetsFunc, &newArgs[0], &newArgs[3], + "newgather"); #endif lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); @@ -1047,10 +1074,6 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) { else { llvm::Value *mask = callInst->getArgOperand(2); llvm::Value *rvalue = callInst->getArgOperand(1); - llvm::Function *gFunc = - m->module->getFunction(is32 ? "__pseudo_scatter_base_offsets_32" : - "__pseudo_scatter_base_offsets_64"); - assert(gFunc); // Generate a new function call to the next pseudo scatter // base+offsets instruction. See above for why passing NULL @@ -1059,11 +1082,12 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) { #if defined(LLVM_3_0) || defined(LLVM_3_0svn) llvm::ArrayRef newArgArray(&newArgs[0], &newArgs[4]); llvm::Instruction *newCall = - llvm::CallInst::Create(gFunc, newArgArray, "", + llvm::CallInst::Create(info->baseOffsetsFunc, newArgArray, "", (llvm::Instruction *)NULL); #else llvm::Instruction *newCall = - llvm::CallInst::Create(gFunc, &newArgs[0], &newArgs[4]); + llvm::CallInst::Create(info->baseOffsetsFunc, &newArgs[0], + &newArgs[4]); #endif lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); @@ -1105,28 +1129,53 @@ char MaskedStoreOptPass::ID = 0; llvm::RegisterPass mss("masked-store-scalarize", "Masked Store Scalarize Pass"); +struct MSInfo { + MSInfo(const char *name, const int a) + : align(a) { + func = m->module->getFunction(name); + assert(func != NULL); + } + llvm::Function *func; + const int align; +}; + + bool MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { - llvm::Function *pms32Func = m->module->getFunction("__pseudo_masked_store_32"); - llvm::Function *pms64Func = m->module->getFunction("__pseudo_masked_store_64"); - llvm::Function *msb32Func = m->module->getFunction("__masked_store_blend_32"); - llvm::Function *msb64Func = m->module->getFunction("__masked_store_blend_64"); - llvm::Function *ms32Func = m->module->getFunction("__masked_store_32"); - llvm::Function *ms64Func = m->module->getFunction("__masked_store_64"); + MSInfo msInfo[] = { + MSInfo("__pseudo_masked_store_8", 1), + MSInfo("__pseudo_masked_store_16", 2), + MSInfo("__pseudo_masked_store_32", 4), + MSInfo("__pseudo_masked_store_64", 8), + MSInfo("__masked_store_blend_8", 1), + MSInfo("__masked_store_blend_16", 2), + MSInfo("__masked_store_blend_32", 4), + MSInfo("__masked_store_blend_64", 8), + MSInfo("__masked_store_8", 1), + MSInfo("__masked_store_16", 2), + MSInfo("__masked_store_32", 4), + MSInfo("__masked_store_64", 8) + }; bool modifiedAny = false; restart: // Iterate over all of the instructions to look for one of the various // masked store functions - for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) { - llvm::CallInst *callInst = llvm::dyn_cast(&*i); + for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { + llvm::CallInst *callInst = llvm::dyn_cast(&*iter); if (!callInst) continue; llvm::Function *called = callInst->getCalledFunction(); - if (called != pms32Func && called != pms64Func && - called != msb32Func && called != msb64Func && - called != ms32Func && called != ms64Func) + int nMSFuncs = sizeof(msInfo) / sizeof(msInfo[0]); + MSInfo *info = NULL; + for (int i = 0; i < nMSFuncs; ++i) { + if (called == msInfo[i].func) { + info = &msInfo[i]; + break; + } + } + if (info == NULL) continue; // Got one; grab the operands @@ -1150,15 +1199,12 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { LLVM_TYPE_CONST llvm::Type *rvalueType = rvalue->getType(); LLVM_TYPE_CONST llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0); - // Need to update this when int8/int16 are added - int align = (called == pms32Func || called == pms64Func || - called == msb32Func) ? 4 : 8; lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst); lCopyMetadata(lvalue, callInst); llvm::Instruction *store = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, - align); + info->align); lCopyMetadata(store, callInst); llvm::ReplaceInstWithInst(callInst, store); @@ -1180,9 +1226,9 @@ CreateMaskedStoreOptPass() { // LowerMaskedStorePass /** When the front-end needs to do a masked store, it emits a - __pseudo_masked_store_{32,64} call as a placeholder. This pass lowers - these calls to either __masked_store_{32,64} or - __masked_store_blend_{32,64} calls. + __pseudo_masked_store_{8,16,32,64} call as a placeholder. This pass + lowers these calls to either __masked_store_{8,16,32,64} or + __masked_store_blend_{8,16,32,64} calls. */ class LowerMaskedStorePass : public llvm::BasicBlockPass { public: @@ -1227,45 +1273,51 @@ lIsStackVariablePointer(llvm::Value *lvalue) { } -/** Utilty routine to figure out which masked store function to use. The - blend parameter indicates if we want the blending version, is32 - indicates if the element size is 32 bits. - */ -static const char * -lMaskedStoreName(bool blend, bool is32) { - if (blend) { - if (is32) - return "__masked_store_blend_32"; - else - return "__masked_store_blend_64"; +struct LMSInfo { + LMSInfo(const char *pname, const char *bname, const char *msname) { + pseudoFunc = m->module->getFunction(pname); + blendFunc = m->module->getFunction(bname); + maskedStoreFunc = m->module->getFunction(msname); + assert(pseudoFunc != NULL && blendFunc != NULL && + maskedStoreFunc != NULL); } - else { - if (is32) - return "__masked_store_32"; - else - return "__masked_store_64"; - } -} + llvm::Function *pseudoFunc; + llvm::Function *blendFunc; + llvm::Function *maskedStoreFunc; +}; bool LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { - llvm::Function *maskedStore32Func = m->module->getFunction("__pseudo_masked_store_32"); - llvm::Function *maskedStore64Func = m->module->getFunction("__pseudo_masked_store_64"); - assert(maskedStore32Func && maskedStore64Func); + LMSInfo msInfo[] = { + LMSInfo("__pseudo_masked_store_8", "__masked_store_blend_8", + "__masked_store_8"), + LMSInfo("__pseudo_masked_store_16", "__masked_store_blend_16", + "__masked_store_16"), + LMSInfo("__pseudo_masked_store_32", "__masked_store_blend_32", + "__masked_store_32"), + LMSInfo("__pseudo_masked_store_64", "__masked_store_blend_64", + "__masked_store_64") + }; bool modifiedAny = false; restart: - for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) { + for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { // Iterate through all of the instructions and look for // __pseudo_masked_store_* calls. - llvm::CallInst *callInst = llvm::dyn_cast(&*i); - if (!callInst || - (callInst->getCalledFunction() != maskedStore32Func && - callInst->getCalledFunction() != maskedStore64Func)) + llvm::CallInst *callInst = llvm::dyn_cast(&*iter); + if (callInst == NULL) + continue; + LMSInfo *info = NULL; + for (unsigned int i = 0; i < sizeof(msInfo) / sizeof(msInfo[0]); ++i) { + if (callInst->getCalledFunction() == msInfo[i].pseudoFunc) { + info = &msInfo[i]; + break; + } + } + if (info == NULL) continue; - bool is32 = (callInst->getCalledFunction() == maskedStore32Func); llvm::Value *lvalue = callInst->getArgOperand(0); llvm::Value *rvalue = callInst->getArgOperand(1); llvm::Value *mask = callInst->getArgOperand(2); @@ -1282,8 +1334,7 @@ LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { // Generate the call to the appropriate masked store function and // replace the __pseudo_* one with it. - llvm::Function *fms = m->module->getFunction(lMaskedStoreName(doBlend, is32)); - assert(fms); + llvm::Function *fms = doBlend ? info->blendFunc : info->maskedStoreFunc; llvm::Value *args[3] = { lvalue, rvalue, mask }; #if defined(LLVM_3_0) || defined(LLVM_3_0svn) llvm::ArrayRef newArgArray(&args[0], &args[3]); @@ -1872,37 +1923,94 @@ lVectorIsLinear(llvm::Value *v[ISPC_MAX_NVEC], int stride) { } +struct GatherImpInfo { + GatherImpInfo(const char *pName, const char *lbName, const char *lmName, + int a) + : align(a) { + pseudoFunc = m->module->getFunction(pName); + loadBroadcastFunc = m->module->getFunction(lbName); + loadMaskedFunc = m->module->getFunction(lmName); + + assert(pseudoFunc != NULL && loadBroadcastFunc != NULL && + loadMaskedFunc != NULL); + } + llvm::Function *pseudoFunc; + llvm::Function *loadBroadcastFunc; + llvm::Function *loadMaskedFunc; + const int align; +}; + + +struct ScatterImpInfo { + ScatterImpInfo(const char *pName, const char *msName, + LLVM_TYPE_CONST llvm::Type *vpt, int a) + : align(a) { + pseudoFunc = m->module->getFunction(pName); + maskedStoreFunc = m->module->getFunction(msName); + vecPtrType = vpt; + assert(pseudoFunc != NULL && maskedStoreFunc != NULL); + } + llvm::Function *pseudoFunc; + llvm::Function *maskedStoreFunc; + LLVM_TYPE_CONST llvm::Type *vecPtrType; + const int align; +}; + + bool GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) { - llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_base_offsets_32"); - llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_base_offsets_64"); - llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_base_offsets_32"); - llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_base_offsets_64"); - assert(gather32Func && gather64Func && scatter32Func && scatter64Func); + GatherImpInfo gInfo[] = { + GatherImpInfo("__pseudo_gather_base_offsets_8", "__load_and_broadcast_8", + "__load_masked_8", 1), + GatherImpInfo("__pseudo_gather_base_offsets_16", "__load_and_broadcast_16", + "__load_masked_16", 2), + GatherImpInfo("__pseudo_gather_base_offsets_32", "__load_and_broadcast_32", + "__load_masked_32", 4), + GatherImpInfo("__pseudo_gather_base_offsets_64", "__load_and_broadcast_64", + "__load_masked_64", 8) + }; + ScatterImpInfo sInfo[] = { + ScatterImpInfo("__pseudo_scatter_base_offsets_8", "__pseudo_masked_store_8", + LLVMTypes::Int8VectorPointerType, 1), + ScatterImpInfo("__pseudo_scatter_base_offsets_16", "__pseudo_masked_store_16", + LLVMTypes::Int16VectorPointerType, 2), + ScatterImpInfo("__pseudo_scatter_base_offsets_32", "__pseudo_masked_store_32", + LLVMTypes::Int32VectorPointerType, 4), + ScatterImpInfo("__pseudo_scatter_base_offsets_64", "__pseudo_masked_store_64", + LLVMTypes::Int64VectorPointerType, 8) + }; bool modifiedAny = false; restart: - for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) { + for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { // Iterate over all of the instructions and look for calls to // __pseudo_*_base_offsets_* calls. - llvm::CallInst *callInst = llvm::dyn_cast(&*i); - if (!callInst || - (callInst->getCalledFunction() != gather32Func && - callInst->getCalledFunction() != gather64Func && - callInst->getCalledFunction() != scatter32Func && - callInst->getCalledFunction() != scatter64Func)) + llvm::CallInst *callInst = llvm::dyn_cast(&*iter); + if (callInst == NULL) + continue; + llvm::Function *calledFunc = callInst->getCalledFunction(); + GatherImpInfo *gatherInfo = NULL; + ScatterImpInfo *scatterInfo = NULL; + for (unsigned int i = 0; i < sizeof(gInfo) / sizeof(gInfo[0]); ++i) { + if (calledFunc == gInfo[i].pseudoFunc) { + gatherInfo = &gInfo[i]; + break; + } + } + for (unsigned int i = 0; i < sizeof(sInfo) / sizeof(sInfo[0]); ++i) { + if (calledFunc == sInfo[i].pseudoFunc) { + scatterInfo = &sInfo[i]; + break; + } + } + if (gatherInfo == NULL && scatterInfo == NULL) continue; SourcePos pos; bool ok = lGetSourcePosFromMetadata(callInst, &pos); assert(ok); - bool isGather = (callInst->getCalledFunction() == gather32Func || - callInst->getCalledFunction() == gather64Func); - bool is32 = (callInst->getCalledFunction() == gather32Func || - callInst->getCalledFunction() == scatter32Func); - // Get the actual base pointer; note that it comes into the gather // or scatter function bitcast to an i8 *, so we need to work back // to get the pointer as the original type. @@ -1921,7 +2029,7 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) { if (!lScalarizeVector(callInst->getArgOperand(1), offsetElements)) continue; - llvm::Value *mask = callInst->getArgOperand(isGather ? 2 : 3); + llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 2 : 3); if (lVectorValuesAllEqual(offsetElements)) { // If all the offsets are equal, then compute the single @@ -1929,14 +2037,15 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) { // (arbitrarily). llvm::Value *indices[1] = { offsetElements[0] }; llvm::Value *basei8 = - new llvm::BitCastInst(base, LLVMTypes::VoidPointerType, "base2i8", callInst); + new llvm::BitCastInst(base, LLVMTypes::VoidPointerType, + "base2i8", callInst); lCopyMetadata(basei8, callInst); llvm::Value *ptr = llvm::GetElementPtrInst::Create(basei8, &indices[0], &indices[1], "ptr", callInst); lCopyMetadata(ptr, callInst); - if (isGather) { + if (gatherInfo != NULL) { // A gather with everyone going to the same location is // handled as a scalar load and broadcast across the lanes. // Note that we do still have to pass the mask to the @@ -1944,20 +2053,16 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) { // access memory if the mask is all off (the location may // be invalid in that case). Debug(pos, "Transformed gather to scalar load and broadcast!"); - llvm::Function *loadBroadcast = - m->module->getFunction(is32 ? "__load_and_broadcast_32" : - "__load_and_broadcast_64"); - assert(loadBroadcast); llvm::Value *args[2] = { ptr, mask }; #if defined(LLVM_3_0) || defined(LLVM_3_0svn) llvm::ArrayRef newArgArray(&args[0], &args[2]); llvm::Instruction *newCall = - llvm::CallInst::Create(loadBroadcast, newArgArray, + llvm::CallInst::Create(gatherInfo->loadBroadcastFunc, newArgArray, "load_broadcast", (llvm::Instruction *)NULL); #else llvm::Instruction *newCall = - llvm::CallInst::Create(loadBroadcast, &args[0], &args[2], - "load_broadcast"); + llvm::CallInst::Create(gatherInfo->loadBroadcastFunc, &args[0], + &args[2], "load_broadcast"); #endif lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); @@ -1977,8 +2082,8 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) { ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0), "ptr2rvalue_type", callInst); lCopyMetadata(ptr, callInst); - llvm::Instruction *sinst = - new llvm::StoreInst(first, ptr, false, is32 ? 4 : 8 /* align */); + llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false, + scatterInfo->align); lCopyMetadata(sinst, callInst); llvm::ReplaceInstWithInst(callInst, sinst); } @@ -1987,7 +2092,8 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } - if (lVectorIsLinear(offsetElements, is32 ? 4 : 8)) { + int step = gatherInfo ? gatherInfo->align : scatterInfo->align; + if (lVectorIsLinear(offsetElements, step)) { // We have a linear sequence of memory locations being accessed // starting with the location given by the offset from // offsetElements[0], with stride of 4 or 8 bytes (for 32 bit @@ -2003,53 +2109,38 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) { "ptr", callInst); lCopyMetadata(ptr, callInst); - if (isGather) { + if (gatherInfo != NULL) { Debug(pos, "Transformed gather to unaligned vector load!"); - // FIXME: make this an aligned load when possible.. - // FIXME: are there lurking potential bugs when e.g. the - // last few entries of the mask are off and the load ends - // up straddling a page boundary? - llvm::Function *loadMasked = - m->module->getFunction(is32 ? "__load_masked_32" : "__load_masked_64"); - assert(loadMasked); - llvm::Value *args[2] = { ptr, mask }; #if defined(LLVM_3_0) || defined(LLVM_3_0svn) llvm::ArrayRef argArray(&args[0], &args[2]); llvm::Instruction *newCall = - llvm::CallInst::Create(loadMasked, argArray, "load_masked", - (llvm::Instruction *)NULL); + llvm::CallInst::Create(gatherInfo->loadMaskedFunc, argArray, + "load_masked", (llvm::Instruction *)NULL); #else llvm::Instruction *newCall = - llvm::CallInst::Create(loadMasked, &args[0], &args[2], "load_masked"); + llvm::CallInst::Create(gatherInfo->loadMaskedFunc, &args[0], + &args[2], "load_masked"); #endif lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); } else { Debug(pos, "Transformed scatter to unaligned vector store!"); - // FIXME: make this an aligned store when possible. Need - // to work through the messiness of issuing a pseudo store - // here. llvm::Value *rvalue = callInst->getArgOperand(2); - - llvm::Function *storeMasked = - m->module->getFunction(is32 ? "__pseudo_masked_store_32" : - "__pseudo_masked_store_64"); - assert(storeMasked); - LLVM_TYPE_CONST llvm::Type *vecPtrType = is32 ? - LLVMTypes::Int32VectorPointerType : LLVMTypes::Int64VectorPointerType; - ptr = new llvm::BitCastInst(ptr, vecPtrType, "ptrcast", callInst); + ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast", + callInst); llvm::Value *args[3] = { ptr, rvalue, mask }; #if defined(LLVM_3_0) || defined(LLVM_3_0svn) llvm::ArrayRef argArray(&args[0], &args[3]); llvm::Instruction *newCall = - llvm::CallInst::Create(storeMasked, argArray, "", - (llvm::Instruction *)NULL); + llvm::CallInst::Create(scatterInfo->maskedStoreFunc, argArray, + "", (llvm::Instruction *)NULL); #else llvm::Instruction *newCall = - llvm::CallInst::Create(storeMasked, &args[0], &args[3], ""); + llvm::CallInst::Create(scatterInfo->maskedStoreFunc, + &args[0], &args[3], ""); #endif lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); @@ -2097,31 +2188,50 @@ char LowerGSPass::ID = 0; llvm::RegisterPass lgs("lower-gs", "Lower Gather/Scatter Pass"); +struct LowerGSInfo { + LowerGSInfo(const char *pName, const char *aName, bool ig) + : isGather(ig) { + pseudoFunc = m->module->getFunction(pName); + actualFunc = m->module->getFunction(aName); + assert(pseudoFunc != NULL && actualFunc != NULL); + } + llvm::Function *pseudoFunc; + llvm::Function *actualFunc; + const bool isGather; +}; + + bool LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) { - llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_base_offsets_32"); - llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_base_offsets_64"); - llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_base_offsets_32"); - llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_base_offsets_64"); - assert(gather32Func && gather64Func && scatter32Func && scatter64Func); + LowerGSInfo lgsInfo[] = { + LowerGSInfo("__pseudo_gather_base_offsets_8", "__gather_base_offsets_i8", true), + LowerGSInfo("__pseudo_gather_base_offsets_16", "__gather_base_offsets_i16", true), + LowerGSInfo("__pseudo_gather_base_offsets_32", "__gather_base_offsets_i32", true), + LowerGSInfo("__pseudo_gather_base_offsets_32", "__gather_base_offsets_i32", true), + LowerGSInfo("__pseudo_scatter_base_offsets_8", "__scatter_base_offsets_i8", false), + LowerGSInfo("__pseudo_scatter_base_offsets_16", "__scatter_base_offsets_i16", false), + LowerGSInfo("__pseudo_scatter_base_offsets_32", "__scatter_base_offsets_i32", false), + LowerGSInfo("__pseudo_scatter_base_offsets_32", "__scatter_base_offsets_i32", false) + }; bool modifiedAny = false; restart: - for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) { + for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { // Loop over the instructions and find calls to the // __pseudo_*_base_offsets_* functions. - llvm::CallInst *callInst = llvm::dyn_cast(&*i); - if (!callInst || - (callInst->getCalledFunction() != gather32Func && - callInst->getCalledFunction() != gather64Func && - callInst->getCalledFunction() != scatter32Func && - callInst->getCalledFunction() != scatter64Func)) + llvm::CallInst *callInst = llvm::dyn_cast(&*iter); + if (callInst == NULL) + continue; + llvm::Function *calledFunc = callInst->getCalledFunction(); + LowerGSInfo *info = NULL; + for (unsigned int i = 0; i < sizeof(lgsInfo) / sizeof(lgsInfo[0]); ++i) { + if (calledFunc == lgsInfo[i].pseudoFunc) { + info = &lgsInfo[i]; + break; + } + } + if (info == NULL) continue; - - bool isGather = (callInst->getCalledFunction() == gather32Func || - callInst->getCalledFunction() == gather64Func); - bool is32 = (callInst->getCalledFunction() == gather32Func || - callInst->getCalledFunction() == scatter32Func); // Get the source position from the metadata attached to the call // instruction so that we can issue PerformanceWarning()s below. @@ -2129,20 +2239,11 @@ LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) { bool ok = lGetSourcePosFromMetadata(callInst, &pos); assert(ok); - if (isGather) { - llvm::Function *gFunc = m->module->getFunction(is32 ? "__gather_base_offsets_i32" : - "__gather_base_offsets_i64"); - assert(gFunc); - callInst->setCalledFunction(gFunc); + callInst->setCalledFunction(info->actualFunc); + if (info->isGather) PerformanceWarning(pos, "Gather required to compute value in expression."); - } - else { - llvm::Function *sFunc = m->module->getFunction(is32 ? "__scatter_base_offsets_i32" : - "__scatter_base_offsets_i64"); - assert(sFunc); - callInst->setCalledFunction(sFunc); + else PerformanceWarning(pos, "Scatter required for storing value."); - } modifiedAny = true; goto restart; } @@ -2286,25 +2387,41 @@ char MakeInternalFuncsStaticPass::ID = 0; llvm::RegisterPass mifsp("make-internal-funcs-static", "Make Internal Funcs Static Pass"); + bool MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) { const char *names[] = { - "__do_print", "__gather_base_offsets_i32", "__gather_base_offsets_i64", - "__gather_elt_32", "__gather_elt_64", "__load_and_broadcast_32", - "__load_and_broadcast_64", "__load_masked_32", "__load_masked_64", - "__masked_store_32", "__masked_store_64", "__masked_store_blend_32", - "__masked_store_blend_64", "__packed_load_active", "__packed_store_active", - "__scatter_base_offsets_i32", "__scatter_base_offsets_i64", "__scatter_elt_32", - "__scatter_elt_64", }; + "__do_print", + "__gather_base_offsets_i8", "__gather_base_offsets_i16", + "__gather_base_offsets_i32", "__gather_base_offsets_i64", + "__gather_elt_8", "__gather_elt_16", + "__gather_elt_32", "__gather_elt_64", + "__load_and_broadcast_8", "__load_and_broadcast_16", + "__load_and_broadcast_32", "__load_and_broadcast_64", + "__load_masked_8", "__load_masked_16", + "__load_masked_32", "__load_masked_64", + "__masked_store_8", "__masked_store_16", + "__masked_store_32", "__masked_store_64", + "__masked_store_blend_8", "__masked_store_blend_16", + "__masked_store_blend_32", "__masked_store_blend_64", + "__packed_load_active", "__packed_store_active", + "__scatter_base_offsets_i8", "__scatter_base_offsets_i16", + "__scatter_base_offsets_i32", "__scatter_base_offsets_i64", + "__scatter_elt_8", "__scatter_elt_16", + "__scatter_elt_32", "__scatter_elt_64", + }; + bool modifiedAny = false; int count = sizeof(names) / sizeof(names[0]); for (int i = 0; i < count; ++i) { llvm::Function *f = m->module->getFunction(names[i]); - if (f != NULL) + if (f != NULL) { f->setLinkage(llvm::GlobalValue::PrivateLinkage); + modifiedAny = true; + } } - return true; + return modifiedAny; } diff --git a/parse.yy b/parse.yy index 36ece91a..430ce92a 100644 --- a/parse.yy +++ b/parse.yy @@ -102,15 +102,16 @@ static const char *lBuiltinTokens[] = { "bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor", "char", "cif", "cwhile", "const", "continue", "creturn", "default", "do", "double", "else", "enum", "export", "extern", "false", "float", "for", "goto", "if", - "inline", "int", "int32", "int64", "launch", "print", "reference", "return", + "inline", "int", "int8", "int16", "int32", "int64", "launch", "print", + "reference", "return", "static", "struct", "switch", "sync", "task", "true", "typedef", "uniform", "unsigned", "varying", "void", "while", NULL }; static const char *lParamListTokens[] = { "bool", "char", "const", "double", "enum", "false", "float", "int", - "int32", "int64", "reference", "struct", "true", "uniform", "unsigned", - "varying", "void", NULL + "int8", "int16", "int32", "int64", "reference", "struct", "true", + "uniform", "unsigned", "varying", "void", NULL }; %} @@ -154,7 +155,7 @@ static const char *lParamListTokens[] = { %token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK %token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA %token TOKEN_CHAR TOKEN_INT TOKEN_UNSIGNED TOKEN_FLOAT TOKEN_DOUBLE -%token TOKEN_INT64 TOKEN_CONST TOKEN_VOID TOKEN_BOOL +%token TOKEN_INT8 TOKEN_INT16 TOKEN_INT64 TOKEN_CONST TOKEN_VOID TOKEN_BOOL %token TOKEN_ENUM TOKEN_STRUCT TOKEN_TRUE TOKEN_FALSE TOKEN_REFERENCE %token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH @@ -587,7 +588,8 @@ type_specifier atomic_var_type_specifier : TOKEN_VOID { $$ = AtomicType::Void; } | TOKEN_BOOL { $$ = AtomicType::VaryingBool; } -/* | TOKEN_CHAR { UNIMPLEMENTED; } */ + | TOKEN_INT8 { $$ = AtomicType::VaryingInt8; } + | TOKEN_INT16 { $$ = AtomicType::VaryingInt16; } | TOKEN_INT { $$ = AtomicType::VaryingInt32; } | TOKEN_FLOAT { $$ = AtomicType::VaryingFloat; } | TOKEN_DOUBLE { $$ = AtomicType::VaryingDouble; } diff --git a/stdlib-avx.ll b/stdlib-avx.ll index 947248dc..de7f87fa 100644 --- a/stdlib-avx.ll +++ b/stdlib-avx.ll @@ -41,7 +41,6 @@ stdlib_core(8) packed_load_and_store(8) -int8_16(8) int64minmax(8) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -539,55 +538,14 @@ define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone alwaysinli ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unaligned loads/loads+broadcasts -define <8 x i32> @__load_and_broadcast_32(i8 *, <8 x i32> %mask) nounwind alwaysinline { - %mm = call i32 @__movmsk(<8 x i32> %mask) - %any_on = icmp ne i32 %mm, 0 - br i1 %any_on, label %load, label %skip - -load: - ; TODO: make sure this becomes a vbroadcast... - %ptr = bitcast i8 * %0 to i32 * - %val = load i32 * %ptr - - %ret0 = insertelement <8 x i32> undef, i32 %val, i32 0 - %ret1 = insertelement <8 x i32> %ret0, i32 %val, i32 1 - %ret2 = insertelement <8 x i32> %ret1, i32 %val, i32 2 - %ret3 = insertelement <8 x i32> %ret2, i32 %val, i32 3 - %ret4 = insertelement <8 x i32> %ret3, i32 %val, i32 4 - %ret5 = insertelement <8 x i32> %ret4, i32 %val, i32 5 - %ret6 = insertelement <8 x i32> %ret5, i32 %val, i32 6 - %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 - ret <8 x i32> %ret7 - -skip: - ret <8 x i32> undef -} - - -define <8 x i64> @__load_and_broadcast_64(i8 *, <8 x i32> %mask) nounwind alwaysinline { - %mm = call i32 @__movmsk(<8 x i32> %mask) - %any_on = icmp ne i32 %mm, 0 - br i1 %any_on, label %load, label %skip - -load: - ; TODO: make sure this becomes a vbroadcast... - %ptr = bitcast i8 * %0 to i64 * - %val = load i64 * %ptr - - %ret0 = insertelement <8 x i64> undef, i64 %val, i32 0 - %ret1 = insertelement <8 x i64> %ret0, i64 %val, i32 1 - %ret2 = insertelement <8 x i64> %ret1, i64 %val, i32 2 - %ret3 = insertelement <8 x i64> %ret2, i64 %val, i32 3 - %ret4 = insertelement <8 x i64> %ret3, i64 %val, i32 4 - %ret5 = insertelement <8 x i64> %ret4, i64 %val, i32 5 - %ret6 = insertelement <8 x i64> %ret5, i64 %val, i32 6 - %ret7 = insertelement <8 x i64> %ret6, i64 %val, i32 7 - ret <8 x i64> %ret3 - -skip: - ret <8 x i64> undef -} +load_and_broadcast(8, i8, 8) +load_and_broadcast(8, i16, 16) +load_and_broadcast(8, i32, 32) +load_and_broadcast(8, i64, 64) +; no masked load instruction for i8 and i16 types?? +load_masked(8, i8, 8, 1) +load_masked(8, i16, 16, 2) declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask) declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask) @@ -623,6 +581,12 @@ define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; masked store +; FIXME: there is no AVX instruction for these, but we could be clever +; by packing the bits down and setting the last 3/4 or half, respectively, +; of the mask to zero... Not sure if this would be a win in the end +gen_masked_store(8, i8, 8) +gen_masked_store(8, i16, 16) + ; note that mask is the 2nd parameter, not the 3rd one!! declare void @llvm.x86.avx.maskstore.ps.256(i8 *, <8 x float>, <8 x float>) declare void @llvm.x86.avx.maskstore.pd.256(i8 *, <4 x double>, <4 x double>) @@ -660,13 +624,14 @@ define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>, ret void } +masked_store_blend_8_16_by_8() declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>, - <8 x i32>) nounwind alwaysinline { + <8 x i32>) nounwind alwaysinline { %mask_as_float = bitcast <8 x i32> %2 to <8 x float> %oldValue = load <8 x i32>* %0, align 4 %oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float> diff --git a/stdlib-sse.ll b/stdlib-sse.ll index 345fdded..16713bb3 100644 --- a/stdlib-sse.ll +++ b/stdlib-sse.ll @@ -36,7 +36,6 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -int8_16(4) int64minmax(4) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -380,29 +379,23 @@ define internal i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; masked store -define void @__masked_store_32(<4 x i32>* nocapture, <4 x i32>, <4 x i32>) nounwind alwaysinline { - per_lane(4, <4 x i32> %2, ` - ; compute address for this one - %ptr_ID = getelementptr <4 x i32> * %0, i32 0, i32 LANE - %storeval_ID = extractelement <4 x i32> %1, i32 LANE - store i32 %storeval_ID, i32 * %ptr_ID') - ret void -} - -define void @__masked_store_64(<4 x i64>* nocapture, <4 x i64>, <4 x i32>) nounwind alwaysinline { - per_lane(4, <4 x i32> %2, ` - %ptr_ID = getelementptr <4 x i64> * %0, i32 0, i32 LANE - %storeval_ID = extractelement <4 x i64> %1, i32 LANE - store i64 %storeval_ID, i64 * %ptr_ID') - ret void -} +masked_store_blend_8_16_by_4() +gen_masked_store(4, i8, 8) +gen_masked_store(4, i16, 16) +gen_masked_store(4, i32, 32) +gen_masked_store(4, i64, 64) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unaligned loads/loads+broadcasts +load_and_broadcast(4, i8, 8) +load_and_broadcast(4, i16, 16) load_and_broadcast(4, i32, 32) load_and_broadcast(4, i64, 64) + +load_masked(4, i8, 8, 1) +load_masked(4, i16, 16, 2) load_masked(4, i32, 32, 4) load_masked(4, i64, 64, 8) @@ -411,7 +404,12 @@ load_masked(4, i64, 64, 8) ; define these with the macros from stdlib.m4 +gen_gather(4, i8) +gen_gather(4, i16) gen_gather(4, i32) gen_gather(4, i64) + +gen_scatter(4, i8) +gen_scatter(4, i16) gen_scatter(4, i32) gen_scatter(4, i64) diff --git a/stdlib-sse4x2.ll b/stdlib-sse4x2.ll index 1b57cdc6..ca654d2c 100644 --- a/stdlib-sse4x2.ll +++ b/stdlib-sse4x2.ll @@ -38,7 +38,6 @@ stdlib_core(8) packed_load_and_store(8) -int8_16(8) int64minmax(8) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -435,44 +434,29 @@ define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone { reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64) } -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; masked store - -define void @__masked_store_32(<8 x i32>* nocapture, <8 x i32>, - <8 x i32>) nounwind alwaysinline { - per_lane(8, <8 x i32> %2, ` - ; compute address for this one - %ptr_ID = getelementptr <8 x i32> * %0, i32 0, i32 LANE - %storeval_ID = extractelement <8 x i32> %1, i32 LANE - store i32 %storeval_ID, i32 * %ptr_ID') - ret void -} - - -define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>, - <8 x i32>) nounwind alwaysinline { - per_lane(8, <8 x i32> %2, ` - ; compute address for this one - %ptr_ID = getelementptr <8 x i64> * %0, i32 0, i32 LANE - %storeval_ID = extractelement <8 x i64> %1, i32 LANE - store i64 %storeval_ID, i64 * %ptr_ID') - ret void -} - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unaligned loads/loads+broadcasts +load_and_broadcast(8, i8, 8) +load_and_broadcast(8, i16, 16) load_and_broadcast(8, i32, 32) load_and_broadcast(8, i64, 64) + +load_masked(8, i8, 8, 1) +load_masked(8, i16, 16, 2) load_masked(8, i32, 32, 4) load_masked(8, i64, 64, 8) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; gather/scatter +gen_gather(8, i8) +gen_gather(8, i16) gen_gather(8, i32) gen_gather(8, i64) + +gen_scatter(8, i8) +gen_scatter(8, i16) gen_scatter(8, i32) gen_scatter(8, i64) @@ -619,6 +603,13 @@ define internal float @__reduce_add_float(<8 x float>) nounwind readonly alwaysi ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; masked store +gen_masked_store(8, i8, 8) +gen_masked_store(8, i16, 16) +gen_masked_store(8, i32, 32) +gen_masked_store(8, i64, 64) + +masked_store_blend_8_16_by_8() + declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone diff --git a/stdlib.ispc b/stdlib.ispc index 5baaa2f1..cb6e99dc 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -85,6 +85,14 @@ static inline float broadcast(float v, uniform int i) { return __broadcast_float(v, i); } +static inline int8 broadcast(int8 v, uniform int i) { + return __broadcast_int8(v, i); +} + +static inline int16 broadcast(int16 v, uniform int i) { + return __broadcast_int16(v, i); +} + static inline int32 broadcast(int32 v, uniform int i) { return __broadcast_int32(v, i); } @@ -101,6 +109,14 @@ static inline float rotate(float v, uniform int i) { return __rotate_float(v, i); } +static inline int8 rotate(int8 v, uniform int i) { + return __rotate_int8(v, i); +} + +static inline int16 rotate(int16 v, uniform int i) { + return __rotate_int16(v, i); +} + static inline int32 rotate(int32 v, uniform int i) { return __rotate_int32(v, i); } @@ -117,6 +133,14 @@ static inline float shuffle(float v, int i) { return __shuffle_float(v, i); } +static inline int8 shuffle(int8 v, int i) { + return __shuffle_int8(v, i); +} + +static inline int16 shuffle(int16 v, int i) { + return __shuffle_int16(v, i); +} + static inline int32 shuffle(int32 v, int i) { return __shuffle_int32(v, i); } @@ -133,6 +157,14 @@ static inline float shuffle(float v0, float v1, int i) { return __shuffle2_float(v0, v1, i); } +static inline int8 shuffle(int8 v0, int8 v1, int i) { + return __shuffle2_int8(v0, v1, i); +} + +static inline int16 shuffle(int16 v0, int16 v1, int i) { + return __shuffle2_int16(v0, v1, i); +} + static inline int32 shuffle(int32 v0, int32 v1, int i) { return __shuffle2_int32(v0, v1, i); } @@ -150,11 +182,27 @@ static inline uniform float extract(float x, uniform int i) { return floatbits(__extract_int32((int)intbits(x), i)); } -static inline uniform int extract(int x, uniform int i) { +static inline uniform int8 extract(int8 x, uniform int i) { + return __extract_int8(x, i); +} + +static inline uniform unsigned int8 extract(unsigned int8 x, uniform int i) { + return __extract_int8(x, (unsigned int)i); +} + +static inline uniform int16 extract(int16 x, uniform int i) { + return __extract_int16(x, i); +} + +static inline uniform unsigned int16 extract(unsigned int16 x, uniform int i) { + return __extract_int16(x, (unsigned int)i); +} + +static inline uniform int32 extract(int32 x, uniform int i) { return __extract_int32(x, i); } -static inline uniform unsigned int extract(unsigned int x, uniform int i) { +static inline uniform unsigned int32 extract(unsigned int32 x, uniform int i) { return __extract_int32(x, (unsigned int)i); } @@ -175,12 +223,30 @@ static inline float insert(float x, uniform int i, uniform float v) { return floatbits(__insert_int32((int)intbits(x), i, (int)intbits(v))); } -static inline int insert(int x, uniform int i, uniform int v) { +static inline int8 insert(int8 x, uniform int i, uniform int8 v) { + return __insert_int8(x, i, v); +} + +static inline unsigned int8 insert(unsigned int8 x, uniform int i, + uniform unsigned int8 v) { + return __insert_int8(x, (unsigned int)i, v); +} + +static inline int16 insert(int16 x, uniform int i, uniform int16 v) { + return __insert_int16(x, i, v); +} + +static inline unsigned int16 insert(unsigned int16 x, uniform int i, + uniform unsigned int16 v) { + return __insert_int16(x, (unsigned int)i, v); +} + +static inline int32 insert(int32 x, uniform int i, uniform int32 v) { return __insert_int32(x, i, v); } -static inline unsigned int insert(unsigned int x, uniform int i, - uniform unsigned int v) { +static inline unsigned int32 insert(unsigned int32 x, uniform int i, + uniform unsigned int32 v) { return __insert_int32(x, (unsigned int)i, v); } @@ -218,7 +284,7 @@ static inline uniform bool all(bool v) { return __movmsk(match) == (1 << programCount) - 1; } -static inline uniform int popcnt(uniform int v) { +static inline uniform int32 popcnt(uniform int32 v) { return __popcnt_int32(v); } @@ -473,52 +539,7 @@ ATOMIC_DECL_CMPXCHG(unsigned int64, int64) ATOMIC_DECL_CMPXCHG(double, double) /////////////////////////////////////////////////////////////////////////// -// Load/store from/to 8/16-bit types - -static inline int load_from_int8(uniform int a[], uniform int offset) { - return __load_int8(a, offset, __mask); -} - -static inline unsigned int load_from_uint8(uniform unsigned int a[], - uniform int offset) { - return __load_uint8(a, offset, __mask); -} - -static inline void store_to_int8(uniform int a[], uniform int offset, - unsigned int val) { - __store_int8(a, offset, val, __mask); -} - -static inline void store_to_uint8(uniform unsigned int a[], uniform int offset, - unsigned int val) { - // Can use __store_int8 for unsigned stuff, since it truncates bits in - // either case. - __store_int8(a, offset, val, __mask); -} - -static inline int load_from_int16(uniform int a[], uniform int offset) { - return __load_int16(a, offset, __mask); -} - -static inline unsigned int load_from_int16(uniform unsigned int a[], - uniform int offset) { - return __load_uint16(a, offset, __mask); -} - -static inline void store_to_int16(uniform int a[], uniform int offset, - int val) { - __store_int16(a, offset, val, __mask); -} - -static inline void store_to_uint16(uniform unsigned int a[], uniform int offset, - unsigned int val) { - // Can use __store_int16 for unsigned stuff, since it truncates bits in - // either case. - __store_int16(a, offset, val, __mask); -} - -/////////////////////////////////////////////////////////////////////////// -// Math +// Floating-Point Math static inline float abs(float a) { // Floating-point hack: zeroing the high bit clears the sign @@ -622,6 +643,11 @@ static inline uniform float rcp(uniform float v) { return __rcp_uniform_float(v); } +/////////////////////////////////////////////////////////////////////////// +// min/max + +// float + static inline float min(float a, float b) { return __min_varying_float(a, b); } @@ -630,14 +656,6 @@ static inline uniform float min(uniform float a, uniform float b) { return __min_uniform_float(a, b); } -static inline double min(double a, double b) { - return __min_varying_double(a, b); -} - -static inline uniform double min(uniform double a, uniform double b) { - return __min_uniform_double(a, b); -} - static inline float max(float a, float b) { return __max_varying_float(a, b); } @@ -646,6 +664,17 @@ static inline uniform float max(uniform float a, uniform float b) { return __max_uniform_float(a, b); } + +// double + +static inline double min(double a, double b) { + return __min_varying_double(a, b); +} + +static inline uniform double min(uniform double a, uniform double b) { + return __min_uniform_double(a, b); +} + static inline double max(double a, double b) { return __max_varying_double(a, b); } @@ -654,6 +683,80 @@ static inline uniform double max(uniform double a, uniform double b) { return __max_uniform_double(a, b); } +// int8 + +static inline uniform unsigned int8 min(uniform unsigned int8 a, + uniform unsigned int8 b) { + return (a < b) ? a : b; +} + +static inline uniform unsigned int8 max(uniform unsigned int8 a, + uniform unsigned int8 b) { + return (a > b) ? a : b; +} + +static inline uniform int8 min(uniform int8 a, uniform int8 b) { + return (a < b) ? a : b; +} + +static inline uniform int8 max(uniform int8 a, uniform int8 b) { + return (a > b) ? a : b; +} + +static inline unsigned int8 min(unsigned int8 a, unsigned int8 b) { + return (a < b) ? a : b; +} + +static inline unsigned int8 max(unsigned int8 a, unsigned int8 b) { + return (a > b) ? a : b; +} + +static inline int8 min(int8 a, int8 b) { + return (a < b) ? a : b; +} + +static inline int8 max(int8 a, int8 b) { + return (a > b) ? a : b; +} + +// int16 + +static inline uniform unsigned int16 min(uniform unsigned int16 a, + uniform unsigned int16 b) { + return (a < b) ? a : b; +} + +static inline uniform unsigned int16 max(uniform unsigned int16 a, + uniform unsigned int16 b) { + return (a > b) ? a : b; +} + +static inline uniform int16 min(uniform int16 a, uniform int16 b) { + return (a < b) ? a : b; +} + +static inline uniform int16 max(uniform int16 a, uniform int16 b) { + return (a > b) ? a : b; +} + +static inline unsigned int16 min(unsigned int16 a, unsigned int16 b) { + return (a < b) ? a : b; +} + +static inline unsigned int16 max(unsigned int16 a, unsigned int16 b) { + return (a > b) ? a : b; +} + +static inline int16 min(int16 a, int16 b) { + return (a < b) ? a : b; +} + +static inline int16 max(int16 a, int16 b) { + return (a > b) ? a : b; +} + +// int32 + static inline unsigned int min(unsigned int a, unsigned int b) { return __min_varying_uint32(a, b); } @@ -686,6 +789,8 @@ static inline uniform int max(uniform int a, uniform int b) { return __max_uniform_int32(a, b); } +// int64 + static inline unsigned int64 min(unsigned int64 a, unsigned int64 b) { return __min_varying_uint64(a, b); } @@ -718,6 +823,11 @@ static inline uniform int64 max(uniform int64 a, uniform int64 b) { return __max_uniform_int64(a, b); } +/////////////////////////////////////////////////////////////////////////// +// clamps + +// float + static inline float clamp(float v, float low, float high) { return min(max(v, low), high); } @@ -726,6 +836,52 @@ static inline uniform float clamp(uniform float v, uniform float low, uniform fl return min(max(v, low), high); } +// int8 + +static inline unsigned int8 clamp(unsigned int8 v, unsigned int8 low, + unsigned int8 high) { + return min(max(v, low), high); +} + +static inline uniform unsigned int8 clamp(uniform unsigned int8 v, + uniform unsigned int8 low, + uniform unsigned int8 high) { + return min(max(v, low), high); +} + +static inline int8 clamp(int8 v, int8 low, int8 high) { + return min(max(v, low), high); +} + +static inline uniform int8 clamp(uniform int8 v, uniform int8 low, + uniform int8 high) { + return min(max(v, low), high); +} + +// int16 + +static inline unsigned int16 clamp(unsigned int16 v, unsigned int16 low, + unsigned int16 high) { + return min(max(v, low), high); +} + +static inline uniform unsigned int16 clamp(uniform unsigned int16 v, + uniform unsigned int16 low, + uniform unsigned int16 high) { + return min(max(v, low), high); +} + +static inline int16 clamp(int16 v, int16 low, int16 high) { + return min(max(v, low), high); +} + +static inline uniform int16 clamp(uniform int16 v, uniform int16 low, + uniform int16 high) { + return min(max(v, low), high); +} + +// int32 + static inline unsigned int clamp(unsigned int v, unsigned int low, unsigned int high) { return min(max(v, low), high); } @@ -735,15 +891,6 @@ static inline uniform unsigned int clamp(uniform unsigned int v, uniform unsigne return min(max(v, low), high); } -static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low, unsigned int64 high) { - return min(max(v, low), high); -} - -static inline uniform unsigned int64 clamp(uniform unsigned int64 v, uniform unsigned int64 low, - uniform unsigned int64 high) { - return min(max(v, low), high); -} - static inline int clamp(int v, int low, int high) { return min(max(v, low), high); } @@ -752,11 +899,25 @@ static inline uniform int clamp(uniform int v, uniform int low, uniform int high return min(max(v, low), high); } +// int64 + +static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low, + unsigned int64 high) { + return min(max(v, low), high); +} + +static inline uniform unsigned int64 clamp(uniform unsigned int64 v, + uniform unsigned int64 low, + uniform unsigned int64 high) { + return min(max(v, low), high); +} + static inline int64 clamp(int64 v, int64 low, int64 high) { return min(max(v, low), high); } -static inline uniform int64 clamp(uniform int64 v, uniform int64 low, uniform int64 high) { +static inline uniform int64 clamp(uniform int64 v, uniform int64 low, + uniform int64 high) { return min(max(v, low), high); } diff --git a/stdlib.m4 b/stdlib.m4 index 49184d85..4540b796 100644 --- a/stdlib.m4 +++ b/stdlib.m4 @@ -566,6 +566,28 @@ declare i1 @__is_compile_time_constant_varying_int32(<$1 x i32>) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; vector ops +define internal i8 @__extract_int8(<$1 x i8>, i32) nounwind readnone alwaysinline { + %extract = extractelement <$1 x i8> %0, i32 %1 + ret i8 %extract +} + +define internal <$1 x i8> @__insert_int8(<$1 x i8>, i32, + i8) nounwind readnone alwaysinline { + %insert = insertelement <$1 x i8> %0, i8 %2, i32 %1 + ret <$1 x i8> %insert +} + +define internal i16 @__extract_int16(<$1 x i16>, i32) nounwind readnone alwaysinline { + %extract = extractelement <$1 x i16> %0, i32 %1 + ret i16 %extract +} + +define internal <$1 x i16> @__insert_int16(<$1 x i16>, i32, + i16) nounwind readnone alwaysinline { + %insert = insertelement <$1 x i16> %0, i16 %2, i32 %1 + ret <$1 x i16> %insert +} + define internal i32 @__extract_int32(<$1 x i32>, i32) nounwind readnone alwaysinline { %extract = extractelement <$1 x i32> %0, i32 %1 ret i32 %extract @@ -588,6 +610,8 @@ define internal <$1 x i64> @__insert_int64(<$1 x i64>, i32, ret <$1 x i64> %insert } +shuffles($1, i8, int8, 1) +shuffles($1, i16, int16, 2) shuffles($1, float, float, 4) shuffles($1, i32, int32, 4) shuffles($1, double, double, 8) @@ -901,171 +925,6 @@ i64minmax($1,min,uint64,ult) i64minmax($1,max,uint64,ugt) ') -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Definitions of 8 and 16-bit load and store functions -;; -;; The `int8_16' macro defines functions related to loading and storing 8 and -;; 16-bit values in memory, converting to and from i32. (This is a workaround -;; to be able to use in-memory values of types in ispc programs, since the -;; compiler doesn't yet support 8 and 16-bit datatypes... -;; -;; Arguments to pass to `int8_16': -;; $1: vector width of the target - -define(`int8_16', ` -define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset, - <$1 x i32> %mask) nounwind alwaysinline { - %mm = call i32 @__movmsk(<$1 x i32> %mask) - %any = icmp ne i32 %mm, 0 - br i1 %any, label %doload, label %skip - -doload: - %ptr8 = bitcast [0 x i32] *%0 to i8 * - %ptr = getelementptr i8 * %ptr8, i32 %offset - %ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) * - %val = load i`'eval(8*$1) * %ptr64, align 1 - - %vval = bitcast i`'eval(8*$1) %val to <$1 x i8> - ; unsigned, so zero-extend to i32... - %ret = zext <$1 x i8> %vval to <$1 x i32> - ret <$1 x i32> %ret - -skip: - ret <$1 x i32> undef -} - - -define internal <$1 x i32> @__load_int8([0 x i32] *, i32 %offset, - <$1 x i32> %mask) nounwind alwaysinline { - %mm = call i32 @__movmsk(<$1 x i32> %mask) - %any = icmp ne i32 %mm, 0 - br i1 %any, label %doload, label %skip - -doload: - %ptr8 = bitcast [0 x i32] *%0 to i8 * - %ptr = getelementptr i8 * %ptr8, i32 %offset - %ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) * - %val = load i`'eval(8*$1) * %ptr64, align 1 - - %vval = bitcast i`'eval(8*$1) %val to <$1 x i8> - ; signed, so sign-extend to i32... - %ret = sext <$1 x i8> %vval to <$1 x i32> - ret <$1 x i32> %ret - -skip: - ret <$1 x i32> undef -} - - -define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset, - <$1 x i32> %mask) nounwind alwaysinline { - %mm = call i32 @__movmsk(<$1 x i32> %mask) - %any = icmp ne i32 %mm, 0 - br i1 %any, label %doload, label %skip - -doload: - %ptr16 = bitcast [0 x i32] *%0 to i16 * - %ptr = getelementptr i16 * %ptr16, i32 %offset - %ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) * - %val = load i`'eval(16*$1) * %ptr64, align 2 - - %vval = bitcast i`'eval(16*$1) %val to <$1 x i16> - ; unsigned, so use zero-extend... - %ret = zext <$1 x i16> %vval to <$1 x i32> - ret <$1 x i32> %ret - -skip: - ret <$1 x i32> undef -} - - -define internal <$1 x i32> @__load_int16([0 x i32] *, i32 %offset, - <$1 x i32> %mask) nounwind alwaysinline { - %mm = call i32 @__movmsk(<$1 x i32> %mask) - %any = icmp ne i32 %mm, 0 - br i1 %any, label %doload, label %skip - -doload: - %ptr16 = bitcast [0 x i32] *%0 to i16 * - %ptr = getelementptr i16 * %ptr16, i32 %offset - %ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) * - %val = load i`'eval(16*$1) * %ptr64, align 2 - - %vval = bitcast i`'eval(16*$1) %val to <$1 x i16> - ; signed, so use sign-extend... - %ret = sext <$1 x i16> %vval to <$1 x i32> - ret <$1 x i32> %ret - -skip: - ret <$1 x i32> undef -} - - -define internal void @__store_int8([0 x i32] *, i32 %offset, <$1 x i32> %val32, - <$1 x i32> %mask) nounwind alwaysinline { - %mm = call i32 @__movmsk(<$1 x i32> %mask) - %any = icmp ne i32 %mm, 0 - br i1 %any, label %dostore, label %skip - -dostore: - %val = trunc <$1 x i32> %val32 to <$1 x i8> - %val64 = bitcast <$1 x i8> %val to i`'eval(8*$1) - - %mask8 = trunc <$1 x i32> %mask to <$1 x i8> - %mask64 = bitcast <$1 x i8> %mask8 to i`'eval(8*$1) - %notmask = xor i`'eval(8*$1) %mask64, -1 - - %ptr8 = bitcast [0 x i32] *%0 to i8 * - %ptr = getelementptr i8 * %ptr8, i32 %offset - %ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) * - - ;; load the old value, use logical ops to blend based on the mask, then - ;; store the result back - %old = load i`'eval(8*$1) * %ptr64, align 1 - %oldmasked = and i`'eval(8*$1) %old, %notmask - %newmasked = and i`'eval(8*$1) %val64, %mask64 - %final = or i`'eval(8*$1) %oldmasked, %newmasked - store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64, align 1 - - ret void - -skip: - ret void -} - -define internal void @__store_int16([0 x i32] *, i32 %offset, <$1 x i32> %val32, - <$1 x i32> %mask) nounwind alwaysinline { - %mm = call i32 @__movmsk(<$1 x i32> %mask) - %any = icmp ne i32 %mm, 0 - br i1 %any, label %dostore, label %skip - -dostore: - %val = trunc <$1 x i32> %val32 to <$1 x i16> - %val64 = bitcast <$1 x i16> %val to i`'eval(16*$1) - - %mask8 = trunc <$1 x i32> %mask to <$1 x i16> - %mask64 = bitcast <$1 x i16> %mask8 to i`'eval(16*$1) - %notmask = xor i`'eval(16*$1) %mask64, -1 - - %ptr16 = bitcast [0 x i32] *%0 to i16 * - %ptr = getelementptr i16 * %ptr16, i32 %offset - %ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) * - - ;; as above, use mask to do blending with logical ops... - %old = load i`'eval(16*$1) * %ptr64, align 2 - %oldmasked = and i`'eval(16*$1) %old, %notmask - %newmasked = and i`'eval(16*$1) %val64, %mask64 - %final = or i`'eval(16*$1) %oldmasked, %newmasked - store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64, align 2 - - ret void - -skip: - ret void -} -' -) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Emit code to safely load a scalar value and broadcast it across the ;; elements of a vector. Parameters: @@ -1150,6 +1009,105 @@ return: } ') +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; masked store +;; emit code to do masked store as a set of per-lane scalar stores +;; parameters: +;; $1: target vector width +;; $2: llvm type of elements +;; $3: suffix for function name + +define(`gen_masked_store', ` +define void @__masked_store_$3(<$1 x $2>* nocapture, <$1 x $2>, <$1 x i32>) nounwind alwaysinline { + per_lane($1, <$1 x i32> %2, ` + %ptr_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE + %storeval_ID = extractelement <$1 x $2> %1, i32 LANE + store $2 %storeval_ID, $2 * %ptr_ID') + ret void +} +') + +define(`masked_store_blend_8_16_by_4', ` +define void @__masked_store_blend_8(<4 x i8>* nocapture, <4 x i8>, + <4 x i32>) nounwind alwaysinline { + %old = load <4 x i8> * %0 + %old32 = bitcast <4 x i8> %old to i32 + %new32 = bitcast <4 x i8> %1 to i32 + + %mask8 = trunc <4 x i32> %2 to <4 x i8> + %mask32 = bitcast <4 x i8> %mask8 to i32 + %notmask32 = xor i32 %mask32, -1 + + %newmasked = and i32 %new32, %mask32 + %oldmasked = and i32 %old32, %notmask32 + %result = or i32 %newmasked, %oldmasked + + %resultvec = bitcast i32 %result to <4 x i8> + store <4 x i8> %resultvec, <4 x i8> * %0 + ret void +} + +define void @__masked_store_blend_16(<4 x i16>* nocapture, <4 x i16>, + <4 x i32>) nounwind alwaysinline { + %old = load <4 x i16> * %0 + %old64 = bitcast <4 x i16> %old to i64 + %new64 = bitcast <4 x i16> %1 to i64 + + %mask16 = trunc <4 x i32> %2 to <4 x i16> + %mask64 = bitcast <4 x i16> %mask16 to i64 + %notmask64 = xor i64 %mask64, -1 + + %newmasked = and i64 %new64, %mask64 + %oldmasked = and i64 %old64, %notmask64 + %result = or i64 %newmasked, %oldmasked + + %resultvec = bitcast i64 %result to <4 x i16> + store <4 x i16> %resultvec, <4 x i16> * %0 + ret void +} +') + +define(`masked_store_blend_8_16_by_8', ` +define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>, + <8 x i32>) nounwind alwaysinline { + %old = load <8 x i8> * %0 + %old64 = bitcast <8 x i8> %old to i64 + %new64 = bitcast <8 x i8> %1 to i64 + + %mask8 = trunc <8 x i32> %2 to <8 x i8> + %mask64 = bitcast <8 x i8> %mask8 to i64 + %notmask64 = xor i64 %mask64, -1 + + %newmasked = and i64 %new64, %mask64 + %oldmasked = and i64 %old64, %notmask64 + %result = or i64 %newmasked, %oldmasked + + %resultvec = bitcast i64 %result to <8 x i8> + store <8 x i8> %resultvec, <8 x i8> * %0 + ret void +} + +define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>, + <8 x i32>) nounwind alwaysinline { + %old = load <8 x i16> * %0 + %old128 = bitcast <8 x i16> %old to i128 + %new128 = bitcast <8 x i16> %1 to i128 + + %mask16 = trunc <8 x i32> %2 to <8 x i16> + %mask128 = bitcast <8 x i16> %mask16 to i128 + %notmask128 = xor i128 %mask128, -1 + + %newmasked = and i128 %new128, %mask128 + %oldmasked = and i128 %old128, %notmask128 + %result = or i128 %newmasked, %oldmasked + + %resultvec = bitcast i128 %result to <8 x i16> + store <8 x i16> %resultvec, <8 x i16> * %0 + ret void +} +') + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; packed load and store functions ;; diff --git a/stmt.cpp b/stmt.cpp index fc651ebc..815d0061 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -1405,6 +1405,18 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) { return NULL; } + // Just int8 and int16 types to int32s... + const Type *baseType = type->GetAsNonConstType()->GetAsUniformType(); + if (baseType == AtomicType::UniformInt8 || + baseType == AtomicType::UniformUInt8 || + baseType == AtomicType::UniformInt16 || + baseType == AtomicType::UniformUInt16) { + expr = new TypeCastExpr(type->IsUniformType() ? AtomicType::UniformInt32 : + AtomicType::VaryingInt32, + expr, expr->pos); + type = expr->GetType(); + } + char t = lEncodeType(type->GetAsNonConstType()); if (t == '\0') { Error(expr->pos, "Only atomic types are allowed in print statements; " diff --git a/tests/array-mixed-unif-vary-indexing-2.ispc b/tests/array-mixed-unif-vary-indexing-2.ispc index 6eefc9d1..edd53c84 100644 --- a/tests/array-mixed-unif-vary-indexing-2.ispc +++ b/tests/array-mixed-unif-vary-indexing-2.ispc @@ -8,7 +8,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform float x[47][47]; for (uniform int i = 0; i < 47; ++i) for (uniform int j = 0; j < 47; ++j) - x[i][j] = 2; + x[i][j] = 2+b-5; // all are 2 except (3,4) = 0, (1,4) = 1, (2,4) = 1, (4,4) = 1 if (a == 3.) diff --git a/tests/array-mixed-unif-vary-indexing-3.ispc b/tests/array-mixed-unif-vary-indexing-3.ispc index faf0fd9f..686f121e 100644 --- a/tests/array-mixed-unif-vary-indexing-3.ispc +++ b/tests/array-mixed-unif-vary-indexing-3.ispc @@ -7,7 +7,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform float x[47][47]; for (uniform int i = 0; i < 47; ++i) for (uniform int j = 0; j < 47; ++j) - x[i][j] = 2; + x[i][j] = 2+b-5; // all are 2 except (4,2) = 0, (4,...) = 1, (4,programCount-1)=2 if (a == 3.) diff --git a/tests/array-mixed-unif-vary-indexing.ispc b/tests/array-mixed-unif-vary-indexing.ispc index 20ff9a53..ebe932ad 100644 --- a/tests/array-mixed-unif-vary-indexing.ispc +++ b/tests/array-mixed-unif-vary-indexing.ispc @@ -8,7 +8,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform float x[47][47]; for (uniform int i = 0; i < 47; ++i) for (uniform int j = 0; j < 47; ++j) - x[i][j] = 2; + x[i][j] = 2+b-5; x[a][b-1] = 0; RET[programIndex] = x[2][a]; diff --git a/tests/broadcast-2.ispc b/tests/broadcast-2.ispc new file mode 100644 index 00000000..2efc98ed --- /dev/null +++ b/tests/broadcast-2.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + int16 a = aFOO[programIndex]; + int16 b = broadcast(a, 2); + RET[programIndex] = b; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 3; +} diff --git a/tests/broadcast-3.ispc b/tests/broadcast-3.ispc new file mode 100644 index 00000000..e1b1308b --- /dev/null +++ b/tests/broadcast-3.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int8 a = aFOO[programIndex]; + int8 br = broadcast(a, (uniform int)b-2); + RET[programIndex] = br; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 4; +} diff --git a/tests/gather-int16-1.ispc b/tests/gather-int16-1.ispc new file mode 100644 index 00000000..e6bedd7f --- /dev/null +++ b/tests/gather-int16-1.ispc @@ -0,0 +1,19 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 x[programCount]; + x[programIndex] = programIndex; + int a = aFOO[programIndex]-1; + unsigned int16 v; + if (programIndex < 2) + v = x[a]; + else + v = 2; + RET[programIndex] = v; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2; + RET[0] = 0; + RET[1] = 1; +} diff --git a/tests/gather-int16.ispc b/tests/gather-int16.ispc new file mode 100644 index 00000000..cae1366d --- /dev/null +++ b/tests/gather-int16.ispc @@ -0,0 +1,13 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 x[programCount]; + x[programIndex] = programIndex; + int a = aFOO[programIndex]-1; + unsigned int16 v = x[a]; + RET[programIndex] = v; +} + +export void result(uniform float RET[]) { + RET[programIndex] = programIndex; +} diff --git a/tests/gather-int8-1.ispc b/tests/gather-int8-1.ispc new file mode 100644 index 00000000..305b12ca --- /dev/null +++ b/tests/gather-int8-1.ispc @@ -0,0 +1,19 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 x[programCount]; + x[programIndex] = programIndex; + int a = aFOO[programIndex]-1; + unsigned int8 v; + if (programIndex < 2) + v = x[a]; + else + v = 2; + RET[programIndex] = v; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2; + RET[0] = 0; + RET[1] = 1; +} diff --git a/tests/gather-int8.ispc b/tests/gather-int8.ispc new file mode 100644 index 00000000..7b2ed9a1 --- /dev/null +++ b/tests/gather-int8.ispc @@ -0,0 +1,13 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 x[programCount]; + x[programIndex] = programIndex; + int a = aFOO[programIndex]-1; + unsigned int8 v = x[a]; + RET[programIndex] = v; +} + +export void result(uniform float RET[]) { + RET[programIndex] = programIndex; +} diff --git a/tests/int16-wrap.ispc b/tests/int16-wrap.ispc new file mode 100644 index 00000000..bd380567 --- /dev/null +++ b/tests/int16-wrap.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bb) { + unsigned int16 a = aFOO[programIndex], b = bb; + RET[programIndex] = ((unsigned int16)4000*a)+b; +} + +export void result(uniform float RET[]) { + RET[programIndex] = (((4000*(programIndex+1))&0xffff)+5)&0xffff; +} diff --git a/tests/int8-wrap.ispc b/tests/int8-wrap.ispc new file mode 100644 index 00000000..22e88aeb --- /dev/null +++ b/tests/int8-wrap.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bb) { + unsigned int8 a = aFOO[programIndex], b = bb; + RET[programIndex] = ((unsigned int8)100*a)+b; +} + +export void result(uniform float RET[]) { + RET[programIndex] = (((100*(programIndex+1))&0xff)+5)&0xff; +} diff --git a/tests/load-int16-1.ispc b/tests/load-int16-1.ispc index 5178cd28..177321ce 100644 --- a/tests/load-int16-1.ispc +++ b/tests/load-int16-1.ispc @@ -1,13 +1,17 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - uniform int x[9] = { 0x00020001, 0x00040003, 0x00060005, 0x00080007, - 0x000a0009, 0x000c000b, 0x000e000d, 0x0010000f, - 0x00120011 }; - unsigned int v = load_from_int16(x, 1); + uniform int16 x[programCount]; + x[programIndex] = aFOO[programIndex]; + unsigned int16 v = 0; + if (programIndex & 1) + v = x[programIndex]; RET[programIndex] = v; } export void result(uniform float RET[]) { - RET[programIndex] = 2+programIndex; + if (programIndex & 1) + RET[programIndex] = 1+programIndex; + else + RET[programIndex] = 0; } diff --git a/tests/load-int16.ispc b/tests/load-int16.ispc index 66160c28..420267a9 100644 --- a/tests/load-int16.ispc +++ b/tests/load-int16.ispc @@ -1,9 +1,9 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - uniform int x[8] = { 0x00020001, 0x00040003, 0x00060005, 0x00080007, - 0x000a0009, 0x000c000b, 0x000e000d, 0x0010000f }; - unsigned int v = load_from_int16(x, 0); + uniform int16 x[programCount]; + x[programIndex] = aFOO[programIndex]; + unsigned int16 v = x[programIndex]; RET[programIndex] = v; } diff --git a/tests/load-int8-1.ispc b/tests/load-int8-1.ispc index c8c9a61c..5c78e6ac 100644 --- a/tests/load-int8-1.ispc +++ b/tests/load-int8-1.ispc @@ -1,12 +1,17 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - uniform int x[5] = { 0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d, - 0x14131211 }; - unsigned int v = load_from_int8(x, 2); + uniform int8 x[programCount]; + x[programIndex] = aFOO[programIndex]; + unsigned int8 v = 0; + if (programIndex & 1) + v = x[programIndex]; RET[programIndex] = v; } export void result(uniform float RET[]) { - RET[programIndex] = 3+programIndex; + if (programIndex & 1) + RET[programIndex] = 1+programIndex; + else + RET[programIndex] = 0; } diff --git a/tests/load-int8.ispc b/tests/load-int8.ispc index 081f0f9b..3078cab9 100644 --- a/tests/load-int8.ispc +++ b/tests/load-int8.ispc @@ -1,8 +1,9 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - uniform int x[4] = { 0x04030201, 0x08070605, 0x0c0b0a09, 0x100f0e0d }; - unsigned int v = load_from_int8(x, 0); + uniform int8 x[programCount]; + x[programIndex] = aFOO[programIndex]; + unsigned int8 v = x[programIndex]; RET[programIndex] = v; } diff --git a/tests/nested-structs-2.ispc b/tests/nested-structs-2.ispc index b2f4de31..cb58e588 100644 --- a/tests/nested-structs-2.ispc +++ b/tests/nested-structs-2.ispc @@ -16,7 +16,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform Bar bar; for (uniform int i = 0; i < 6; ++i) for (uniform int j = 0; j < 18; ++j) - bar.foo[i].f[j] = 2.; + bar.foo[i].f[j] = 2.+b-5; bar.foo[5].f[a] = a; RET[programIndex] = bar.foo[b].f[a]; diff --git a/tests/nested-structs.ispc b/tests/nested-structs.ispc index 3476a6b8..c1b717de 100644 --- a/tests/nested-structs.ispc +++ b/tests/nested-structs.ispc @@ -1,8 +1,6 @@ export uniform int width() { return programCount; } - - struct Foo { float f[6]; }; @@ -16,7 +14,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { uniform Bar bar; for (uniform int i = 0; i < 6; ++i) for (uniform int j = 0; j < 6; ++j) - bar.foo[i].f[j] = 2.; + bar.foo[i].f[j] = 2.+b-5; RET[programIndex] = bar.foo[b].f[b]; } diff --git a/tests/op-plus-equals-ensure-one-lhs-eval.ispc b/tests/op-plus-equals-ensure-one-lhs-eval.ispc index 0499c1a3..cfbcc83c 100644 --- a/tests/op-plus-equals-ensure-one-lhs-eval.ispc +++ b/tests/op-plus-equals-ensure-one-lhs-eval.ispc @@ -4,7 +4,7 @@ export uniform int width() { return programCount; } export void f_fu(uniform float ret[], uniform float aa[], uniform float b) { uniform float foo[16]; for (uniform int i = 0; i < 16; ++i) - foo[i] = 1; + foo[i] = i; uniform int i = 0; foo[i++] += 1; diff --git a/tests/pass-varying-lvalue-to-ref.ispc b/tests/pass-varying-lvalue-to-ref.ispc index c0886a31..beed8468 100644 --- a/tests/pass-varying-lvalue-to-ref.ispc +++ b/tests/pass-varying-lvalue-to-ref.ispc @@ -6,10 +6,10 @@ void inc(reference float v) { ++v; } export void f_fu(uniform float ret[], uniform float aa[], uniform float b) { uniform float foo[32]; for (uniform int i = 0; i < 32; ++i) - foo[i] = 10; + foo[i] = 10+i; int a = (int)aa[programIndex]; inc(foo[a]); - ret[programIndex] = foo[programIndex]; + ret[programIndex] = foo[programIndex]-programIndex; } export void result(uniform float ret[]) { diff --git a/tests/rotate-5.ispc b/tests/rotate-5.ispc new file mode 100644 index 00000000..f5097fc5 --- /dev/null +++ b/tests/rotate-5.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int8 a = aFOO[programIndex]; + int8 rot = rotate(a, 2); + RET[programIndex] = rot; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1 + (programIndex + 2) % programCount; +} diff --git a/tests/rotate-6.ispc b/tests/rotate-6.ispc new file mode 100644 index 00000000..122cd2a6 --- /dev/null +++ b/tests/rotate-6.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int16 a = aFOO[programIndex]; + int16 rot = rotate(a, -1); + RET[programIndex] = rot; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1 + (programIndex + programCount - 1) % programCount; +} diff --git a/tests/scatter-int16-1.ispc b/tests/scatter-int16-1.ispc new file mode 100644 index 00000000..83b313c7 --- /dev/null +++ b/tests/scatter-int16-1.ispc @@ -0,0 +1,17 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 x[programCount]; + x[programIndex] = -1; + int a = aFOO[programIndex]-1; + if (programIndex < 3) + x[a] = programIndex; + RET[programIndex] = x[programIndex]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = -1; + RET[0] = 0; + RET[1] = 1; + RET[2] = 2; +} diff --git a/tests/scatter-int16.ispc b/tests/scatter-int16.ispc new file mode 100644 index 00000000..3da39a0d --- /dev/null +++ b/tests/scatter-int16.ispc @@ -0,0 +1,13 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 x[programCount]; + x[programIndex] = 0; + int a = aFOO[programIndex]-1; + x[a] = programIndex; + RET[programIndex] = x[programIndex]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = programIndex; +} diff --git a/tests/scatter-int8-1.ispc b/tests/scatter-int8-1.ispc new file mode 100644 index 00000000..3cce9287 --- /dev/null +++ b/tests/scatter-int8-1.ispc @@ -0,0 +1,17 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 x[programCount]; + x[programIndex] = -1; + int a = aFOO[programIndex]-1; + if (programIndex < 3) + x[a] = programIndex; + RET[programIndex] = x[programIndex]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = -1; + RET[0] = 0; + RET[1] = 1; + RET[2] = 2; +} diff --git a/tests/scatter-int8.ispc b/tests/scatter-int8.ispc new file mode 100644 index 00000000..65cd2d38 --- /dev/null +++ b/tests/scatter-int8.ispc @@ -0,0 +1,13 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 x[programCount]; + x[programIndex] = 0; + int a = aFOO[programIndex]-1; + x[a] = programIndex; + RET[programIndex] = x[programIndex]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = programIndex; +} diff --git a/tests/shuffle-3.ispc b/tests/shuffle-3.ispc new file mode 100644 index 00000000..afe0b066 --- /dev/null +++ b/tests/shuffle-3.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int8 a = aFOO[programIndex]; + int8 shuf = shuffle(a, 1); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2; +} diff --git a/tests/shuffle-4.ispc b/tests/shuffle-4.ispc new file mode 100644 index 00000000..e20352ef --- /dev/null +++ b/tests/shuffle-4.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int16 a = aFOO[programIndex]; + int reverse = programCount - 1 - programIndex; + int16 shuf = shuffle(a, reverse); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = programCount - programIndex; +} diff --git a/tests/shuffle-5.ispc b/tests/shuffle-5.ispc new file mode 100644 index 00000000..9272cb6d --- /dev/null +++ b/tests/shuffle-5.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int8 a = aFOO[programIndex]; + int reverse = programCount - 1 - programIndex + (int)b - 5; + int8 shuf = shuffle(a, reverse); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = programCount - programIndex; +} diff --git a/tests/shuffle2-11.ispc b/tests/shuffle2-11.ispc new file mode 100644 index 00000000..2f7f286b --- /dev/null +++ b/tests/shuffle2-11.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int16 aa = aFOO[programIndex]; + int16 bb = aa + programCount; + int16 shuf = shuffle(aa, bb, 2*programIndex); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1 + 2*programIndex; +} diff --git a/tests/shuffle2-6.ispc b/tests/shuffle2-6.ispc new file mode 100644 index 00000000..a5e57264 --- /dev/null +++ b/tests/shuffle2-6.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int8 aa = aFOO[programIndex]; + int8 bb = aa + programCount; + int8 shuf = shuffle(aa, bb, 1); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2; +} diff --git a/tests/shuffle2-7.ispc b/tests/shuffle2-7.ispc new file mode 100644 index 00000000..832c84cf --- /dev/null +++ b/tests/shuffle2-7.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int16 aa = aFOO[programIndex]; + int16 bb = aa + programCount; + int16 shuf = shuffle(aa, bb, programCount + 1); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2 + programCount; +} diff --git a/tests/shuffle2-8.ispc b/tests/shuffle2-8.ispc new file mode 100644 index 00000000..190c0d9d --- /dev/null +++ b/tests/shuffle2-8.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int8 aa = aFOO[programIndex]; + int8 bb = aa + programCount; + int8 shuf = shuffle(aa, bb, programIndex + 2); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 3 + programIndex; +} diff --git a/tests/shuffle2-9.ispc b/tests/shuffle2-9.ispc new file mode 100644 index 00000000..85bb9123 --- /dev/null +++ b/tests/shuffle2-9.ispc @@ -0,0 +1,13 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + int16 aa = aFOO[programIndex]; + int16 bb = aa + programCount; + int16 shuf = shuffle(aa, bb, programIndex + 2 + (int)b - 5); + RET[programIndex] = shuf; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 3 + programIndex; +} diff --git a/tests/store-int16-1.ispc b/tests/store-int16-1.ispc index aa3eb36a..c333e29c 100644 --- a/tests/store-int16-1.ispc +++ b/tests/store-int16-1.ispc @@ -1,16 +1,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - uniform int x[16]; - for (uniform int i = 0; i < 16; ++i) - x[i] = 0xffffffff; - unsigned int val = aFOO[programIndex]; - store_to_int16(x, 5, val); - unsigned int v = load_from_int16(x, 6); - RET[programIndex] = v; + uniform unsigned int16 x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) + x[i] = 0xffff; + unsigned int16 val = aFOO[programIndex]; + x[2+programIndex] = val; + RET[programIndex] = x[1+programIndex]; } export void result(uniform float RET[]) { - RET[programIndex] = 2+programIndex; - RET[programCount-1] = (unsigned int)0xffffffff; + RET[programIndex] = programIndex; + RET[0] = 65535; } diff --git a/tests/store-int16-2.ispc b/tests/store-int16-2.ispc new file mode 100644 index 00000000..4f40d42e --- /dev/null +++ b/tests/store-int16-2.ispc @@ -0,0 +1,19 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int16 x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) + x[i] = 0xffff; + unsigned int16 val = aFOO[programIndex]; + if (programIndex & 1) + x[2+programIndex] = val; + RET[programIndex] = x[1+programIndex]; +} + +export void result(uniform float RET[]) { + if (programIndex & 1) + RET[programIndex] = 65535; + else + RET[programIndex] = programIndex; + RET[0] = 65535; +} diff --git a/tests/store-int16.ispc b/tests/store-int16.ispc index 77c8d902..2fcd690e 100644 --- a/tests/store-int16.ispc +++ b/tests/store-int16.ispc @@ -1,16 +1,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - uniform int x[16]; - for (uniform int i = 0; i < 16; ++i) - x[i] = 0xffffffff; - unsigned int val = aFOO[programIndex]; - store_to_int16(x, 5, val); - int v = load_from_int16(x, 6); - RET[programIndex] = v; + uniform int16 x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) + x[i] = 0xffff; + unsigned int8 val = aFOO[programIndex]; + x[2+programIndex] = val; + RET[programIndex] = x[1+programIndex]; } export void result(uniform float RET[]) { - RET[programIndex] = 2+programIndex; - RET[programCount-1] = -1; + RET[programIndex] = programIndex; + RET[0] = -1.; } diff --git a/tests/store-int8-1.ispc b/tests/store-int8-1.ispc index b07c64d0..f4bc7e3a 100644 --- a/tests/store-int8-1.ispc +++ b/tests/store-int8-1.ispc @@ -1,16 +1,15 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - uniform unsigned int x[8]; - for (uniform int i = 0; i < 8; ++i) - x[i] = 0xffffffff; - unsigned int val = aFOO[programIndex]; - store_to_uint8(x, 2, val); - unsigned int v = load_from_uint8(x, 1); - RET[programIndex] = v; + uniform unsigned int8 x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) + x[i] = 0xff; + unsigned int8 val = aFOO[programIndex]; + x[2+programIndex] = val; + RET[programIndex] = x[1+programIndex]; } export void result(uniform float RET[]) { RET[programIndex] = programIndex; - RET[0] = (unsigned int)0xff; + RET[0] = 255; } diff --git a/tests/store-int8-2.ispc b/tests/store-int8-2.ispc new file mode 100644 index 00000000..a0e59094 --- /dev/null +++ b/tests/store-int8-2.ispc @@ -0,0 +1,19 @@ +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int8 x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) + x[i] = 0xff; + unsigned int8 val = aFOO[programIndex]; + if (programIndex & 1) + x[2+programIndex] = val; + RET[programIndex] = x[1+programIndex]; +} + +export void result(uniform float RET[]) { + if (programIndex & 1) + RET[programIndex] = 255; + else + RET[programIndex] = programIndex; + RET[0] = 255; +} diff --git a/tests/store-int8.ispc b/tests/store-int8.ispc index db3d9c8f..133761cd 100644 --- a/tests/store-int8.ispc +++ b/tests/store-int8.ispc @@ -1,13 +1,12 @@ export uniform int width() { return programCount; } export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { - uniform int x[8]; - for (uniform int i = 0; i < 8; ++i) - x[i] = 0xffffffff; - unsigned int val = aFOO[programIndex]; - store_to_int8(x, 2, val); - int v = load_from_int8(x, 1); - RET[programIndex] = v; + uniform int8 x[2*programCount]; + for (uniform int i = 0; i < 2*programCount; ++i) + x[i] = 0xff; + unsigned int8 val = aFOO[programIndex]; + x[2+programIndex] = val; + RET[programIndex] = x[1+programIndex]; } export void result(uniform float RET[]) { diff --git a/tests/write-same-loc.ispc b/tests/write-same-loc.ispc index 171fea6f..56f6e4f3 100644 --- a/tests/write-same-loc.ispc +++ b/tests/write-same-loc.ispc @@ -4,12 +4,12 @@ export uniform int width() { return programCount; } export void f_fu(uniform float ret[], uniform float aa[], uniform float b) { uniform int foo[10]; for (uniform int i = 0; i < 10; ++i) - foo[i] = 10; + foo[i] = 10+i; int bb = b; foo[bb] = 0; ret[programIndex] = foo[4] + foo[5]; } export void result(uniform float ret[]) { - ret[programIndex] = 10; + ret[programIndex] = 14; } diff --git a/type.cpp b/type.cpp index c58be48d..5b419c88 100644 --- a/type.cpp +++ b/type.cpp @@ -74,6 +74,14 @@ lShouldPrintName(const std::string &name) { const AtomicType *AtomicType::UniformBool = new AtomicType(TYPE_BOOL, true, false); const AtomicType *AtomicType::VaryingBool = new AtomicType(TYPE_BOOL, false, false); +const AtomicType *AtomicType::UniformInt8 = new AtomicType(TYPE_INT8, true, false); +const AtomicType *AtomicType::VaryingInt8 = new AtomicType(TYPE_INT8, false, false); +const AtomicType *AtomicType::UniformUInt8 = new AtomicType(TYPE_UINT8, true, false); +const AtomicType *AtomicType::VaryingUInt8 = new AtomicType(TYPE_UINT8, false, false); +const AtomicType *AtomicType::UniformInt16 = new AtomicType(TYPE_INT16, true, false); +const AtomicType *AtomicType::VaryingInt16 = new AtomicType(TYPE_INT16, false, false); +const AtomicType *AtomicType::UniformUInt16 = new AtomicType(TYPE_UINT16, true, false); +const AtomicType *AtomicType::VaryingUInt16 = new AtomicType(TYPE_UINT16, false, false); const AtomicType *AtomicType::UniformInt32 = new AtomicType(TYPE_INT32, true, false); const AtomicType *AtomicType::VaryingInt32 = new AtomicType(TYPE_INT32, false, false); const AtomicType *AtomicType::UniformUInt32 = new AtomicType(TYPE_UINT32, true, false); @@ -89,6 +97,14 @@ const AtomicType *AtomicType::VaryingDouble = new AtomicType(TYPE_DOUBLE, false, const AtomicType *AtomicType::UniformConstBool = new AtomicType(TYPE_BOOL, true, true); const AtomicType *AtomicType::VaryingConstBool = new AtomicType(TYPE_BOOL, false, true); +const AtomicType *AtomicType::UniformConstInt8 = new AtomicType(TYPE_INT8, true, true); +const AtomicType *AtomicType::VaryingConstInt8 = new AtomicType(TYPE_INT8, false, true); +const AtomicType *AtomicType::UniformConstUInt8 = new AtomicType(TYPE_UINT8, true, true); +const AtomicType *AtomicType::VaryingConstUInt8 = new AtomicType(TYPE_UINT8, false, true); +const AtomicType *AtomicType::UniformConstInt16 = new AtomicType(TYPE_INT16, true, true); +const AtomicType *AtomicType::VaryingConstInt16 = new AtomicType(TYPE_INT16, false, true); +const AtomicType *AtomicType::UniformConstUInt16 = new AtomicType(TYPE_UINT16, true, true); +const AtomicType *AtomicType::VaryingConstUInt16 = new AtomicType(TYPE_UINT16, false, true); const AtomicType *AtomicType::UniformConstInt32 = new AtomicType(TYPE_INT32, true, true); const AtomicType *AtomicType::VaryingConstInt32 = new AtomicType(TYPE_INT32, false, true); const AtomicType *AtomicType::UniformConstUInt32 = new AtomicType(TYPE_UINT32, true, true); @@ -101,6 +117,7 @@ const AtomicType *AtomicType::UniformConstUInt64 = new AtomicType(TYPE_UINT64, t const AtomicType *AtomicType::VaryingConstUInt64 = new AtomicType(TYPE_UINT64, false, true); const AtomicType *AtomicType::UniformConstDouble = new AtomicType(TYPE_DOUBLE, true, true); const AtomicType *AtomicType::VaryingConstDouble = new AtomicType(TYPE_DOUBLE, false, true); + const AtomicType *AtomicType::Void = new AtomicType(TYPE_VOID, true, false); @@ -123,14 +140,17 @@ AtomicType::IsFloatType() const { bool AtomicType::IsIntType() const { - return (basicType == TYPE_INT32 || basicType == TYPE_UINT32 || + return (basicType == TYPE_INT8 || basicType == TYPE_UINT8 || + basicType == TYPE_INT16 || basicType == TYPE_UINT16 || + basicType == TYPE_INT32 || basicType == TYPE_UINT32 || basicType == TYPE_INT64 || basicType == TYPE_UINT64); } bool AtomicType::IsUnsignedType() const { - return (basicType == TYPE_UINT32 || basicType == TYPE_UINT64); + return (basicType == TYPE_UINT8 || basicType == TYPE_UINT16 || + basicType == TYPE_UINT32 || basicType == TYPE_UINT64); } @@ -151,10 +171,18 @@ AtomicType::GetAsUnsignedType() const { if (IsUnsignedType()) return this; - if (this == AtomicType::UniformInt32) return AtomicType::UniformUInt32; + if (this == AtomicType::UniformInt8) return AtomicType::UniformUInt8; + else if (this == AtomicType::VaryingInt8) return AtomicType::VaryingUInt8; + else if (this == AtomicType::UniformInt16) return AtomicType::UniformUInt16; + else if (this == AtomicType::VaryingInt16) return AtomicType::VaryingUInt16; + else if (this == AtomicType::UniformInt32) return AtomicType::UniformUInt32; else if (this == AtomicType::VaryingInt32) return AtomicType::VaryingUInt32; else if (this == AtomicType::UniformInt64) return AtomicType::UniformUInt64; else if (this == AtomicType::VaryingInt64) return AtomicType::VaryingUInt64; + else if (this == AtomicType::UniformConstInt8) return AtomicType::UniformConstUInt8; + else if (this == AtomicType::VaryingConstInt8) return AtomicType::VaryingConstUInt8; + else if (this == AtomicType::UniformConstInt16) return AtomicType::UniformConstUInt16; + else if (this == AtomicType::VaryingConstInt16) return AtomicType::VaryingConstUInt16; else if (this == AtomicType::UniformConstInt32) return AtomicType::UniformConstUInt32; else if (this == AtomicType::VaryingConstInt32) return AtomicType::VaryingConstUInt32; else if (this == AtomicType::UniformConstInt64) return AtomicType::UniformConstUInt64; @@ -170,6 +198,10 @@ AtomicType::GetAsConstType() const { switch (basicType) { case TYPE_BOOL: return isUniform ? UniformConstBool : VaryingConstBool; + case TYPE_INT8: return isUniform ? UniformConstInt8 : VaryingConstInt8; + case TYPE_UINT8: return isUniform ? UniformConstUInt8 : VaryingConstUInt8; + case TYPE_INT16: return isUniform ? UniformConstInt16 : VaryingConstInt16; + case TYPE_UINT16: return isUniform ? UniformConstUInt16 : VaryingConstUInt16; case TYPE_INT32: return isUniform ? UniformConstInt32 : VaryingConstInt32; case TYPE_UINT32: return isUniform ? UniformConstUInt32 : VaryingConstUInt32; case TYPE_FLOAT: return isUniform ? UniformConstFloat : VaryingConstFloat; @@ -190,6 +222,10 @@ AtomicType::GetAsNonConstType() const { switch (basicType) { case TYPE_BOOL: return isUniform ? UniformBool : VaryingBool; + case TYPE_INT8: return isUniform ? UniformInt8 : VaryingInt8; + case TYPE_UINT8: return isUniform ? UniformUInt8 : VaryingUInt8; + case TYPE_INT16: return isUniform ? UniformInt16 : VaryingInt16; + case TYPE_UINT16: return isUniform ? UniformUInt16 : VaryingUInt16; case TYPE_INT32: return isUniform ? UniformInt32 : VaryingInt32; case TYPE_UINT32: return isUniform ? UniformUInt32 : VaryingUInt32; case TYPE_FLOAT: return isUniform ? UniformFloat : VaryingFloat; @@ -216,13 +252,17 @@ AtomicType::GetAsVaryingType() const { switch (basicType) { case TYPE_VOID: return this; - case TYPE_BOOL: return isConst ? AtomicType::VaryingConstBool : AtomicType::VaryingBool; - case TYPE_INT32: return isConst ? AtomicType::VaryingConstInt32 : AtomicType::VaryingInt32; - case TYPE_UINT32: return isConst ? AtomicType::VaryingConstUInt32 : AtomicType::VaryingUInt32; - case TYPE_FLOAT: return isConst ? AtomicType::VaryingConstFloat : AtomicType::VaryingFloat; - case TYPE_INT64: return isConst ? AtomicType::VaryingConstInt64 : AtomicType::VaryingInt64; - case TYPE_UINT64: return isConst ? AtomicType::VaryingConstUInt64 : AtomicType::VaryingUInt64; - case TYPE_DOUBLE: return isConst ? AtomicType::VaryingConstDouble : AtomicType::VaryingDouble; + case TYPE_BOOL: return isConst ? VaryingConstBool : VaryingBool; + case TYPE_INT8: return isConst ? VaryingConstInt8 : VaryingInt8; + case TYPE_UINT8: return isConst ? VaryingConstUInt8 : VaryingUInt8; + case TYPE_INT16: return isConst ? VaryingConstInt16 : VaryingInt16; + case TYPE_UINT16: return isConst ? VaryingConstUInt16 : VaryingUInt16; + case TYPE_INT32: return isConst ? VaryingConstInt32 : VaryingInt32; + case TYPE_UINT32: return isConst ? VaryingConstUInt32 : VaryingUInt32; + case TYPE_FLOAT: return isConst ? VaryingConstFloat : VaryingFloat; + case TYPE_INT64: return isConst ? VaryingConstInt64 : VaryingInt64; + case TYPE_UINT64: return isConst ? VaryingConstUInt64 : VaryingUInt64; + case TYPE_DOUBLE: return isConst ? VaryingConstDouble : VaryingDouble; default: FATAL("Logic error in AtomicType::GetAsVaryingType()"); } return NULL; @@ -236,13 +276,17 @@ AtomicType::GetAsUniformType() const { switch (basicType) { case TYPE_VOID: return this; - case TYPE_BOOL: return isConst ? AtomicType::UniformConstBool : AtomicType::UniformBool; - case TYPE_INT32: return isConst ? AtomicType::UniformConstInt32 : AtomicType::UniformInt32; - case TYPE_UINT32: return isConst ? AtomicType::UniformConstUInt32 : AtomicType::UniformUInt32; - case TYPE_FLOAT: return isConst ? AtomicType::UniformConstFloat : AtomicType::UniformFloat; - case TYPE_INT64: return isConst ? AtomicType::UniformConstInt64 : AtomicType::UniformInt64; - case TYPE_UINT64: return isConst ? AtomicType::UniformConstUInt64 : AtomicType::UniformUInt64; - case TYPE_DOUBLE: return isConst ? AtomicType::UniformConstDouble : AtomicType::UniformDouble; + case TYPE_BOOL: return isConst ? UniformConstBool : UniformBool; + case TYPE_INT8: return isConst ? UniformConstInt8 : UniformInt8; + case TYPE_UINT8: return isConst ? UniformConstUInt8 : UniformUInt8; + case TYPE_INT16: return isConst ? UniformConstInt16 : UniformInt16; + case TYPE_UINT16: return isConst ? UniformConstUInt16 : UniformUInt16; + case TYPE_INT32: return isConst ? UniformConstInt32 : UniformInt32; + case TYPE_UINT32: return isConst ? UniformConstUInt32 : UniformUInt32; + case TYPE_FLOAT: return isConst ? UniformConstFloat : UniformFloat; + case TYPE_INT64: return isConst ? UniformConstInt64 : UniformInt64; + case TYPE_UINT64: return isConst ? UniformConstUInt64 : UniformUInt64; + case TYPE_DOUBLE: return isConst ? UniformConstDouble : UniformDouble; default: FATAL("Logic error in AtomicType::GetAsUniformType()"); } return NULL; @@ -267,6 +311,10 @@ AtomicType::GetString() const { switch (basicType) { case TYPE_VOID: ret += "void"; break; case TYPE_BOOL: ret += "bool"; break; + case TYPE_INT8: ret += "int8"; break; + case TYPE_UINT8: ret += "unsigned int8"; break; + case TYPE_INT16: ret += "int16"; break; + case TYPE_UINT16: ret += "unsigned int16"; break; case TYPE_INT32: ret += "int32"; break; case TYPE_UINT32: ret += "unsigned int32"; break; case TYPE_FLOAT: ret += "float"; break; @@ -288,6 +336,10 @@ AtomicType::Mangle() const { switch (basicType) { case TYPE_VOID: ret += "v"; break; case TYPE_BOOL: ret += "b"; break; + case TYPE_INT8: ret += "t"; break; + case TYPE_UINT8: ret += "T"; break; + case TYPE_INT16: ret += "s"; break; + case TYPE_UINT16: ret += "S"; break; case TYPE_INT32: ret += "i"; break; case TYPE_UINT32: ret += "u"; break; case TYPE_FLOAT: ret += "f"; break; @@ -309,12 +361,16 @@ AtomicType::GetCDeclaration(const std::string &name) const { switch (basicType) { case TYPE_VOID: ret += "void"; break; case TYPE_BOOL: ret += "bool"; break; + case TYPE_INT8: ret += "int8_t"; break; + case TYPE_UINT8: ret += "uint8_t"; break; + case TYPE_INT16: ret += "int16_t"; break; + case TYPE_UINT16: ret += "uint16_t"; break; case TYPE_INT32: ret += "int32_t"; break; case TYPE_UINT32: ret += "uint32_t"; break; case TYPE_FLOAT: ret += "float"; break; - case TYPE_DOUBLE: ret += "double"; break; case TYPE_INT64: ret += "int64_t"; break; case TYPE_UINT64: ret += "uint64_t"; break; + case TYPE_DOUBLE: ret += "double"; break; default: FATAL("Logic error in AtomicType::GetCDeclaration()"); } @@ -333,6 +389,12 @@ AtomicType::LLVMType(llvm::LLVMContext *ctx) const { return llvm::Type::getVoidTy(*ctx); case TYPE_BOOL: return isUniform ? LLVMTypes::BoolType : LLVMTypes::BoolVectorType; + case TYPE_INT8: + case TYPE_UINT8: + return isUniform ? LLVMTypes::Int8Type : LLVMTypes::Int8VectorType; + case TYPE_INT16: + case TYPE_UINT16: + return isUniform ? LLVMTypes::Int16Type : LLVMTypes::Int16VectorType; case TYPE_INT32: case TYPE_UINT32: return isUniform ? LLVMTypes::Int32Type : LLVMTypes::Int32VectorType; @@ -364,6 +426,22 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const { return m->diBuilder->createBasicType("bool", 32 /* size */, 32 /* align */, llvm::dwarf::DW_ATE_unsigned); break; + case TYPE_INT8: + return m->diBuilder->createBasicType("int8", 8 /* size */, 8 /* align */, + llvm::dwarf::DW_ATE_signed); + break; + case TYPE_UINT8: + return m->diBuilder->createBasicType("uint8", 8 /* size */, 8 /* align */, + llvm::dwarf::DW_ATE_unsigned); + break; + case TYPE_INT16: + return m->diBuilder->createBasicType("int16", 16 /* size */, 16 /* align */, + llvm::dwarf::DW_ATE_signed); + break; + case TYPE_UINT16: + return m->diBuilder->createBasicType("uint16", 16 /* size */, 16 /* align */, + llvm::dwarf::DW_ATE_unsigned); + break; case TYPE_INT32: return m->diBuilder->createBasicType("int32", 32 /* size */, 32 /* align */, llvm::dwarf::DW_ATE_signed); diff --git a/type.h b/type.h index 666f8731..a985dc2e 100644 --- a/type.h +++ b/type.h @@ -210,6 +210,10 @@ public: enum BasicType { TYPE_VOID, TYPE_BOOL, + TYPE_INT8, + TYPE_UINT8, + TYPE_INT16, + TYPE_UINT16, TYPE_INT32, TYPE_UINT32, TYPE_FLOAT, @@ -221,14 +225,22 @@ public: const BasicType basicType; static const AtomicType *UniformBool, *VaryingBool; + static const AtomicType *UniformInt8, *VaryingInt8; + static const AtomicType *UniformInt16, *VaryingInt16; static const AtomicType *UniformInt32, *VaryingInt32; + static const AtomicType *UniformUInt8, *VaryingUInt8; + static const AtomicType *UniformUInt16, *VaryingUInt16; static const AtomicType *UniformUInt32, *VaryingUInt32; static const AtomicType *UniformFloat, *VaryingFloat; static const AtomicType *UniformInt64, *VaryingInt64; static const AtomicType *UniformUInt64, *VaryingUInt64; static const AtomicType *UniformDouble, *VaryingDouble; static const AtomicType *UniformConstBool, *VaryingConstBool; + static const AtomicType *UniformConstInt8, *VaryingConstInt8; + static const AtomicType *UniformConstInt16, *VaryingConstInt16; static const AtomicType *UniformConstInt32, *VaryingConstInt32; + static const AtomicType *UniformConstUInt8, *VaryingConstUInt8; + static const AtomicType *UniformConstUInt16, *VaryingConstUInt16; static const AtomicType *UniformConstUInt32, *VaryingConstUInt32; static const AtomicType *UniformConstFloat, *VaryingConstFloat; static const AtomicType *UniformConstInt64, *VaryingConstInt64;