Provide both signed and unsigned int variants of bitcode-based builtins.
When creating function Symbols for functions that were defined in LLVM bitcode for the standard library, if any of the function parameters are integer types, create two ispc-side Symbols: one where the integer types are all signed and the other where they are all unsigned. This allows us to provide, for example, both store_to_int16(reference int a[], uniform int offset, int val) as well as store_to_int16(reference unsigned int a[], uniform int offset, unsigned int val). functions.
Added some additional tests to exercise the new variants of these.
Also fixed some cases where the __{load,store}_int{8,16} builtins would read from/write to memory even if the mask was all off (which could cause crashes in some cases.)
This commit is contained in:
9
Makefile
9
Makefile
@@ -6,12 +6,9 @@ ARCH_OS = $(shell uname)
|
||||
ARCH_TYPE = $(shell arch)
|
||||
|
||||
CLANG=clang
|
||||
CLANG_LIBS = -lclangFrontendTool -lclangFrontend -lclangDriver \
|
||||
-lclangSerialization -lclangCodeGen -lclangParse -lclangSema \
|
||||
-lclangStaticAnalyzerFrontend -lclangStaticAnalyzerCheckers \
|
||||
-lclangStaticAnalyzerCore \
|
||||
-lclangAnalysis -lclangIndex -lclangRewrite \
|
||||
-lclangAST -lclangLex -lclangBasic
|
||||
CLANG_LIBS = -lclangFrontend -lclangDriver \
|
||||
-lclangSerialization -lclangParse -lclangSema \
|
||||
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
|
||||
|
||||
LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
|
||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||
|
||||
86
builtins.cpp
86
builtins.cpp
@@ -64,41 +64,46 @@ extern yy_buffer_state *yy_scan_string(const char *);
|
||||
/** Given an LLVM type, try to find the equivalent ispc type. Note that
|
||||
this is an under-constrained problem due to LLVM's type representations
|
||||
carrying less information than ispc's. (For example, LLVM doesn't
|
||||
distinguish between signed and unsigned integers in its types.)
|
||||
distinguish between signed and unsigned integers in its types.)
|
||||
|
||||
Because this function is only used for generating ispc declarations of
|
||||
functions defined in LLVM bitcode in the stdlib-*.ll files, in practice
|
||||
we can get enough of what we need for the relevant cases to make things
|
||||
work, partially with the help of the intAsUnsigned parameter, which
|
||||
indicates whether LLVM integer types should be treated as being signed
|
||||
or unsigned.
|
||||
|
||||
However, because this function is only used for generating ispc
|
||||
declarations of functions defined in LLVM bitcode in the stdlib-*.ll
|
||||
files, in practice we can get enough of what we need for the relevant
|
||||
cases to make things work.
|
||||
*/
|
||||
static const Type *
|
||||
lLLVMTypeToISPCType(const llvm::Type *t) {
|
||||
lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
if (t == LLVMTypes::VoidType)
|
||||
return AtomicType::Void;
|
||||
else if (t == LLVMTypes::BoolType)
|
||||
return AtomicType::UniformBool;
|
||||
else if (t == LLVMTypes::Int32Type)
|
||||
return AtomicType::UniformInt32;
|
||||
return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
|
||||
else if (t == LLVMTypes::FloatType)
|
||||
return AtomicType::UniformFloat;
|
||||
else if (t == LLVMTypes::DoubleType)
|
||||
return AtomicType::UniformDouble;
|
||||
else if (t == LLVMTypes::Int64Type)
|
||||
return AtomicType::UniformInt64;
|
||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||
else if (t == LLVMTypes::Int32VectorType)
|
||||
return AtomicType::VaryingInt32;
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
|
||||
else if (t == LLVMTypes::FloatVectorType)
|
||||
return AtomicType::VaryingFloat;
|
||||
else if (t == LLVMTypes::DoubleVectorType)
|
||||
return AtomicType::VaryingDouble;
|
||||
else if (t == LLVMTypes::Int64VectorType)
|
||||
return AtomicType::VaryingInt64;
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
|
||||
else if (t == LLVMTypes::Int32PointerType)
|
||||
return new ReferenceType(AtomicType::UniformInt32, false);
|
||||
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 :
|
||||
AtomicType::UniformInt32, false);
|
||||
else if (t == LLVMTypes::FloatPointerType)
|
||||
return new ReferenceType(AtomicType::UniformFloat, false);
|
||||
else if (t == LLVMTypes::Int32VectorPointerType)
|
||||
return new ReferenceType(AtomicType::VaryingInt32, false);
|
||||
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 :
|
||||
AtomicType::VaryingInt32, false);
|
||||
else if (t == LLVMTypes::FloatVectorPointerType)
|
||||
return new ReferenceType(AtomicType::VaryingFloat, false);
|
||||
else if (llvm::isa<const llvm::PointerType>(t)) {
|
||||
@@ -114,9 +119,11 @@ lLLVMTypeToISPCType(const llvm::Type *t) {
|
||||
const llvm::ArrayType *at =
|
||||
llvm::dyn_cast<const llvm::ArrayType>(pt->getElementType());
|
||||
if (at && at->getNumElements() == 0 &&
|
||||
at->getElementType() == LLVMTypes::Int32Type)
|
||||
return new ReferenceType(new ArrayType(AtomicType::UniformInt32, 0),
|
||||
false);
|
||||
at->getElementType() == LLVMTypes::Int32Type) {
|
||||
const Type *eltType = intAsUnsigned ? AtomicType::UniformUInt32 :
|
||||
AtomicType::UniformInt32;
|
||||
return new ReferenceType(new ArrayType(eltType, 0), false);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@@ -135,26 +142,43 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
||||
const llvm::FunctionType *ftype = func->getFunctionType();
|
||||
std::string name = func->getName();
|
||||
|
||||
const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType());
|
||||
if (!returnType)
|
||||
// return type not representable in ispc -> not callable from ispc
|
||||
return false;
|
||||
// If the function has any parameters with integer types, we'll make
|
||||
// two Symbols for two overloaded versions of the function, one with
|
||||
// all of the integer types treated as signed integers and one with all
|
||||
// of them treated as unsigned.
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
bool intAsUnsigned = (i == 1);
|
||||
|
||||
// Iterate over the arguments and try to find their equivalent ispc
|
||||
// types.
|
||||
std::vector<const Type *> argTypes;
|
||||
for (unsigned int i = 0; i < ftype->getNumParams(); ++i) {
|
||||
const llvm::Type *llvmArgType = ftype->getParamType(i);
|
||||
const Type *type = lLLVMTypeToISPCType(llvmArgType);
|
||||
if (type == NULL)
|
||||
const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
|
||||
intAsUnsigned);
|
||||
if (!returnType)
|
||||
// return type not representable in ispc -> not callable from ispc
|
||||
return false;
|
||||
argTypes.push_back(type);
|
||||
|
||||
// Iterate over the arguments and try to find their equivalent ispc
|
||||
// types. Track if any of the arguments has an integer type.
|
||||
bool anyIntArgs = false;
|
||||
std::vector<const Type *> argTypes;
|
||||
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
|
||||
const llvm::Type *llvmArgType = ftype->getParamType(j);
|
||||
const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
|
||||
if (type == NULL)
|
||||
return false;
|
||||
anyIntArgs |=
|
||||
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
|
||||
argTypes.push_back(type);
|
||||
}
|
||||
|
||||
// Always create the symbol the first time through, in particular
|
||||
// so that we get symbols for things with no integer types!
|
||||
if (i == 0 || anyIntArgs == true) {
|
||||
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
|
||||
Symbol *sym = new Symbol(name, noPos, funcType);
|
||||
sym->function = func;
|
||||
symbolTable->AddFunction(sym);
|
||||
}
|
||||
}
|
||||
|
||||
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
|
||||
Symbol *sym = new Symbol(name, noPos, funcType);
|
||||
sym->function = func;
|
||||
symbolTable->AddFunction(sym);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
3
ctx.cpp
3
ctx.cpp
@@ -695,7 +695,8 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
|
||||
// Call the target-dependent movmsk function to turn the vector mask
|
||||
// into an i32 value
|
||||
std::vector<Symbol *> *mm = m->symbolTable->LookupFunction("__movmsk");
|
||||
assert(mm && mm->size() == 1);
|
||||
// There should be one with signed int signature, one unsigned int.
|
||||
assert(mm && mm->size() == 2);
|
||||
llvm::Function *fmm = (*mm)[0]->function;
|
||||
return CallInst(fmm, v, "val_movmsk");
|
||||
}
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
=== v1.0.3 === (not yet released)
|
||||
|
||||
There are now both 'signed' and 'unsigned' variants of the standard library
|
||||
functions like packed_load_active() that that references to arrays of
|
||||
signed int32s and unsigned int32s respectively. (The
|
||||
{load_from,store_to}_{int8,int16}() functions have similarly been augmented
|
||||
to have both 'signed' and 'unsigned' variants.)
|
||||
|
||||
In initializer expressions with variable declarations, it is no longer
|
||||
legal to initialize arrays and structs with single scalar values that then
|
||||
initialize their members; they now must be initialized with initializer
|
||||
|
||||
@@ -1777,24 +1777,31 @@ Packed Load and Store Operations
|
||||
--------------------------------
|
||||
|
||||
The standard library also offers routines for writing out and reading in
|
||||
values from linear memory locations for the active program instances.
|
||||
``packed_load_active()`` loads consecutive values from the given array,
|
||||
starting at ``a[offset]``, loading one value for each currently-executing
|
||||
program instance and storing it into that program instance's ``val``
|
||||
variable. It returns the total number of values loaded. Similarly,
|
||||
``packed_store_active()`` stores the ``val`` values for each program
|
||||
instances that executed the ``packed_store_active()`` call, storing the
|
||||
results into the given array starting at the given offset. It returns the
|
||||
total number of values stored.
|
||||
values from linear memory locations for the active program instances. The
|
||||
``packed_load_active()`` functions load consecutive values from the given
|
||||
array, starting at ``a[offset]``, loading one value for each
|
||||
currently-executing program instance and storing it into that program
|
||||
instance's ``val`` variable. They return the total number of values
|
||||
loaded. Similarly, the ``packed_store_active()`` functions store the
|
||||
``val`` values for each program instances that executed the
|
||||
``packed_store_active()`` call, storing the results into the given array
|
||||
starting at the given offset. They return the total number of values
|
||||
stored.
|
||||
|
||||
::
|
||||
|
||||
uniform unsigned int packed_load_active(uniform int a[],
|
||||
uniform int offset,
|
||||
reference int val)
|
||||
uniform unsigned int packed_store_active(uniform int a[],
|
||||
uniform int offset,
|
||||
int val)
|
||||
uniform int packed_load_active(uniform int a[],
|
||||
uniform int offset,
|
||||
reference int val)
|
||||
uniform int packed_load_active(uniform unsigned int a[],
|
||||
uniform int offset,
|
||||
reference unsigned int val)
|
||||
uniform int packed_store_active(uniform int a[],
|
||||
uniform int offset,
|
||||
int val)
|
||||
uniform int packed_store_active(uniform unsigned int a[],
|
||||
uniform int offset,
|
||||
unsigned int val)
|
||||
|
||||
|
||||
As an example of how these functions can be used, the following code shows
|
||||
@@ -1845,24 +1852,31 @@ and this conversion step are necessary because ``ispc`` doesn't have native
|
||||
|
||||
::
|
||||
|
||||
unsigned int load_from_int8(uniform int a[],
|
||||
int load_from_int8(uniform int a[], uniform int offset)
|
||||
unsigned int load_from_int8(uniform unsigned int a[],
|
||||
uniform int offset)
|
||||
void store_to_int8(uniform int a[], uniform int offset,
|
||||
int val)
|
||||
void store_to_int8(uniform unsigned int a[], uniform int offset,
|
||||
unsigned int val)
|
||||
unsigned int load_from_int16(uniform int a[],
|
||||
uniform int offset)
|
||||
unsigned unsigned int load_from_int16(uniform unsigned int a[],
|
||||
uniform int offset)
|
||||
void store_to_int16(uniform int a[], uniform int offset,
|
||||
int val)
|
||||
void store_to_int16(uniform unsigned int a[], uniform int offset,
|
||||
unsigned int val)
|
||||
|
||||
There are three things to note in these functions. First, note that these
|
||||
functions take ``unsigned int`` arrays as parameters; you need
|
||||
to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side to
|
||||
``unsigned int`` when passing them to ``ispc`` code. Second, although the
|
||||
arrays are passed as ``unsigned int``, in the array indexing calculation,
|
||||
with the ``offset`` parameter, they are treated as if they were ``int8`` or
|
||||
``int16`` types. (i.e. the offset treated as being in terms of number of 8
|
||||
or 16-bit elements.) Third, note that programIndex is implicitly added
|
||||
to offset.
|
||||
functions take either ``int`` or ``unsigned int`` arrays as parameters; you
|
||||
need to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side
|
||||
to ``int`` or ``unsigned int`` when passing them to ``ispc`` code. Second,
|
||||
although the arrays are passed as 32-bit integers, in the array indexing
|
||||
calculation, with the ``offset`` parameter, they are treated as if they
|
||||
were ``int8`` or ``int16`` types (i.e. the offset treated as being in terms
|
||||
of number of 8 or 16-bit elements). Third, note that the value of
|
||||
``programIndex`` is implicitly added to offset.
|
||||
|
||||
The ``intbits()`` and ``floatbits()`` functions can be used to implement
|
||||
low-level floating-point bit twiddling. For example, ``intbits()`` returns
|
||||
|
||||
62
stdlib.ispc
62
stdlib.ispc
@@ -181,7 +181,7 @@ static inline uniform bool all(bool v) {
|
||||
// As with any(), we need to explicitly mask v with the current program mask
|
||||
// so we're only looking at the current lanes
|
||||
bool match = ((v & __mask) == __mask);
|
||||
return __movmsk(match) == (1 << programCount) - 1;
|
||||
return __movmsk((int)match) == (1 << programCount) - 1;
|
||||
}
|
||||
|
||||
static inline uniform int popcnt(uniform int v) {
|
||||
@@ -273,35 +273,71 @@ static inline uniform unsigned int reduce_max(unsigned int v) {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// packed load, store
|
||||
|
||||
static inline uniform unsigned int packed_load_active(uniform int a[], uniform int start,
|
||||
reference int vals) {
|
||||
static inline uniform int
|
||||
packed_load_active(uniform unsigned int a[], uniform int start,
|
||||
reference unsigned int vals) {
|
||||
return __packed_load_active(a, start, vals, __mask);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int packed_store_active(uniform int a[], uniform int start,
|
||||
int vals) {
|
||||
static inline uniform int
|
||||
packed_store_active(uniform unsigned int a[], uniform int start,
|
||||
unsigned int vals) {
|
||||
return __packed_store_active(a, start, vals, __mask);
|
||||
}
|
||||
|
||||
static inline uniform int packed_load_active(uniform int a[], uniform int start,
|
||||
reference int vals) {
|
||||
return __packed_load_active(a, start, vals, __mask);
|
||||
}
|
||||
|
||||
static inline uniform int packed_store_active(uniform int a[], uniform int start,
|
||||
int vals) {
|
||||
return __packed_store_active(a, start, vals, __mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Load/store from/to 8/16-bit types
|
||||
|
||||
static inline unsigned int load_from_int8(uniform int a[], uniform int offset) {
|
||||
return __load_uint8(a, offset);
|
||||
static inline int load_from_int8(uniform int a[], uniform int offset) {
|
||||
return __load_int8(a, offset, __mask);
|
||||
}
|
||||
|
||||
static inline unsigned int load_from_uint8(uniform unsigned int a[],
|
||||
uniform int offset) {
|
||||
return __load_uint8(a, offset, __mask);
|
||||
}
|
||||
|
||||
static inline void store_to_int8(uniform int a[], uniform int offset,
|
||||
unsigned int val) {
|
||||
__store_uint8(a, offset, val, __mask);
|
||||
unsigned int val) {
|
||||
__store_int8(a, offset, val, __mask);
|
||||
}
|
||||
|
||||
static inline unsigned int load_from_int16(uniform int a[], uniform int offset) {
|
||||
return __load_uint16(a, offset);
|
||||
static inline void store_to_uint8(uniform unsigned int a[], uniform int offset,
|
||||
unsigned int val) {
|
||||
// Can use __store_int8 for unsigned stuff, since it truncates bits in
|
||||
// either case.
|
||||
__store_int8(a, offset, val, __mask);
|
||||
}
|
||||
|
||||
static inline int load_from_int16(uniform int a[], uniform int offset) {
|
||||
return __load_int16(a, offset, __mask);
|
||||
}
|
||||
|
||||
static inline unsigned int load_from_int16(uniform unsigned int a[],
|
||||
uniform int offset) {
|
||||
return __load_uint16(a, offset, __mask);
|
||||
}
|
||||
|
||||
static inline void store_to_int16(uniform int a[], uniform int offset,
|
||||
unsigned int val) {
|
||||
__store_uint16(a, offset, val, __mask);
|
||||
int val) {
|
||||
__store_int16(a, offset, val, __mask);
|
||||
}
|
||||
|
||||
static inline void store_to_uint16(uniform unsigned int a[], uniform int offset,
|
||||
unsigned int val) {
|
||||
// Can use __store_int16 for unsigned stuff, since it truncates bits in
|
||||
// either case.
|
||||
__store_int16(a, offset, val, __mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
95
stdlib.m4
95
stdlib.m4
@@ -557,33 +557,101 @@ define internal float @__stdlib_pow(float, float) nounwind readnone alwaysinline
|
||||
;; $1: vector width of the target
|
||||
|
||||
define(`int8_16', `
|
||||
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset) nounwind alwaysinline {
|
||||
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %doload, label %skip
|
||||
|
||||
doload:
|
||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
||||
%val = load i`'eval(8*$1) * %ptr64, align 1
|
||||
|
||||
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
|
||||
; were assuming unsigned, so zero-extend to i32...
|
||||
; unsigned, so zero-extend to i32...
|
||||
%ret = zext <$1 x i8> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
|
||||
skip:
|
||||
ret <$1 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset) nounwind alwaysinline {
|
||||
define internal <$1 x i32> @__load_int8([0 x i32] *, i32 %offset,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %doload, label %skip
|
||||
|
||||
doload:
|
||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
||||
%val = load i`'eval(8*$1) * %ptr64, align 1
|
||||
|
||||
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
|
||||
; signed, so sign-extend to i32...
|
||||
%ret = sext <$1 x i8> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
|
||||
skip:
|
||||
ret <$1 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %doload, label %skip
|
||||
|
||||
doload:
|
||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||
%val = load i`'eval(16*$1) * %ptr64, align 2
|
||||
|
||||
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
||||
; unsigned, so use zero-extent...
|
||||
; unsigned, so use zero-extend...
|
||||
%ret = zext <$1 x i16> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
|
||||
skip:
|
||||
ret <$1 x i32> undef
|
||||
}
|
||||
|
||||
define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
|
||||
define internal <$1 x i32> @__load_int16([0 x i32] *, i32 %offset,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %doload, label %skip
|
||||
|
||||
doload:
|
||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||
%val = load i`'eval(16*$1) * %ptr64, align 2
|
||||
|
||||
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
||||
; signed, so use sign-extend...
|
||||
%ret = sext <$1 x i16> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
|
||||
skip:
|
||||
ret <$1 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define internal void @__store_int8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %dostore, label %skip
|
||||
|
||||
dostore:
|
||||
%val = trunc <$1 x i32> %val32 to <$1 x i8>
|
||||
%val64 = bitcast <$1 x i8> %val to i`'eval(8*$1)
|
||||
|
||||
@@ -604,10 +672,18 @@ define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64, align 1
|
||||
|
||||
ret void
|
||||
|
||||
skip:
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
define internal void @__store_int16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%any = icmp ne i32 %mm, 0
|
||||
br i1 %any, label %dostore, label %skip
|
||||
|
||||
dostore:
|
||||
%val = trunc <$1 x i32> %val32 to <$1 x i16>
|
||||
%val64 = bitcast <$1 x i16> %val to i`'eval(16*$1)
|
||||
|
||||
@@ -627,6 +703,9 @@ define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32
|
||||
store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64, align 2
|
||||
|
||||
ret void
|
||||
|
||||
skip:
|
||||
ret void
|
||||
}
|
||||
'
|
||||
)
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int a[programCount];
|
||||
uniform unsigned int a[programCount];
|
||||
a[programIndex] = aFOO[programIndex];
|
||||
int aa;
|
||||
unsigned int aa;
|
||||
packed_load_active(a, 0, aa);
|
||||
RET[programIndex] = aa;
|
||||
}
|
||||
|
||||
@@ -3,10 +3,10 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform int pack[programCount];
|
||||
uniform unsigned int pack[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
pack[i] = 0;
|
||||
packed_store_active(pack, 0, a);
|
||||
packed_store_active(pack, 0, (unsigned int)a);
|
||||
RET[programIndex] = pack[programIndex];
|
||||
}
|
||||
|
||||
|
||||
13
tests/shuffle2.ispc
Normal file
13
tests/shuffle2.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int32 aa = aFOO[programIndex];
|
||||
int32 bb = aa + programCount;
|
||||
int32 shuf = shuffle(aa, bb, 1);
|
||||
RET[programIndex] = shuf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
}
|
||||
16
tests/store-int16-1.ispc
Normal file
16
tests/store-int16-1.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int x[16];
|
||||
for (uniform int i = 0; i < 16; ++i)
|
||||
x[i] = 0xffffffff;
|
||||
unsigned int val = aFOO[programIndex];
|
||||
store_to_int16(x, 5, val);
|
||||
unsigned int v = load_from_int16(x, 6);
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2+programIndex;
|
||||
RET[programCount-1] = (unsigned int)0xffffffff;
|
||||
}
|
||||
@@ -6,11 +6,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
x[i] = 0xffffffff;
|
||||
unsigned int val = aFOO[programIndex];
|
||||
store_to_int16(x, 5, val);
|
||||
unsigned int v = load_from_int16(x, 6);
|
||||
int v = load_from_int16(x, 6);
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2+programIndex;
|
||||
RET[programCount-1] = 0xffff;
|
||||
RET[programCount-1] = -1;
|
||||
}
|
||||
|
||||
16
tests/store-int8-1.ispc
Normal file
16
tests/store-int8-1.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform unsigned int x[8];
|
||||
for (uniform int i = 0; i < 8; ++i)
|
||||
x[i] = 0xffffffff;
|
||||
unsigned int val = aFOO[programIndex];
|
||||
store_to_uint8(x, 2, val);
|
||||
unsigned int v = load_from_uint8(x, 1);
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
RET[0] = (unsigned int)0xff;
|
||||
}
|
||||
@@ -6,11 +6,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
x[i] = 0xffffffff;
|
||||
unsigned int val = aFOO[programIndex];
|
||||
store_to_int8(x, 2, val);
|
||||
unsigned int v = load_from_int8(x, 1);
|
||||
int v = load_from_int8(x, 1);
|
||||
RET[programIndex] = v;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
RET[0] = 0xff;
|
||||
RET[0] = -1.;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user