Provide both signed and unsigned int variants of bitcode-based builtins.

When creating function Symbols for functions that were defined in LLVM bitcode for the standard library, if any of the function parameters are integer types, create two ispc-side Symbols: one where the integer types are all signed and the other where they are all unsigned.  This allows us to provide, for example, both store_to_int16(reference int a[], uniform int offset, int val) as well as store_to_int16(reference unsigned int a[], uniform int offset, unsigned int val). functions.

Added some additional tests to exercise the new variants of these.

Also fixed some cases where the __{load,store}_int{8,16} builtins would read from/write to memory even if the mask was all off (which could cause crashes in some cases.)
This commit is contained in:
Matt Pharr
2011-07-04 12:07:00 +01:00
parent fac50ba454
commit c14c3ceba6
14 changed files with 293 additions and 91 deletions

View File

@@ -6,12 +6,9 @@ ARCH_OS = $(shell uname)
ARCH_TYPE = $(shell arch)
CLANG=clang
CLANG_LIBS = -lclangFrontendTool -lclangFrontend -lclangDriver \
-lclangSerialization -lclangCodeGen -lclangParse -lclangSema \
-lclangStaticAnalyzerFrontend -lclangStaticAnalyzerCheckers \
-lclangStaticAnalyzerCore \
-lclangAnalysis -lclangIndex -lclangRewrite \
-lclangAST -lclangLex -lclangBasic
CLANG_LIBS = -lclangFrontend -lclangDriver \
-lclangSerialization -lclangParse -lclangSema \
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)

View File

@@ -64,41 +64,46 @@ extern yy_buffer_state *yy_scan_string(const char *);
/** Given an LLVM type, try to find the equivalent ispc type. Note that
this is an under-constrained problem due to LLVM's type representations
carrying less information than ispc's. (For example, LLVM doesn't
distinguish between signed and unsigned integers in its types.)
distinguish between signed and unsigned integers in its types.)
Because this function is only used for generating ispc declarations of
functions defined in LLVM bitcode in the stdlib-*.ll files, in practice
we can get enough of what we need for the relevant cases to make things
work, partially with the help of the intAsUnsigned parameter, which
indicates whether LLVM integer types should be treated as being signed
or unsigned.
However, because this function is only used for generating ispc
declarations of functions defined in LLVM bitcode in the stdlib-*.ll
files, in practice we can get enough of what we need for the relevant
cases to make things work.
*/
static const Type *
lLLVMTypeToISPCType(const llvm::Type *t) {
lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
if (t == LLVMTypes::VoidType)
return AtomicType::Void;
else if (t == LLVMTypes::BoolType)
return AtomicType::UniformBool;
else if (t == LLVMTypes::Int32Type)
return AtomicType::UniformInt32;
return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
else if (t == LLVMTypes::FloatType)
return AtomicType::UniformFloat;
else if (t == LLVMTypes::DoubleType)
return AtomicType::UniformDouble;
else if (t == LLVMTypes::Int64Type)
return AtomicType::UniformInt64;
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
else if (t == LLVMTypes::Int32VectorType)
return AtomicType::VaryingInt32;
return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
else if (t == LLVMTypes::FloatVectorType)
return AtomicType::VaryingFloat;
else if (t == LLVMTypes::DoubleVectorType)
return AtomicType::VaryingDouble;
else if (t == LLVMTypes::Int64VectorType)
return AtomicType::VaryingInt64;
return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
else if (t == LLVMTypes::Int32PointerType)
return new ReferenceType(AtomicType::UniformInt32, false);
return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 :
AtomicType::UniformInt32, false);
else if (t == LLVMTypes::FloatPointerType)
return new ReferenceType(AtomicType::UniformFloat, false);
else if (t == LLVMTypes::Int32VectorPointerType)
return new ReferenceType(AtomicType::VaryingInt32, false);
return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 :
AtomicType::VaryingInt32, false);
else if (t == LLVMTypes::FloatVectorPointerType)
return new ReferenceType(AtomicType::VaryingFloat, false);
else if (llvm::isa<const llvm::PointerType>(t)) {
@@ -114,9 +119,11 @@ lLLVMTypeToISPCType(const llvm::Type *t) {
const llvm::ArrayType *at =
llvm::dyn_cast<const llvm::ArrayType>(pt->getElementType());
if (at && at->getNumElements() == 0 &&
at->getElementType() == LLVMTypes::Int32Type)
return new ReferenceType(new ArrayType(AtomicType::UniformInt32, 0),
false);
at->getElementType() == LLVMTypes::Int32Type) {
const Type *eltType = intAsUnsigned ? AtomicType::UniformUInt32 :
AtomicType::UniformInt32;
return new ReferenceType(new ArrayType(eltType, 0), false);
}
}
return NULL;
@@ -135,26 +142,43 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
const llvm::FunctionType *ftype = func->getFunctionType();
std::string name = func->getName();
const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType());
if (!returnType)
// return type not representable in ispc -> not callable from ispc
return false;
// If the function has any parameters with integer types, we'll make
// two Symbols for two overloaded versions of the function, one with
// all of the integer types treated as signed integers and one with all
// of them treated as unsigned.
for (int i = 0; i < 2; ++i) {
bool intAsUnsigned = (i == 1);
// Iterate over the arguments and try to find their equivalent ispc
// types.
std::vector<const Type *> argTypes;
for (unsigned int i = 0; i < ftype->getNumParams(); ++i) {
const llvm::Type *llvmArgType = ftype->getParamType(i);
const Type *type = lLLVMTypeToISPCType(llvmArgType);
if (type == NULL)
const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
intAsUnsigned);
if (!returnType)
// return type not representable in ispc -> not callable from ispc
return false;
argTypes.push_back(type);
// Iterate over the arguments and try to find their equivalent ispc
// types. Track if any of the arguments has an integer type.
bool anyIntArgs = false;
std::vector<const Type *> argTypes;
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
const llvm::Type *llvmArgType = ftype->getParamType(j);
const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
if (type == NULL)
return false;
anyIntArgs |=
(Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
argTypes.push_back(type);
}
// Always create the symbol the first time through, in particular
// so that we get symbols for things with no integer types!
if (i == 0 || anyIntArgs == true) {
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
Symbol *sym = new Symbol(name, noPos, funcType);
sym->function = func;
symbolTable->AddFunction(sym);
}
}
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
Symbol *sym = new Symbol(name, noPos, funcType);
sym->function = func;
symbolTable->AddFunction(sym);
return true;
}

View File

@@ -695,7 +695,8 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
// Call the target-dependent movmsk function to turn the vector mask
// into an i32 value
std::vector<Symbol *> *mm = m->symbolTable->LookupFunction("__movmsk");
assert(mm && mm->size() == 1);
// There should be one with signed int signature, one unsigned int.
assert(mm && mm->size() == 2);
llvm::Function *fmm = (*mm)[0]->function;
return CallInst(fmm, v, "val_movmsk");
}

View File

@@ -1,5 +1,11 @@
=== v1.0.3 === (not yet released)
There are now both 'signed' and 'unsigned' variants of the standard library
functions like packed_load_active() that that references to arrays of
signed int32s and unsigned int32s respectively. (The
{load_from,store_to}_{int8,int16}() functions have similarly been augmented
to have both 'signed' and 'unsigned' variants.)
In initializer expressions with variable declarations, it is no longer
legal to initialize arrays and structs with single scalar values that then
initialize their members; they now must be initialized with initializer

View File

@@ -1777,24 +1777,31 @@ Packed Load and Store Operations
--------------------------------
The standard library also offers routines for writing out and reading in
values from linear memory locations for the active program instances.
``packed_load_active()`` loads consecutive values from the given array,
starting at ``a[offset]``, loading one value for each currently-executing
program instance and storing it into that program instance's ``val``
variable. It returns the total number of values loaded. Similarly,
``packed_store_active()`` stores the ``val`` values for each program
instances that executed the ``packed_store_active()`` call, storing the
results into the given array starting at the given offset. It returns the
total number of values stored.
values from linear memory locations for the active program instances. The
``packed_load_active()`` functions load consecutive values from the given
array, starting at ``a[offset]``, loading one value for each
currently-executing program instance and storing it into that program
instance's ``val`` variable. They return the total number of values
loaded. Similarly, the ``packed_store_active()`` functions store the
``val`` values for each program instances that executed the
``packed_store_active()`` call, storing the results into the given array
starting at the given offset. They return the total number of values
stored.
::
uniform unsigned int packed_load_active(uniform int a[],
uniform int offset,
reference int val)
uniform unsigned int packed_store_active(uniform int a[],
uniform int offset,
int val)
uniform int packed_load_active(uniform int a[],
uniform int offset,
reference int val)
uniform int packed_load_active(uniform unsigned int a[],
uniform int offset,
reference unsigned int val)
uniform int packed_store_active(uniform int a[],
uniform int offset,
int val)
uniform int packed_store_active(uniform unsigned int a[],
uniform int offset,
unsigned int val)
As an example of how these functions can be used, the following code shows
@@ -1845,24 +1852,31 @@ and this conversion step are necessary because ``ispc`` doesn't have native
::
unsigned int load_from_int8(uniform int a[],
int load_from_int8(uniform int a[], uniform int offset)
unsigned int load_from_int8(uniform unsigned int a[],
uniform int offset)
void store_to_int8(uniform int a[], uniform int offset,
int val)
void store_to_int8(uniform unsigned int a[], uniform int offset,
unsigned int val)
unsigned int load_from_int16(uniform int a[],
uniform int offset)
unsigned unsigned int load_from_int16(uniform unsigned int a[],
uniform int offset)
void store_to_int16(uniform int a[], uniform int offset,
int val)
void store_to_int16(uniform unsigned int a[], uniform int offset,
unsigned int val)
There are three things to note in these functions. First, note that these
functions take ``unsigned int`` arrays as parameters; you need
to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side to
``unsigned int`` when passing them to ``ispc`` code. Second, although the
arrays are passed as ``unsigned int``, in the array indexing calculation,
with the ``offset`` parameter, they are treated as if they were ``int8`` or
``int16`` types. (i.e. the offset treated as being in terms of number of 8
or 16-bit elements.) Third, note that programIndex is implicitly added
to offset.
functions take either ``int`` or ``unsigned int`` arrays as parameters; you
need to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side
to ``int`` or ``unsigned int`` when passing them to ``ispc`` code. Second,
although the arrays are passed as 32-bit integers, in the array indexing
calculation, with the ``offset`` parameter, they are treated as if they
were ``int8`` or ``int16`` types (i.e. the offset treated as being in terms
of number of 8 or 16-bit elements). Third, note that the value of
``programIndex`` is implicitly added to offset.
The ``intbits()`` and ``floatbits()`` functions can be used to implement
low-level floating-point bit twiddling. For example, ``intbits()`` returns

View File

@@ -181,7 +181,7 @@ static inline uniform bool all(bool v) {
// As with any(), we need to explicitly mask v with the current program mask
// so we're only looking at the current lanes
bool match = ((v & __mask) == __mask);
return __movmsk(match) == (1 << programCount) - 1;
return __movmsk((int)match) == (1 << programCount) - 1;
}
static inline uniform int popcnt(uniform int v) {
@@ -273,35 +273,71 @@ static inline uniform unsigned int reduce_max(unsigned int v) {
///////////////////////////////////////////////////////////////////////////
// packed load, store
static inline uniform unsigned int packed_load_active(uniform int a[], uniform int start,
reference int vals) {
static inline uniform int
packed_load_active(uniform unsigned int a[], uniform int start,
reference unsigned int vals) {
return __packed_load_active(a, start, vals, __mask);
}
static inline uniform unsigned int packed_store_active(uniform int a[], uniform int start,
int vals) {
static inline uniform int
packed_store_active(uniform unsigned int a[], uniform int start,
unsigned int vals) {
return __packed_store_active(a, start, vals, __mask);
}
static inline uniform int packed_load_active(uniform int a[], uniform int start,
reference int vals) {
return __packed_load_active(a, start, vals, __mask);
}
static inline uniform int packed_store_active(uniform int a[], uniform int start,
int vals) {
return __packed_store_active(a, start, vals, __mask);
}
///////////////////////////////////////////////////////////////////////////
// Load/store from/to 8/16-bit types
static inline unsigned int load_from_int8(uniform int a[], uniform int offset) {
return __load_uint8(a, offset);
static inline int load_from_int8(uniform int a[], uniform int offset) {
return __load_int8(a, offset, __mask);
}
static inline unsigned int load_from_uint8(uniform unsigned int a[],
uniform int offset) {
return __load_uint8(a, offset, __mask);
}
static inline void store_to_int8(uniform int a[], uniform int offset,
unsigned int val) {
__store_uint8(a, offset, val, __mask);
unsigned int val) {
__store_int8(a, offset, val, __mask);
}
static inline unsigned int load_from_int16(uniform int a[], uniform int offset) {
return __load_uint16(a, offset);
static inline void store_to_uint8(uniform unsigned int a[], uniform int offset,
unsigned int val) {
// Can use __store_int8 for unsigned stuff, since it truncates bits in
// either case.
__store_int8(a, offset, val, __mask);
}
static inline int load_from_int16(uniform int a[], uniform int offset) {
return __load_int16(a, offset, __mask);
}
static inline unsigned int load_from_int16(uniform unsigned int a[],
uniform int offset) {
return __load_uint16(a, offset, __mask);
}
static inline void store_to_int16(uniform int a[], uniform int offset,
unsigned int val) {
__store_uint16(a, offset, val, __mask);
int val) {
__store_int16(a, offset, val, __mask);
}
static inline void store_to_uint16(uniform unsigned int a[], uniform int offset,
unsigned int val) {
// Can use __store_int16 for unsigned stuff, since it truncates bits in
// either case.
__store_int16(a, offset, val, __mask);
}
///////////////////////////////////////////////////////////////////////////

View File

@@ -557,33 +557,101 @@ define internal float @__stdlib_pow(float, float) nounwind readnone alwaysinline
;; $1: vector width of the target
define(`int8_16', `
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset) nounwind alwaysinline {
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %doload, label %skip
doload:
%ptr8 = bitcast [0 x i32] *%0 to i8 *
%ptr = getelementptr i8 * %ptr8, i32 %offset
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
%val = load i`'eval(8*$1) * %ptr64, align 1
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
; were assuming unsigned, so zero-extend to i32...
; unsigned, so zero-extend to i32...
%ret = zext <$1 x i8> %vval to <$1 x i32>
ret <$1 x i32> %ret
skip:
ret <$1 x i32> undef
}
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset) nounwind alwaysinline {
define internal <$1 x i32> @__load_int8([0 x i32] *, i32 %offset,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %doload, label %skip
doload:
%ptr8 = bitcast [0 x i32] *%0 to i8 *
%ptr = getelementptr i8 * %ptr8, i32 %offset
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
%val = load i`'eval(8*$1) * %ptr64, align 1
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
; signed, so sign-extend to i32...
%ret = sext <$1 x i8> %vval to <$1 x i32>
ret <$1 x i32> %ret
skip:
ret <$1 x i32> undef
}
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %doload, label %skip
doload:
%ptr16 = bitcast [0 x i32] *%0 to i16 *
%ptr = getelementptr i16 * %ptr16, i32 %offset
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
%val = load i`'eval(16*$1) * %ptr64, align 2
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
; unsigned, so use zero-extent...
; unsigned, so use zero-extend...
%ret = zext <$1 x i16> %vval to <$1 x i32>
ret <$1 x i32> %ret
skip:
ret <$1 x i32> undef
}
define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
<$1 x i32> %mask) nounwind alwaysinline {
define internal <$1 x i32> @__load_int16([0 x i32] *, i32 %offset,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %doload, label %skip
doload:
%ptr16 = bitcast [0 x i32] *%0 to i16 *
%ptr = getelementptr i16 * %ptr16, i32 %offset
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
%val = load i`'eval(16*$1) * %ptr64, align 2
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
; signed, so use sign-extend...
%ret = sext <$1 x i16> %vval to <$1 x i32>
ret <$1 x i32> %ret
skip:
ret <$1 x i32> undef
}
define internal void @__store_int8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %dostore, label %skip
dostore:
%val = trunc <$1 x i32> %val32 to <$1 x i8>
%val64 = bitcast <$1 x i8> %val to i`'eval(8*$1)
@@ -604,10 +672,18 @@ define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64, align 1
ret void
skip:
ret void
}
define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
<$1 x i32> %mask) nounwind alwaysinline {
define internal void @__store_int16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
<$1 x i32> %mask) nounwind alwaysinline {
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%any = icmp ne i32 %mm, 0
br i1 %any, label %dostore, label %skip
dostore:
%val = trunc <$1 x i32> %val32 to <$1 x i16>
%val64 = bitcast <$1 x i16> %val to i`'eval(16*$1)
@@ -627,6 +703,9 @@ define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32
store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64, align 2
ret void
skip:
ret void
}
'
)

View File

@@ -2,9 +2,9 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform int a[programCount];
uniform unsigned int a[programCount];
a[programIndex] = aFOO[programIndex];
int aa;
unsigned int aa;
packed_load_active(a, 0, aa);
RET[programIndex] = aa;
}

View File

@@ -3,10 +3,10 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform int pack[programCount];
uniform unsigned int pack[programCount];
for (uniform int i = 0; i < programCount; ++i)
pack[i] = 0;
packed_store_active(pack, 0, a);
packed_store_active(pack, 0, (unsigned int)a);
RET[programIndex] = pack[programIndex];
}

13
tests/shuffle2.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int32 aa = aFOO[programIndex];
int32 bb = aa + programCount;
int32 shuf = shuffle(aa, bb, 1);
RET[programIndex] = shuf;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2;
}

16
tests/store-int16-1.ispc Normal file
View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
uniform int x[16];
for (uniform int i = 0; i < 16; ++i)
x[i] = 0xffffffff;
unsigned int val = aFOO[programIndex];
store_to_int16(x, 5, val);
unsigned int v = load_from_int16(x, 6);
RET[programIndex] = v;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2+programIndex;
RET[programCount-1] = (unsigned int)0xffffffff;
}

View File

@@ -6,11 +6,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
x[i] = 0xffffffff;
unsigned int val = aFOO[programIndex];
store_to_int16(x, 5, val);
unsigned int v = load_from_int16(x, 6);
int v = load_from_int16(x, 6);
RET[programIndex] = v;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2+programIndex;
RET[programCount-1] = 0xffff;
RET[programCount-1] = -1;
}

16
tests/store-int8-1.ispc Normal file
View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
uniform unsigned int x[8];
for (uniform int i = 0; i < 8; ++i)
x[i] = 0xffffffff;
unsigned int val = aFOO[programIndex];
store_to_uint8(x, 2, val);
unsigned int v = load_from_uint8(x, 1);
RET[programIndex] = v;
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex;
RET[0] = (unsigned int)0xff;
}

View File

@@ -6,11 +6,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
x[i] = 0xffffffff;
unsigned int val = aFOO[programIndex];
store_to_int8(x, 2, val);
unsigned int v = load_from_int8(x, 1);
int v = load_from_int8(x, 1);
RET[programIndex] = v;
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex;
RET[0] = 0xff;
RET[0] = -1.;
}