Provide both signed and unsigned int variants of bitcode-based builtins.

When creating function Symbols for functions that were defined in LLVM bitcode for the standard library, if any of the function parameters are integer types, create two ispc-side Symbols: one where the integer types are all signed and the other where they are all unsigned. This allows us to provide, for example, both store_to_int16(reference int a[], uniform int offset, int val) as well as store_to_int16(reference unsigned int a[], uniform int offset, unsigned int val). functions. Added some additional tests to exercise the new variants of these. Also fixed some cases where the __{load,store}_int{8,16} builtins would read from/write to memory even if the mask was all off (which could cause crashes in some cases.)
2011-07-04 12:07:00 +01:00
parent fac50ba454
commit c14c3ceba6
14 changed files with 293 additions and 91 deletions
--- a/9
+++ b/9
@@ -6,12 +6,9 @@ ARCH_OS = $(shell uname)
 ARCH_TYPE = $(shell arch)
 CLANG=clang
-CLANG_LIBS = -lclangFrontendTool -lclangFrontend -lclangDriver \
+CLANG_LIBS = -lclangFrontend -lclangDriver \
-             -lclangSerialization -lclangCodeGen -lclangParse -lclangSema \
+             -lclangSerialization -lclangParse -lclangSema \
-             -lclangStaticAnalyzerFrontend -lclangStaticAnalyzerCheckers \
+             -lclangAnalysis -lclangAST -lclangLex -lclangBasic
             -lclangStaticAnalyzerCore \
             -lclangAnalysis -lclangIndex -lclangRewrite \
             -lclangAST -lclangLex -lclangBasic
 LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
 LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -64,41 +64,46 @@ extern yy_buffer_state *yy_scan_string(const char *);
 /** Given an LLVM type, try to find the equivalent ispc type.  Note that
    this is an under-constrained problem due to LLVM's type representations
    carrying less information than ispc's.  (For example, LLVM doesn't
-    distinguish between signed and unsigned integers in its types.)  
+    distinguish between signed and unsigned integers in its types.)
    Because this function is only used for generating ispc declarations of
    functions defined in LLVM bitcode in the stdlib-*.ll files, in practice
    we can get enough of what we need for the relevant cases to make things
    work, partially with the help of the intAsUnsigned parameter, which
    indicates whether LLVM integer types should be treated as being signed
    or unsigned.
    However, because this function is only used for generating ispc
    declarations of functions defined in LLVM bitcode in the stdlib-*.ll
    files, in practice we can get enough of what we need for the relevant
    cases to make things work.
 */
 static const Type *
-lLLVMTypeToISPCType(const llvm::Type *t) {
+lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
    if (t == LLVMTypes::VoidType)
        return AtomicType::Void;
    else if (t == LLVMTypes::BoolType)
        return AtomicType::UniformBool;
    else if (t == LLVMTypes::Int32Type)
-        return AtomicType::UniformInt32;
+        return intAsUnsigned ? AtomicType::UniformUInt32 : AtomicType::UniformInt32;
    else if (t == LLVMTypes::FloatType)
        return AtomicType::UniformFloat;
    else if (t == LLVMTypes::DoubleType)
        return AtomicType::UniformDouble;
    else if (t == LLVMTypes::Int64Type)
-        return AtomicType::UniformInt64;
+        return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
    else if (t == LLVMTypes::Int32VectorType)
-        return AtomicType::VaryingInt32;
+        return intAsUnsigned ? AtomicType::VaryingUInt32 : AtomicType::VaryingInt32;
    else if (t == LLVMTypes::FloatVectorType)
        return AtomicType::VaryingFloat;
    else if (t == LLVMTypes::DoubleVectorType)
        return AtomicType::VaryingDouble;
    else if (t == LLVMTypes::Int64VectorType)
-        return AtomicType::VaryingInt64;
+        return intAsUnsigned ? AtomicType::VaryingUInt64 : AtomicType::VaryingInt64;
    else if (t == LLVMTypes::Int32PointerType)
-        return new ReferenceType(AtomicType::UniformInt32, false);
+        return new ReferenceType(intAsUnsigned ? AtomicType::UniformUInt32 :
                                                 AtomicType::UniformInt32, false);
    else if (t == LLVMTypes::FloatPointerType)
        return new ReferenceType(AtomicType::UniformFloat, false);
    else if (t == LLVMTypes::Int32VectorPointerType)
-        return new ReferenceType(AtomicType::VaryingInt32, false);
+        return new ReferenceType(intAsUnsigned ? AtomicType::VaryingUInt32 :
                                                 AtomicType::VaryingInt32, false);
    else if (t == LLVMTypes::FloatVectorPointerType)
        return new ReferenceType(AtomicType::VaryingFloat, false);
    else if (llvm::isa<const llvm::PointerType>(t)) {
@@ -114,9 +119,11 @@ lLLVMTypeToISPCType(const llvm::Type *t) {
        const llvm::ArrayType *at = 
            llvm::dyn_cast<const llvm::ArrayType>(pt->getElementType());
        if (at && at->getNumElements() == 0 &&
-            at->getElementType() == LLVMTypes::Int32Type)
+            at->getElementType() == LLVMTypes::Int32Type) {
-            return new ReferenceType(new ArrayType(AtomicType::UniformInt32, 0),
+            const Type *eltType = intAsUnsigned ? AtomicType::UniformUInt32 :
-                                     false);
+                                                  AtomicType::UniformInt32;
            return new ReferenceType(new ArrayType(eltType, 0), false);
        }
    }
    return NULL;
@@ -135,26 +142,43 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
    const llvm::FunctionType *ftype = func->getFunctionType();
    std::string name = func->getName();
-    const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType());
+    // If the function has any parameters with integer types, we'll make
-    if (!returnType)
+    // two Symbols for two overloaded versions of the function, one with
-        // return type not representable in ispc -> not callable from ispc
+    // all of the integer types treated as signed integers and one with all
-        return false;
+    // of them treated as unsigned.
    for (int i = 0; i < 2; ++i) {
        bool intAsUnsigned = (i == 1);
-    // Iterate over the arguments and try to find their equivalent ispc
+        const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType(),
-    // types.
+                                                     intAsUnsigned);
-    std::vector<const Type *> argTypes;
+        if (!returnType)
-    for (unsigned int i = 0; i < ftype->getNumParams(); ++i) {
+            // return type not representable in ispc -> not callable from ispc
        const llvm::Type *llvmArgType = ftype->getParamType(i);
        const Type *type = lLLVMTypeToISPCType(llvmArgType);
        if (type == NULL)
            return false;
-        argTypes.push_back(type);
+
        // Iterate over the arguments and try to find their equivalent ispc
        // types.  Track if any of the arguments has an integer type.
        bool anyIntArgs = false;
        std::vector<const Type *> argTypes;
        for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
            const llvm::Type *llvmArgType = ftype->getParamType(j);
            const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
            if (type == NULL)
                return false;
            anyIntArgs |= 
                (Type::Equal(type, lLLVMTypeToISPCType(llvmArgType, !intAsUnsigned)) == false);
            argTypes.push_back(type);
        }
        // Always create the symbol the first time through, in particular
        // so that we get symbols for things with no integer types!
        if (i == 0 || anyIntArgs == true) {
            FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
            Symbol *sym = new Symbol(name, noPos, funcType);
            sym->function = func;
            symbolTable->AddFunction(sym);
        }
    }
    FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
    Symbol *sym = new Symbol(name, noPos, funcType);
    sym->function = func;
    symbolTable->AddFunction(sym);
    return true;
 }
--- a/ctx.cpp
+++ b/ctx.cpp
@@ -695,7 +695,8 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
    // Call the target-dependent movmsk function to turn the vector mask
    // into an i32 value
    std::vector<Symbol *> *mm = m->symbolTable->LookupFunction("__movmsk");
-    assert(mm && mm->size() == 1);
+    // There should be one with signed int signature, one unsigned int.
    assert(mm && mm->size() == 2); 
    llvm::Function *fmm = (*mm)[0]->function;
    return CallInst(fmm, v, "val_movmsk");
 }
--- a/docs/ReleaseNotes.txt
+++ b/docs/ReleaseNotes.txt
@@ -1,5 +1,11 @@
 === v1.0.3 === (not yet released)
 There are now both 'signed' and 'unsigned' variants of the standard library
 functions like packed_load_active() that that references to arrays of
 signed int32s and unsigned int32s respectively.  (The
 {load_from,store_to}_{int8,int16}() functions have similarly been augmented
 to have both 'signed' and 'unsigned' variants.)
 In initializer expressions with variable declarations, it is no longer
 legal to initialize arrays and structs with single scalar values that then
 initialize their members; they now must be initialized with initializer
--- a/docs/ispc.txt
+++ b/docs/ispc.txt
@@ -1777,24 +1777,31 @@ Packed Load and Store Operations
 --------------------------------
 The standard library also offers routines for writing out and reading in
-values from linear memory locations for the active program instances.
+values from linear memory locations for the active program instances.  The
-``packed_load_active()`` loads consecutive values from the given array,
+``packed_load_active()`` functions load consecutive values from the given
-starting at ``a[offset]``, loading one value for each currently-executing
+array, starting at ``a[offset]``, loading one value for each
-program instance and storing it into that program instance's ``val``
+currently-executing program instance and storing it into that program
-variable.  It returns the total number of values loaded.  Similarly,
+instance's ``val`` variable.  They return the total number of values
-``packed_store_active()`` stores the ``val`` values for each program
+loaded.  Similarly, the ``packed_store_active()`` functions store the
-instances that executed the ``packed_store_active()`` call, storing the
+``val`` values for each program instances that executed the
-results into the given array starting at the given offset.  It returns the
+``packed_store_active()`` call, storing the results into the given array
-total number of values stored.
+starting at the given offset.  They return the total number of values
 stored.
 ::
-    uniform unsigned int packed_load_active(uniform int a[],
+    uniform int packed_load_active(uniform int a[],
-                                            uniform int offset,
+                                   uniform int offset,
-                                            reference int val)
+                                   reference int val)
-    uniform unsigned int packed_store_active(uniform int a[],
+    uniform int packed_load_active(uniform unsigned int a[],
-                                             uniform int offset,
+                                   uniform int offset,
-                                             int val)
+                                   reference unsigned int val)
    uniform int packed_store_active(uniform int a[],
                                    uniform int offset,
                                    int val)
    uniform int packed_store_active(uniform unsigned int a[],
                                    uniform int offset,
                                    unsigned int val)
 As an example of how these functions can be used, the following code shows
@@ -1845,24 +1852,31 @@ and this conversion step are necessary because ``ispc`` doesn't have native
 ::
-    unsigned int load_from_int8(uniform int a[],
+    int load_from_int8(uniform int a[], uniform int offset)
    unsigned int load_from_int8(uniform unsigned int a[],
                                uniform int offset)
    void store_to_int8(uniform int a[], uniform int offset, 
                       int val)
    void store_to_int8(uniform unsigned int a[], uniform int offset, 
                       unsigned int val)
    unsigned int load_from_int16(uniform int a[],
                                 uniform int offset)
    unsigned unsigned int load_from_int16(uniform unsigned int a[],
                                          uniform int offset)
    void store_to_int16(uniform int a[], uniform int offset, 
                        int val)
    void store_to_int16(uniform unsigned int a[], uniform int offset, 
                        unsigned int val)
 There are three things to note in these functions.  First, note that these
-functions take ``unsigned int`` arrays as parameters; you need
+functions take either ``int`` or ``unsigned int`` arrays as parameters; you
-to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side to
+need to cast `the ``int8_t`` and ``int16_t`` pointers from the C/C++ side
-``unsigned int`` when passing them to ``ispc`` code.  Second, although the
+to ``int`` or ``unsigned int`` when passing them to ``ispc`` code.  Second,
-arrays are passed as ``unsigned int``, in the array indexing calculation,
+although the arrays are passed as 32-bit integers, in the array indexing
-with the ``offset`` parameter, they are treated as if they were ``int8`` or
+calculation, with the ``offset`` parameter, they are treated as if they
-``int16`` types.  (i.e. the offset treated as being in terms of number of 8
+were ``int8`` or ``int16`` types (i.e. the offset treated as being in terms
-or 16-bit elements.) Third, note that programIndex is implicitly added
+of number of 8 or 16-bit elements).  Third, note that the value of
-to offset.
+``programIndex`` is implicitly added to offset.
 The ``intbits()`` and ``floatbits()`` functions can be used to implement
 low-level floating-point bit twiddling.  For example, ``intbits()`` returns
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -181,7 +181,7 @@ static inline uniform bool all(bool v) {
    // As with any(), we need to explicitly mask v with the current program mask
    // so we're only looking at the current lanes
    bool match = ((v & __mask) == __mask);
-    return __movmsk(match) == (1 << programCount) - 1;
+    return __movmsk((int)match) == (1 << programCount) - 1;
 }
 static inline uniform int popcnt(uniform int v) {
@@ -273,35 +273,71 @@ static inline uniform unsigned int reduce_max(unsigned int v) {
 ///////////////////////////////////////////////////////////////////////////
 // packed load, store
-static inline uniform unsigned int packed_load_active(uniform int a[], uniform int start,
+static inline uniform int 
-                                               reference int vals) {
+packed_load_active(uniform unsigned int a[], uniform int start,
                   reference unsigned int vals) {
    return __packed_load_active(a, start, vals, __mask);
 }
-static inline uniform unsigned int packed_store_active(uniform int a[], uniform int start,
+static inline uniform int
-                                                int vals) {
+packed_store_active(uniform unsigned int a[], uniform int start,
                    unsigned int vals) {
    return __packed_store_active(a, start, vals, __mask);
 }
 static inline uniform int packed_load_active(uniform int a[], uniform int start,
                                             reference int vals) {
    return __packed_load_active(a, start, vals, __mask);
 }
 static inline uniform int packed_store_active(uniform int a[], uniform int start,
                                              int vals) {
    return __packed_store_active(a, start, vals, __mask);
 }
 ///////////////////////////////////////////////////////////////////////////
 // Load/store from/to 8/16-bit types
-static inline unsigned int load_from_int8(uniform int a[], uniform int offset) {
+static inline int load_from_int8(uniform int a[], uniform int offset) {
-    return __load_uint8(a, offset);
+    return __load_int8(a, offset, __mask);
 }
 static inline unsigned int load_from_uint8(uniform unsigned int a[], 
                                           uniform int offset) {
    return __load_uint8(a, offset, __mask);
 }
 static inline void store_to_int8(uniform int a[], uniform int offset, 
-                          unsigned int val) {
+                                 unsigned int val) {
-    __store_uint8(a, offset, val, __mask);
+    __store_int8(a, offset, val, __mask);
 }
-static inline unsigned int load_from_int16(uniform int a[], uniform int offset) {
+static inline void store_to_uint8(uniform unsigned int a[], uniform int offset, 
-    return __load_uint16(a, offset);
+                                  unsigned int val) {
    // Can use __store_int8 for unsigned stuff, since it truncates bits in
    // either case.
    __store_int8(a, offset, val, __mask);
 }
 static inline int load_from_int16(uniform int a[], uniform int offset) {
    return __load_int16(a, offset, __mask);
 }
 static inline unsigned int load_from_int16(uniform unsigned int a[], 
                                           uniform int offset) {
    return __load_uint16(a, offset, __mask);
 }
 static inline void store_to_int16(uniform int a[], uniform int offset, 
-                           unsigned int val) {
+                                  int val) {
-    __store_uint16(a, offset, val, __mask);
+    __store_int16(a, offset, val, __mask);
 }
 static inline void store_to_uint16(uniform unsigned int a[], uniform int offset, 
                                   unsigned int val) {
    // Can use __store_int16 for unsigned stuff, since it truncates bits in
    // either case.
    __store_int16(a, offset, val, __mask);
 }
 ///////////////////////////////////////////////////////////////////////////
--- a/stdlib.m4
+++ b/stdlib.m4
@@ -557,33 +557,101 @@ define internal float @__stdlib_pow(float, float) nounwind readnone alwaysinline
 ;; $1: vector width of the target
 define(`int8_16', `
-define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset) nounwind alwaysinline {
+define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset,
                                         <$1 x i32> %mask) nounwind alwaysinline {
  %mm = call i32 @__movmsk(<$1 x i32> %mask)
  %any = icmp ne i32 %mm, 0
  br i1 %any, label %doload, label %skip
 doload:  
  %ptr8 = bitcast [0 x i32] *%0 to i8 *
  %ptr = getelementptr i8 * %ptr8, i32 %offset
  %ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
  %val = load i`'eval(8*$1) * %ptr64, align 1
  %vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
-  ; were assuming unsigned, so zero-extend to i32... 
+  ; unsigned, so zero-extend to i32... 
  %ret = zext <$1 x i8> %vval to <$1 x i32>
  ret <$1 x i32> %ret
 skip:
  ret <$1 x i32> undef
 }
-define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset) nounwind alwaysinline {
+define internal <$1 x i32> @__load_int8([0 x i32] *, i32 %offset,
                                        <$1 x i32> %mask) nounwind alwaysinline {
  %mm = call i32 @__movmsk(<$1 x i32> %mask)
  %any = icmp ne i32 %mm, 0
  br i1 %any, label %doload, label %skip
 doload:  
  %ptr8 = bitcast [0 x i32] *%0 to i8 *
  %ptr = getelementptr i8 * %ptr8, i32 %offset
  %ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
  %val = load i`'eval(8*$1) * %ptr64, align 1
  %vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
  ; signed, so sign-extend to i32... 
  %ret = sext <$1 x i8> %vval to <$1 x i32>
  ret <$1 x i32> %ret
 skip:
  ret <$1 x i32> undef
 }
 define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset,
                                          <$1 x i32> %mask) nounwind alwaysinline {
  %mm = call i32 @__movmsk(<$1 x i32> %mask)
  %any = icmp ne i32 %mm, 0
  br i1 %any, label %doload, label %skip
 doload:  
  %ptr16 = bitcast [0 x i32] *%0 to i16 *
  %ptr = getelementptr i16 * %ptr16, i32 %offset
  %ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
  %val = load i`'eval(16*$1) * %ptr64, align 2
  %vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
-  ; unsigned, so use zero-extent...
+  ; unsigned, so use zero-extend...
  %ret = zext <$1 x i16> %vval to <$1 x i32>
  ret <$1 x i32> %ret
 skip:
  ret <$1 x i32> undef
 }
-define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
+
-                                    <$1 x i32> %mask) nounwind alwaysinline {
+define internal <$1 x i32> @__load_int16([0 x i32] *, i32 %offset,
                                         <$1 x i32> %mask) nounwind alwaysinline {
  %mm = call i32 @__movmsk(<$1 x i32> %mask)
  %any = icmp ne i32 %mm, 0
  br i1 %any, label %doload, label %skip
 doload:  
  %ptr16 = bitcast [0 x i32] *%0 to i16 *
  %ptr = getelementptr i16 * %ptr16, i32 %offset
  %ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
  %val = load i`'eval(16*$1) * %ptr64, align 2
  %vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
  ; signed, so use sign-extend...
  %ret = sext <$1 x i16> %vval to <$1 x i32>
  ret <$1 x i32> %ret
 skip:
  ret <$1 x i32> undef
 }
 define internal void @__store_int8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
                                   <$1 x i32> %mask) nounwind alwaysinline {
  %mm = call i32 @__movmsk(<$1 x i32> %mask)
  %any = icmp ne i32 %mm, 0
  br i1 %any, label %dostore, label %skip
 dostore:  
  %val = trunc <$1 x i32> %val32 to <$1 x i8>
  %val64 = bitcast <$1 x i8> %val to i`'eval(8*$1)
@@ -604,10 +672,18 @@ define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
  store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64, align 1
  ret void
 skip:
  ret void
 }
-define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
+define internal void @__store_int16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
-                                     <$1 x i32> %mask) nounwind alwaysinline {
+                                    <$1 x i32> %mask) nounwind alwaysinline {
  %mm = call i32 @__movmsk(<$1 x i32> %mask)
  %any = icmp ne i32 %mm, 0
  br i1 %any, label %dostore, label %skip
 dostore:
  %val = trunc <$1 x i32> %val32 to <$1 x i16>
  %val64 = bitcast <$1 x i16> %val to i`'eval(16*$1)
@@ -627,6 +703,9 @@ define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32
  store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64, align 2
  ret void
 skip:
  ret void
 }
 '
 )
--- a/tests/packed-load-1.ispc
+++ b/tests/packed-load-1.ispc
@@ -2,9 +2,9 @@
 export uniform int width() { return programCount; }
 export void f_f(uniform float RET[], uniform float aFOO[]) {
-    uniform int a[programCount];
+    uniform unsigned int a[programCount];
    a[programIndex] = aFOO[programIndex];
-    int aa;
+    unsigned int aa;
    packed_load_active(a, 0, aa);
    RET[programIndex] = aa;
 }
--- a/tests/packed-store.ispc
+++ b/tests/packed-store.ispc
@@ -3,10 +3,10 @@ export uniform int width() { return programCount; }
 export void f_f(uniform float RET[], uniform float aFOO[]) {
    float a = aFOO[programIndex]; 
-    uniform int pack[programCount];
+    uniform unsigned int pack[programCount];
    for (uniform int i = 0; i < programCount; ++i)
        pack[i] = 0;
-    packed_store_active(pack, 0, a);
+    packed_store_active(pack, 0, (unsigned int)a);
    RET[programIndex] = pack[programIndex]; 
 }
--- a/tests/shuffle2.ispc
+++ b/tests/shuffle2.ispc
@@ -0,0 +1,13 @@
 export uniform int width() { return programCount; }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    int32 aa = aFOO[programIndex]; 
    int32 bb = aa + programCount;
    int32 shuf = shuffle(aa, bb, 1);
    RET[programIndex] = shuf;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 2;
 }
--- a/tests/store-int16-1.ispc
+++ b/tests/store-int16-1.ispc
@@ -0,0 +1,16 @@
 export uniform int width() { return programCount; }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    uniform int x[16];
    for (uniform int i = 0; i < 16; ++i)
        x[i] = 0xffffffff;
    unsigned int val = aFOO[programIndex];
    store_to_int16(x, 5, val);
    unsigned int v = load_from_int16(x, 6);
    RET[programIndex] = v;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 2+programIndex;
    RET[programCount-1] = (unsigned int)0xffffffff;
 }
--- a/tests/store-int16.ispc
+++ b/tests/store-int16.ispc
@@ -6,11 +6,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
        x[i] = 0xffffffff;
    unsigned int val = aFOO[programIndex];
    store_to_int16(x, 5, val);
-    unsigned int v = load_from_int16(x, 6);
+    int v = load_from_int16(x, 6);
    RET[programIndex] = v;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 2+programIndex;
-    RET[programCount-1] = 0xffff;
+    RET[programCount-1] = -1;
 }
--- a/tests/store-int8-1.ispc
+++ b/tests/store-int8-1.ispc
@@ -0,0 +1,16 @@
 export uniform int width() { return programCount; }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    uniform unsigned int x[8];
    for (uniform int i = 0; i < 8; ++i)
        x[i] = 0xffffffff;
    unsigned int val = aFOO[programIndex];
    store_to_uint8(x, 2, val);
    unsigned int v = load_from_uint8(x, 1);
    RET[programIndex] = v;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = programIndex;
    RET[0] = (unsigned int)0xff;
 }
--- a/tests/store-int8.ispc
+++ b/tests/store-int8.ispc
@@ -6,11 +6,11 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
        x[i] = 0xffffffff;
    unsigned int val = aFOO[programIndex];
    store_to_int8(x, 2, val);
-    unsigned int v = load_from_int8(x, 1);
+    int v = load_from_int8(x, 1);
    RET[programIndex] = v;
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = programIndex;
-    RET[0] = 0xff;
+    RET[0] = -1.;
 }