diff --git a/builtins.cpp b/builtins.cpp index d72bb371..d65c7308 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -151,6 +151,27 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) { const llvm::FunctionType *ftype = func->getFunctionType(); std::string name = func->getName(); + // An unfortunate hack: we want this builtin function to have the + // signature "int __sext_varying_bool(bool)", but the ispc function + // symbol creation code below assumes that any LLVM vector of i32s is a + // varying int32. Here, we need that to be interpreted as a varying + // bool, so just have a one-off override for that one... + if (name == "__sext_varying_bool") { + const Type *returnType = AtomicType::VaryingInt32; + std::vector argTypes; + argTypes.push_back(AtomicType::VaryingBool); + std::vector defaults; + defaults.push_back(NULL); + + FunctionType *funcType = new FunctionType(returnType, argTypes, noPos); + funcType->SetArgumentDefaults(defaults); + + Symbol *sym = new Symbol(name, noPos, funcType); + sym->function = func; + symbolTable->AddFunction(sym); + return true; + } + // If the function has any parameters with integer types, we'll make // two Symbols for two overloaded versions of the function, one with // all of the integer types treated as signed integers and one with all diff --git a/docs/ispc.txt b/docs/ispc.txt index 26d582e3..bee91ce6 100644 --- a/docs/ispc.txt +++ b/docs/ispc.txt @@ -509,11 +509,9 @@ is provided in parenthesis around the expression: double foo = 1. / 3.; int bar = (float)bar + (float)bar; // 32-bit float addition -Note: if a ``bool`` is converted to an integer numeric type (``int``, -``int64``, etc.), then the conversion is done with sign extension, not zero -extension. Thus, the resulting value has all bits set if the ``bool`` is -``true``; for example, ``0xffffffff`` for ``int32``. This differs from C -and C++, where a ``true`` bool is converted to the integer value one. +If a ``bool`` is converted to an integer numeric type (``int``, ``int64``, +etc.), then the result is the value one if the ``bool`` has the value +``true`` and has the value zero otherwise. Variables can be declared with the ``const`` qualifier, which prohibits their modification. @@ -1895,6 +1893,16 @@ code. Low-Level Bits -------------- +Sometimes it's useful to convert a ``bool`` value to an integer using sign +extension so that the integer's bits are all on if the ``bool`` has the +value ``true`` (rather than just having the value one). The +``sign_extend()`` functions provide this functionality: + +:: + + int sign_extend(bool value) + uniform int sign_extend(uniform bool value) + ``ispc`` provides a number of bit/memory-level utility routines in its standard library as well. It has routines that load from and store to 8-bit and 16-bit integer values stored in memory, converting to and from @@ -1964,7 +1972,6 @@ It, it clears the high order bit, to ensure that the given floating-point value is positive. This compiles down to a single ``andps`` instruction when used with an IntelĀ® SSE target, for example. - Interoperability with the Application ===================================== diff --git a/expr.cpp b/expr.cpp index 0ce3fc16..6b20b7e3 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3673,14 +3673,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - // FIXME: we're currently doing sign extension rather than zero - // extension here, which means that ints will have the value - // 0xffffffff for 'true' bools (versus the value 1). There is - // some code in stdlib.ispc that depends on bool->int conversions - // having this behavior that needs to be cleaned up (e.g. to - // call a __sext() builtin to do bool->int conversions) if we - // are going to fix this here. - cast = ctx->SExtInst(exprVal, targetType, "bool2int"); + cast = ctx->ZExtInst(exprVal, targetType, "bool2int"); break; case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: @@ -3712,9 +3705,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - // FIXME: See comments above w.r.t. fixing this to be a - // ZExtInst rather than an SExtInst... - cast = ctx->SExtInst(exprVal, targetType, "bool2uint"); + cast = ctx->ZExtInst(exprVal, targetType, "bool2uint"); break; case AtomicType::TYPE_INT32: case AtomicType::TYPE_UINT32: @@ -3752,7 +3743,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->SExtInst(exprVal, targetType, "bool2int64"); + cast = ctx->ZExtInst(exprVal, targetType, "bool2int64"); break; case AtomicType::TYPE_INT32: cast = ctx->SExtInst(exprVal, targetType, "int32_to_int64"); @@ -3786,7 +3777,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal, if (fromType->IsVaryingType() && LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType) exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1"); - cast = ctx->SExtInst(exprVal, targetType, "bool2uint"); + cast = ctx->ZExtInst(exprVal, targetType, "bool2uint"); break; case AtomicType::TYPE_INT32: cast = ctx->SExtInst(exprVal, targetType, "int32_to_uint64"); diff --git a/stdlib.ispc b/stdlib.ispc index ec94c4c8..5baaa2f1 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -197,18 +197,25 @@ static inline unsigned int64 insert(unsigned int64 x, uniform int i, return __insert_int64(x, (unsigned int)i, v); } +static inline uniform int32 sign_extend(uniform bool v) { + return __sext_uniform_bool(v); +} + +static inline int32 sign_extend(bool v) { + return __sext_varying_bool(v); +} static inline uniform bool any(bool v) { // We only care about whether "any" is true for the active program instances, // so we have to make v with the current program mask. - return __movmsk(v & __mask) != 0; + return __movmsk(__sext_varying_bool(v) & __mask) != 0; } static inline uniform bool all(bool v) { // As with any(), we need to explicitly mask v with the current program mask // so we're only looking at the current lanes - bool match = ((v & __mask) == __mask); - return __movmsk((int)match) == (1 << programCount) - 1; + int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask); + return __movmsk(match) == (1 << programCount) - 1; } static inline uniform int popcnt(uniform int v) { @@ -235,7 +242,7 @@ static inline int popcnt(int64 v) { static inline uniform int popcnt(bool v) { // As with any() and all(), only count across the active lanes - return __popcnt_int32(__movmsk(v & __mask)); + return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask)); } static inline uniform int lanemask() { @@ -271,7 +278,7 @@ static inline uniform float reduce_max(float v) { static inline uniform int reduce_add(int x) { // Zero out the values for lanes that aren't running - return __reduce_add_int32(x & (int)__mask); + return __reduce_add_int32(x & __mask); } static inline uniform int reduce_min(int v) { @@ -291,7 +298,7 @@ static inline uniform int reduce_max(int v) { static inline uniform unsigned int reduce_add(unsigned int x) { // Set values for non-running lanes to zero so they don't affect the // result. - return __reduce_add_uint32(x & (int)__mask); + return __reduce_add_uint32(x & __mask); } static inline uniform unsigned int reduce_min(unsigned int v) { @@ -329,7 +336,7 @@ static inline uniform double reduce_max(double v) { static inline uniform int64 reduce_add(int64 x) { // Zero out the values for lanes that aren't running - return __reduce_add_int64(x & (int64)__mask); + return __reduce_add_int64(x & (int64)(__mask)); } static inline uniform int64 reduce_min(int64 v) { @@ -349,7 +356,7 @@ static inline uniform int64 reduce_max(int64 v) { static inline uniform unsigned int64 reduce_add(unsigned int64 x) { // Set values for non-running lanes to zero so they don't affect the // result. - return __reduce_add_int64(x & (int64)__mask); + return __reduce_add_int64(x & (int64)(__mask)); } static inline uniform unsigned int64 reduce_min(unsigned int64 v) { diff --git a/stdlib.m4 b/stdlib.m4 index 797aeb51..49184d85 100644 --- a/stdlib.m4 +++ b/stdlib.m4 @@ -644,6 +644,17 @@ define internal float @__undef_uniform() nounwind readnone alwaysinline { ret float undef } +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; sign extension + +define internal i32 @__sext_uniform_bool(i1) nounwind readnone alwaysinline { + %r = sext i1 %0 to i32 + ret i32 %r +} + +define internal <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone alwaysinline { + ret <$1 x i32> %0 +} ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; stdlib transcendentals diff --git a/tests/movmsk-opt.ispc b/tests/movmsk-opt.ispc index 70ce5211..6a360dff 100644 --- a/tests/movmsk-opt.ispc +++ b/tests/movmsk-opt.ispc @@ -20,7 +20,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float v = float4(1,1,0,0); bool b = (v == 1.); - ret = __movmsk(((int)b)); + ret = __movmsk((sign_extend(b))); RET[programIndex] = ret; } diff --git a/tests/sign-extend-1.ispc b/tests/sign-extend-1.ispc new file mode 100644 index 00000000..07e082c7 --- /dev/null +++ b/tests/sign-extend-1.ispc @@ -0,0 +1,15 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = aFOO[programIndex]; + bool b = (a == 1.); + int32 s = sign_extend(b); + RET[programIndex] = (s == 0xffffffff) ? 16 : 2; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 2; + RET[0] = 16; +} diff --git a/tests/sign-extend.ispc b/tests/sign-extend.ispc new file mode 100644 index 00000000..05521e4f --- /dev/null +++ b/tests/sign-extend.ispc @@ -0,0 +1,14 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = aFOO[programIndex]; + bool b = (a == 1.); + RET[programIndex] = (int)b; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; + RET[0] = 1; +}