Fix issue #2: use zero extend to convert bool->int, not sign extend.

This way, we match C/C++ in that casting a bool to an int gives either the value
zero or the value one.  There is a new stdlib function int sign_extend(bool)
that does sign extension for cases where that's desired.
This commit is contained in:
Matt Pharr
2011-07-12 13:30:05 +01:00
parent 6e8af5038b
commit a535aa586b
8 changed files with 94 additions and 28 deletions

View File

@@ -151,6 +151,27 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
const llvm::FunctionType *ftype = func->getFunctionType();
std::string name = func->getName();
// An unfortunate hack: we want this builtin function to have the
// signature "int __sext_varying_bool(bool)", but the ispc function
// symbol creation code below assumes that any LLVM vector of i32s is a
// varying int32. Here, we need that to be interpreted as a varying
// bool, so just have a one-off override for that one...
if (name == "__sext_varying_bool") {
const Type *returnType = AtomicType::VaryingInt32;
std::vector<const Type *> argTypes;
argTypes.push_back(AtomicType::VaryingBool);
std::vector<ConstExpr *> defaults;
defaults.push_back(NULL);
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
funcType->SetArgumentDefaults(defaults);
Symbol *sym = new Symbol(name, noPos, funcType);
sym->function = func;
symbolTable->AddFunction(sym);
return true;
}
// If the function has any parameters with integer types, we'll make
// two Symbols for two overloaded versions of the function, one with
// all of the integer types treated as signed integers and one with all

View File

@@ -509,11 +509,9 @@ is provided in parenthesis around the expression:
double foo = 1. / 3.;
int bar = (float)bar + (float)bar; // 32-bit float addition
Note: if a ``bool`` is converted to an integer numeric type (``int``,
``int64``, etc.), then the conversion is done with sign extension, not zero
extension. Thus, the resulting value has all bits set if the ``bool`` is
``true``; for example, ``0xffffffff`` for ``int32``. This differs from C
and C++, where a ``true`` bool is converted to the integer value one.
If a ``bool`` is converted to an integer numeric type (``int``, ``int64``,
etc.), then the result is the value one if the ``bool`` has the value
``true`` and has the value zero otherwise.
Variables can be declared with the ``const`` qualifier, which prohibits
their modification.
@@ -1895,6 +1893,16 @@ code.
Low-Level Bits
--------------
Sometimes it's useful to convert a ``bool`` value to an integer using sign
extension so that the integer's bits are all on if the ``bool`` has the
value ``true`` (rather than just having the value one). The
``sign_extend()`` functions provide this functionality:
::
int sign_extend(bool value)
uniform int sign_extend(uniform bool value)
``ispc`` provides a number of bit/memory-level utility routines in its
standard library as well. It has routines that load from and store
to 8-bit and 16-bit integer values stored in memory, converting to and from
@@ -1964,7 +1972,6 @@ It, it clears the high order bit, to ensure that the given floating-point
value is positive. This compiles down to a single ``andps`` instruction
when used with an Intel® SSE target, for example.
Interoperability with the Application
=====================================

View File

@@ -3673,14 +3673,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
if (fromType->IsVaryingType() &&
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
// FIXME: we're currently doing sign extension rather than zero
// extension here, which means that ints will have the value
// 0xffffffff for 'true' bools (versus the value 1). There is
// some code in stdlib.ispc that depends on bool->int conversions
// having this behavior that needs to be cleaned up (e.g. to
// call a __sext() builtin to do bool->int conversions) if we
// are going to fix this here.
cast = ctx->SExtInst(exprVal, targetType, "bool2int");
cast = ctx->ZExtInst(exprVal, targetType, "bool2int");
break;
case AtomicType::TYPE_INT32:
case AtomicType::TYPE_UINT32:
@@ -3712,9 +3705,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
if (fromType->IsVaryingType() &&
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
// FIXME: See comments above w.r.t. fixing this to be a
// ZExtInst rather than an SExtInst...
cast = ctx->SExtInst(exprVal, targetType, "bool2uint");
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
break;
case AtomicType::TYPE_INT32:
case AtomicType::TYPE_UINT32:
@@ -3752,7 +3743,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
if (fromType->IsVaryingType() &&
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
cast = ctx->SExtInst(exprVal, targetType, "bool2int64");
cast = ctx->ZExtInst(exprVal, targetType, "bool2int64");
break;
case AtomicType::TYPE_INT32:
cast = ctx->SExtInst(exprVal, targetType, "int32_to_int64");
@@ -3786,7 +3777,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
if (fromType->IsVaryingType() &&
LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
exprVal = ctx->TruncInst(exprVal, LLVMTypes::Int1VectorType, "bool_to_i1");
cast = ctx->SExtInst(exprVal, targetType, "bool2uint");
cast = ctx->ZExtInst(exprVal, targetType, "bool2uint");
break;
case AtomicType::TYPE_INT32:
cast = ctx->SExtInst(exprVal, targetType, "int32_to_uint64");

View File

@@ -197,18 +197,25 @@ static inline unsigned int64 insert(unsigned int64 x, uniform int i,
return __insert_int64(x, (unsigned int)i, v);
}
static inline uniform int32 sign_extend(uniform bool v) {
return __sext_uniform_bool(v);
}
static inline int32 sign_extend(bool v) {
return __sext_varying_bool(v);
}
static inline uniform bool any(bool v) {
// We only care about whether "any" is true for the active program instances,
// so we have to make v with the current program mask.
return __movmsk(v & __mask) != 0;
return __movmsk(__sext_varying_bool(v) & __mask) != 0;
}
static inline uniform bool all(bool v) {
// As with any(), we need to explicitly mask v with the current program mask
// so we're only looking at the current lanes
bool match = ((v & __mask) == __mask);
return __movmsk((int)match) == (1 << programCount) - 1;
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
return __movmsk(match) == (1 << programCount) - 1;
}
static inline uniform int popcnt(uniform int v) {
@@ -235,7 +242,7 @@ static inline int popcnt(int64 v) {
static inline uniform int popcnt(bool v) {
// As with any() and all(), only count across the active lanes
return __popcnt_int32(__movmsk(v & __mask));
return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask));
}
static inline uniform int lanemask() {
@@ -271,7 +278,7 @@ static inline uniform float reduce_max(float v) {
static inline uniform int reduce_add(int x) {
// Zero out the values for lanes that aren't running
return __reduce_add_int32(x & (int)__mask);
return __reduce_add_int32(x & __mask);
}
static inline uniform int reduce_min(int v) {
@@ -291,7 +298,7 @@ static inline uniform int reduce_max(int v) {
static inline uniform unsigned int reduce_add(unsigned int x) {
// Set values for non-running lanes to zero so they don't affect the
// result.
return __reduce_add_uint32(x & (int)__mask);
return __reduce_add_uint32(x & __mask);
}
static inline uniform unsigned int reduce_min(unsigned int v) {
@@ -329,7 +336,7 @@ static inline uniform double reduce_max(double v) {
static inline uniform int64 reduce_add(int64 x) {
// Zero out the values for lanes that aren't running
return __reduce_add_int64(x & (int64)__mask);
return __reduce_add_int64(x & (int64)(__mask));
}
static inline uniform int64 reduce_min(int64 v) {
@@ -349,7 +356,7 @@ static inline uniform int64 reduce_max(int64 v) {
static inline uniform unsigned int64 reduce_add(unsigned int64 x) {
// Set values for non-running lanes to zero so they don't affect the
// result.
return __reduce_add_int64(x & (int64)__mask);
return __reduce_add_int64(x & (int64)(__mask));
}
static inline uniform unsigned int64 reduce_min(unsigned int64 v) {

View File

@@ -644,6 +644,17 @@ define internal float @__undef_uniform() nounwind readnone alwaysinline {
ret float undef
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; sign extension
define internal i32 @__sext_uniform_bool(i1) nounwind readnone alwaysinline {
%r = sext i1 %0 to i32
ret i32 %r
}
define internal <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone alwaysinline {
ret <$1 x i32> %0
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; stdlib transcendentals

View File

@@ -20,7 +20,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float v = float4(1,1,0,0);
bool b = (v == 1.);
ret = __movmsk(((int)b));
ret = __movmsk((sign_extend(b)));
RET[programIndex] = ret;
}

15
tests/sign-extend-1.ispc Normal file
View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
bool b = (a == 1.);
int32 s = sign_extend(b);
RET[programIndex] = (s == 0xffffffff) ? 16 : 2;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2;
RET[0] = 16;
}

14
tests/sign-extend.ispc Normal file
View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
bool b = (a == 1.);
RET[programIndex] = (int)b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
RET[0] = 1;
}