Choose type for integer literals to match the target mask size (if possible).

On a target with a 16-bit mask (for example), we would choose the type
of an integer literal "1024" to be an int16.  Previously, we used an int32,
which is a worse fit and leads to less efficient code than an int16
on a 16-bit mask target.  (However, we'd still give an integer literal
1000000 the type int32, even in a 16-bit target.)

Updated the tests to still pass with 8 and 16-bit targets, given this
change.
This commit is contained in:
Matt Pharr
2013-07-23 17:01:03 -07:00
parent 9ba49eabb2
commit f7f281a256
61 changed files with 166 additions and 120 deletions

View File

@@ -3126,7 +3126,7 @@ static inline void __range_reduce_log(float input, varying float * uniform reduc
static const int nonexponent_mask = 0x807FFFFF;
// We want the reduced version to have an exponent of -1 which is -1 + 127 after biasing or 126
static const int exponent_neg1 = (126 << 23);
static const int exponent_neg1 = (126l << 23);
// NOTE(boulos): We don't need to mask anything out since we know
// the sign bit has to be 0. If it's 1, we need to return infinity/nan
// anyway (log(x), x = +-0 -> infinity, x < 0 -> NaN).
@@ -3149,7 +3149,7 @@ static inline void __range_reduce_log(uniform float input, uniform float * unifo
uniform int int_version = intbits(input);
static const uniform int nonexponent_mask = 0x807FFFFF;
static const uniform int exponent_neg1 = (126 << 23);
static const uniform int exponent_neg1 = (126ul << 23);
uniform int biased_exponent = int_version >> 23;
uniform int offset_exponent = biased_exponent + 1;
*exponent = offset_exponent - 127; // get the real value
@@ -3647,18 +3647,18 @@ static inline uniform float half_to_float(uniform unsigned int16 h) {
else {
// https://gist.github.com/2144712
// Fabian "ryg" Giesen.
static const uniform unsigned int32 shifted_exp = 0x7c00 << 13; // exponent mask after shift
static const uniform unsigned int32 shifted_exp = 0x7c00ul << 13; // exponent mask after shift
uniform int32 o = ((int32)(h & 0x7fff)) << 13; // exponent/mantissa bits
uniform unsigned int32 exp = shifted_exp & o; // just the exponent
o += (127 - 15) << 23; // exponent adjust
o += (uniform int32)(127 - 15) << 23; // exponent adjust
// handle exponent special cases
if (exp == shifted_exp) // Inf/NaN?
o += (128 - 16) << 23; // extra exp adjust
o += (uniform unsigned int32)(128 - 16) << 23; // extra exp adjust
else if (exp == 0) { // Zero/Denormal?
o += 1 << 23; // extra exp adjust
o = intbits(floatbits(o) - floatbits(113 << 23)); // renormalize
o += 1ul << 23; // extra exp adjust
o = intbits(floatbits(o) - floatbits(113ul << 23)); // renormalize
}
o |= ((int32)(h & 0x8000)) << 16; // sign bit
@@ -3675,17 +3675,17 @@ static inline float half_to_float(unsigned int16 h) {
// https://gist.github.com/2144712
// Fabian "ryg" Giesen.
const unsigned int32 shifted_exp = 0x7c00 << 13; // exponent mask after shift
const unsigned int32 shifted_exp = 0x7c00ul << 13; // exponent mask after shift
int32 o = ((int32)(h & 0x7fff)) << 13; // exponent/mantissa bits
int32 o = ((int32)(h & 0x7ffful)) << 13; // exponent/mantissa bits
unsigned int32 exp = shifted_exp & o; // just the exponent
o += (127 - 15) << 23; // exponent adjust
o += (int32)(127 - 15) << 23; // exponent adjust
int32 infnan_val = o + ((128 - 16) << 23);
int32 zerodenorm_val = intbits(floatbits(o + (1<<23)) - floatbits(113 << 23));
int32 infnan_val = o + ((int32)(128 - 16) << 23);
int32 zerodenorm_val = intbits(floatbits(o + (1ul<<23)) - floatbits(113ul << 23));
int32 reg_val = (exp == 0) ? zerodenorm_val : o;
int32 sign_bit = ((int32)(h & 0x8000)) << 16;
int32 sign_bit = ((int32)(h & 0x8000ul)) << 16;
return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
}
}
@@ -3715,16 +3715,16 @@ static inline uniform int16 float_to_half(uniform float f) {
// NaN->qNaN and Inf->Inf
// unconditional assignment here, will override with right value for
// the regular case below.
uniform int32 f32infty = 255 << 23;
o = (fint > f32infty) ? 0x7e00 : 0x7c00;
uniform int32 f32infty = 255ul << 23;
o = (fint > f32infty) ? 0x7e00u : 0x7c00u;
// (De)normalized number or zero
// update fint unconditionally to save the blending; we don't need it
// anymore for the Inf/NaN case anyway.
const uniform unsigned int32 round_mask = ~0xfffu;
const uniform int32 magic = 15 << 23;
const uniform int32 f16infty = 31 << 23;
const uniform unsigned int32 round_mask = ~0xffful;
const uniform int32 magic = 15ul << 23;
const uniform int32 f16infty = 31ul << 23;
uniform int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask;
fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
@@ -3761,16 +3761,16 @@ static inline int16 float_to_half(float f) {
// NaN->qNaN and Inf->Inf
// unconditional assignment here, will override with right value for
// the regular case below.
int32 f32infty = 255 << 23;
o = (fint > f32infty) ? 0x7e00 : 0x7c00;
int32 f32infty = 255ul << 23;
o = (fint > f32infty) ? 0x7e00u : 0x7c00u;
// (De)normalized number or zero
// update fint unconditionally to save the blending; we don't need it
// anymore for the Inf/NaN case anyway.
const unsigned int32 round_mask = ~0xfffu;
const int32 magic = 15 << 23;
const int32 f16infty = 31 << 23;
const unsigned int32 round_mask = ~0xffful;
const int32 magic = 15ul << 23;
const int32 f16infty = 31ul << 23;
// Shift exponent down, denormalize if necessary.
// NOTE This represents half-float denormals using single precision denormals.
@@ -3789,7 +3789,7 @@ static inline int16 float_to_half(float f) {
// FP16 denormals are rare in practice, I don't know. Whatever slow path your HW
// may or may not have for denormals, this may well hit it.
float fscale = floatbits(fint & round_mask) * floatbits(magic);
fscale = min(fscale, floatbits((31 << 23) - 0x1000));
fscale = min(fscale, floatbits((31ul << 23) - 0x1000ul));
int32 fint2 = intbits(fscale) - round_mask;
if (fint < f32infty)
@@ -3956,7 +3956,7 @@ float_to_srgb8(float inval)
// Do the table lookup and unpack bias, scale
unsigned int tab = table[(intbits(inval) - 0x39000000u) >> 20];
unsigned int bias = (tab >> 16) << 9;
unsigned int scale = tab & 0xffff;
unsigned int scale = tab & 0xfffful;
// Grab next-highest mantissa bits and perform linear interpolation
unsigned int t = (intbits(inval) >> 12) & 0xff;
@@ -4006,7 +4006,7 @@ float_to_srgb8(uniform float inval)
// Do the table lookup and unpack bias, scale
uniform unsigned int tab = table[(intbits(inval) - 0x39000000u) >> 20];
uniform unsigned int bias = (tab >> 16) << 9;
uniform unsigned int scale = tab & 0xffff;
uniform unsigned int scale = tab & 0xfffful;
// Grab next-highest mantissa bits and perform linear interpolation
uniform unsigned int t = (intbits(inval) >> 12) & 0xff;
@@ -4053,14 +4053,14 @@ static inline uniform unsigned int random(uniform RNGState * uniform state)
static inline float frandom(varying RNGState * uniform state)
{
unsigned int irand = random(state);
irand &= (1<<23)-1;
irand &= (1ul<<23)-1;
return floatbits(0x3F800000 | irand)-1.0f;
}
static inline uniform float frandom(uniform RNGState * uniform state)
{
uniform unsigned int irand = random(state);
irand &= (1<<23)-1;
irand &= (1ul<<23)-1;
return floatbits(0x3F800000 | irand)-1.0f;
}
@@ -4068,18 +4068,18 @@ static inline void seed_rng(varying RNGState * uniform state,
unsigned int seed) {
state->z1 = seed;
state->z2 = seed ^ 0xbeeff00d;
state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
state->z3 = ((seed & 0xfffful) << 16) | (seed >> 16);
state->z4 = (((seed & 0xfful) << 24) | ((seed & 0xff00ul) << 8) |
((seed & 0xff0000ul) >> 8) | (seed & 0xff000000ul) >> 24);
}
static inline void seed_rng(uniform RNGState * uniform state,
uniform unsigned int seed) {
state->z1 = seed;
state->z2 = seed ^ 0xbeeff00d;
state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
state->z3 = ((seed & 0xfffful) << 16) | (seed >> 16);
state->z4 = (((seed & 0xfful) << 24) | ((seed & 0xff00ul) << 8) |
((seed & 0xff0000ul) >> 8) | (seed & 0xff000000ul) >> 24);
}
@@ -4097,7 +4097,7 @@ static inline uniform bool rdrand(float * uniform ptr) {
uniform int32 irand;
uniform bool success = __rdrand_i32(&irand);
if (success) {
irand &= (1<<23)-1;
irand &= (1ul<<23)-1;
*ptr = floatbits(0x3F800000 | irand)-1.0f;
}
return success;
@@ -4117,7 +4117,7 @@ static inline bool rdrand(varying float * uniform ptr) {
// in vector form. However, we need to be careful to not
// clobber any existing already-set values in *ptr with
// inactive lanes here...
irand &= (1<<23)-1;
irand &= (1ul<<23)-1;
*ptr = floatbits(0x3F800000 | irand)-1.0f;
success = true;
}
@@ -4137,7 +4137,7 @@ static inline bool rdrand(float * ptr) {
foreach_active (index) {
uniform int32 irand;
if (__rdrand_i32(&irand)) {
irand &= (1<<23)-1;
irand &= (1ul<<23)-1;
*ptrs[index] = floatbits(0x3F800000 | irand)-1.0f;
success = true;
}