Choose type for integer literals to match the target mask size (if possible).

On a target with a 16-bit mask (for example), we would choose the type of an integer literal "1024" to be an int16. Previously, we used an int32, which is a worse fit and leads to less efficient code than an int16 on a 16-bit mask target. (However, we'd still give an integer literal 1000000 the type int32, even in a 16-bit target.) Updated the tests to still pass with 8 and 16-bit targets, given this change.
2013-07-23 17:01:03 -07:00
parent 9ba49eabb2
commit f7f281a256
61 changed files with 166 additions and 120 deletions
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -3126,7 +3126,7 @@ static inline void __range_reduce_log(float input, varying float * uniform reduc
    static const int nonexponent_mask = 0x807FFFFF;

    // We want the reduced version to have an exponent of -1 which is -1 + 127 after biasing or 126
-    static const int exponent_neg1 = (126 << 23);
+    static const int exponent_neg1 = (126l << 23);
    // NOTE(boulos): We don't need to mask anything out since we know
    // the sign bit has to be 0. If it's 1, we need to return infinity/nan
    // anyway (log(x), x = +-0 -> infinity, x < 0 -> NaN).
@@ -3149,7 +3149,7 @@ static inline void __range_reduce_log(uniform float input, uniform float * unifo
    uniform int int_version = intbits(input);
    static const uniform int nonexponent_mask = 0x807FFFFF;

-    static const uniform int exponent_neg1 = (126 << 23);
+    static const uniform int exponent_neg1 = (126ul << 23);
    uniform int biased_exponent = int_version >> 23;
    uniform int offset_exponent = biased_exponent + 1;
    *exponent = offset_exponent - 127; // get the real value
@@ -3647,18 +3647,18 @@ static inline uniform float half_to_float(uniform unsigned int16 h) {
    else {
        // https://gist.github.com/2144712
        // Fabian "ryg" Giesen.
-        static const uniform unsigned int32 shifted_exp = 0x7c00 << 13; // exponent mask after shift
+        static const uniform unsigned int32 shifted_exp = 0x7c00ul << 13; // exponent mask after shift

        uniform int32 o = ((int32)(h & 0x7fff)) << 13;     // exponent/mantissa bits
        uniform unsigned int32 exp = shifted_exp & o;   // just the exponent
-        o += (127 - 15) << 23;        // exponent adjust
+        o += (uniform int32)(127 - 15) << 23;        // exponent adjust

        // handle exponent special cases
        if (exp == shifted_exp) // Inf/NaN?
-            o += (128 - 16) << 23;    // extra exp adjust
+            o += (uniform unsigned int32)(128 - 16) << 23;    // extra exp adjust
        else if (exp == 0) { // Zero/Denormal?
-            o += 1 << 23;             // extra exp adjust
-            o = intbits(floatbits(o) - floatbits(113 << 23)); // renormalize
+            o += 1ul << 23;             // extra exp adjust
+            o = intbits(floatbits(o) - floatbits(113ul << 23)); // renormalize
        }

        o |= ((int32)(h & 0x8000)) << 16;    // sign bit
@@ -3675,17 +3675,17 @@ static inline float half_to_float(unsigned int16 h) {
        // https://gist.github.com/2144712
        // Fabian "ryg" Giesen.

-        const unsigned int32 shifted_exp = 0x7c00 << 13; // exponent mask after shift
+        const unsigned int32 shifted_exp = 0x7c00ul << 13; // exponent mask after shift

-        int32 o = ((int32)(h & 0x7fff)) << 13;     // exponent/mantissa bits
+        int32 o = ((int32)(h & 0x7ffful)) << 13;     // exponent/mantissa bits
        unsigned int32 exp = shifted_exp & o;   // just the exponent
-        o += (127 - 15) << 23;        // exponent adjust
+        o += (int32)(127 - 15) << 23;        // exponent adjust

-        int32 infnan_val = o + ((128 - 16) << 23);
-        int32 zerodenorm_val = intbits(floatbits(o + (1<<23)) - floatbits(113 << 23));
+        int32 infnan_val = o + ((int32)(128 - 16) << 23);
+        int32 zerodenorm_val = intbits(floatbits(o + (1ul<<23)) - floatbits(113ul << 23));
        int32 reg_val = (exp == 0) ? zerodenorm_val : o;

-        int32 sign_bit = ((int32)(h & 0x8000)) << 16;
+        int32 sign_bit = ((int32)(h & 0x8000ul)) << 16;
        return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
    }
 }
@@ -3715,16 +3715,16 @@ static inline uniform int16 float_to_half(uniform float f) {
        // NaN->qNaN and Inf->Inf
        // unconditional assignment here, will override with right value for
        // the regular case below.
-        uniform int32 f32infty = 255 << 23;
-        o = (fint > f32infty) ? 0x7e00 : 0x7c00; 
+        uniform int32 f32infty = 255ul << 23;
+        o = (fint > f32infty) ? 0x7e00u : 0x7c00u; 

        // (De)normalized number or zero
        // update fint unconditionally to save the blending; we don't need it
        // anymore for the Inf/NaN case anyway.

-        const uniform unsigned int32 round_mask = ~0xfffu; 
-        const uniform int32 magic = 15 << 23;
-        const uniform int32 f16infty = 31 << 23;
+        const uniform unsigned int32 round_mask = ~0xffful;
+        const uniform int32 magic = 15ul << 23;
+        const uniform int32 f16infty = 31ul << 23;

        uniform int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask;
        fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
@@ -3761,16 +3761,16 @@ static inline int16 float_to_half(float f) {
        // NaN->qNaN and Inf->Inf
        // unconditional assignment here, will override with right value for
        // the regular case below.
-        int32 f32infty = 255 << 23;
-        o = (fint > f32infty) ? 0x7e00 : 0x7c00; 
+        int32 f32infty = 255ul << 23;
+        o = (fint > f32infty) ? 0x7e00u : 0x7c00u;

        // (De)normalized number or zero
        // update fint unconditionally to save the blending; we don't need it
        // anymore for the Inf/NaN case anyway.

-        const unsigned int32 round_mask = ~0xfffu; 
-        const int32 magic = 15 << 23;
-        const int32 f16infty = 31 << 23;
+        const unsigned int32 round_mask = ~0xffful;
+        const int32 magic = 15ul << 23;
+        const int32 f16infty = 31ul << 23;

        // Shift exponent down, denormalize if necessary.
        // NOTE This represents half-float denormals using single precision denormals.
@@ -3789,7 +3789,7 @@ static inline int16 float_to_half(float f) {
        //   FP16 denormals are rare in practice, I don't know. Whatever slow path your HW
        //   may or may not have for denormals, this may well hit it.
        float fscale = floatbits(fint & round_mask) * floatbits(magic);
-        fscale = min(fscale, floatbits((31 << 23) - 0x1000));
+        fscale = min(fscale, floatbits((31ul << 23) - 0x1000ul));
        int32 fint2 = intbits(fscale) - round_mask;

        if (fint < f32infty)
@@ -3956,7 +3956,7 @@ float_to_srgb8(float inval)
    // Do the table lookup and unpack bias, scale
    unsigned int tab = table[(intbits(inval) - 0x39000000u) >> 20];
    unsigned int bias = (tab >> 16) << 9;
-    unsigned int scale = tab & 0xffff;
+    unsigned int scale = tab & 0xfffful;

    // Grab next-highest mantissa bits and perform linear interpolation
    unsigned int t = (intbits(inval) >> 12) & 0xff;
@@ -4006,7 +4006,7 @@ float_to_srgb8(uniform float inval)
    // Do the table lookup and unpack bias, scale
    uniform unsigned int tab = table[(intbits(inval) - 0x39000000u) >> 20];
    uniform unsigned int bias = (tab >> 16) << 9;
-    uniform unsigned int scale = tab & 0xffff;
+    uniform unsigned int scale = tab & 0xfffful;

    // Grab next-highest mantissa bits and perform linear interpolation
    uniform unsigned int t = (intbits(inval) >> 12) & 0xff;
@@ -4053,14 +4053,14 @@ static inline uniform unsigned int random(uniform RNGState * uniform state)
 static inline float frandom(varying RNGState * uniform state)
 {
    unsigned int irand = random(state);
-    irand &= (1<<23)-1;
+    irand &= (1ul<<23)-1;
    return floatbits(0x3F800000 | irand)-1.0f;
 }

 static inline uniform float frandom(uniform RNGState * uniform state)
 {
    uniform unsigned int irand = random(state);
-    irand &= (1<<23)-1;
+    irand &= (1ul<<23)-1;
    return floatbits(0x3F800000 | irand)-1.0f;
 }

@@ -4068,18 +4068,18 @@ static inline void seed_rng(varying RNGState * uniform state,
                            unsigned int seed) {
    state->z1 = seed;
    state->z2 = seed ^ 0xbeeff00d;
-    state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
-    state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00)  << 8) |
-                 ((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
+    state->z3 = ((seed & 0xfffful) << 16) | (seed >> 16);
+    state->z4 = (((seed & 0xfful) << 24) | ((seed & 0xff00ul)  << 8) |
+                 ((seed & 0xff0000ul) >> 8) | (seed & 0xff000000ul) >> 24);
 }

 static inline void seed_rng(uniform RNGState * uniform state, 
                            uniform unsigned int seed) {
    state->z1 = seed;
    state->z2 = seed ^ 0xbeeff00d;
-    state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
-    state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00)  << 8) |
-                 ((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
+    state->z3 = ((seed & 0xfffful) << 16) | (seed >> 16);
+    state->z4 = (((seed & 0xfful) << 24) | ((seed & 0xff00ul)  << 8) |
+                 ((seed & 0xff0000ul) >> 8) | (seed & 0xff000000ul) >> 24);
 }


@@ -4097,7 +4097,7 @@ static inline uniform bool rdrand(float * uniform ptr) {
        uniform int32 irand;
        uniform bool success = __rdrand_i32(&irand);
        if (success) {
-            irand &= (1<<23)-1;
+            irand &= (1ul<<23)-1;
            *ptr = floatbits(0x3F800000 | irand)-1.0f;
        }
        return success;
@@ -4117,7 +4117,7 @@ static inline bool rdrand(varying float * uniform ptr) {
                // in vector form.  However, we need to be careful to not
                // clobber any existing already-set values in *ptr with
                // inactive lanes here...
-                irand &= (1<<23)-1;
+                irand &= (1ul<<23)-1;
                *ptr = floatbits(0x3F800000 | irand)-1.0f;
                success = true;
            }
@@ -4137,7 +4137,7 @@ static inline bool rdrand(float * ptr) {
        foreach_active (index) {
            uniform int32 irand;
            if (__rdrand_i32(&irand)) {
-                irand &= (1<<23)-1;
+                irand &= (1ul<<23)-1;
                *ptrs[index] = floatbits(0x3F800000 | irand)-1.0f;
                success = true;
            }