Merge branch 'master' of git://github.com/ispc/ispc
This commit is contained in:
250
stdlib.ispc
250
stdlib.ispc
@@ -3460,68 +3460,42 @@ static inline uniform int16 float_to_half(uniform float f) {
|
|||||||
return __float_to_half_uniform(f);
|
return __float_to_half_uniform(f);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uniform int32 x = intbits(f);
|
// via Fabian "ryg" Giesen.
|
||||||
// Store the return value in an int32 until the very end; this ends up
|
// https://gist.github.com/2156668
|
||||||
// generating better code...
|
uniform unsigned int32 sign_mask = 0x80000000u;
|
||||||
uniform int32 ret;
|
uniform int32 o;
|
||||||
if ((x & 0x7FFFFFFFu) == 0)
|
|
||||||
// Signed zero
|
uniform int32 fint = intbits(f);
|
||||||
ret = (x >> 16);
|
uniform int32 sign = fint & sign_mask;
|
||||||
else {
|
fint ^= sign;
|
||||||
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
|
||||||
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
// NOTE all the integer compares in this function can be safely
|
||||||
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
// compiled into signed compares since all operands are below
|
||||||
if (xe == 0) {
|
// 0x80000000. Important if you want fast straight SSE2 code (since
|
||||||
// Denormal will underflow, return a signed zero
|
// there's no unsigned PCMPGTD).
|
||||||
ret = (xs >> 16);
|
|
||||||
}
|
// Inf or NaN (all exponent bits set)
|
||||||
else {
|
// NaN->qNaN and Inf->Inf
|
||||||
if (xe == 0x7F800000u) {
|
// unconditional assignment here, will override with right value for
|
||||||
// Inf or NaN (all the exponent bits are set)
|
// the regular case below.
|
||||||
if (xm == 0)
|
uniform int32 f32infty = 255 << 23;
|
||||||
// Zero mantissa -> signed infinity
|
o = (fint > f32infty) ? 0x7e00 : 0x7c00;
|
||||||
ret = ((xs >> 16) | 0x7C00u);
|
|
||||||
else
|
// (De)normalized number or zero
|
||||||
// NaN, only 1st mantissa bit set
|
// update fint unconditionally to save the blending; we don't need it
|
||||||
ret = 0xFE00u;
|
// anymore for the Inf/NaN case anyway.
|
||||||
}
|
|
||||||
else {
|
const uniform unsigned int32 round_mask = ~0xfffu;
|
||||||
// Normalized number
|
const uniform int32 magic = 15 << 23;
|
||||||
uniform unsigned int32 hs = (xs >> 16); // Sign bit
|
const uniform int32 f16infty = 31 << 23;
|
||||||
uniform unsigned int32 hm;
|
|
||||||
// Exponent unbias the single, then bias the halfp
|
uniform int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask;
|
||||||
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
|
||||||
if (hes >= 0x1F)
|
|
||||||
// Overflow: return signed infinity
|
if (fint < f32infty)
|
||||||
ret = ((xs >> 16) | 0x7C00u);
|
o = fint2 >> 13; // Take the bits!
|
||||||
else if (hes <= 0) {
|
|
||||||
// Underflow
|
return (o | (sign >> 16));
|
||||||
if ((14 - hes) > 24) {
|
|
||||||
// Mantissa shifted all the way off & no rounding possibility
|
|
||||||
hm = 0u; // Set mantissa to zero
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xm |= 0x00800000u; // Add the hidden leading bit
|
|
||||||
hm = (xm >> (14 - hes)); // Mantissa
|
|
||||||
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
|
|
||||||
// Round, might overflow into exp bit, but this is OK
|
|
||||||
hm += 1u;
|
|
||||||
}
|
|
||||||
ret = (hs | hm);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
uniform unsigned int32 he = (hes << 10); // Exponent
|
|
||||||
hm = (xm >> 13); // Mantissa
|
|
||||||
if (xm & 0x00001000u) // Check for rounding
|
|
||||||
// Round, might overflow to inf, this is OK
|
|
||||||
ret = (hs | he | hm) + 1u;
|
|
||||||
else
|
|
||||||
ret = (hs | he | hm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (int16)ret;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3532,68 +3506,58 @@ static inline int16 float_to_half(float f) {
|
|||||||
return __float_to_half_varying(f);
|
return __float_to_half_varying(f);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int32 x = intbits(f);
|
// via Fabian "ryg" Giesen.
|
||||||
// Store the return value in an int32 until the very end; this ends up
|
// https://gist.github.com/2156668
|
||||||
// generating better code...
|
unsigned int32 sign_mask = 0x80000000u;
|
||||||
int32 ret;
|
int32 o;
|
||||||
if ((x & 0x7FFFFFFFu) == 0)
|
|
||||||
// Signed zero
|
int32 fint = intbits(f);
|
||||||
ret = (x >> 16);
|
int32 sign = fint & sign_mask;
|
||||||
else {
|
fint ^= sign;
|
||||||
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
|
||||||
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
// NOTE all the integer compares in this function can be safely
|
||||||
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
// compiled into signed compares since all operands are below
|
||||||
if (xe == 0) {
|
// 0x80000000. Important if you want fast straight SSE2 code (since
|
||||||
// Denormal will underflow, return a signed zero
|
// there's no unsigned PCMPGTD).
|
||||||
ret = (xs >> 16);
|
|
||||||
}
|
// Inf or NaN (all exponent bits set)
|
||||||
else {
|
// NaN->qNaN and Inf->Inf
|
||||||
cif (xe == 0x7F800000u) {
|
// unconditional assignment here, will override with right value for
|
||||||
// Inf or NaN (all the exponent bits are set)
|
// the regular case below.
|
||||||
if (xm == 0)
|
int32 f32infty = 255 << 23;
|
||||||
// Zero mantissa -> signed infinity
|
o = (fint > f32infty) ? 0x7e00 : 0x7c00;
|
||||||
ret = ((xs >> 16) | 0x7C00u);
|
|
||||||
else
|
// (De)normalized number or zero
|
||||||
// NaN, only 1st mantissa bit set
|
// update fint unconditionally to save the blending; we don't need it
|
||||||
ret = 0xFE00u;
|
// anymore for the Inf/NaN case anyway.
|
||||||
}
|
|
||||||
else {
|
const unsigned int32 round_mask = ~0xfffu;
|
||||||
// Normalized number
|
const int32 magic = 15 << 23;
|
||||||
unsigned int32 hs = (xs >> 16); // Sign bit
|
const int32 f16infty = 31 << 23;
|
||||||
unsigned int32 hm;
|
|
||||||
// Exponent unbias the single, then bias the halfp
|
// Shift exponent down, denormalize if necessary.
|
||||||
int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
// NOTE This represents half-float denormals using single precision denormals.
|
||||||
if (hes >= 0x1F)
|
// The main reason to do this is that there's no shift with per-lane variable
|
||||||
// Overflow: return signed infinity
|
// shifts in SSE*, which we'd otherwise need. It has some funky side effects
|
||||||
ret = ((xs >> 16) | 0x7C00u);
|
// though:
|
||||||
else if (hes <= 0) {
|
// - This conversion will actually respect the FTZ (Flush To Zero) flag in
|
||||||
// Underflow
|
// MXCSR - if it's set, no half-float denormals will be generated. I'm
|
||||||
if ((14 - hes) > 24) {
|
// honestly not sure whether this is good or bad. It's definitely interesting.
|
||||||
// Mantissa shifted all the way off & no rounding possibility
|
// - If the underlying HW doesn't support denormals (not an issue with Intel
|
||||||
hm = 0u; // Set mantissa to zero
|
// CPUs, but might be a problem on GPUs or PS3 SPUs), you will always get
|
||||||
}
|
// flush-to-zero behavior. This is bad, unless you're on a CPU where you don't
|
||||||
else {
|
// care.
|
||||||
xm |= 0x00800000u; // Add the hidden leading bit
|
// - Denormals tend to be slow. FP32 denormals are rare in practice outside of things
|
||||||
hm = (xm >> (14 - hes)); // Mantissa
|
// like recursive filters in DSP - not a typical half-float application. Whether
|
||||||
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
|
// FP16 denormals are rare in practice, I don't know. Whatever slow path your HW
|
||||||
// Round, might overflow into exp bit, but this is OK
|
// may or may not have for denormals, this may well hit it.
|
||||||
hm += 1u;
|
int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask;
|
||||||
}
|
fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
|
||||||
ret = (hs | hm);
|
|
||||||
}
|
if (fint < f32infty)
|
||||||
else {
|
o = fint2 >> 13; // Take the bits!
|
||||||
unsigned int32 he = (hes << 10); // Exponent
|
|
||||||
hm = (xm >> 13); // Mantissa
|
return (o | (sign >> 16));
|
||||||
if (xm & 0x00001000u) // Check for rounding
|
|
||||||
// Round, might overflow to inf, this is OK
|
|
||||||
ret = (hs | he | hm) + 1u;
|
|
||||||
else
|
|
||||||
ret = (hs | he | hm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (int16)ret;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3604,19 +3568,15 @@ static inline uniform float half_to_float_fast(uniform unsigned int16 h) {
|
|||||||
return __half_to_float_uniform(h);
|
return __half_to_float_uniform(h);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
||||||
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
uniform unsigned int32 hem = h & (int32)0x7fffu; // Pick off exponent-mantissa bits
|
||||||
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
|
||||||
|
|
||||||
// sign
|
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
||||||
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
uniform unsigned int32 xem = ((unsigned int32) hem) << 13;
|
||||||
// Exponent: unbias the halfp, then bias the single
|
|
||||||
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
xem += 0x38000000; // (127 - 15) << 23
|
||||||
// Exponent
|
|
||||||
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
return floatbits(xs | xem);
|
||||||
// Mantissa
|
|
||||||
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
|
|
||||||
return floatbits(xs | xe | xm);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3626,19 +3586,13 @@ static inline float half_to_float_fast(unsigned int16 h) {
|
|||||||
return __half_to_float_varying(h);
|
return __half_to_float_varying(h);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
||||||
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
unsigned int32 hem = h & (int32)0x7fffu; // Pick off exponent-mantissa bits
|
||||||
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
|
||||||
|
|
||||||
// sign
|
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
||||||
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
unsigned int32 xem = ((unsigned int32) hem) << 13;
|
||||||
// Exponent: unbias the halfp, then bias the single
|
|
||||||
int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
return floatbits(xs | (xem + 0x38000000 /* (127 - 15) << 23 */));
|
||||||
// Exponent
|
|
||||||
unsigned int32 xe = (unsigned int32) (xes << 23);
|
|
||||||
// Mantissa
|
|
||||||
unsigned int32 xm = ((unsigned int32) hm) << 13;
|
|
||||||
return floatbits(xs | xe | xm);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
4
stmt.cpp
4
stmt.cpp
@@ -541,6 +541,10 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
|||||||
bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) &&
|
bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) &&
|
||||||
SafeToRunWithMaskAllOff(falseStmts));
|
SafeToRunWithMaskAllOff(falseStmts));
|
||||||
|
|
||||||
|
Debug(pos, "If statement: true cost %d (safe %d), false cost %d (safe %d).",
|
||||||
|
::EstimateCost(trueStmts), (int)SafeToRunWithMaskAllOff(trueStmts),
|
||||||
|
::EstimateCost(falseStmts), (int)SafeToRunWithMaskAllOff(falseStmts));
|
||||||
|
|
||||||
if (safeToRunWithAllLanesOff &&
|
if (safeToRunWithAllLanesOff &&
|
||||||
(costIsAcceptable || g->opt.disableCoherentControlFlow)) {
|
(costIsAcceptable || g->opt.disableCoherentControlFlow)) {
|
||||||
ctx->StartVaryingIf(oldMask);
|
ctx->StartVaryingIf(oldMask);
|
||||||
|
|||||||
Reference in New Issue
Block a user