From 7e18f0e2473b9ccb940733b81de5ff15c001afe2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 23 Mar 2012 16:09:32 -0700 Subject: [PATCH] Small improvement to float->half function in stdlib. Rewrite things to be able to do a float MINPS, for slightly better code on SSE2 (which has that but not an signed int min). SSE2 code now 23 instructions (vs 21 intrinsics). --- stdlib.ispc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/stdlib.ispc b/stdlib.ispc index f5984277..89c47cf7 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -3551,8 +3551,9 @@ static inline int16 float_to_half(float f) { // like recursive filters in DSP - not a typical half-float application. Whether // FP16 denormals are rare in practice, I don't know. Whatever slow path your HW // may or may not have for denormals, this may well hit it. - int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask; - fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed + float fscale = floatbits(fint & round_mask) * floatbits(magic); + fscale = min(fscale, floatbits((31 << 23) - 0x1000)); + int32 fint2 = intbits(fscale) - round_mask; if (fint < f32infty) o = fint2 >> 13; // Take the bits!