Add support for in-memory half float data. Fixes issue #10

This commit is contained in:
Matt Pharr
2011-07-21 15:55:45 +01:00
parent 96d40327d0
commit 8ef3df57c5
6 changed files with 349 additions and 2 deletions

View File

@@ -114,3 +114,30 @@ CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
---------------------------------------------------------------------------
ispc's code to convert to and from half-precision floats is based on James
Tursa's code, which is covered by the following license:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

View File

@@ -687,10 +687,15 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
if (includeStdlibISPC) {
// If the user wants the standard library to be included, parse the
// serialized version of the stdlib.ispc file to get its definitions
// added.
// serialized version of the stdlib.ispc file to get its
// definitions added. Disable emission of performance warnings for
// now, since the user doesn't care about any of that in the stdlib
// implementation...
bool epf = g->emitPerfWarnings;
g->emitPerfWarnings = false;
extern char stdlib_code[];
yy_scan_string(stdlib_code);
yyparse();
g->emitPerfWarnings = epf;
}
}

View File

@@ -77,6 +77,7 @@ Contents:
+ `Output Functions`_
+ `Cross-Program Instance Operations`_
+ `Packed Load and Store Operations`_
+ `Conversions To and From Half-Precision Floats`_
+ `Atomic Operations and Memory Fences`_
+ `Low-Level Bits`_
@@ -1890,6 +1891,28 @@ where the ``i`` th element of ``x`` has been replaced with the value ``v``
float insert(float x, uniform int i, uniform float v)
Conversions To and From Half-Precision Floats
---------------------------------------------
There are functions to convert to and from the IEEE 16-bit floating-point
format. Note that there is no ``half`` data-type, and it isn't possible
to do floating-point math directly with ``half`` types in ``ispc``; these
functions facilitate converting to and from half-format data in memory.
To use them, half-format data should be loaded into an ``int16`` and the
``half_to_float()`` function used to convert it the a 32-bit floating point
value. To store a value to memory in half format, the ``float_to_half()``
function returns the 16 bits that are the closest match to the given
``float``, in half format.
::
float half_to_float(unsigned int16 h)
uniform float half_to_float(uniform unsigned int16 h)
int16 float_to_half(float f)
uniform int16 float_to_half(uniform float f)
Atomic Operations and Memory Fences
-----------------------------------

View File

@@ -2350,6 +2350,256 @@ static inline uniform double pow(uniform double a, uniform double b) {
return __stdlib_pow(a, b);
}
///////////////////////////////////////////////////////////////////////////
// half-precision floats
static inline uniform float half_to_float(uniform unsigned int16 h) {
if ((h & 0x7FFFu) == 0)
// Signed zero
return floatbits(((unsigned int32) h) << 16);
else {
// Though these are int16 quantities, we get much better code
// with them stored as int32s...
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
cif (he == 0) {
// Denormal will convert to normalized
uniform int e = -1;
// The following loop figures out how much extra to adjust the exponent
// Shift until leading bit overflows into exponent bit
do {
e++;
hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm);
}
else {
if (he == 0x7C00u) {
// Inf or NaN (all the exponent bits are set)
if (hm == 0)
// Zero mantissa -> signed inf
return floatbits((((unsigned int32) hs) << 16) |
((unsigned int32) 0x7F800000u));
else
// NaN
return floatbits(0xFFC00000u);
}
else {
// Normalized number
// sign
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
}
}
}
static inline float half_to_float(unsigned int16 h) {
if ((h & 0x7FFFu) == 0)
// Signed zero
return floatbits(((unsigned int32) h) << 16);
else {
// Though these are int16 quantities, we get much better code
// with them stored as int32s...
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
cif (he == 0) {
// Denormal will convert to normalized
int e = -1;
// The following loop figures out how much extra to adjust the exponent
// Shift until leading bit overflows into exponent bit
do {
e++;
hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm);
}
else {
if (he == 0x7C00u) {
// Inf or NaN (all the exponent bits are set)
if (hm == 0)
// Zero mantissa -> signed inf
return floatbits((((unsigned int32) hs) << 16) |
((unsigned int32) 0x7F800000u));
else
// NaN
return floatbits(0xFFC00000u);
}
else {
// Normalized number
// sign
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
}
}
}
static inline uniform int16 float_to_half(uniform float f) {
uniform int32 x = intbits(f);
// Store the return value in an int32 until the very end; this ends up
// generating better code...
uniform int32 ret;
if ((x & 0x7FFFFFFFu) == 0)
// Signed zero
ret = (x >> 16);
else {
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
if (xe == 0) {
// Denormal will underflow, return a signed zero
ret = (xs >> 16);
}
else {
cif (xe == 0x7F800000u) {
// Inf or NaN (all the exponent bits are set)
if (xm == 0)
// Zero mantissa -> signed infinity
ret = ((xs >> 16) | 0x7C00u);
else
// NaN, only 1st mantissa bit set
ret = 0xFE00u;
}
else {
// Normalized number
uniform unsigned int32 hs = (xs >> 16); // Sign bit
uniform unsigned int32 hm;
// Exponent unbias the single, then bias the halfp
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
if (hes >= 0x1F)
// Overflow: return signed infinity
ret = ((xs >> 16) | 0x7C00u);
else if (hes <= 0) {
// Underflow
if ((14 - hes) > 24) {
// Mantissa shifted all the way off & no rounding possibility
hm = 0u; // Set mantissa to zero
}
else {
xm |= 0x00800000u; // Add the hidden leading bit
hm = (xm >> (14 - hes)); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
// Round, might overflow into exp bit, but this is OK
hm += 1u;
}
ret = (hs | hm);
}
else {
uniform unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> 13); // Mantissa
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
}
}
}
}
return (int16)ret;
}
static inline int16 float_to_half(float f) {
int32 x = intbits(f);
// Store the return value in an int32 until the very end; this ends up
// generating better code...
int32 ret;
if ((x & 0x7FFFFFFFu) == 0)
// Signed zero
ret = (x >> 16);
else {
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
if (xe == 0) {
// Denormal will underflow, return a signed zero
ret = (xs >> 16);
}
else {
cif (xe == 0x7F800000u) {
// Inf or NaN (all the exponent bits are set)
if (xm == 0)
// Zero mantissa -> signed infinity
ret = ((xs >> 16) | 0x7C00u);
else
// NaN, only 1st mantissa bit set
ret = 0xFE00u;
}
else {
// Normalized number
unsigned int32 hs = (xs >> 16); // Sign bit
unsigned int32 hm;
// Exponent unbias the single, then bias the halfp
int32 hes = ((int)(xe >> 23)) - 127 + 15;
if (hes >= 0x1F)
// Overflow: return signed infinity
ret = ((xs >> 16) | 0x7C00u);
else if (hes <= 0) {
// Underflow
if ((14 - hes) > 24) {
// Mantissa shifted all the way off & no rounding possibility
hm = 0u; // Set mantissa to zero
}
else {
xm |= 0x00800000u; // Add the hidden leading bit
hm = (xm >> (14 - hes)); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
// Round, might overflow into exp bit, but this is OK
hm += 1u;
}
ret = (hs | hm);
}
else {
unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> 13); // Mantissa
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
}
}
}
}
return (int16)ret;
}
///////////////////////////////////////////////////////////////////////////
// RNG stuff

21
tests/half-1.ispc Normal file
View File

@@ -0,0 +1,21 @@
export uniform int width() { return programCount; }
export void f_v(uniform float RET[]) {
float sum = 0;
int errors = 0;
for (uniform int i = 0; i <= 0xffff; ++i) {
uniform unsigned int16 h = i;
uniform float f = half_to_float(i);
h = float_to_half(f);
// may return a different value back for NaNs..
if (f == f && i != h)
++errors;
}
RET[programIndex] = errors;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

21
tests/half.ispc Normal file
View File

@@ -0,0 +1,21 @@
export uniform int width() { return programCount; }
export void f_v(uniform float RET[]) {
float sum = 0;
int errors = 0;
for (uniform int i = 0; i <= 0xffff; ++i) {
unsigned int16 h = i;
float f = half_to_float(i);
h = float_to_half(f);
// may return a different value back for NaNs..
if (f == f && i != h)
++errors;
}
RET[programIndex] = errors;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}