Use native float/half conversion instructions with the AVX2 target.

This commit is contained in:
Matt Pharr
2012-01-24 15:33:38 -08:00
parent a5b7fca7e0
commit 1867b5b317
12 changed files with 453 additions and 256 deletions

View File

@@ -822,6 +822,9 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
symbolTable);
lDefineConstantInt("__have_native_half", (g->target.isa == Target::AVX2),
module, symbolTable);
if (includeStdlibISPC) {
// If the user wants the standard library to be included, parse the
// serialized version of the stdlib.ispc file to get its

View File

@@ -58,6 +58,14 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
ret <16 x i32> %ret
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather

View File

@@ -58,6 +58,14 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
ret <8 x i32> %ret
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather
@@ -65,6 +73,3 @@ gen_gather(8, i8)
gen_gather(8, i16)
gen_gather(8, i32)
gen_gather(8, i64)

View File

@@ -63,6 +63,61 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
ret <16 x i32> %m
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float/half conversions
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
; 0 is round nearest even
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
%r_0 = shufflevector <16 x i16> %v, <16 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0)
%r_1 = shufflevector <16 x i16> %v, <16 x i16> undef,
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1)
%r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x float> %r
}
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
%r_0 = shufflevector <16 x float> %v, <16 x float> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0)
%r_1 = shufflevector <16 x float> %v, <16 x float> undef,
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0)
%r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %r
}
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
%v1 = bitcast i16 %v to <1 x i16>
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
%r = extractelement <8 x float> %rv, i32 0
ret float %r
}
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
%v1 = bitcast float %v to <1 x float>
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
; round to nearest even
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
%r = extractelement <8 x i16> %rv, i32 0
ret i16 %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather

View File

@@ -63,6 +63,44 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
ret <8 x i32> %m
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float/half conversions
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
; 0 is round nearest even
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
ret <8 x float> %r
}
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
ret <8 x i16> %r
}
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
%v1 = bitcast i16 %v to <1 x i16>
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
%r = extractelement <8 x float> %rv, i32 0
ret float %r
}
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
%v1 = bitcast float %v to <1 x float>
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
; round to nearest even
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
%r = extractelement <8 x i16> %rv, i32 0
ret i16 %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather
@@ -70,6 +108,3 @@ gen_gather(8, i8)
gen_gather(8, i16)
gen_gather(8, i32)
gen_gather(8, i64)

View File

@@ -47,6 +47,14 @@ int64minmax()
include(`target-sse2-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -44,6 +44,14 @@ int64minmax()
include(`target-sse2-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding
;;

View File

@@ -47,6 +47,14 @@ int64minmax()
include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -44,6 +44,14 @@ int64minmax()
include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -210,7 +210,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->isa = Target::AVX2;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx2,+popcnt,+cmov";
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
@@ -219,7 +219,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->isa = Target::AVX2;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->attributes = "+avx2,+popcnt,+cmov";
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;

View File

@@ -2824,114 +2824,124 @@ static inline uniform double pow(uniform double a, uniform double b) {
// half-precision floats
static inline uniform float half_to_float(uniform unsigned int16 h) {
if ((h & 0x7FFFu) == 0)
// Signed zero
return floatbits(((unsigned int32) h) << 16);
if (__have_native_half) {
return __half_to_float_uniform(h);
}
else {
// Though these are int16 quantities, we get much better code
// with them stored as int32s...
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
if (he == 0) {
// Denormal will convert to normalized
uniform int e = -1;
// The following loop figures out how much extra to adjust the exponent
// Shift until leading bit overflows into exponent bit
do {
e++;
hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm);
}
if ((h & 0x7FFFu) == 0)
// Signed zero
return floatbits(((unsigned int32) h) << 16);
else {
if (he == 0x7C00u) {
// Inf or NaN (all the exponent bits are set)
if (hm == 0)
// Zero mantissa -> signed inf
return floatbits((((unsigned int32) hs) << 16) |
((unsigned int32) 0x7F800000u));
else
// NaN
return floatbits(0xFFC00000u);
}
else {
// Normalized number
// sign
// Though these are int16 quantities, we get much better code
// with them stored as int32s...
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
if (he == 0) {
// Denormal will convert to normalized
uniform int e = -1;
// The following loop figures out how much extra to adjust the exponent
// Shift until leading bit overflows into exponent bit
do {
e++;
hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
uniform int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
uniform unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm);
}
else {
if (he == 0x7C00u) {
// Inf or NaN (all the exponent bits are set)
if (hm == 0)
// Zero mantissa -> signed inf
return floatbits((((unsigned int32) hs) << 16) |
((unsigned int32) 0x7F800000u));
else
// NaN
return floatbits(0xFFC00000u);
}
else {
// Normalized number
// sign
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
}
}
}
}
static inline float half_to_float(unsigned int16 h) {
if ((h & 0x7FFFu) == 0)
// Signed zero
return floatbits(((unsigned int32) h) << 16);
if (__have_native_half) {
return __half_to_float_varying(h);
}
else {
// Though these are int16 quantities, we get much better code
// with them stored as int32s...
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
cif (he == 0) {
// Denormal will convert to normalized
int e = -1;
// The following loop figures out how much extra to adjust the exponent
// Shift until leading bit overflows into exponent bit
do {
e++;
hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm);
}
if ((h & 0x7FFFu) == 0)
// Signed zero
return floatbits(((unsigned int32) h) << 16);
else {
if (he == 0x7C00u) {
// Inf or NaN (all the exponent bits are set)
if (hm == 0)
// Zero mantissa -> signed inf
return floatbits((((unsigned int32) hs) << 16) |
((unsigned int32) 0x7F800000u));
else
// NaN
return floatbits(0xFFC00000u);
}
else {
// Normalized number
// sign
// Though these are int16 quantities, we get much better code
// with them stored as int32s...
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
cif (he == 0) {
// Denormal will convert to normalized
int e = -1;
// The following loop figures out how much extra to adjust the exponent
// Shift until leading bit overflows into exponent bit
do {
e++;
hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32) (he >> 10)) - 15 + 127;
int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13;
unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm);
}
else {
if (he == 0x7C00u) {
// Inf or NaN (all the exponent bits are set)
if (hm == 0)
// Zero mantissa -> signed inf
return floatbits((((unsigned int32) hs) << 16) |
((unsigned int32) 0x7F800000u));
else
// NaN
return floatbits(0xFFC00000u);
}
else {
// Normalized number
// sign
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
}
}
}
@@ -2939,209 +2949,237 @@ static inline float half_to_float(unsigned int16 h) {
static inline uniform int16 float_to_half(uniform float f) {
uniform int32 x = intbits(f);
// Store the return value in an int32 until the very end; this ends up
// generating better code...
uniform int32 ret;
if ((x & 0x7FFFFFFFu) == 0)
// Signed zero
ret = (x >> 16);
if (__have_native_half) {
return __float_to_half_uniform(f);
}
else {
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
if (xe == 0) {
// Denormal will underflow, return a signed zero
ret = (xs >> 16);
}
uniform int32 x = intbits(f);
// Store the return value in an int32 until the very end; this ends up
// generating better code...
uniform int32 ret;
if ((x & 0x7FFFFFFFu) == 0)
// Signed zero
ret = (x >> 16);
else {
if (xe == 0x7F800000u) {
// Inf or NaN (all the exponent bits are set)
if (xm == 0)
// Zero mantissa -> signed infinity
ret = ((xs >> 16) | 0x7C00u);
else
// NaN, only 1st mantissa bit set
ret = 0xFE00u;
}
else {
// Normalized number
uniform unsigned int32 hs = (xs >> 16); // Sign bit
uniform unsigned int32 hm;
// Exponent unbias the single, then bias the halfp
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
if (hes >= 0x1F)
// Overflow: return signed infinity
ret = ((xs >> 16) | 0x7C00u);
else if (hes <= 0) {
// Underflow
if ((14 - hes) > 24) {
// Mantissa shifted all the way off & no rounding possibility
hm = 0u; // Set mantissa to zero
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
if (xe == 0) {
// Denormal will underflow, return a signed zero
ret = (xs >> 16);
}
else {
if (xe == 0x7F800000u) {
// Inf or NaN (all the exponent bits are set)
if (xm == 0)
// Zero mantissa -> signed infinity
ret = ((xs >> 16) | 0x7C00u);
else
// NaN, only 1st mantissa bit set
ret = 0xFE00u;
}
else {
// Normalized number
uniform unsigned int32 hs = (xs >> 16); // Sign bit
uniform unsigned int32 hm;
// Exponent unbias the single, then bias the halfp
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
if (hes >= 0x1F)
// Overflow: return signed infinity
ret = ((xs >> 16) | 0x7C00u);
else if (hes <= 0) {
// Underflow
if ((14 - hes) > 24) {
// Mantissa shifted all the way off & no rounding possibility
hm = 0u; // Set mantissa to zero
}
else {
xm |= 0x00800000u; // Add the hidden leading bit
hm = (xm >> (14 - hes)); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
// Round, might overflow into exp bit, but this is OK
hm += 1u;
}
ret = (hs | hm);
}
else {
xm |= 0x00800000u; // Add the hidden leading bit
hm = (xm >> (14 - hes)); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
// Round, might overflow into exp bit, but this is OK
hm += 1u;
uniform unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> 13); // Mantissa
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
}
ret = (hs | hm);
}
else {
uniform unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> 13); // Mantissa
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
}
}
}
return (int16)ret;
}
return (int16)ret;
}
static inline int16 float_to_half(float f) {
int32 x = intbits(f);
// Store the return value in an int32 until the very end; this ends up
// generating better code...
int32 ret;
if ((x & 0x7FFFFFFFu) == 0)
// Signed zero
ret = (x >> 16);
if (__have_native_half) {
return __float_to_half_varying(f);
}
else {
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
if (xe == 0) {
// Denormal will underflow, return a signed zero
ret = (xs >> 16);
}
int32 x = intbits(f);
// Store the return value in an int32 until the very end; this ends up
// generating better code...
int32 ret;
if ((x & 0x7FFFFFFFu) == 0)
// Signed zero
ret = (x >> 16);
else {
cif (xe == 0x7F800000u) {
// Inf or NaN (all the exponent bits are set)
if (xm == 0)
// Zero mantissa -> signed infinity
ret = ((xs >> 16) | 0x7C00u);
else
// NaN, only 1st mantissa bit set
ret = 0xFE00u;
}
else {
// Normalized number
unsigned int32 hs = (xs >> 16); // Sign bit
unsigned int32 hm;
// Exponent unbias the single, then bias the halfp
int32 hes = ((int)(xe >> 23)) - 127 + 15;
if (hes >= 0x1F)
// Overflow: return signed infinity
ret = ((xs >> 16) | 0x7C00u);
else if (hes <= 0) {
// Underflow
if ((14 - hes) > 24) {
// Mantissa shifted all the way off & no rounding possibility
hm = 0u; // Set mantissa to zero
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
if (xe == 0) {
// Denormal will underflow, return a signed zero
ret = (xs >> 16);
}
else {
cif (xe == 0x7F800000u) {
// Inf or NaN (all the exponent bits are set)
if (xm == 0)
// Zero mantissa -> signed infinity
ret = ((xs >> 16) | 0x7C00u);
else
// NaN, only 1st mantissa bit set
ret = 0xFE00u;
}
else {
// Normalized number
unsigned int32 hs = (xs >> 16); // Sign bit
unsigned int32 hm;
// Exponent unbias the single, then bias the halfp
int32 hes = ((int)(xe >> 23)) - 127 + 15;
if (hes >= 0x1F)
// Overflow: return signed infinity
ret = ((xs >> 16) | 0x7C00u);
else if (hes <= 0) {
// Underflow
if ((14 - hes) > 24) {
// Mantissa shifted all the way off & no rounding possibility
hm = 0u; // Set mantissa to zero
}
else {
xm |= 0x00800000u; // Add the hidden leading bit
hm = (xm >> (14 - hes)); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
// Round, might overflow into exp bit, but this is OK
hm += 1u;
}
ret = (hs | hm);
}
else {
xm |= 0x00800000u; // Add the hidden leading bit
hm = (xm >> (14 - hes)); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
// Round, might overflow into exp bit, but this is OK
hm += 1u;
unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> 13); // Mantissa
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
}
ret = (hs | hm);
}
else {
unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> 13); // Mantissa
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
}
}
}
return (int16)ret;
}
return (int16)ret;
}
static inline uniform float half_to_float_fast(uniform unsigned int16 h) {
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
// sign
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
if (__have_native_half) {
return __half_to_float_uniform(h);
}
else {
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
// sign
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
}
static inline float half_to_float_fast(unsigned int16 h) {
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
// sign
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
if (__have_native_half) {
return __half_to_float_varying(h);
}
else {
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
// sign
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
}
static inline uniform int16 float_to_half_fast(uniform float f) {
uniform int32 x = intbits(f);
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
if (__have_native_half) {
return __float_to_half_uniform(f);
}
else {
uniform int32 x = intbits(f);
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
uniform unsigned int32 hs = (xs >> 16); // Sign bit
// Exponent unbias the single, then bias the halfp
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
uniform unsigned int32 he = (hes << 10); // Exponent
uniform int32 hm = (xm >> 13); // Mantissa
uniform int32 ret = (hs | he | hm);
uniform unsigned int32 hs = (xs >> 16); // Sign bit
// Exponent unbias the single, then bias the halfp
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
uniform unsigned int32 he = (hes << 10); // Exponent
uniform int32 hm = (xm >> 13); // Mantissa
uniform int32 ret = (hs | he | hm);
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret += 1u;
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret += 1u;
return (int16)ret;
return (int16)ret;
}
}
static inline int16 float_to_half_fast(float f) {
int32 x = intbits(f);
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
if (__have_native_half) {
return __float_to_half_varying(f);
}
else {
int32 x = intbits(f);
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
unsigned int32 hs = (xs >> 16); // Sign bit
// Exponent unbias the single, then bias the halfp
int32 hes = ((int)(xe >> 23)) - 127 + 15;
unsigned int32 he = (hes << 10); // Exponent
int32 hm = (xm >> 13); // Mantissa
int32 ret = (hs | he | hm);
unsigned int32 hs = (xs >> 16); // Sign bit
// Exponent unbias the single, then bias the halfp
int32 hes = ((int)(xe >> 23)) - 127 + 15;
unsigned int32 he = (hes << 10); // Exponent
int32 hm = (xm >> 13); // Mantissa
int32 ret = (hs | he | hm);
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret += 1u;
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret += 1u;
return (int16)ret;
return (int16)ret;
}
}
///////////////////////////////////////////////////////////////////////////

21
tests/half-3.ispc Normal file
View File

@@ -0,0 +1,21 @@
export uniform int width() { return programCount; }
export void f_v(uniform float RET[]) {
int errors = 0;
foreach (i = 0 ... 65535) {
unsigned int16 h = i;
float f = half_to_float(i);
h = float_to_half(f);
int mismatches = (f == f && i != h);
errors += reduce_add(mismatches);
}
RET[programIndex] = errors;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}