Use native float/half conversion instructions with the AVX2 target.

This commit is contained in:
Matt Pharr
2012-01-24 15:33:38 -08:00
parent a5b7fca7e0
commit 1867b5b317
12 changed files with 453 additions and 256 deletions

View File

@@ -822,6 +822,9 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module, lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
symbolTable); symbolTable);
lDefineConstantInt("__have_native_half", (g->target.isa == Target::AVX2),
module, symbolTable);
if (includeStdlibISPC) { if (includeStdlibISPC) {
// If the user wants the standard library to be included, parse the // If the user wants the standard library to be included, parse the
// serialized version of the stdlib.ispc file to get its // serialized version of the stdlib.ispc file to get its

View File

@@ -58,6 +58,14 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
ret <16 x i32> %ret ret <16 x i32> %ret
} }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather ;; gather

View File

@@ -58,6 +58,14 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
ret <8 x i32> %ret ret <8 x i32> %ret
} }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather ;; gather
@@ -65,6 +73,3 @@ gen_gather(8, i8)
gen_gather(8, i16) gen_gather(8, i16)
gen_gather(8, i32) gen_gather(8, i32)
gen_gather(8, i64) gen_gather(8, i64)

View File

@@ -63,6 +63,61 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
ret <16 x i32> %m ret <16 x i32> %m
} }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float/half conversions
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
; 0 is round nearest even
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
%r_0 = shufflevector <16 x i16> %v, <16 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0)
%r_1 = shufflevector <16 x i16> %v, <16 x i16> undef,
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1)
%r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x float> %r
}
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
%r_0 = shufflevector <16 x float> %v, <16 x float> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0)
%r_1 = shufflevector <16 x float> %v, <16 x float> undef,
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0)
%r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %r
}
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
%v1 = bitcast i16 %v to <1 x i16>
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
%r = extractelement <8 x float> %rv, i32 0
ret float %r
}
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
%v1 = bitcast float %v to <1 x float>
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
; round to nearest even
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
%r = extractelement <8 x i16> %rv, i32 0
ret i16 %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather ;; gather

View File

@@ -63,6 +63,44 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
ret <8 x i32> %m ret <8 x i32> %m
} }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float/half conversions
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
; 0 is round nearest even
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
ret <8 x float> %r
}
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
ret <8 x i16> %r
}
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
%v1 = bitcast i16 %v to <1 x i16>
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
%r = extractelement <8 x float> %rv, i32 0
ret float %r
}
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
%v1 = bitcast float %v to <1 x float>
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
; round to nearest even
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
%r = extractelement <8 x i16> %rv, i32 0
ret i16 %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather ;; gather
@@ -70,6 +108,3 @@ gen_gather(8, i8)
gen_gather(8, i16) gen_gather(8, i16)
gen_gather(8, i32) gen_gather(8, i32)
gen_gather(8, i64) gen_gather(8, i64)

View File

@@ -47,6 +47,14 @@ int64minmax()
include(`target-sse2-common.ll') include(`target-sse2-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp ;; rcp

View File

@@ -44,6 +44,14 @@ int64minmax()
include(`target-sse2-common.ll') include(`target-sse2-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding ;; rounding
;; ;;

View File

@@ -47,6 +47,14 @@ int64minmax()
include(`target-sse4-common.ll') include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp ;; rcp

View File

@@ -44,6 +44,14 @@ int64minmax()
include(`target-sse4-common.ll') include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp ;; rcp

View File

@@ -210,7 +210,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->isa = Target::AVX2; t->isa = Target::AVX2;
t->nativeVectorWidth = 8; t->nativeVectorWidth = 8;
t->vectorWidth = 8; t->vectorWidth = 8;
t->attributes = "+avx2,+popcnt,+cmov"; t->attributes = "+avx2,+popcnt,+cmov,+f16c";
t->maskingIsFree = false; t->maskingIsFree = false;
t->allOffMaskIsSafe = false; t->allOffMaskIsSafe = false;
t->maskBitCount = 32; t->maskBitCount = 32;
@@ -219,7 +219,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->isa = Target::AVX2; t->isa = Target::AVX2;
t->nativeVectorWidth = 16; t->nativeVectorWidth = 16;
t->vectorWidth = 16; t->vectorWidth = 16;
t->attributes = "+avx2,+popcnt,+cmov"; t->attributes = "+avx2,+popcnt,+cmov,+f16c";
t->maskingIsFree = false; t->maskingIsFree = false;
t->allOffMaskIsSafe = false; t->allOffMaskIsSafe = false;
t->maskBitCount = 32; t->maskBitCount = 32;

View File

@@ -2824,114 +2824,124 @@ static inline uniform double pow(uniform double a, uniform double b) {
// half-precision floats // half-precision floats
static inline uniform float half_to_float(uniform unsigned int16 h) { static inline uniform float half_to_float(uniform unsigned int16 h) {
if ((h & 0x7FFFu) == 0) if (__have_native_half) {
// Signed zero return __half_to_float_uniform(h);
return floatbits(((unsigned int32) h) << 16); }
else { else {
// Though these are int16 quantities, we get much better code if ((h & 0x7FFFu) == 0)
// with them stored as int32s... // Signed zero
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit return floatbits(((unsigned int32) h) << 16);
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
if (he == 0) {
// Denormal will convert to normalized
uniform int e = -1;
// The following loop figures out how much extra to adjust the exponent
// Shift until leading bit overflows into exponent bit
do {
e++;
hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm);
}
else { else {
if (he == 0x7C00u) { // Though these are int16 quantities, we get much better code
// Inf or NaN (all the exponent bits are set) // with them stored as int32s...
if (hm == 0) uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
// Zero mantissa -> signed inf uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
return floatbits((((unsigned int32) hs) << 16) | uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
((unsigned int32) 0x7F800000u)); if (he == 0) {
else // Denormal will convert to normalized
// NaN uniform int e = -1;
return floatbits(0xFFC00000u); // The following loop figures out how much extra to adjust the exponent
} // Shift until leading bit overflows into exponent bit
else { do {
// Normalized number e++;
// sign hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
uniform unsigned int32 xs = ((unsigned int32) hs) << 16; uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single // Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127; uniform int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent // Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23); uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa // Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13; uniform unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm); return floatbits(xs | xe | xm);
}
else {
if (he == 0x7C00u) {
// Inf or NaN (all the exponent bits are set)
if (hm == 0)
// Zero mantissa -> signed inf
return floatbits((((unsigned int32) hs) << 16) |
((unsigned int32) 0x7F800000u));
else
// NaN
return floatbits(0xFFC00000u);
}
else {
// Normalized number
// sign
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
} }
} }
} }
} }
static inline float half_to_float(unsigned int16 h) { static inline float half_to_float(unsigned int16 h) {
if ((h & 0x7FFFu) == 0) if (__have_native_half) {
// Signed zero return __half_to_float_varying(h);
return floatbits(((unsigned int32) h) << 16); }
else { else {
// Though these are int16 quantities, we get much better code if ((h & 0x7FFFu) == 0)
// with them stored as int32s... // Signed zero
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit return floatbits(((unsigned int32) h) << 16);
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
cif (he == 0) {
// Denormal will convert to normalized
int e = -1;
// The following loop figures out how much extra to adjust the exponent
// Shift until leading bit overflows into exponent bit
do {
e++;
hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm);
}
else { else {
if (he == 0x7C00u) { // Though these are int16 quantities, we get much better code
// Inf or NaN (all the exponent bits are set) // with them stored as int32s...
if (hm == 0) unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
// Zero mantissa -> signed inf unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
return floatbits((((unsigned int32) hs) << 16) | unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
((unsigned int32) 0x7F800000u)); cif (he == 0) {
else // Denormal will convert to normalized
// NaN int e = -1;
return floatbits(0xFFC00000u); // The following loop figures out how much extra to adjust the exponent
} // Shift until leading bit overflows into exponent bit
else { do {
// Normalized number e++;
// sign hm <<= 1;
} while((hm & 0x0400u) == 0);
// Sign bit
unsigned int32 xs = ((unsigned int32) hs) << 16; unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single // Exponent: unbias the halfp, then bias the single
int32 xes = ((int32) (he >> 10)) - 15 + 127; int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
// Exponent // Exponent
unsigned int32 xe = (unsigned int32) (xes << 23); unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa // Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13; unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
return floatbits(xs | xe | xm); return floatbits(xs | xe | xm);
}
else {
if (he == 0x7C00u) {
// Inf or NaN (all the exponent bits are set)
if (hm == 0)
// Zero mantissa -> signed inf
return floatbits((((unsigned int32) hs) << 16) |
((unsigned int32) 0x7F800000u));
else
// NaN
return floatbits(0xFFC00000u);
}
else {
// Normalized number
// sign
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
} }
} }
} }
@@ -2939,209 +2949,237 @@ static inline float half_to_float(unsigned int16 h) {
static inline uniform int16 float_to_half(uniform float f) { static inline uniform int16 float_to_half(uniform float f) {
uniform int32 x = intbits(f); if (__have_native_half) {
// Store the return value in an int32 until the very end; this ends up return __float_to_half_uniform(f);
// generating better code... }
uniform int32 ret;
if ((x & 0x7FFFFFFFu) == 0)
// Signed zero
ret = (x >> 16);
else { else {
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit uniform int32 x = intbits(f);
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits // Store the return value in an int32 until the very end; this ends up
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits // generating better code...
if (xe == 0) { uniform int32 ret;
// Denormal will underflow, return a signed zero if ((x & 0x7FFFFFFFu) == 0)
ret = (xs >> 16); // Signed zero
} ret = (x >> 16);
else { else {
if (xe == 0x7F800000u) { uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
// Inf or NaN (all the exponent bits are set) uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
if (xm == 0) uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
// Zero mantissa -> signed infinity if (xe == 0) {
ret = ((xs >> 16) | 0x7C00u); // Denormal will underflow, return a signed zero
else ret = (xs >> 16);
// NaN, only 1st mantissa bit set }
ret = 0xFE00u; else {
} if (xe == 0x7F800000u) {
else { // Inf or NaN (all the exponent bits are set)
// Normalized number if (xm == 0)
uniform unsigned int32 hs = (xs >> 16); // Sign bit // Zero mantissa -> signed infinity
uniform unsigned int32 hm; ret = ((xs >> 16) | 0x7C00u);
// Exponent unbias the single, then bias the halfp else
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15; // NaN, only 1st mantissa bit set
if (hes >= 0x1F) ret = 0xFE00u;
// Overflow: return signed infinity }
ret = ((xs >> 16) | 0x7C00u); else {
else if (hes <= 0) { // Normalized number
// Underflow uniform unsigned int32 hs = (xs >> 16); // Sign bit
if ((14 - hes) > 24) { uniform unsigned int32 hm;
// Mantissa shifted all the way off & no rounding possibility // Exponent unbias the single, then bias the halfp
hm = 0u; // Set mantissa to zero uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
if (hes >= 0x1F)
// Overflow: return signed infinity
ret = ((xs >> 16) | 0x7C00u);
else if (hes <= 0) {
// Underflow
if ((14 - hes) > 24) {
// Mantissa shifted all the way off & no rounding possibility
hm = 0u; // Set mantissa to zero
}
else {
xm |= 0x00800000u; // Add the hidden leading bit
hm = (xm >> (14 - hes)); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
// Round, might overflow into exp bit, but this is OK
hm += 1u;
}
ret = (hs | hm);
} }
else { else {
xm |= 0x00800000u; // Add the hidden leading bit uniform unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> (14 - hes)); // Mantissa hm = (xm >> 13); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding if (xm & 0x00001000u) // Check for rounding
// Round, might overflow into exp bit, but this is OK // Round, might overflow to inf, this is OK
hm += 1u; ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
} }
ret = (hs | hm);
}
else {
uniform unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> 13); // Mantissa
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
} }
} }
} }
return (int16)ret;
} }
return (int16)ret;
} }
static inline int16 float_to_half(float f) { static inline int16 float_to_half(float f) {
int32 x = intbits(f); if (__have_native_half) {
// Store the return value in an int32 until the very end; this ends up return __float_to_half_varying(f);
// generating better code... }
int32 ret;
if ((x & 0x7FFFFFFFu) == 0)
// Signed zero
ret = (x >> 16);
else { else {
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit int32 x = intbits(f);
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits // Store the return value in an int32 until the very end; this ends up
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits // generating better code...
if (xe == 0) { int32 ret;
// Denormal will underflow, return a signed zero if ((x & 0x7FFFFFFFu) == 0)
ret = (xs >> 16); // Signed zero
} ret = (x >> 16);
else { else {
cif (xe == 0x7F800000u) { unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
// Inf or NaN (all the exponent bits are set) unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
if (xm == 0) unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
// Zero mantissa -> signed infinity if (xe == 0) {
ret = ((xs >> 16) | 0x7C00u); // Denormal will underflow, return a signed zero
else ret = (xs >> 16);
// NaN, only 1st mantissa bit set }
ret = 0xFE00u; else {
} cif (xe == 0x7F800000u) {
else { // Inf or NaN (all the exponent bits are set)
// Normalized number if (xm == 0)
unsigned int32 hs = (xs >> 16); // Sign bit // Zero mantissa -> signed infinity
unsigned int32 hm; ret = ((xs >> 16) | 0x7C00u);
// Exponent unbias the single, then bias the halfp else
int32 hes = ((int)(xe >> 23)) - 127 + 15; // NaN, only 1st mantissa bit set
if (hes >= 0x1F) ret = 0xFE00u;
// Overflow: return signed infinity }
ret = ((xs >> 16) | 0x7C00u); else {
else if (hes <= 0) { // Normalized number
// Underflow unsigned int32 hs = (xs >> 16); // Sign bit
if ((14 - hes) > 24) { unsigned int32 hm;
// Mantissa shifted all the way off & no rounding possibility // Exponent unbias the single, then bias the halfp
hm = 0u; // Set mantissa to zero int32 hes = ((int)(xe >> 23)) - 127 + 15;
if (hes >= 0x1F)
// Overflow: return signed infinity
ret = ((xs >> 16) | 0x7C00u);
else if (hes <= 0) {
// Underflow
if ((14 - hes) > 24) {
// Mantissa shifted all the way off & no rounding possibility
hm = 0u; // Set mantissa to zero
}
else {
xm |= 0x00800000u; // Add the hidden leading bit
hm = (xm >> (14 - hes)); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
// Round, might overflow into exp bit, but this is OK
hm += 1u;
}
ret = (hs | hm);
} }
else { else {
xm |= 0x00800000u; // Add the hidden leading bit unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> (14 - hes)); // Mantissa hm = (xm >> 13); // Mantissa
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding if (xm & 0x00001000u) // Check for rounding
// Round, might overflow into exp bit, but this is OK // Round, might overflow to inf, this is OK
hm += 1u; ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
} }
ret = (hs | hm);
}
else {
unsigned int32 he = (hes << 10); // Exponent
hm = (xm >> 13); // Mantissa
if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK
ret = (hs | he | hm) + 1u;
else
ret = (hs | he | hm);
} }
} }
} }
return (int16)ret;
} }
return (int16)ret;
} }
static inline uniform float half_to_float_fast(uniform unsigned int16 h) { static inline uniform float half_to_float_fast(uniform unsigned int16 h) {
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit if (__have_native_half) {
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits return __half_to_float_uniform(h);
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits }
else {
// sign uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
uniform unsigned int32 xs = ((unsigned int32) hs) << 16; uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
// Exponent: unbias the halfp, then bias the single uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
// sign
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
} }
static inline float half_to_float_fast(unsigned int16 h) { static inline float half_to_float_fast(unsigned int16 h) {
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit if (__have_native_half) {
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits return __half_to_float_varying(h);
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits }
else {
// sign unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
unsigned int32 xs = ((unsigned int32) hs) << 16; unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
// Exponent: unbias the halfp, then bias the single unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
// sign
unsigned int32 xs = ((unsigned int32) hs) << 16;
// Exponent: unbias the halfp, then bias the single
int32 xes = ((int32) (he >> 10)) - 15 + 127;
// Exponent
unsigned int32 xe = (unsigned int32) (xes << 23);
// Mantissa
unsigned int32 xm = ((unsigned int32) hm) << 13;
return floatbits(xs | xe | xm);
}
} }
static inline uniform int16 float_to_half_fast(uniform float f) { static inline uniform int16 float_to_half_fast(uniform float f) {
uniform int32 x = intbits(f); if (__have_native_half) {
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit return __float_to_half_uniform(f);
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits }
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits else {
uniform int32 x = intbits(f);
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
uniform unsigned int32 hs = (xs >> 16); // Sign bit uniform unsigned int32 hs = (xs >> 16); // Sign bit
// Exponent unbias the single, then bias the halfp // Exponent unbias the single, then bias the halfp
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15; uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
uniform unsigned int32 he = (hes << 10); // Exponent uniform unsigned int32 he = (hes << 10); // Exponent
uniform int32 hm = (xm >> 13); // Mantissa uniform int32 hm = (xm >> 13); // Mantissa
uniform int32 ret = (hs | he | hm); uniform int32 ret = (hs | he | hm);
if (xm & 0x00001000u) // Check for rounding if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK // Round, might overflow to inf, this is OK
ret += 1u; ret += 1u;
return (int16)ret; return (int16)ret;
}
} }
static inline int16 float_to_half_fast(float f) { static inline int16 float_to_half_fast(float f) {
int32 x = intbits(f); if (__have_native_half) {
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit return __float_to_half_varying(f);
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits }
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits else {
int32 x = intbits(f);
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
unsigned int32 hs = (xs >> 16); // Sign bit unsigned int32 hs = (xs >> 16); // Sign bit
// Exponent unbias the single, then bias the halfp // Exponent unbias the single, then bias the halfp
int32 hes = ((int)(xe >> 23)) - 127 + 15; int32 hes = ((int)(xe >> 23)) - 127 + 15;
unsigned int32 he = (hes << 10); // Exponent unsigned int32 he = (hes << 10); // Exponent
int32 hm = (xm >> 13); // Mantissa int32 hm = (xm >> 13); // Mantissa
int32 ret = (hs | he | hm); int32 ret = (hs | he | hm);
if (xm & 0x00001000u) // Check for rounding if (xm & 0x00001000u) // Check for rounding
// Round, might overflow to inf, this is OK // Round, might overflow to inf, this is OK
ret += 1u; ret += 1u;
return (int16)ret; return (int16)ret;
}
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////

21
tests/half-3.ispc Normal file
View File

@@ -0,0 +1,21 @@
export uniform int width() { return programCount; }
export void f_v(uniform float RET[]) {
int errors = 0;
foreach (i = 0 ... 65535) {
unsigned int16 h = i;
float f = half_to_float(i);
h = float_to_half(f);
int mismatches = (f == f && i != h);
errors += reduce_add(mismatches);
}
RET[programIndex] = errors;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}