Use native float/half conversion instructions with the AVX2 target.
This commit is contained in:
@@ -822,6 +822,9 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
|
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
|
||||||
symbolTable);
|
symbolTable);
|
||||||
|
|
||||||
|
lDefineConstantInt("__have_native_half", (g->target.isa == Target::AVX2),
|
||||||
|
module, symbolTable);
|
||||||
|
|
||||||
if (includeStdlibISPC) {
|
if (includeStdlibISPC) {
|
||||||
// If the user wants the standard library to be included, parse the
|
// If the user wants the standard library to be included, parse the
|
||||||
// serialized version of the stdlib.ispc file to get its
|
// serialized version of the stdlib.ispc file to get its
|
||||||
|
|||||||
@@ -58,6 +58,14 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
|
|||||||
ret <16 x i32> %ret
|
ret <16 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; half conversion routines
|
||||||
|
|
||||||
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|
||||||
|
|||||||
@@ -58,6 +58,14 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
|
|||||||
ret <8 x i32> %ret
|
ret <8 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; half conversion routines
|
||||||
|
|
||||||
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|
||||||
@@ -65,6 +73,3 @@ gen_gather(8, i8)
|
|||||||
gen_gather(8, i16)
|
gen_gather(8, i16)
|
||||||
gen_gather(8, i32)
|
gen_gather(8, i32)
|
||||||
gen_gather(8, i64)
|
gen_gather(8, i64)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -63,6 +63,61 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
|
|||||||
ret <16 x i32> %m
|
ret <16 x i32> %m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; float/half conversions
|
||||||
|
|
||||||
|
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||||
|
; 0 is round nearest even
|
||||||
|
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
|
||||||
|
%r_0 = shufflevector <16 x i16> %v, <16 x i16> undef,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0)
|
||||||
|
%r_1 = shufflevector <16 x i16> %v, <16 x i16> undef,
|
||||||
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1)
|
||||||
|
%r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1,
|
||||||
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||||
|
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
ret <16 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
|
||||||
|
%r_0 = shufflevector <16 x float> %v, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0)
|
||||||
|
%r_1 = shufflevector <16 x float> %v, <16 x float> undef,
|
||||||
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0)
|
||||||
|
%r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1,
|
||||||
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||||
|
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
ret <16 x i16> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||||
|
%v1 = bitcast i16 %v to <1 x i16>
|
||||||
|
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||||
|
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||||
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||||
|
%r = extractelement <8 x float> %rv, i32 0
|
||||||
|
ret float %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||||
|
%v1 = bitcast float %v to <1 x float>
|
||||||
|
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||||
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; round to nearest even
|
||||||
|
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||||
|
%r = extractelement <8 x i16> %rv, i32 0
|
||||||
|
ret i16 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|
||||||
|
|||||||
@@ -63,6 +63,44 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
|
|||||||
ret <8 x i32> %m
|
ret <8 x i32> %m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; float/half conversions
|
||||||
|
|
||||||
|
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||||
|
; 0 is round nearest even
|
||||||
|
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
|
||||||
|
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
|
||||||
|
ret <8 x float> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
|
||||||
|
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
|
||||||
|
ret <8 x i16> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||||
|
%v1 = bitcast i16 %v to <1 x i16>
|
||||||
|
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||||
|
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||||
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||||
|
%r = extractelement <8 x float> %rv, i32 0
|
||||||
|
ret float %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||||
|
%v1 = bitcast float %v to <1 x float>
|
||||||
|
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||||
|
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||||
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; round to nearest even
|
||||||
|
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||||
|
%r = extractelement <8 x i16> %rv, i32 0
|
||||||
|
ret i16 %r
|
||||||
|
}
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|
||||||
@@ -70,6 +108,3 @@ gen_gather(8, i8)
|
|||||||
gen_gather(8, i16)
|
gen_gather(8, i16)
|
||||||
gen_gather(8, i32)
|
gen_gather(8, i32)
|
||||||
gen_gather(8, i64)
|
gen_gather(8, i64)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,14 @@ int64minmax()
|
|||||||
|
|
||||||
include(`target-sse2-common.ll')
|
include(`target-sse2-common.ll')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; half conversion routines
|
||||||
|
|
||||||
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
|
|
||||||
|
|||||||
@@ -44,6 +44,14 @@ int64minmax()
|
|||||||
|
|
||||||
include(`target-sse2-common.ll')
|
include(`target-sse2-common.ll')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; half conversion routines
|
||||||
|
|
||||||
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rounding
|
;; rounding
|
||||||
;;
|
;;
|
||||||
|
|||||||
@@ -47,6 +47,14 @@ int64minmax()
|
|||||||
|
|
||||||
include(`target-sse4-common.ll')
|
include(`target-sse4-common.ll')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; half conversion routines
|
||||||
|
|
||||||
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
|
|
||||||
|
|||||||
@@ -44,6 +44,14 @@ int64minmax()
|
|||||||
|
|
||||||
include(`target-sse4-common.ll')
|
include(`target-sse4-common.ll')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; half conversion routines
|
||||||
|
|
||||||
|
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||||
|
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||||
|
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
|
|
||||||
|
|||||||
4
ispc.cpp
4
ispc.cpp
@@ -210,7 +210,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->isa = Target::AVX2;
|
t->isa = Target::AVX2;
|
||||||
t->nativeVectorWidth = 8;
|
t->nativeVectorWidth = 8;
|
||||||
t->vectorWidth = 8;
|
t->vectorWidth = 8;
|
||||||
t->attributes = "+avx2,+popcnt,+cmov";
|
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->allOffMaskIsSafe = false;
|
t->allOffMaskIsSafe = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
@@ -219,7 +219,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->isa = Target::AVX2;
|
t->isa = Target::AVX2;
|
||||||
t->nativeVectorWidth = 16;
|
t->nativeVectorWidth = 16;
|
||||||
t->vectorWidth = 16;
|
t->vectorWidth = 16;
|
||||||
t->attributes = "+avx2,+popcnt,+cmov";
|
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->allOffMaskIsSafe = false;
|
t->allOffMaskIsSafe = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
|
|||||||
534
stdlib.ispc
534
stdlib.ispc
@@ -2824,114 +2824,124 @@ static inline uniform double pow(uniform double a, uniform double b) {
|
|||||||
// half-precision floats
|
// half-precision floats
|
||||||
|
|
||||||
static inline uniform float half_to_float(uniform unsigned int16 h) {
|
static inline uniform float half_to_float(uniform unsigned int16 h) {
|
||||||
if ((h & 0x7FFFu) == 0)
|
if (__have_native_half) {
|
||||||
// Signed zero
|
return __half_to_float_uniform(h);
|
||||||
return floatbits(((unsigned int32) h) << 16);
|
}
|
||||||
else {
|
else {
|
||||||
// Though these are int16 quantities, we get much better code
|
if ((h & 0x7FFFu) == 0)
|
||||||
// with them stored as int32s...
|
// Signed zero
|
||||||
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
return floatbits(((unsigned int32) h) << 16);
|
||||||
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
|
||||||
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
|
||||||
if (he == 0) {
|
|
||||||
// Denormal will convert to normalized
|
|
||||||
uniform int e = -1;
|
|
||||||
// The following loop figures out how much extra to adjust the exponent
|
|
||||||
// Shift until leading bit overflows into exponent bit
|
|
||||||
do {
|
|
||||||
e++;
|
|
||||||
hm <<= 1;
|
|
||||||
} while((hm & 0x0400u) == 0);
|
|
||||||
|
|
||||||
// Sign bit
|
|
||||||
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
|
||||||
// Exponent: unbias the halfp, then bias the single
|
|
||||||
uniform int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
|
|
||||||
// Exponent
|
|
||||||
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
|
||||||
// Mantissa
|
|
||||||
uniform unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
|
|
||||||
return floatbits(xs | xe | xm);
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
if (he == 0x7C00u) {
|
// Though these are int16 quantities, we get much better code
|
||||||
// Inf or NaN (all the exponent bits are set)
|
// with them stored as int32s...
|
||||||
if (hm == 0)
|
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
||||||
// Zero mantissa -> signed inf
|
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
||||||
return floatbits((((unsigned int32) hs) << 16) |
|
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
||||||
((unsigned int32) 0x7F800000u));
|
if (he == 0) {
|
||||||
else
|
// Denormal will convert to normalized
|
||||||
// NaN
|
uniform int e = -1;
|
||||||
return floatbits(0xFFC00000u);
|
// The following loop figures out how much extra to adjust the exponent
|
||||||
}
|
// Shift until leading bit overflows into exponent bit
|
||||||
else {
|
do {
|
||||||
// Normalized number
|
e++;
|
||||||
// sign
|
hm <<= 1;
|
||||||
|
} while((hm & 0x0400u) == 0);
|
||||||
|
|
||||||
|
// Sign bit
|
||||||
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
||||||
// Exponent: unbias the halfp, then bias the single
|
// Exponent: unbias the halfp, then bias the single
|
||||||
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
uniform int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
|
||||||
// Exponent
|
// Exponent
|
||||||
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
||||||
// Mantissa
|
// Mantissa
|
||||||
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
|
uniform unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
|
||||||
return floatbits(xs | xe | xm);
|
return floatbits(xs | xe | xm);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (he == 0x7C00u) {
|
||||||
|
// Inf or NaN (all the exponent bits are set)
|
||||||
|
if (hm == 0)
|
||||||
|
// Zero mantissa -> signed inf
|
||||||
|
return floatbits((((unsigned int32) hs) << 16) |
|
||||||
|
((unsigned int32) 0x7F800000u));
|
||||||
|
else
|
||||||
|
// NaN
|
||||||
|
return floatbits(0xFFC00000u);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Normalized number
|
||||||
|
// sign
|
||||||
|
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
||||||
|
// Exponent: unbias the halfp, then bias the single
|
||||||
|
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
||||||
|
// Exponent
|
||||||
|
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
||||||
|
// Mantissa
|
||||||
|
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
|
||||||
|
return floatbits(xs | xe | xm);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline float half_to_float(unsigned int16 h) {
|
static inline float half_to_float(unsigned int16 h) {
|
||||||
if ((h & 0x7FFFu) == 0)
|
if (__have_native_half) {
|
||||||
// Signed zero
|
return __half_to_float_varying(h);
|
||||||
return floatbits(((unsigned int32) h) << 16);
|
}
|
||||||
else {
|
else {
|
||||||
// Though these are int16 quantities, we get much better code
|
if ((h & 0x7FFFu) == 0)
|
||||||
// with them stored as int32s...
|
// Signed zero
|
||||||
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
return floatbits(((unsigned int32) h) << 16);
|
||||||
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
|
||||||
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
|
||||||
cif (he == 0) {
|
|
||||||
// Denormal will convert to normalized
|
|
||||||
int e = -1;
|
|
||||||
// The following loop figures out how much extra to adjust the exponent
|
|
||||||
// Shift until leading bit overflows into exponent bit
|
|
||||||
do {
|
|
||||||
e++;
|
|
||||||
hm <<= 1;
|
|
||||||
} while((hm & 0x0400u) == 0);
|
|
||||||
|
|
||||||
// Sign bit
|
|
||||||
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
|
||||||
// Exponent: unbias the halfp, then bias the single
|
|
||||||
int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
|
|
||||||
// Exponent
|
|
||||||
unsigned int32 xe = (unsigned int32) (xes << 23);
|
|
||||||
// Mantissa
|
|
||||||
unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
|
|
||||||
return floatbits(xs | xe | xm);
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
if (he == 0x7C00u) {
|
// Though these are int16 quantities, we get much better code
|
||||||
// Inf or NaN (all the exponent bits are set)
|
// with them stored as int32s...
|
||||||
if (hm == 0)
|
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
||||||
// Zero mantissa -> signed inf
|
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
||||||
return floatbits((((unsigned int32) hs) << 16) |
|
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
||||||
((unsigned int32) 0x7F800000u));
|
cif (he == 0) {
|
||||||
else
|
// Denormal will convert to normalized
|
||||||
// NaN
|
int e = -1;
|
||||||
return floatbits(0xFFC00000u);
|
// The following loop figures out how much extra to adjust the exponent
|
||||||
}
|
// Shift until leading bit overflows into exponent bit
|
||||||
else {
|
do {
|
||||||
// Normalized number
|
e++;
|
||||||
// sign
|
hm <<= 1;
|
||||||
|
} while((hm & 0x0400u) == 0);
|
||||||
|
|
||||||
|
// Sign bit
|
||||||
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
||||||
// Exponent: unbias the halfp, then bias the single
|
// Exponent: unbias the halfp, then bias the single
|
||||||
int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
|
||||||
// Exponent
|
// Exponent
|
||||||
unsigned int32 xe = (unsigned int32) (xes << 23);
|
unsigned int32 xe = (unsigned int32) (xes << 23);
|
||||||
// Mantissa
|
// Mantissa
|
||||||
unsigned int32 xm = ((unsigned int32) hm) << 13;
|
unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
|
||||||
return floatbits(xs | xe | xm);
|
return floatbits(xs | xe | xm);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (he == 0x7C00u) {
|
||||||
|
// Inf or NaN (all the exponent bits are set)
|
||||||
|
if (hm == 0)
|
||||||
|
// Zero mantissa -> signed inf
|
||||||
|
return floatbits((((unsigned int32) hs) << 16) |
|
||||||
|
((unsigned int32) 0x7F800000u));
|
||||||
|
else
|
||||||
|
// NaN
|
||||||
|
return floatbits(0xFFC00000u);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Normalized number
|
||||||
|
// sign
|
||||||
|
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
||||||
|
// Exponent: unbias the halfp, then bias the single
|
||||||
|
int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
||||||
|
// Exponent
|
||||||
|
unsigned int32 xe = (unsigned int32) (xes << 23);
|
||||||
|
// Mantissa
|
||||||
|
unsigned int32 xm = ((unsigned int32) hm) << 13;
|
||||||
|
return floatbits(xs | xe | xm);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2939,209 +2949,237 @@ static inline float half_to_float(unsigned int16 h) {
|
|||||||
|
|
||||||
|
|
||||||
static inline uniform int16 float_to_half(uniform float f) {
|
static inline uniform int16 float_to_half(uniform float f) {
|
||||||
uniform int32 x = intbits(f);
|
if (__have_native_half) {
|
||||||
// Store the return value in an int32 until the very end; this ends up
|
return __float_to_half_uniform(f);
|
||||||
// generating better code...
|
}
|
||||||
uniform int32 ret;
|
|
||||||
if ((x & 0x7FFFFFFFu) == 0)
|
|
||||||
// Signed zero
|
|
||||||
ret = (x >> 16);
|
|
||||||
else {
|
else {
|
||||||
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
uniform int32 x = intbits(f);
|
||||||
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
// Store the return value in an int32 until the very end; this ends up
|
||||||
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
// generating better code...
|
||||||
if (xe == 0) {
|
uniform int32 ret;
|
||||||
// Denormal will underflow, return a signed zero
|
if ((x & 0x7FFFFFFFu) == 0)
|
||||||
ret = (xs >> 16);
|
// Signed zero
|
||||||
}
|
ret = (x >> 16);
|
||||||
else {
|
else {
|
||||||
if (xe == 0x7F800000u) {
|
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
||||||
// Inf or NaN (all the exponent bits are set)
|
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
||||||
if (xm == 0)
|
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
||||||
// Zero mantissa -> signed infinity
|
if (xe == 0) {
|
||||||
ret = ((xs >> 16) | 0x7C00u);
|
// Denormal will underflow, return a signed zero
|
||||||
else
|
ret = (xs >> 16);
|
||||||
// NaN, only 1st mantissa bit set
|
}
|
||||||
ret = 0xFE00u;
|
else {
|
||||||
}
|
if (xe == 0x7F800000u) {
|
||||||
else {
|
// Inf or NaN (all the exponent bits are set)
|
||||||
// Normalized number
|
if (xm == 0)
|
||||||
uniform unsigned int32 hs = (xs >> 16); // Sign bit
|
// Zero mantissa -> signed infinity
|
||||||
uniform unsigned int32 hm;
|
ret = ((xs >> 16) | 0x7C00u);
|
||||||
// Exponent unbias the single, then bias the halfp
|
else
|
||||||
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
// NaN, only 1st mantissa bit set
|
||||||
if (hes >= 0x1F)
|
ret = 0xFE00u;
|
||||||
// Overflow: return signed infinity
|
}
|
||||||
ret = ((xs >> 16) | 0x7C00u);
|
else {
|
||||||
else if (hes <= 0) {
|
// Normalized number
|
||||||
// Underflow
|
uniform unsigned int32 hs = (xs >> 16); // Sign bit
|
||||||
if ((14 - hes) > 24) {
|
uniform unsigned int32 hm;
|
||||||
// Mantissa shifted all the way off & no rounding possibility
|
// Exponent unbias the single, then bias the halfp
|
||||||
hm = 0u; // Set mantissa to zero
|
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
||||||
|
if (hes >= 0x1F)
|
||||||
|
// Overflow: return signed infinity
|
||||||
|
ret = ((xs >> 16) | 0x7C00u);
|
||||||
|
else if (hes <= 0) {
|
||||||
|
// Underflow
|
||||||
|
if ((14 - hes) > 24) {
|
||||||
|
// Mantissa shifted all the way off & no rounding possibility
|
||||||
|
hm = 0u; // Set mantissa to zero
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xm |= 0x00800000u; // Add the hidden leading bit
|
||||||
|
hm = (xm >> (14 - hes)); // Mantissa
|
||||||
|
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
|
||||||
|
// Round, might overflow into exp bit, but this is OK
|
||||||
|
hm += 1u;
|
||||||
|
}
|
||||||
|
ret = (hs | hm);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
xm |= 0x00800000u; // Add the hidden leading bit
|
uniform unsigned int32 he = (hes << 10); // Exponent
|
||||||
hm = (xm >> (14 - hes)); // Mantissa
|
hm = (xm >> 13); // Mantissa
|
||||||
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
|
if (xm & 0x00001000u) // Check for rounding
|
||||||
// Round, might overflow into exp bit, but this is OK
|
// Round, might overflow to inf, this is OK
|
||||||
hm += 1u;
|
ret = (hs | he | hm) + 1u;
|
||||||
|
else
|
||||||
|
ret = (hs | he | hm);
|
||||||
}
|
}
|
||||||
ret = (hs | hm);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
uniform unsigned int32 he = (hes << 10); // Exponent
|
|
||||||
hm = (xm >> 13); // Mantissa
|
|
||||||
if (xm & 0x00001000u) // Check for rounding
|
|
||||||
// Round, might overflow to inf, this is OK
|
|
||||||
ret = (hs | he | hm) + 1u;
|
|
||||||
else
|
|
||||||
ret = (hs | he | hm);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return (int16)ret;
|
||||||
}
|
}
|
||||||
return (int16)ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int16 float_to_half(float f) {
|
static inline int16 float_to_half(float f) {
|
||||||
int32 x = intbits(f);
|
if (__have_native_half) {
|
||||||
// Store the return value in an int32 until the very end; this ends up
|
return __float_to_half_varying(f);
|
||||||
// generating better code...
|
}
|
||||||
int32 ret;
|
|
||||||
if ((x & 0x7FFFFFFFu) == 0)
|
|
||||||
// Signed zero
|
|
||||||
ret = (x >> 16);
|
|
||||||
else {
|
else {
|
||||||
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
int32 x = intbits(f);
|
||||||
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
// Store the return value in an int32 until the very end; this ends up
|
||||||
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
// generating better code...
|
||||||
if (xe == 0) {
|
int32 ret;
|
||||||
// Denormal will underflow, return a signed zero
|
if ((x & 0x7FFFFFFFu) == 0)
|
||||||
ret = (xs >> 16);
|
// Signed zero
|
||||||
}
|
ret = (x >> 16);
|
||||||
else {
|
else {
|
||||||
cif (xe == 0x7F800000u) {
|
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
||||||
// Inf or NaN (all the exponent bits are set)
|
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
||||||
if (xm == 0)
|
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
||||||
// Zero mantissa -> signed infinity
|
if (xe == 0) {
|
||||||
ret = ((xs >> 16) | 0x7C00u);
|
// Denormal will underflow, return a signed zero
|
||||||
else
|
ret = (xs >> 16);
|
||||||
// NaN, only 1st mantissa bit set
|
}
|
||||||
ret = 0xFE00u;
|
else {
|
||||||
}
|
cif (xe == 0x7F800000u) {
|
||||||
else {
|
// Inf or NaN (all the exponent bits are set)
|
||||||
// Normalized number
|
if (xm == 0)
|
||||||
unsigned int32 hs = (xs >> 16); // Sign bit
|
// Zero mantissa -> signed infinity
|
||||||
unsigned int32 hm;
|
ret = ((xs >> 16) | 0x7C00u);
|
||||||
// Exponent unbias the single, then bias the halfp
|
else
|
||||||
int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
// NaN, only 1st mantissa bit set
|
||||||
if (hes >= 0x1F)
|
ret = 0xFE00u;
|
||||||
// Overflow: return signed infinity
|
}
|
||||||
ret = ((xs >> 16) | 0x7C00u);
|
else {
|
||||||
else if (hes <= 0) {
|
// Normalized number
|
||||||
// Underflow
|
unsigned int32 hs = (xs >> 16); // Sign bit
|
||||||
if ((14 - hes) > 24) {
|
unsigned int32 hm;
|
||||||
// Mantissa shifted all the way off & no rounding possibility
|
// Exponent unbias the single, then bias the halfp
|
||||||
hm = 0u; // Set mantissa to zero
|
int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
||||||
|
if (hes >= 0x1F)
|
||||||
|
// Overflow: return signed infinity
|
||||||
|
ret = ((xs >> 16) | 0x7C00u);
|
||||||
|
else if (hes <= 0) {
|
||||||
|
// Underflow
|
||||||
|
if ((14 - hes) > 24) {
|
||||||
|
// Mantissa shifted all the way off & no rounding possibility
|
||||||
|
hm = 0u; // Set mantissa to zero
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xm |= 0x00800000u; // Add the hidden leading bit
|
||||||
|
hm = (xm >> (14 - hes)); // Mantissa
|
||||||
|
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
|
||||||
|
// Round, might overflow into exp bit, but this is OK
|
||||||
|
hm += 1u;
|
||||||
|
}
|
||||||
|
ret = (hs | hm);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
xm |= 0x00800000u; // Add the hidden leading bit
|
unsigned int32 he = (hes << 10); // Exponent
|
||||||
hm = (xm >> (14 - hes)); // Mantissa
|
hm = (xm >> 13); // Mantissa
|
||||||
if ((xm >> (13 - hes)) & 0x00000001u) // Check for rounding
|
if (xm & 0x00001000u) // Check for rounding
|
||||||
// Round, might overflow into exp bit, but this is OK
|
// Round, might overflow to inf, this is OK
|
||||||
hm += 1u;
|
ret = (hs | he | hm) + 1u;
|
||||||
|
else
|
||||||
|
ret = (hs | he | hm);
|
||||||
}
|
}
|
||||||
ret = (hs | hm);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
unsigned int32 he = (hes << 10); // Exponent
|
|
||||||
hm = (xm >> 13); // Mantissa
|
|
||||||
if (xm & 0x00001000u) // Check for rounding
|
|
||||||
// Round, might overflow to inf, this is OK
|
|
||||||
ret = (hs | he | hm) + 1u;
|
|
||||||
else
|
|
||||||
ret = (hs | he | hm);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return (int16)ret;
|
||||||
}
|
}
|
||||||
return (int16)ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline uniform float half_to_float_fast(uniform unsigned int16 h) {
|
static inline uniform float half_to_float_fast(uniform unsigned int16 h) {
|
||||||
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
if (__have_native_half) {
|
||||||
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
return __half_to_float_uniform(h);
|
||||||
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
}
|
||||||
|
else {
|
||||||
// sign
|
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
||||||
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
||||||
// Exponent: unbias the halfp, then bias the single
|
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
||||||
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
|
||||||
// Exponent
|
|
||||||
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
|
||||||
// Mantissa
|
|
||||||
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
|
|
||||||
return floatbits(xs | xe | xm);
|
|
||||||
|
|
||||||
|
// sign
|
||||||
|
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
||||||
|
// Exponent: unbias the halfp, then bias the single
|
||||||
|
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
||||||
|
// Exponent
|
||||||
|
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
||||||
|
// Mantissa
|
||||||
|
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
|
||||||
|
return floatbits(xs | xe | xm);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline float half_to_float_fast(unsigned int16 h) {
|
static inline float half_to_float_fast(unsigned int16 h) {
|
||||||
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
if (__have_native_half) {
|
||||||
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
return __half_to_float_varying(h);
|
||||||
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
}
|
||||||
|
else {
|
||||||
// sign
|
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
||||||
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
||||||
// Exponent: unbias the halfp, then bias the single
|
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
||||||
int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
|
||||||
// Exponent
|
|
||||||
unsigned int32 xe = (unsigned int32) (xes << 23);
|
|
||||||
// Mantissa
|
|
||||||
unsigned int32 xm = ((unsigned int32) hm) << 13;
|
|
||||||
return floatbits(xs | xe | xm);
|
|
||||||
|
|
||||||
|
// sign
|
||||||
|
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
||||||
|
// Exponent: unbias the halfp, then bias the single
|
||||||
|
int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
||||||
|
// Exponent
|
||||||
|
unsigned int32 xe = (unsigned int32) (xes << 23);
|
||||||
|
// Mantissa
|
||||||
|
unsigned int32 xm = ((unsigned int32) hm) << 13;
|
||||||
|
return floatbits(xs | xe | xm);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform int16 float_to_half_fast(uniform float f) {
|
static inline uniform int16 float_to_half_fast(uniform float f) {
|
||||||
uniform int32 x = intbits(f);
|
if (__have_native_half) {
|
||||||
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
return __float_to_half_uniform(f);
|
||||||
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
}
|
||||||
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
else {
|
||||||
|
uniform int32 x = intbits(f);
|
||||||
|
uniform unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
||||||
|
uniform unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
||||||
|
uniform unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
||||||
|
|
||||||
uniform unsigned int32 hs = (xs >> 16); // Sign bit
|
uniform unsigned int32 hs = (xs >> 16); // Sign bit
|
||||||
// Exponent unbias the single, then bias the halfp
|
// Exponent unbias the single, then bias the halfp
|
||||||
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
||||||
uniform unsigned int32 he = (hes << 10); // Exponent
|
uniform unsigned int32 he = (hes << 10); // Exponent
|
||||||
uniform int32 hm = (xm >> 13); // Mantissa
|
uniform int32 hm = (xm >> 13); // Mantissa
|
||||||
uniform int32 ret = (hs | he | hm);
|
uniform int32 ret = (hs | he | hm);
|
||||||
|
|
||||||
if (xm & 0x00001000u) // Check for rounding
|
if (xm & 0x00001000u) // Check for rounding
|
||||||
// Round, might overflow to inf, this is OK
|
// Round, might overflow to inf, this is OK
|
||||||
ret += 1u;
|
ret += 1u;
|
||||||
|
|
||||||
return (int16)ret;
|
return (int16)ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int16 float_to_half_fast(float f) {
|
static inline int16 float_to_half_fast(float f) {
|
||||||
int32 x = intbits(f);
|
if (__have_native_half) {
|
||||||
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
return __float_to_half_varying(f);
|
||||||
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
}
|
||||||
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
else {
|
||||||
|
int32 x = intbits(f);
|
||||||
|
unsigned int32 xs = x & 0x80000000u; // Pick off sign bit
|
||||||
|
unsigned int32 xe = x & 0x7F800000u; // Pick off exponent bits
|
||||||
|
unsigned int32 xm = x & 0x007FFFFFu; // Pick off mantissa bits
|
||||||
|
|
||||||
unsigned int32 hs = (xs >> 16); // Sign bit
|
unsigned int32 hs = (xs >> 16); // Sign bit
|
||||||
// Exponent unbias the single, then bias the halfp
|
// Exponent unbias the single, then bias the halfp
|
||||||
int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
int32 hes = ((int)(xe >> 23)) - 127 + 15;
|
||||||
unsigned int32 he = (hes << 10); // Exponent
|
unsigned int32 he = (hes << 10); // Exponent
|
||||||
int32 hm = (xm >> 13); // Mantissa
|
int32 hm = (xm >> 13); // Mantissa
|
||||||
int32 ret = (hs | he | hm);
|
int32 ret = (hs | he | hm);
|
||||||
|
|
||||||
if (xm & 0x00001000u) // Check for rounding
|
if (xm & 0x00001000u) // Check for rounding
|
||||||
// Round, might overflow to inf, this is OK
|
// Round, might overflow to inf, this is OK
|
||||||
ret += 1u;
|
ret += 1u;
|
||||||
|
|
||||||
return (int16)ret;
|
return (int16)ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
21
tests/half-3.ispc
Normal file
21
tests/half-3.ispc
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_v(uniform float RET[]) {
|
||||||
|
int errors = 0;
|
||||||
|
|
||||||
|
foreach (i = 0 ... 65535) {
|
||||||
|
unsigned int16 h = i;
|
||||||
|
float f = half_to_float(i);
|
||||||
|
h = float_to_half(f);
|
||||||
|
|
||||||
|
int mismatches = (f == f && i != h);
|
||||||
|
errors += reduce_add(mismatches);
|
||||||
|
}
|
||||||
|
|
||||||
|
RET[programIndex] = errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user