Much more efficient half_to_float() code, via @rygorous.
Also, switch deferred shading example to use it. (Rather than the "fast" half to float that doesn't handle deforms, etc.)
This commit is contained in:
@@ -327,8 +327,8 @@ ShadeTile(
|
|||||||
|
|
||||||
// Reconstruct normal from G-buffer
|
// Reconstruct normal from G-buffer
|
||||||
float surface_normal_x, surface_normal_y, surface_normal_z;
|
float surface_normal_x, surface_normal_y, surface_normal_z;
|
||||||
float normal_x = half_to_float_fast(inputData.normalEncoded_x[gBufferOffset]);
|
float normal_x = half_to_float(inputData.normalEncoded_x[gBufferOffset]);
|
||||||
float normal_y = half_to_float_fast(inputData.normalEncoded_y[gBufferOffset]);
|
float normal_y = half_to_float(inputData.normalEncoded_y[gBufferOffset]);
|
||||||
|
|
||||||
float f = (normal_x - normal_x * normal_x) + (normal_y - normal_y * normal_y);
|
float f = (normal_x - normal_x * normal_x) + (normal_y - normal_y * normal_y);
|
||||||
float m = sqrt(4.0f * f - 1.0f);
|
float m = sqrt(4.0f * f - 1.0f);
|
||||||
@@ -339,9 +339,9 @@ ShadeTile(
|
|||||||
|
|
||||||
// Load other G-buffer parameters
|
// Load other G-buffer parameters
|
||||||
float surface_specularAmount =
|
float surface_specularAmount =
|
||||||
half_to_float_fast(inputData.specularAmount[gBufferOffset]);
|
half_to_float(inputData.specularAmount[gBufferOffset]);
|
||||||
float surface_specularPower =
|
float surface_specularPower =
|
||||||
half_to_float_fast(inputData.specularPower[gBufferOffset]);
|
half_to_float(inputData.specularPower[gBufferOffset]);
|
||||||
float surface_albedo_x = Unorm8ToFloat32(inputData.albedo_x[gBufferOffset]);
|
float surface_albedo_x = Unorm8ToFloat32(inputData.albedo_x[gBufferOffset]);
|
||||||
float surface_albedo_y = Unorm8ToFloat32(inputData.albedo_y[gBufferOffset]);
|
float surface_albedo_y = Unorm8ToFloat32(inputData.albedo_y[gBufferOffset]);
|
||||||
float surface_albedo_z = Unorm8ToFloat32(inputData.albedo_z[gBufferOffset]);
|
float surface_albedo_z = Unorm8ToFloat32(inputData.albedo_z[gBufferOffset]);
|
||||||
|
|||||||
137
stdlib.ispc
137
stdlib.ispc
@@ -3408,123 +3408,48 @@ static inline uniform float half_to_float(uniform unsigned int16 h) {
|
|||||||
return __half_to_float_uniform(h);
|
return __half_to_float_uniform(h);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ((h & 0x7FFFu) == 0)
|
// https://gist.github.com/2144712
|
||||||
// Signed zero
|
// Fabian "ryg" Giesen.
|
||||||
return floatbits(((unsigned int32) h) << 16);
|
static const uniform unsigned int32 shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||||
else {
|
|
||||||
// Though these are int16 quantities, we get much better code
|
|
||||||
// with them stored as int32s...
|
|
||||||
uniform unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
|
||||||
uniform unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
|
||||||
uniform unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
|
||||||
if (he == 0) {
|
|
||||||
// Denormal will convert to normalized
|
|
||||||
uniform int e = -1;
|
|
||||||
// The following loop figures out how much extra to adjust the exponent
|
|
||||||
// Shift until leading bit overflows into exponent bit
|
|
||||||
do {
|
|
||||||
e++;
|
|
||||||
hm <<= 1;
|
|
||||||
} while((hm & 0x0400u) == 0);
|
|
||||||
|
|
||||||
// Sign bit
|
uniform int32 o = ((int32)(h & 0x7fff)) << 13; // exponent/mantissa bits
|
||||||
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
uniform unsigned int32 exp = shifted_exp & o; // just the exponent
|
||||||
// Exponent: unbias the halfp, then bias the single
|
o += (127 - 15) << 23; // exponent adjust
|
||||||
uniform int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
|
|
||||||
// Exponent
|
// handle exponent special cases
|
||||||
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
if (exp == shifted_exp) // Inf/NaN?
|
||||||
// Mantissa
|
o += (128 - 16) << 23; // extra exp adjust
|
||||||
uniform unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
|
else if (exp == 0) { // Zero/Denormal?
|
||||||
return floatbits(xs | xe | xm);
|
o += 1 << 23; // extra exp adjust
|
||||||
}
|
o = intbits(floatbits(o) - floatbits(113 << 23)); // renormalize
|
||||||
else {
|
|
||||||
if (he == 0x7C00u) {
|
|
||||||
// Inf or NaN (all the exponent bits are set)
|
|
||||||
if (hm == 0)
|
|
||||||
// Zero mantissa -> signed inf
|
|
||||||
return floatbits((((unsigned int32) hs) << 16) |
|
|
||||||
((unsigned int32) 0x7F800000u));
|
|
||||||
else
|
|
||||||
// NaN
|
|
||||||
return floatbits(0xFFC00000u);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Normalized number
|
|
||||||
// sign
|
|
||||||
uniform unsigned int32 xs = ((unsigned int32) hs) << 16;
|
|
||||||
// Exponent: unbias the halfp, then bias the single
|
|
||||||
uniform int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
|
||||||
// Exponent
|
|
||||||
uniform unsigned int32 xe = (unsigned int32) (xes << 23);
|
|
||||||
// Mantissa
|
|
||||||
uniform unsigned int32 xm = ((unsigned int32) hm) << 13;
|
|
||||||
return floatbits(xs | xe | xm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
o |= ((int32)(h & 0x8000)) << 16; // sign bit
|
||||||
|
return floatbits(o);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline float half_to_float(unsigned int16 h) {
|
static inline float half_to_float(unsigned int16 h) {
|
||||||
if (__have_native_half) {
|
if (__have_native_half) {
|
||||||
return __half_to_float_varying(h);
|
return __half_to_float_varying((unsigned int16)h);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ((h & 0x7FFFu) == 0)
|
// https://gist.github.com/2144712
|
||||||
// Signed zero
|
// Fabian "ryg" Giesen.
|
||||||
return floatbits(((unsigned int32) h) << 16);
|
|
||||||
else {
|
|
||||||
// Though these are int16 quantities, we get much better code
|
|
||||||
// with them stored as int32s...
|
|
||||||
unsigned int32 hs = h & (int32)0x8000u; // Pick off sign bit
|
|
||||||
unsigned int32 he = h & (int32)0x7C00u; // Pick off exponent bits
|
|
||||||
unsigned int32 hm = h & (int32)0x03FFu; // Pick off mantissa bits
|
|
||||||
cif (he == 0) {
|
|
||||||
// Denormal will convert to normalized
|
|
||||||
int e = -1;
|
|
||||||
// The following loop figures out how much extra to adjust the exponent
|
|
||||||
// Shift until leading bit overflows into exponent bit
|
|
||||||
do {
|
|
||||||
e++;
|
|
||||||
hm <<= 1;
|
|
||||||
} while((hm & 0x0400u) == 0);
|
|
||||||
|
|
||||||
// Sign bit
|
const unsigned int32 shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||||
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
|
||||||
// Exponent: unbias the halfp, then bias the single
|
int32 o = ((int32)(h & 0x7fff)) << 13; // exponent/mantissa bits
|
||||||
int32 xes = ((int32)(he >> 10)) - 15 + 127 - e;
|
unsigned int32 exp = shifted_exp & o; // just the exponent
|
||||||
// Exponent
|
o += (127 - 15) << 23; // exponent adjust
|
||||||
unsigned int32 xe = (unsigned int32) (xes << 23);
|
|
||||||
// Mantissa
|
int32 infnan_val = o + ((128 - 16) << 23);
|
||||||
unsigned int32 xm = ((unsigned int32) (hm & 0x03FFu)) << 13;
|
int32 zerodenorm_val = intbits(floatbits(o + (1<<23)) - floatbits(113 << 23));
|
||||||
return floatbits(xs | xe | xm);
|
int32 reg_val = (exp == 0) ? zerodenorm_val : o;
|
||||||
}
|
|
||||||
else {
|
int32 sign_bit = ((int32)(h & 0x8000)) << 16;
|
||||||
if (he == 0x7C00u) {
|
return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
|
||||||
// Inf or NaN (all the exponent bits are set)
|
|
||||||
if (hm == 0)
|
|
||||||
// Zero mantissa -> signed inf
|
|
||||||
return floatbits((((unsigned int32) hs) << 16) |
|
|
||||||
((unsigned int32) 0x7F800000u));
|
|
||||||
else
|
|
||||||
// NaN
|
|
||||||
return floatbits(0xFFC00000u);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Normalized number
|
|
||||||
// sign
|
|
||||||
unsigned int32 xs = ((unsigned int32) hs) << 16;
|
|
||||||
// Exponent: unbias the halfp, then bias the single
|
|
||||||
int32 xes = ((int32) (he >> 10)) - 15 + 127;
|
|
||||||
// Exponent
|
|
||||||
unsigned int32 xe = (unsigned int32) (xes << 23);
|
|
||||||
// Mantissa
|
|
||||||
unsigned int32 xm = ((unsigned int32) hm) << 13;
|
|
||||||
return floatbits(xs | xe | xm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user