Approximation for inverse sqrt and reciprocal provided in fast math mode.

RCP was actually slow in fast math mode
   Inverse sqrt did not expose fast approximation
This commit is contained in:
Jean-Luc Duprat
2012-11-13 14:01:35 -08:00
parent adeef0af01
commit b601331362

View File

@@ -1949,14 +1949,18 @@ static FORCEINLINE __vec16_f __sqrt_varying_float(__vec16_f v) {
static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) {
#ifdef ISPC_FAST_MATH
return _mm512_recip_ps(v);
return _mm512_rcp23_ps(v); // Approximation with 23 bits of accuracy.
#else
return _mm512_rcp23_ps(v); // 23-bits of accuracy
return _mm512_recip_ps(v);
#endif
}
static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
return _mm512_rsqrt23_ps(v); // to 0.775ULP accuracy
#ifdef ISPC_FAST_MATH
return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy
#else
return _mm512_invsqrt_pd(v);
#endif
}
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {