Approximation for inverse sqrt and reciprocal provided in fast math mode.
RCP was actually slow in fast math mode Inverse sqrt did not expose fast approximation
This commit is contained in:
@@ -1949,14 +1949,18 @@ static FORCEINLINE __vec16_f __sqrt_varying_float(__vec16_f v) {
|
||||
|
||||
static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) {
|
||||
#ifdef ISPC_FAST_MATH
|
||||
return _mm512_recip_ps(v);
|
||||
return _mm512_rcp23_ps(v); // Approximation with 23 bits of accuracy.
|
||||
#else
|
||||
return _mm512_rcp23_ps(v); // 23-bits of accuracy
|
||||
return _mm512_recip_ps(v);
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
|
||||
return _mm512_rsqrt23_ps(v); // to 0.775ULP accuracy
|
||||
#ifdef ISPC_FAST_MATH
|
||||
return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy
|
||||
#else
|
||||
return _mm512_invsqrt_pd(v);
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
|
||||
|
||||
Reference in New Issue
Block a user