From b6013313623e0a8181e65dd3f179da6fa8a80d0f Mon Sep 17 00:00:00 2001 From: Jean-Luc Duprat Date: Tue, 13 Nov 2012 14:01:35 -0800 Subject: [PATCH] Approximation for inverse sqrt and reciprocal provided in fast math mode. RCP was actually slow in fast math mode Inverse sqrt did not expose fast approximation --- examples/intrinsics/knc.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index 859a4bc7..7b784d82 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -1949,14 +1949,18 @@ static FORCEINLINE __vec16_f __sqrt_varying_float(__vec16_f v) { static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) { #ifdef ISPC_FAST_MATH - return _mm512_recip_ps(v); + return _mm512_rcp23_ps(v); // Approximation with 23 bits of accuracy. #else - return _mm512_rcp23_ps(v); // 23-bits of accuracy + return _mm512_recip_ps(v); #endif } static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) { - return _mm512_rsqrt23_ps(v); // to 0.775ULP accuracy +#ifdef ISPC_FAST_MATH + return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy +#else + return _mm512_invsqrt_pd(v); +#endif } static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {