From b6013313623e0a8181e65dd3f179da6fa8a80d0f Mon Sep 17 00:00:00 2001
From: Jean-Luc Duprat <jld@acm.org>
Date: Tue, 13 Nov 2012 14:01:35 -0800
Subject: [PATCH] Approximation for inverse sqrt and reciprocal provided in
 fast math mode.    RCP was actually slow in fast math mode    Inverse sqrt
 did not expose fast approximation

---
 examples/intrinsics/knc.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h
index 859a4bc7..7b784d82 100644
--- a/examples/intrinsics/knc.h
+++ b/examples/intrinsics/knc.h
@@ -1949,14 +1949,18 @@ static FORCEINLINE __vec16_f __sqrt_varying_float(__vec16_f v) {
 
 static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) {
 #ifdef ISPC_FAST_MATH
-    return _mm512_recip_ps(v);
+    return _mm512_rcp23_ps(v); // Approximation with 23 bits of accuracy.
 #else
-    return _mm512_rcp23_ps(v); // 23-bits of accuracy
+    return _mm512_recip_ps(v);
 #endif
 }
 
 static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
-  return _mm512_rsqrt23_ps(v); // to 0.775ULP accuracy
+#ifdef ISPC_FAST_MATH
+    return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy
+#else 
+    return _mm512_invsqrt_pd(v);
+#endif
 }
 
 static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {