Merge pull request #1018 from Vsevolod-Livinskij/knl_fix

rcp23->rcp28 fix for knl.h
This commit is contained in:
Dmitry Babokin
2015-04-22 16:07:05 +03:00

View File

@@ -2615,7 +2615,7 @@ static FORCEINLINE __vec16_f __sqrt_varying_float(__vec16_f v) {
static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) { static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) {
#ifdef ISPC_FAST_MATH #ifdef ISPC_FAST_MATH
return _mm512_rcp23_ps(v); // Approximation with 23 bits of accuracy. return _mm512_rcp28_ps(v); // Approximation with 28 bits of accuracy.
#else #else
return _mm512_recip_ps(v); return _mm512_recip_ps(v);
#endif #endif
@@ -2634,7 +2634,7 @@ static FORCEINLINE double __rcp_uniform_double(double v)
static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) { static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
#ifdef ISPC_FAST_MATH #ifdef ISPC_FAST_MATH
return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy return _mm512_rsqrt28_ps(v); // Approximation with 28 bits of accuracy
#else #else
return _mm512_invsqrt_ps(v); return _mm512_invsqrt_ps(v);
#endif #endif