Merge pull request #1018 from Vsevolod-Livinskij/knl_fix
rcp23->rcp28 fix for knl.h
This commit is contained in:
@@ -2615,7 +2615,7 @@ static FORCEINLINE __vec16_f __sqrt_varying_float(__vec16_f v) {
|
|||||||
|
|
||||||
static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) {
|
static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) {
|
||||||
#ifdef ISPC_FAST_MATH
|
#ifdef ISPC_FAST_MATH
|
||||||
return _mm512_rcp23_ps(v); // Approximation with 23 bits of accuracy.
|
return _mm512_rcp28_ps(v); // Approximation with 28 bits of accuracy.
|
||||||
#else
|
#else
|
||||||
return _mm512_recip_ps(v);
|
return _mm512_recip_ps(v);
|
||||||
#endif
|
#endif
|
||||||
@@ -2634,7 +2634,7 @@ static FORCEINLINE double __rcp_uniform_double(double v)
|
|||||||
|
|
||||||
static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
|
static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
|
||||||
#ifdef ISPC_FAST_MATH
|
#ifdef ISPC_FAST_MATH
|
||||||
return _mm512_rsqrt23_ps(v); // Approximation with 0.775ULP accuracy
|
return _mm512_rsqrt28_ps(v); // Approximation with 28 bits of accuracy
|
||||||
#else
|
#else
|
||||||
return _mm512_invsqrt_ps(v);
|
return _mm512_invsqrt_ps(v);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user