From eb1a495a7aab85773fdfd5cad37e29c54d380460 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Tue, 4 Feb 2014 14:44:54 +0100 Subject: [PATCH] added support for fast approximate rsqrt(double). Provide 16 digit accurancy but is over 3x faster than 1/sqrt(double) --- builtins/target-avx1-i64x4base.ll | 2 ++ builtins/target-generic-common.ll | 9 ++++---- builtins/util.m4 | 22 +++++++++---------- stdlib.ispc | 35 +++++++++++++++++++++++++------ tests/test-148.ispc | 4 ++-- 5 files changed, 48 insertions(+), 24 deletions(-) diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll index e1832030..1b1e7c4f 100644 --- a/builtins/target-avx1-i64x4base.ll +++ b/builtins/target-avx1-i64x4base.ll @@ -511,3 +511,5 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r ret <4 x double> %call } +rsqrt_double() +rcp_double() diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 4f87ad26..dfdc7e9e 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -191,13 +191,14 @@ declare @__max_varying_double(, declare float @__rsqrt_uniform_float(float) nounwind readnone declare float @__rcp_uniform_float(float) nounwind readnone -declare double @__rsqrt_uniform_double(double) nounwind readnone -declare double @__rcp_uniform_double(double) nounwind readnone +declare double @__rsqrt_uniform_double(double, ) nounwind readnone +declare double @__rcp_uniform_double(double, ) nounwind readnone declare float @__sqrt_uniform_float(float) nounwind readnone declare @__rcp_varying_float() nounwind readnone declare @__rsqrt_varying_float() nounwind readnone -declare @__rcp_varying_double() nounwind readnone -declare @__rsqrt_varying_double() nounwind readnone +declare @__rcp_varying_double(, ) nounwind readnone +declare @__rsqrt_varying_double(, ) nounwind readnone + declare @__sqrt_varying_float() nounwind readnone declare double @__sqrt_uniform_double(double) nounwind readnone diff --git a/builtins/util.m4 b/builtins/util.m4 index a8340adb..3915bc4e 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -4533,31 +4533,29 @@ define_down_avgs() ') define(`rsqrt_double', ` -define double @__rsqrt_uniform_double(double) nounwind alwaysinline readnone +declare double @__rsqrt_safe_uniform_double___und(double, ) +define double @__rsqrt_uniform_double(double, ) nounwind alwaysinline readnone { - %flt = fptrunc double %0 to float - %res = call float @__rsqrt_uniform_float(float %flt) - %dres = fpext float %res to double - ret double %dres + %res = call double @__rsqrt_safe_uniform_double___und(double %0, %1) + ret double %res } -define @__rsqrt_varying_double() nounwind alwaysinline readnone +declare @__rsqrt_safe_varying_double___vyd(, ) +define @__rsqrt_varying_double(, ) nounwind alwaysinline readnone { - %flt = fptrunc %0 to - %res = call @__rsqrt_varying_float( %flt) - %dres = fpext %res to - ret %dres + %res = call @__rsqrt_safe_varying_double___vyd( %0, %1) + ret %res } ') define(`rcp_double', ` -define double @__rcp_uniform_double(double) nounwind alwaysinline readnone +define double @__rcp_uniform_double(double, ) nounwind alwaysinline readnone { %flt = fptrunc double %0 to float %res = call float @__rcp_uniform_float(float %flt) %dres = fpext float %res to double ret double %dres } -define @__rcp_varying_double() nounwind alwaysinline readnone +define @__rcp_varying_double(, ) nounwind alwaysinline readnone { %flt = fptrunc %0 to %res = call @__rcp_varying_float( %flt) diff --git a/stdlib.ispc b/stdlib.ispc index 42b60b52..70b5f0d1 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -1393,12 +1393,12 @@ static inline uniform float rcp(uniform float v) { __declspec(safe) static inline double rcp(double v) { - return __rcp_varying_double(v); + return __rcp_varying_double(v, (IntMaskType)__mask); } __declspec(safe) static inline uniform double rcp(uniform double v) { - return __rcp_uniform_double(v); + return __rcp_uniform_double(v, (IntMaskType)__mask); } /////////////////////////////////////////////////////////////////////////// @@ -3527,14 +3527,37 @@ static inline uniform double sqrt(uniform double v) { return __sqrt_uniform_double(v); } -__declspec(safe) -static inline double rsqrt(double v) { - return __rsqrt_varying_double(v); +#define RSQRTD(QUAL) \ +__declspec(safe) \ +static inline QUAL double __rsqrt_iterate_double(QUAL double x, QUAL double y) \ +{ \ + QUAL double xh = x*0.5d; \ + y += y*(0.5d0 - xh*y*y); \ + y += y*(0.5d0 - xh*y*y); \ + return y; \ +} \ +__declspec(safe) \ +static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x) \ +{ \ + if (x <= 1.0d+33 && x >= 1.0d-33) \ + return __rsqrt_iterate_double(x, rsqrt((QUAL float)x)); \ + QUAL int64 ex = intbits(x) & 0x7fe0000000000000; \ + QUAL double exp = doublebits( 0x7fd0000000000000 - ex ); /* 1.0d/exponent */ \ + QUAL double exph = doublebits( 0x5fe0000000000000 - (ex >> 1)); /* 1.0d/sqrt(exponent) */ \ + QUAL double y = rsqrt((QUAL float)(x*exp)); \ + return __rsqrt_iterate_double(x, y*exph); \ } +RSQRTD(varying) +__declspec(safe) +static inline double rsqrt(double v) { + return __rsqrt_varying_double(v, (IntMaskType)__mask); +} + +RSQRTD(uniform) __declspec(safe) static inline uniform double rsqrt(uniform double v) { - return __rsqrt_uniform_double(v); + return __rsqrt_uniform_double(v, (IntMaskType)__mask); } __declspec(safe) static inline double ldexp(double x, int n) { diff --git a/tests/test-148.ispc b/tests/test-148.ispc index f8284e1a..1c01428c 100644 --- a/tests/test-148.ispc +++ b/tests/test-148.ispc @@ -3,11 +3,11 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - double x = aFOO[programIndex]; + double x = aFOO[programIndex]*1d100; double d, invsqrt = rsqrt(x); d = (x * (invsqrt * invsqrt)) - 1.0d0; if (d < 0.0d0) d = -d; - RET[programIndex] = d > 1d-5; + RET[programIndex] = d > 1d-15; }