added fast approximate rcp(double) accurate to 15 digits

This commit is contained in:
Evghenii
2014-02-04 15:23:34 +01:00
parent eb1a495a7a
commit fe98fe8cdc
3 changed files with 34 additions and 15 deletions

View File

@@ -4548,18 +4548,16 @@ define <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>, <WIDTH x MASK
') ')
define(`rcp_double', ` define(`rcp_double', `
define double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone declare double @__rcp_safe_uniform_double___und(double, <WIDTH x MASK>)
define double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
{ {
%flt = fptrunc double %0 to float %res = call double @__rcp_safe_uniform_double___und(double %0, <WIDTH x MASK> %1)
%res = call float @__rcp_uniform_float(float %flt) ret double %res
%dres = fpext float %res to double
ret double %dres
} }
define <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone declare <WIDTH x double> @__rcp_safe_varying_double___vyd(<WIDTH x double>, <WIDTH x MASK>)
define <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
{ {
%flt = fptrunc <WIDTH x double> %0 to <WIDTH x float> %res = call <WIDTH x double> @__rcp_safe_varying_double___vyd(<WIDTH x double> %0, <WIDTH x MASK> %1)
%res = call <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %flt) ret <WIDTH x double> %res
%dres = fpext <WIDTH x float> %res to <WIDTH x double>
ret <WIDTH x double> %dres
} }
') ')

View File

@@ -1391,11 +1391,32 @@ static inline uniform float rcp(uniform float v) {
return __rcp_uniform_float(v); return __rcp_uniform_float(v);
} }
#define RCPD(QUAL) \
__declspec(safe) \
static inline QUAL double __rcp_iterate_##QUAL##_double(QUAL double v, QUAL double iv) \
{ \
iv = iv * (2.0d - v*iv); \
iv = iv * (2.0d - v*iv); \
return iv; \
} \
__declspec(safe) \
static inline QUAL double __rcp_safe_##QUAL##_double(QUAL double x) \
{ \
if (x <= 1.0d+33 && x >= 1.0d-33) \
return __rcp_iterate_##QUAL##_double(x, rcp((QUAL float)x)); \
QUAL int64 ex = intbits(x) & 0x7fe0000000000000; \
QUAL double exp = doublebits( 0x7fd0000000000000 + ~ex ); \
QUAL double y = rcp((QUAL float)(x*exp)); \
return __rcp_iterate_##QUAL##_double(x, y*exp); \
}
RCPD(varying)
__declspec(safe) __declspec(safe)
static inline double rcp(double v) { static inline double rcp(double v) {
return __rcp_varying_double(v, (IntMaskType)__mask); return __rcp_varying_double(v, (IntMaskType)__mask);
} }
RCPD(uniform)
__declspec(safe) __declspec(safe)
static inline uniform double rcp(uniform double v) { static inline uniform double rcp(uniform double v) {
return __rcp_uniform_double(v, (IntMaskType)__mask); return __rcp_uniform_double(v, (IntMaskType)__mask);
@@ -3529,7 +3550,7 @@ static inline uniform double sqrt(uniform double v) {
#define RSQRTD(QUAL) \ #define RSQRTD(QUAL) \
__declspec(safe) \ __declspec(safe) \
static inline QUAL double __rsqrt_iterate_double(QUAL double x, QUAL double y) \ static inline QUAL double __rsqrt_iterate_##QUAL##_double(QUAL double x, QUAL double y) \
{ \ { \
QUAL double xh = x*0.5d; \ QUAL double xh = x*0.5d; \
y += y*(0.5d0 - xh*y*y); \ y += y*(0.5d0 - xh*y*y); \
@@ -3540,12 +3561,12 @@ __declspec(safe) \
static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x) \ static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x) \
{ \ { \
if (x <= 1.0d+33 && x >= 1.0d-33) \ if (x <= 1.0d+33 && x >= 1.0d-33) \
return __rsqrt_iterate_double(x, rsqrt((QUAL float)x)); \ return __rsqrt_iterate_##QUAL##_double(x, rsqrt((QUAL float)x)); \
QUAL int64 ex = intbits(x) & 0x7fe0000000000000; \ QUAL int64 ex = intbits(x) & 0x7fe0000000000000; \
QUAL double exp = doublebits( 0x7fd0000000000000 - ex ); /* 1.0d/exponent */ \ QUAL double exp = doublebits( 0x7fd0000000000000 - ex ); /* 1.0d/exponent */ \
QUAL double exph = doublebits( 0x5fe0000000000000 - (ex >> 1)); /* 1.0d/sqrt(exponent) */ \ QUAL double exph = doublebits( 0x5fe0000000000000 - (ex >> 1)); /* 1.0d/sqrt(exponent) */ \
QUAL double y = rsqrt((QUAL float)(x*exp)); \ QUAL double y = rsqrt((QUAL float)(x*exp)); \
return __rsqrt_iterate_double(x, y*exph); \ return __rsqrt_iterate_##QUAL##_double(x, y*exph); \
} }
RSQRTD(varying) RSQRTD(varying)

View File

@@ -3,12 +3,12 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
double x = aFOO[programIndex&0x3]; double x = aFOO[programIndex&0x3]*1d100;
double d, ix; double d, ix;
ix = rcp(x); ix = rcp(x);
d = (ix - 1.0d0 / x); d = (ix - 1.0d0 / x);
d = (d < 0.0d0) ? -d : d; d = (d < 0.0d0) ? -d : d;
RET[programIndex] = (d < 1d-7) ? 1.0d0 : 0.0d0; RET[programIndex] = (d < 1d-15 && !isnan(d)) ? 1.0d0 : 0.0d0;
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {