added support for fast approximate rsqrt(double). Provide 16 digit accurancy but is over 3x faster than 1/sqrt(double)

This commit is contained in:
Evghenii
2014-02-04 14:44:54 +01:00
parent b0753dc93d
commit eb1a495a7a
5 changed files with 48 additions and 24 deletions

View File

@@ -511,3 +511,5 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r
ret <4 x double> %call ret <4 x double> %call
} }
rsqrt_double()
rcp_double()

View File

@@ -191,13 +191,14 @@ declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
declare float @__rsqrt_uniform_float(float) nounwind readnone declare float @__rsqrt_uniform_float(float) nounwind readnone
declare float @__rcp_uniform_float(float) nounwind readnone declare float @__rcp_uniform_float(float) nounwind readnone
declare double @__rsqrt_uniform_double(double) nounwind readnone declare double @__rsqrt_uniform_double(double, <WIDTH x MASK>) nounwind readnone
declare double @__rcp_uniform_double(double) nounwind readnone declare double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind readnone
declare float @__sqrt_uniform_float(float) nounwind readnone declare float @__sqrt_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind readnone
declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind readnone
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
declare double @__sqrt_uniform_double(double) nounwind readnone declare double @__sqrt_uniform_double(double) nounwind readnone

View File

@@ -4533,31 +4533,29 @@ define_down_avgs()
') ')
define(`rsqrt_double', ` define(`rsqrt_double', `
define double @__rsqrt_uniform_double(double) nounwind alwaysinline readnone declare double @__rsqrt_safe_uniform_double___und(double, <WIDTH x MASK>)
define double @__rsqrt_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
{ {
%flt = fptrunc double %0 to float %res = call double @__rsqrt_safe_uniform_double___und(double %0, <WIDTH x MASK> %1)
%res = call float @__rsqrt_uniform_float(float %flt) ret double %res
%dres = fpext float %res to double
ret double %dres
} }
define <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind alwaysinline readnone declare <WIDTH x double> @__rsqrt_safe_varying_double___vyd(<WIDTH x double>, <WIDTH x MASK>)
define <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
{ {
%flt = fptrunc <WIDTH x double> %0 to <WIDTH x float> %res = call <WIDTH x double> @__rsqrt_safe_varying_double___vyd(<WIDTH x double> %0, <WIDTH x MASK> %1)
%res = call <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %flt) ret <WIDTH x double> %res
%dres = fpext <WIDTH x float> %res to <WIDTH x double>
ret <WIDTH x double> %dres
} }
') ')
define(`rcp_double', ` define(`rcp_double', `
define double @__rcp_uniform_double(double) nounwind alwaysinline readnone define double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
{ {
%flt = fptrunc double %0 to float %flt = fptrunc double %0 to float
%res = call float @__rcp_uniform_float(float %flt) %res = call float @__rcp_uniform_float(float %flt)
%dres = fpext float %res to double %dres = fpext float %res to double
ret double %dres ret double %dres
} }
define <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind alwaysinline readnone define <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
{ {
%flt = fptrunc <WIDTH x double> %0 to <WIDTH x float> %flt = fptrunc <WIDTH x double> %0 to <WIDTH x float>
%res = call <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %flt) %res = call <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %flt)

View File

@@ -1393,12 +1393,12 @@ static inline uniform float rcp(uniform float v) {
__declspec(safe) __declspec(safe)
static inline double rcp(double v) { static inline double rcp(double v) {
return __rcp_varying_double(v); return __rcp_varying_double(v, (IntMaskType)__mask);
} }
__declspec(safe) __declspec(safe)
static inline uniform double rcp(uniform double v) { static inline uniform double rcp(uniform double v) {
return __rcp_uniform_double(v); return __rcp_uniform_double(v, (IntMaskType)__mask);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -3527,14 +3527,37 @@ static inline uniform double sqrt(uniform double v) {
return __sqrt_uniform_double(v); return __sqrt_uniform_double(v);
} }
__declspec(safe) #define RSQRTD(QUAL) \
static inline double rsqrt(double v) { __declspec(safe) \
return __rsqrt_varying_double(v); static inline QUAL double __rsqrt_iterate_double(QUAL double x, QUAL double y) \
{ \
QUAL double xh = x*0.5d; \
y += y*(0.5d0 - xh*y*y); \
y += y*(0.5d0 - xh*y*y); \
return y; \
} \
__declspec(safe) \
static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x) \
{ \
if (x <= 1.0d+33 && x >= 1.0d-33) \
return __rsqrt_iterate_double(x, rsqrt((QUAL float)x)); \
QUAL int64 ex = intbits(x) & 0x7fe0000000000000; \
QUAL double exp = doublebits( 0x7fd0000000000000 - ex ); /* 1.0d/exponent */ \
QUAL double exph = doublebits( 0x5fe0000000000000 - (ex >> 1)); /* 1.0d/sqrt(exponent) */ \
QUAL double y = rsqrt((QUAL float)(x*exp)); \
return __rsqrt_iterate_double(x, y*exph); \
} }
RSQRTD(varying)
__declspec(safe)
static inline double rsqrt(double v) {
return __rsqrt_varying_double(v, (IntMaskType)__mask);
}
RSQRTD(uniform)
__declspec(safe) __declspec(safe)
static inline uniform double rsqrt(uniform double v) { static inline uniform double rsqrt(uniform double v) {
return __rsqrt_uniform_double(v); return __rsqrt_uniform_double(v, (IntMaskType)__mask);
} }
__declspec(safe) __declspec(safe)
static inline double ldexp(double x, int n) { static inline double ldexp(double x, int n) {

View File

@@ -3,11 +3,11 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
double x = aFOO[programIndex]; double x = aFOO[programIndex]*1d100;
double d, invsqrt = rsqrt(x); double d, invsqrt = rsqrt(x);
d = (x * (invsqrt * invsqrt)) - 1.0d0; d = (x * (invsqrt * invsqrt)) - 1.0d0;
if (d < 0.0d0) d = -d; if (d < 0.0d0) d = -d;
RET[programIndex] = d > 1d-5; RET[programIndex] = d > 1d-15;
} }