From 70a9b286e5fa3f1f61d56d98e02f6749983a2ad1 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Fri, 7 Feb 2014 15:28:39 +0100 Subject: [PATCH] added support for native and double precision trigonometry/transendentals --- builtins/target-avx-x2.ll | 2 + builtins/target-avx.ll | 3 + builtins/target-avx1-i64x4base.ll | 3 + builtins/target-generic-1.ll | 3 + builtins/target-generic-common.ll | 12 ++- builtins/target-neon-16.ll | 3 + builtins/target-neon-32.ll | 3 + builtins/target-neon-8.ll | 3 + builtins/target-sse2-x2.ll | 3 + builtins/target-sse2.ll | 3 + builtins/target-sse4-16.ll | 3 + builtins/target-sse4-8.ll | 3 + builtins/target-sse4-x2.ll | 3 + builtins/target-sse4.ll | 3 + builtins/util.m4 | 62 ++++++++++++-- examples/intrinsics/generic-16.h | 127 +++++++++++++++++++++-------- examples/intrinsics/sse4.h | 130 ++++++++++++++++++++++-------- stdlib.ispc | 79 +++++++++++------- 18 files changed, 340 insertions(+), 108 deletions(-) diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index b3a77871..a110dfef 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -694,3 +694,5 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin rsqrtd_decl() rcpd_decl() +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 9738f9d3..e0f4e45d 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -564,3 +564,6 @@ gen_scatter(double) rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll index a6601a28..3bd28c02 100644 --- a/builtins/target-avx1-i64x4base.ll +++ b/builtins/target-avx1-i64x4base.ll @@ -513,3 +513,6 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index 3dcd8373..1178e98c 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -997,3 +997,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 401c862d..ef33ff97 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -191,13 +191,9 @@ declare @__max_varying_double(, declare float @__rsqrt_uniform_float(float) nounwind readnone declare float @__rcp_uniform_float(float) nounwind readnone -declare double @__rsqrt_uniform_double(double) nounwind readnone -declare double @__rcp_uniform_double(double) nounwind readnone declare float @__sqrt_uniform_float(float) nounwind readnone declare @__rcp_varying_float() nounwind readnone declare @__rsqrt_varying_float() nounwind readnone -declare @__rcp_varying_double() nounwind readnone -declare @__rsqrt_varying_double() nounwind readnone declare @__sqrt_varying_float() nounwind readnone @@ -393,3 +389,11 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind define_avgs() +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrtd_decl() +rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-16.ll b/builtins/target-neon-16.ll index 8e0ef121..d73b0a50 100644 --- a/builtins/target-neon-16.ll +++ b/builtins/target-neon-16.ll @@ -521,3 +521,6 @@ define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-32.ll b/builtins/target-neon-32.ll index d6e861a2..a8999269 100644 --- a/builtins/target-neon-32.ll +++ b/builtins/target-neon-32.ll @@ -491,3 +491,6 @@ define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-8.ll b/builtins/target-neon-8.ll index aaa0a7b7..0c1edf8c 100644 --- a/builtins/target-neon-8.ll +++ b/builtins/target-neon-8.ll @@ -587,3 +587,6 @@ define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index bfb927e5..f69c25c8 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -658,3 +658,6 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index 93a8eb93..4e033d88 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -593,3 +593,6 @@ gen_scatter(double) rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index 0de5c1b4..abca8459 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -494,3 +494,6 @@ define_down_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index 79f44212..2e9f83a7 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -496,3 +496,6 @@ define_down_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index ceff27f0..c0466b34 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -597,3 +597,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 9e2ac8a5..d5d9ca5f 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -520,3 +520,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/util.m4 b/builtins/util.m4 index fbd929a1..2af75c25 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -2106,13 +2106,6 @@ declare void @__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, , , ) nounwind -declare float @__log_uniform_float(float) nounwind readnone -declare @__log_varying_float() nounwind readnone -declare float @__exp_uniform_float(float) nounwind readnone -declare @__exp_varying_float() nounwind readnone -declare float @__pow_uniform_float(float, float) nounwind readnone -declare @__pow_varying_float(, ) nounwind readnone - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare void @__use8() @@ -4541,3 +4534,58 @@ define(`rcpd_decl', ` declare double @__rcp_uniform_double(double) declare @__rcp_varying_double() ') + + +define(`transcendetals_decl',` + declare float @__log_uniform_float(float) nounwind readnone + declare @__log_varying_float() nounwind readnone + declare float @__exp_uniform_float(float) nounwind readnone + declare @__exp_varying_float() nounwind readnone + declare float @__pow_uniform_float(float, float) nounwind readnone + declare @__pow_varying_float(, ) nounwind readnone + + declare double @__log_uniform_double(double) nounwind readnone + declare @__log_varying_double() nounwind readnone + declare double @__exp_uniform_double(double) nounwind readnone + declare @__exp_varying_double() nounwind readnone + declare double @__pow_uniform_double(double, double) nounwind readnone + declare @__pow_varying_double(, ) nounwind readnone +') + +define(`trigonometry_decl',` + declare @__sin_varying_float() nounwind readnone + declare @__asin_varying_float() nounwind readnone + declare @__cos_varying_float() nounwind readnone + declare @__acos_varying_float() nounwind readnone + declare void @__sincos_varying_float(, *, *) nounwind readnone + declare @__tan_varying_float() nounwind readnone + declare @__atan_varying_float() nounwind readnone + declare @__atan2_varying_float(,) nounwind readnone + + declare float @__sin_uniform_float(float) nounwind readnone + declare float @__asin_uniform_float(float) nounwind readnone + declare float @__cos_uniform_float(float) nounwind readnone + declare float @__acos_uniform_float(float) nounwind readnone + declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone + declare float @__tan_uniform_float(float) nounwind readnone + declare float @__atan_uniform_float(float) nounwind readnone + declare float @__atan2_uniform_float(float,float) nounwind readnone + + declare @__sin_varying_double() nounwind readnone + declare @__asin_varying_double() nounwind readnone + declare @__cos_varying_double() nounwind readnone + declare @__acos_varying_double() nounwind readnone + declare void @__sincos_varying_double(, *, *) nounwind readnone + declare @__tan_varying_double() nounwind readnone + declare @__atan_varying_double() nounwind readnone + declare @__atan2_varying_double(,) nounwind readnone + + declare double @__sin_uniform_double(double) nounwind readnone + declare double @__asin_uniform_double(double) nounwind readnone + declare double @__cos_uniform_double(double) nounwind readnone + declare double @__acos_uniform_double(double) nounwind readnone + declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone + declare double @__tan_uniform_double(double) nounwind readnone + declare double @__atan_uniform_double(double) nounwind readnone + declare double @__atan2_uniform_double(double,double) nounwind readnone +') diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 3b5c6ec3..c8f2cf08 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -691,39 +691,6 @@ SHIFT(__vec16_f, float, float) SHUFFLES(__vec16_f, float, float) LOAD_STORE(__vec16_f, float) -static FORCEINLINE float __exp_uniform_float(float v) { - return expf(v); -} - -static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = expf(v.v[i]); - return ret; -} - -static FORCEINLINE float __log_uniform_float(float v) { - return logf(v); -} - -static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = logf(v.v[i]); - return ret; -} - -static FORCEINLINE float __pow_uniform_float(float a, float b) { - return powf(a, b); -} - -static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = powf(a.v[i], b.v[i]); - return ret; -} - static FORCEINLINE int __intbits(float v) { union { float f; @@ -1813,3 +1780,97 @@ static FORCEINLINE uint64_t __clock() { #endif // !WIN32 + +/////////////////////////////////////////////////////////////////////////// +// Transcendentals +// +// +#define TRANSCENDENTALS(op) \ +static FORCEINLINE float __##op##_uniform_float(float v) { \ + return op##f(v); \ +} \ +static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { \ + __vec16_f ret; \ + for (int i = 0; i < 16; ++i) \ + ret.v[i] = op##f(v.v[i]); \ + return ret; \ +} \ +static FORCEINLINE double __##op##_uniform_double(double v) { \ + return op(v); \ +} \ +static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { \ + __vec16_d ret; \ + for (int i = 0; i < 16; ++i) \ + ret.v[i] = op(v.v[i]); \ + return ret; \ +} + + TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + + +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} +static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = powf(a.v[i], b.v[i]); + return ret; +} +static FORCEINLINE double __pow_uniform_double(double a, double b) { + return pow(a, b); +} +static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = pow(a.v[i], b.v[i]); + return ret; +} + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + + +static FORCEINLINE float __atan2_uniform_float(float a, float b) { + return atan2f(a, b); +} +static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = atan2f(a.v[i], b.v[i]); + return ret; +} +static FORCEINLINE double __atan2_uniform_double(double a, double b) { + return atan2(a, b); +} +static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = atan2(a.v[i], b.v[i]); + return ret; +} + +static FORCEINLINE void __sincos_uniform_float(float x, float *a, float *b) { + sincosf(x,a,b); +} +static FORCEINLINE void __sincos_varying_float(__vec16_f x, __vec16_f *a, __vec16_f *b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + sincosf(x.v[i], (float*)a + i, (float*)b+i); +} +static FORCEINLINE void __sincos_uniform_double(double x, double *a, double *b) { + sincos(x,a,b); +} +static FORCEINLINE void __sincos_varying_double(__vec16_d x, __vec16_d *a, __vec16_d *b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + sincos(x.v[i], (double*)a + i, (double*)b+i); +} diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 45b31be1..a25af10b 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -167,6 +167,10 @@ struct __vec4_d { } __m128d v[2]; + FORCEINLINE __vec4_d(double *p) { + v[0] = _mm_set_pd(p[1], p[0]); + v[1] = _mm_set_pd(p[3], p[2]); + } }; @@ -2471,39 +2475,6 @@ static FORCEINLINE __vec4_d __sqrt_varying_double(__vec4_d v) { return __vec4_d(_mm_sqrt_pd(v.v[0]), _mm_sqrt_pd(v.v[1])); } -static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = powf(__extract_element(a, i), __extract_element(b, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __pow_uniform_float(float a, float b) { - return powf(a, b); -} - -static FORCEINLINE __vec4_f __exp_varying_float(__vec4_f a) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = expf(__extract_element(a, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __exp_uniform_float(float a) { - return expf(a); -} - -static FORCEINLINE __vec4_f __log_varying_float(__vec4_f a) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = logf(__extract_element(a, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __log_uniform_float(float a) { - return logf(a); -} - static FORCEINLINE int __intbits(float v) { union { float f; @@ -4166,4 +4137,97 @@ static FORCEINLINE uint64_t __clock() { } #endif // !WIN32 + +/////////////////////////////////////////////////////////////////////////// +// Transcendentals + + +#define TRANSCENDENTALS(op) \ +static FORCEINLINE __vec4_f __##op##_varying_float(__vec4_f a) {\ + float r[4];\ + for (int i = 0; i < 4; ++i)\ + r[i] = op##f(__extract_element(a, i));\ + return __vec4_f(r);\ +}\ +static FORCEINLINE float __##op##_uniform_float(float a) {\ + return op##f(a);\ +}\ +static FORCEINLINE __vec4_d __##op##_varying_double(__vec4_d a) {\ + double r[4];\ + for (int i = 0; i < 4; ++i)\ + r[i] = op(__extract_element(a, i));\ + return __vec4_d(r);\ +}\ +static FORCEINLINE double __##op##_uniform_double(double a) {\ + return op(a);\ +} + +TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + + +static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) { + float r[4]; + for (int i = 0; i < 4; ++i) + r[i] = powf(__extract_element(a, i), __extract_element(b, i)); + return __vec4_f(r); +} +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} +static FORCEINLINE __vec4_d __pow_varying_double(__vec4_d a, __vec4_d b) { + double r[4]; + for (int i = 0; i < 4; ++i) + r[i] = pow(__extract_element(a, i), __extract_element(b, i)); + return __vec4_d(r); +} +static FORCEINLINE double __pow_uniform_double(double a, double b) { + return pow(a, b); +} + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + + +static FORCEINLINE __vec4_f __atan2_varying_float(__vec4_f a, __vec4_f b) { + float r[4]; + for (int i = 0; i < 4; ++i) + r[i] = atan2f(__extract_element(a, i), __extract_element(b, i)); + return __vec4_f(r); +} +static FORCEINLINE float __atan2_uniform_float(float a, float b) { + return atan2f(a, b); +} +static FORCEINLINE __vec4_d __atan2_varying_double(__vec4_d a, __vec4_d b) { + double r[4]; + for (int i = 0; i < 4; ++i) + r[i] = atan2(__extract_element(a, i), __extract_element(b, i)); + return __vec4_d(r); +} +static FORCEINLINE double __atan2_uniform_double(double a, double b) { + return atan2(a, b); +} + +static FORCEINLINE void __sincos_varying_float(__vec4_f x, __vec4_f * _sin, __vec4_f * _cos) { + for (int i = 0; i < 4; ++i) + sincosf(__extract_element(x, i), (float*)_sin + i, (float*)_cos + i); +} +static FORCEINLINE void __sincos_uniform_float(float x, float *_sin, float *_cos) { + sincosf(x, _sin, _cos); +} +static FORCEINLINE void __sincos_varying_double(__vec4_d x, __vec4_d * _sin, __vec4_d * _cos) { + for (int i = 0; i < 4; ++i) + sincos(__extract_element(x, i), (double*)_sin + i, (double*)_cos + i); +} +static FORCEINLINE void __sincos_uniform_double(double x, double *_sin, double *_cos) { + sincos(x, _sin, _cos); +} + #undef FORCEINLINE diff --git a/stdlib.ispc b/stdlib.ispc index eb5ee9c4..a6ed8896 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -2433,29 +2433,29 @@ static inline uniform float sin(uniform float x_full) { __declspec(safe) -static inline float asin(float x) { - bool isneg = x < 0; - x = abs(x); - +static inline float asin(float x0) { + bool isneg = x0< 0; + float x = abs(x0); bool isnan = (x > 1); - float v; + if (__have_native_trigonometry) { - return __asin_varying_float(x_full); + return __asin_varying_float(x0); } else if (__math_lib == __math_lib_svml) { - return __svml_asinf(x); + return __svml_asinf(x0); } else if (__math_lib == __math_lib_system) { float ret; foreach_active (i) { - uniform float r = __stdlib_asinf(extract(x, i)); + uniform float r = __stdlib_asinf(extract(x0, i)); ret = insert(ret, i, r); } return ret; } else if (__math_lib == __math_lib_ispc) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], // [|single...|], [1e-20;.9999999999999999]); @@ -2471,7 +2471,9 @@ static inline float asin(float x) { x * (3.05023305118083953857421875e-2f + x * (-1.2897425331175327301025390625e-2f + x * 2.38926825113594532012939453125e-3f))))))))); + } else if (__math_lib == __math_lib_ispc_fast) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], // [1e-20;.9999999999999999]); @@ -2482,6 +2484,7 @@ static inline float asin(float x) { x * (-4.489909112453460693359375e-2f + x * (1.928029954433441162109375e-2f + x * (-4.3095736764371395111083984375e-3f))))); + } v *= -sqrt(1.f - x); v = v + 1.57079637050628662109375; @@ -2496,22 +2499,21 @@ static inline float asin(float x) { __declspec(safe) -static inline uniform float asin(uniform float x) { - uniform bool isneg = x < 0; - x = abs(x); - +static inline uniform float asin(uniform float x0) { + uniform bool isneg = x0 < 0; + uniform float x = abs(x0); uniform bool isnan = (x > 1); - uniform float v; if (__have_native_trigonometry) { - return __asin_uniform_float(x_full); + return __asin_uniform_float(x0); } else if (__math_lib == __math_lib_svml || __math_lib == __math_lib_system) { - return __stdlib_asinf(x); + return __stdlib_asinf(x0); } else if (__math_lib == __math_lib_ispc) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], // [|single...|], [1e-20;.9999999999999999]); @@ -2527,7 +2529,9 @@ static inline uniform float asin(uniform float x) { x * (3.05023305118083953857421875e-2f + x * (-1.2897425331175327301025390625e-2f + x * 2.38926825113594532012939453125e-3f))))))))); + } else if (__math_lib == __math_lib_ispc_fast) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], // [1e-20;.9999999999999999]); @@ -2538,6 +2542,7 @@ static inline uniform float asin(uniform float x) { x * (-4.489909112453460693359375e-2f + x * (1.928029954433441162109375e-2f + x * (-4.3095736764371395111083984375e-3f))))); + } v *= -sqrt(1.f - x); v = v + 1.57079637050628662109375; @@ -2687,22 +2692,34 @@ static inline uniform float cos(uniform float x_full) { __declspec(safe) static inline float acos(float v) { + if (__have_native_trigonometry) + return __acos_varying_float(v); + else return 1.57079637050628662109375 - asin(v); } __declspec(safe) static inline double acos(const double v) { + if (__have_native_trigonometry) + return __acos_varying_double(v); + else return 1.57079637050628662109375d0 - asin(v); } __declspec(safe) static inline uniform float acos(uniform float v) { + if (__have_native_trigonometry) + return __acos_uniform_float(v); + else return 1.57079637050628662109375 - asin(v); } __declspec(safe) static inline uniform double acos(const uniform double v) { + if (__have_native_trigonometry) + return __acos_uniform_double(v); + else return 1.57079637050628662109375d0 - asin(v); } @@ -3696,7 +3713,7 @@ __declspec(safe) static inline double sin(double x) { if (__have_native_trigonometry) { - return __sin_varying_double(x_full); + return __sin_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3715,7 +3732,7 @@ __declspec(safe) static inline double asin(double x) { if (__have_native_trigonometry) { - return __asin_varying_double(x_full); + return __asin_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3735,7 +3752,7 @@ __declspec(safe) static inline uniform double sin(uniform double x) { if (__have_native_trigonometry) { - return __sin_uniform_double(x_full); + return __sin_uniform_double(x); } else return __stdlib_sin(x); @@ -3745,7 +3762,7 @@ __declspec(safe) static inline double asin(const double x) { if (__have_native_trigonometry) { - return __asin_varying_double(x_full); + return __asin_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3765,7 +3782,7 @@ __declspec(safe) static inline double cos(const double x) { if (__have_native_trigonometry) { - return __cos_varying_double(x_full); + return __cos_varying_double(x); } if (__math_lib == __math_lib_svml) { @@ -3785,7 +3802,7 @@ __declspec(safe) static inline uniform double cos(uniform double x) { if (__have_native_trigonometry) { - return __cos_uniform_double(x_full); + return __cos_uniform_double(x); } else return __stdlib_cos(x); @@ -3796,7 +3813,7 @@ static inline void sincos(double x, varying double * uniform sin_result, varying double * uniform cos_result) { if (__have_native_trigonometry) { - __sincos_varying_double(x_full),sin_result,cos_result); + __sincos_varying_double(x,sin_result,cos_result); } if (__math_lib == __math_lib_svml) { @@ -3817,7 +3834,7 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result, uniform double * uniform cos_result) { if (__have_native_trigonometry) { - __sincos_uniform_double(x_full,sin_result, cos_result); + __sincos_uniform_double(x,sin_result, cos_result); } else __stdlib_sincos(x, sin_result, cos_result); @@ -3827,7 +3844,7 @@ __declspec(safe) static inline double tan(double x) { if (__have_native_trigonometry) { - return __tan_varying_double(x_full); + return __tan_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3847,7 +3864,7 @@ __declspec(safe) static inline uniform double tan(uniform double x) { if (__have_native_trigonometry) { - return __tan_uniform_double(x_full); + return __tan_uniform_double(x); } else return __stdlib_tan(x); @@ -3857,7 +3874,7 @@ __declspec(safe) static inline double atan(double x) { if (__have_native_trigonometry) { - return __atan_varying_double(x_full); + return __atan_varying_double(x); } else { double ret; @@ -3873,7 +3890,7 @@ __declspec(safe) static inline uniform double atan(uniform double x) { if (__have_native_trigonometry) { - return __atan_uniform_double(x_full); + return __atan_uniform_double(x); } else return __stdlib_atan(x); @@ -3912,7 +3929,7 @@ static inline uniform double atan2(uniform double y, uniform double x) { __declspec(safe) static inline double exp(double x) { if (__have_native_transcendentals) { - return __exp_varying_double(x_full); + return __exp_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3931,7 +3948,7 @@ static inline double exp(double x) { __declspec(safe) static inline uniform double exp(uniform double x) { if (__have_native_transcendentals) { - return __exp_uniform_double(x_full); + return __exp_uniform_double(x); } else return __stdlib_exp(x); @@ -3940,7 +3957,7 @@ static inline uniform double exp(uniform double x) { __declspec(safe) static inline double log(double x) { if (__have_native_transcendentals) { - return __log_varying_double(x_full); + return __log_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3959,7 +3976,7 @@ static inline double log(double x) { __declspec(safe) static inline uniform double log(uniform double x) { if (__have_native_transcendentals) { - return __log_uniform_double(x_full); + return __log_uniform_double(x); } else return __stdlib_log(x);