added support for native and double precision trigonometry/transendentals
This commit is contained in:
@@ -694,3 +694,5 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -564,3 +564,6 @@ gen_scatter(double)
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -513,3 +513,6 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -997,3 +997,6 @@ define_avgs()
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -191,13 +191,9 @@ declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
|
||||
|
||||
declare float @__rsqrt_uniform_float(float) nounwind readnone
|
||||
declare float @__rcp_uniform_float(float) nounwind readnone
|
||||
declare double @__rsqrt_uniform_double(double) nounwind readnone
|
||||
declare double @__rcp_uniform_double(double) nounwind readnone
|
||||
declare float @__sqrt_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone
|
||||
|
||||
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
|
||||
@@ -393,3 +389,11 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
|
||||
|
||||
define_avgs()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reciprocals in double precision, if supported
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -521,3 +521,6 @@ define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone {
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -491,3 +491,6 @@ define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone {
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -587,3 +587,6 @@ define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone {
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -658,3 +658,6 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -593,3 +593,6 @@ gen_scatter(double)
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -494,3 +494,6 @@ define_down_avgs()
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -496,3 +496,6 @@ define_down_avgs()
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -597,3 +597,6 @@ define_avgs()
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -520,3 +520,6 @@ define_avgs()
|
||||
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
trigonometry_decl()
|
||||
|
||||
@@ -2106,13 +2106,6 @@ declare void
|
||||
@__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>,
|
||||
<WIDTH x double>, <WIDTH x MASK>) nounwind
|
||||
|
||||
declare float @__log_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__exp_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__pow_uniform_float(float, float) nounwind readnone
|
||||
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
declare void @__use8(<WIDTH x i8>)
|
||||
@@ -4541,3 +4534,58 @@ define(`rcpd_decl', `
|
||||
declare double @__rcp_uniform_double(double)
|
||||
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>)
|
||||
')
|
||||
|
||||
|
||||
define(`transcendetals_decl',`
|
||||
declare float @__log_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__exp_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__pow_uniform_float(float, float) nounwind readnone
|
||||
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
|
||||
|
||||
declare double @__log_uniform_double(double) nounwind readnone
|
||||
declare <WIDTH x double> @__log_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare double @__exp_uniform_double(double) nounwind readnone
|
||||
declare <WIDTH x double> @__exp_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare double @__pow_uniform_double(double, double) nounwind readnone
|
||||
declare <WIDTH x double> @__pow_varying_double(<WIDTH x double>, <WIDTH x double>) nounwind readnone
|
||||
')
|
||||
|
||||
define(`trigonometry_decl',`
|
||||
declare <WIDTH x float> @__sin_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__asin_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__cos_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__acos_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare void @__sincos_varying_float(<WIDTH x float>, <WIDTH x float>*, <WIDTH x float>*) nounwind readnone
|
||||
declare <WIDTH x float> @__tan_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__atan_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__atan2_varying_float(<WIDTH x float>,<WIDTH x float>) nounwind readnone
|
||||
|
||||
declare float @__sin_uniform_float(float) nounwind readnone
|
||||
declare float @__asin_uniform_float(float) nounwind readnone
|
||||
declare float @__cos_uniform_float(float) nounwind readnone
|
||||
declare float @__acos_uniform_float(float) nounwind readnone
|
||||
declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone
|
||||
declare float @__tan_uniform_float(float) nounwind readnone
|
||||
declare float @__atan_uniform_float(float) nounwind readnone
|
||||
declare float @__atan2_uniform_float(float,float) nounwind readnone
|
||||
|
||||
declare <WIDTH x double> @__sin_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__asin_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__cos_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__acos_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare void @__sincos_varying_double(<WIDTH x double>, <WIDTH x double>*, <WIDTH x double>*) nounwind readnone
|
||||
declare <WIDTH x double> @__tan_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__atan_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__atan2_varying_double(<WIDTH x double>,<WIDTH x double>) nounwind readnone
|
||||
|
||||
declare double @__sin_uniform_double(double) nounwind readnone
|
||||
declare double @__asin_uniform_double(double) nounwind readnone
|
||||
declare double @__cos_uniform_double(double) nounwind readnone
|
||||
declare double @__acos_uniform_double(double) nounwind readnone
|
||||
declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone
|
||||
declare double @__tan_uniform_double(double) nounwind readnone
|
||||
declare double @__atan_uniform_double(double) nounwind readnone
|
||||
declare double @__atan2_uniform_double(double,double) nounwind readnone
|
||||
')
|
||||
|
||||
@@ -691,39 +691,6 @@ SHIFT(__vec16_f, float, float)
|
||||
SHUFFLES(__vec16_f, float, float)
|
||||
LOAD_STORE(__vec16_f, float)
|
||||
|
||||
static FORCEINLINE float __exp_uniform_float(float v) {
|
||||
return expf(v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = expf(v.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static FORCEINLINE float __log_uniform_float(float v) {
|
||||
return logf(v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = logf(v.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static FORCEINLINE float __pow_uniform_float(float a, float b) {
|
||||
return powf(a, b);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = powf(a.v[i], b.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static FORCEINLINE int __intbits(float v) {
|
||||
union {
|
||||
float f;
|
||||
@@ -1813,3 +1780,97 @@ static FORCEINLINE uint64_t __clock() {
|
||||
|
||||
#endif // !WIN32
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Transcendentals
|
||||
//
|
||||
//
|
||||
#define TRANSCENDENTALS(op) \
|
||||
static FORCEINLINE float __##op##_uniform_float(float v) { \
|
||||
return op##f(v); \
|
||||
} \
|
||||
static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { \
|
||||
__vec16_f ret; \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
ret.v[i] = op##f(v.v[i]); \
|
||||
return ret; \
|
||||
} \
|
||||
static FORCEINLINE double __##op##_uniform_double(double v) { \
|
||||
return op(v); \
|
||||
} \
|
||||
static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { \
|
||||
__vec16_d ret; \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
ret.v[i] = op(v.v[i]); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
TRANSCENDENTALS(log)
|
||||
TRANSCENDENTALS(exp)
|
||||
|
||||
|
||||
static FORCEINLINE float __pow_uniform_float(float a, float b) {
|
||||
return powf(a, b);
|
||||
}
|
||||
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = powf(a.v[i], b.v[i]);
|
||||
return ret;
|
||||
}
|
||||
static FORCEINLINE double __pow_uniform_double(double a, double b) {
|
||||
return pow(a, b);
|
||||
}
|
||||
static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) {
|
||||
__vec16_d ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = pow(a.v[i], b.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Trigonometry
|
||||
|
||||
TRANSCENDENTALS(sin)
|
||||
TRANSCENDENTALS(asin)
|
||||
TRANSCENDENTALS(cos)
|
||||
TRANSCENDENTALS(acos)
|
||||
TRANSCENDENTALS(tan)
|
||||
TRANSCENDENTALS(atan)
|
||||
|
||||
|
||||
static FORCEINLINE float __atan2_uniform_float(float a, float b) {
|
||||
return atan2f(a, b);
|
||||
}
|
||||
static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = atan2f(a.v[i], b.v[i]);
|
||||
return ret;
|
||||
}
|
||||
static FORCEINLINE double __atan2_uniform_double(double a, double b) {
|
||||
return atan2(a, b);
|
||||
}
|
||||
static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) {
|
||||
__vec16_d ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = atan2(a.v[i], b.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static FORCEINLINE void __sincos_uniform_float(float x, float *a, float *b) {
|
||||
sincosf(x,a,b);
|
||||
}
|
||||
static FORCEINLINE void __sincos_varying_float(__vec16_f x, __vec16_f *a, __vec16_f *b) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
sincosf(x.v[i], (float*)a + i, (float*)b+i);
|
||||
}
|
||||
static FORCEINLINE void __sincos_uniform_double(double x, double *a, double *b) {
|
||||
sincos(x,a,b);
|
||||
}
|
||||
static FORCEINLINE void __sincos_varying_double(__vec16_d x, __vec16_d *a, __vec16_d *b) {
|
||||
__vec16_d ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
sincos(x.v[i], (double*)a + i, (double*)b+i);
|
||||
}
|
||||
|
||||
@@ -167,6 +167,10 @@ struct __vec4_d {
|
||||
}
|
||||
|
||||
__m128d v[2];
|
||||
FORCEINLINE __vec4_d(double *p) {
|
||||
v[0] = _mm_set_pd(p[1], p[0]);
|
||||
v[1] = _mm_set_pd(p[3], p[2]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -2471,39 +2475,6 @@ static FORCEINLINE __vec4_d __sqrt_varying_double(__vec4_d v) {
|
||||
return __vec4_d(_mm_sqrt_pd(v.v[0]), _mm_sqrt_pd(v.v[1]));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) {
|
||||
float r[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
r[i] = powf(__extract_element(a, i), __extract_element(b, i));
|
||||
return __vec4_f(r);
|
||||
}
|
||||
|
||||
static FORCEINLINE float __pow_uniform_float(float a, float b) {
|
||||
return powf(a, b);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_f __exp_varying_float(__vec4_f a) {
|
||||
float r[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
r[i] = expf(__extract_element(a, i));
|
||||
return __vec4_f(r);
|
||||
}
|
||||
|
||||
static FORCEINLINE float __exp_uniform_float(float a) {
|
||||
return expf(a);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_f __log_varying_float(__vec4_f a) {
|
||||
float r[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
r[i] = logf(__extract_element(a, i));
|
||||
return __vec4_f(r);
|
||||
}
|
||||
|
||||
static FORCEINLINE float __log_uniform_float(float a) {
|
||||
return logf(a);
|
||||
}
|
||||
|
||||
static FORCEINLINE int __intbits(float v) {
|
||||
union {
|
||||
float f;
|
||||
@@ -4166,4 +4137,97 @@ static FORCEINLINE uint64_t __clock() {
|
||||
}
|
||||
#endif // !WIN32
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Transcendentals
|
||||
|
||||
|
||||
#define TRANSCENDENTALS(op) \
|
||||
static FORCEINLINE __vec4_f __##op##_varying_float(__vec4_f a) {\
|
||||
float r[4];\
|
||||
for (int i = 0; i < 4; ++i)\
|
||||
r[i] = op##f(__extract_element(a, i));\
|
||||
return __vec4_f(r);\
|
||||
}\
|
||||
static FORCEINLINE float __##op##_uniform_float(float a) {\
|
||||
return op##f(a);\
|
||||
}\
|
||||
static FORCEINLINE __vec4_d __##op##_varying_double(__vec4_d a) {\
|
||||
double r[4];\
|
||||
for (int i = 0; i < 4; ++i)\
|
||||
r[i] = op(__extract_element(a, i));\
|
||||
return __vec4_d(r);\
|
||||
}\
|
||||
static FORCEINLINE double __##op##_uniform_double(double a) {\
|
||||
return op(a);\
|
||||
}
|
||||
|
||||
TRANSCENDENTALS(log)
|
||||
TRANSCENDENTALS(exp)
|
||||
|
||||
|
||||
static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) {
|
||||
float r[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
r[i] = powf(__extract_element(a, i), __extract_element(b, i));
|
||||
return __vec4_f(r);
|
||||
}
|
||||
static FORCEINLINE float __pow_uniform_float(float a, float b) {
|
||||
return powf(a, b);
|
||||
}
|
||||
static FORCEINLINE __vec4_d __pow_varying_double(__vec4_d a, __vec4_d b) {
|
||||
double r[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
r[i] = pow(__extract_element(a, i), __extract_element(b, i));
|
||||
return __vec4_d(r);
|
||||
}
|
||||
static FORCEINLINE double __pow_uniform_double(double a, double b) {
|
||||
return pow(a, b);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Trigonometry
|
||||
|
||||
TRANSCENDENTALS(sin)
|
||||
TRANSCENDENTALS(asin)
|
||||
TRANSCENDENTALS(cos)
|
||||
TRANSCENDENTALS(acos)
|
||||
TRANSCENDENTALS(tan)
|
||||
TRANSCENDENTALS(atan)
|
||||
|
||||
|
||||
static FORCEINLINE __vec4_f __atan2_varying_float(__vec4_f a, __vec4_f b) {
|
||||
float r[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
r[i] = atan2f(__extract_element(a, i), __extract_element(b, i));
|
||||
return __vec4_f(r);
|
||||
}
|
||||
static FORCEINLINE float __atan2_uniform_float(float a, float b) {
|
||||
return atan2f(a, b);
|
||||
}
|
||||
static FORCEINLINE __vec4_d __atan2_varying_double(__vec4_d a, __vec4_d b) {
|
||||
double r[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
r[i] = atan2(__extract_element(a, i), __extract_element(b, i));
|
||||
return __vec4_d(r);
|
||||
}
|
||||
static FORCEINLINE double __atan2_uniform_double(double a, double b) {
|
||||
return atan2(a, b);
|
||||
}
|
||||
|
||||
static FORCEINLINE void __sincos_varying_float(__vec4_f x, __vec4_f * _sin, __vec4_f * _cos) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
sincosf(__extract_element(x, i), (float*)_sin + i, (float*)_cos + i);
|
||||
}
|
||||
static FORCEINLINE void __sincos_uniform_float(float x, float *_sin, float *_cos) {
|
||||
sincosf(x, _sin, _cos);
|
||||
}
|
||||
static FORCEINLINE void __sincos_varying_double(__vec4_d x, __vec4_d * _sin, __vec4_d * _cos) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
sincos(__extract_element(x, i), (double*)_sin + i, (double*)_cos + i);
|
||||
}
|
||||
static FORCEINLINE void __sincos_uniform_double(double x, double *_sin, double *_cos) {
|
||||
sincos(x, _sin, _cos);
|
||||
}
|
||||
|
||||
#undef FORCEINLINE
|
||||
|
||||
79
stdlib.ispc
79
stdlib.ispc
@@ -2433,29 +2433,29 @@ static inline uniform float sin(uniform float x_full) {
|
||||
|
||||
|
||||
__declspec(safe)
|
||||
static inline float asin(float x) {
|
||||
bool isneg = x < 0;
|
||||
x = abs(x);
|
||||
|
||||
static inline float asin(float x0) {
|
||||
bool isneg = x0< 0;
|
||||
float x = abs(x0);
|
||||
bool isnan = (x > 1);
|
||||
|
||||
float v;
|
||||
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __asin_varying_float(x_full);
|
||||
return __asin_varying_float(x0);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml) {
|
||||
return __svml_asinf(x);
|
||||
return __svml_asinf(x0);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
foreach_active (i) {
|
||||
uniform float r = __stdlib_asinf(extract(x, i));
|
||||
uniform float r = __stdlib_asinf(extract(x0, i));
|
||||
ret = insert(ret, i, r);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc)
|
||||
{
|
||||
// sollya
|
||||
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
|
||||
// [|single...|], [1e-20;.9999999999999999]);
|
||||
@@ -2471,7 +2471,9 @@ static inline float asin(float x) {
|
||||
x * (3.05023305118083953857421875e-2f +
|
||||
x * (-1.2897425331175327301025390625e-2f +
|
||||
x * 2.38926825113594532012939453125e-3f)))))))));
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
{
|
||||
// sollya
|
||||
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
|
||||
// [1e-20;.9999999999999999]);
|
||||
@@ -2482,6 +2484,7 @@ static inline float asin(float x) {
|
||||
x * (-4.489909112453460693359375e-2f +
|
||||
x * (1.928029954433441162109375e-2f +
|
||||
x * (-4.3095736764371395111083984375e-3f)))));
|
||||
}
|
||||
|
||||
v *= -sqrt(1.f - x);
|
||||
v = v + 1.57079637050628662109375;
|
||||
@@ -2496,22 +2499,21 @@ static inline float asin(float x) {
|
||||
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform float asin(uniform float x) {
|
||||
uniform bool isneg = x < 0;
|
||||
x = abs(x);
|
||||
|
||||
static inline uniform float asin(uniform float x0) {
|
||||
uniform bool isneg = x0 < 0;
|
||||
uniform float x = abs(x0);
|
||||
uniform bool isnan = (x > 1);
|
||||
|
||||
uniform float v;
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __asin_uniform_float(x_full);
|
||||
return __asin_uniform_float(x0);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml ||
|
||||
__math_lib == __math_lib_system) {
|
||||
return __stdlib_asinf(x);
|
||||
return __stdlib_asinf(x0);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc)
|
||||
{
|
||||
// sollya
|
||||
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
|
||||
// [|single...|], [1e-20;.9999999999999999]);
|
||||
@@ -2527,7 +2529,9 @@ static inline uniform float asin(uniform float x) {
|
||||
x * (3.05023305118083953857421875e-2f +
|
||||
x * (-1.2897425331175327301025390625e-2f +
|
||||
x * 2.38926825113594532012939453125e-3f)))))))));
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
{
|
||||
// sollya
|
||||
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
|
||||
// [1e-20;.9999999999999999]);
|
||||
@@ -2538,6 +2542,7 @@ static inline uniform float asin(uniform float x) {
|
||||
x * (-4.489909112453460693359375e-2f +
|
||||
x * (1.928029954433441162109375e-2f +
|
||||
x * (-4.3095736764371395111083984375e-3f)))));
|
||||
}
|
||||
|
||||
v *= -sqrt(1.f - x);
|
||||
v = v + 1.57079637050628662109375;
|
||||
@@ -2687,22 +2692,34 @@ static inline uniform float cos(uniform float x_full) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline float acos(float v) {
|
||||
if (__have_native_trigonometry)
|
||||
return __acos_varying_float(v);
|
||||
else
|
||||
return 1.57079637050628662109375 - asin(v);
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static inline double acos(const double v) {
|
||||
if (__have_native_trigonometry)
|
||||
return __acos_varying_double(v);
|
||||
else
|
||||
return 1.57079637050628662109375d0 - asin(v);
|
||||
}
|
||||
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform float acos(uniform float v) {
|
||||
if (__have_native_trigonometry)
|
||||
return __acos_uniform_float(v);
|
||||
else
|
||||
return 1.57079637050628662109375 - asin(v);
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform double acos(const uniform double v) {
|
||||
if (__have_native_trigonometry)
|
||||
return __acos_uniform_double(v);
|
||||
else
|
||||
return 1.57079637050628662109375d0 - asin(v);
|
||||
}
|
||||
|
||||
@@ -3696,7 +3713,7 @@ __declspec(safe)
|
||||
static inline double sin(double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __sin_varying_double(x_full);
|
||||
return __sin_varying_double(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
@@ -3715,7 +3732,7 @@ __declspec(safe)
|
||||
static inline double asin(double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __asin_varying_double(x_full);
|
||||
return __asin_varying_double(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
@@ -3735,7 +3752,7 @@ __declspec(safe)
|
||||
static inline uniform double sin(uniform double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __sin_uniform_double(x_full);
|
||||
return __sin_uniform_double(x);
|
||||
}
|
||||
else
|
||||
return __stdlib_sin(x);
|
||||
@@ -3745,7 +3762,7 @@ __declspec(safe)
|
||||
static inline double asin(const double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __asin_varying_double(x_full);
|
||||
return __asin_varying_double(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
@@ -3765,7 +3782,7 @@ __declspec(safe)
|
||||
static inline double cos(const double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __cos_varying_double(x_full);
|
||||
return __cos_varying_double(x);
|
||||
}
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
@@ -3785,7 +3802,7 @@ __declspec(safe)
|
||||
static inline uniform double cos(uniform double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __cos_uniform_double(x_full);
|
||||
return __cos_uniform_double(x);
|
||||
}
|
||||
else
|
||||
return __stdlib_cos(x);
|
||||
@@ -3796,7 +3813,7 @@ static inline void sincos(double x, varying double * uniform sin_result,
|
||||
varying double * uniform cos_result) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
__sincos_varying_double(x_full),sin_result,cos_result);
|
||||
__sincos_varying_double(x,sin_result,cos_result);
|
||||
}
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
@@ -3817,7 +3834,7 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result,
|
||||
uniform double * uniform cos_result) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
__sincos_uniform_double(x_full,sin_result, cos_result);
|
||||
__sincos_uniform_double(x,sin_result, cos_result);
|
||||
}
|
||||
else
|
||||
__stdlib_sincos(x, sin_result, cos_result);
|
||||
@@ -3827,7 +3844,7 @@ __declspec(safe)
|
||||
static inline double tan(double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __tan_varying_double(x_full);
|
||||
return __tan_varying_double(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
@@ -3847,7 +3864,7 @@ __declspec(safe)
|
||||
static inline uniform double tan(uniform double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __tan_uniform_double(x_full);
|
||||
return __tan_uniform_double(x);
|
||||
}
|
||||
else
|
||||
return __stdlib_tan(x);
|
||||
@@ -3857,7 +3874,7 @@ __declspec(safe)
|
||||
static inline double atan(double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __atan_varying_double(x_full);
|
||||
return __atan_varying_double(x);
|
||||
}
|
||||
else {
|
||||
double ret;
|
||||
@@ -3873,7 +3890,7 @@ __declspec(safe)
|
||||
static inline uniform double atan(uniform double x) {
|
||||
if (__have_native_trigonometry)
|
||||
{
|
||||
return __atan_uniform_double(x_full);
|
||||
return __atan_uniform_double(x);
|
||||
}
|
||||
else
|
||||
return __stdlib_atan(x);
|
||||
@@ -3912,7 +3929,7 @@ static inline uniform double atan2(uniform double y, uniform double x) {
|
||||
__declspec(safe)
|
||||
static inline double exp(double x) {
|
||||
if (__have_native_transcendentals) {
|
||||
return __exp_varying_double(x_full);
|
||||
return __exp_varying_double(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
@@ -3931,7 +3948,7 @@ static inline double exp(double x) {
|
||||
__declspec(safe)
|
||||
static inline uniform double exp(uniform double x) {
|
||||
if (__have_native_transcendentals) {
|
||||
return __exp_uniform_double(x_full);
|
||||
return __exp_uniform_double(x);
|
||||
}
|
||||
else
|
||||
return __stdlib_exp(x);
|
||||
@@ -3940,7 +3957,7 @@ static inline uniform double exp(uniform double x) {
|
||||
__declspec(safe)
|
||||
static inline double log(double x) {
|
||||
if (__have_native_transcendentals) {
|
||||
return __log_varying_double(x_full);
|
||||
return __log_varying_double(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
@@ -3959,7 +3976,7 @@ static inline double log(double x) {
|
||||
__declspec(safe)
|
||||
static inline uniform double log(uniform double x) {
|
||||
if (__have_native_transcendentals) {
|
||||
return __log_uniform_double(x_full);
|
||||
return __log_uniform_double(x);
|
||||
}
|
||||
else
|
||||
return __stdlib_log(x);
|
||||
|
||||
Reference in New Issue
Block a user