diff --git a/builtins.cpp b/builtins.cpp index 2c1acc36..fa1a8209 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -1158,6 +1158,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod symbolTable); lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(), module, symbolTable); + lDefineConstantInt("__have_native_trigonometry", g->target->hasTrigonometry(), + module, symbolTable); lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(), module, symbolTable); lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(), diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index 68a67133..69026515 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -695,3 +695,5 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin rsqrtd_decl() rcpd_decl() +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 9738f9d3..e0f4e45d 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -564,3 +564,6 @@ gen_scatter(double) rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll index 19b47b1d..004a8702 100644 --- a/builtins/target-avx1-i64x4base.ll +++ b/builtins/target-avx1-i64x4base.ll @@ -514,3 +514,6 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index a3de92f3..a48294ba 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -998,3 +998,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 401c862d..ef33ff97 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -191,13 +191,9 @@ declare @__max_varying_double(, declare float @__rsqrt_uniform_float(float) nounwind readnone declare float @__rcp_uniform_float(float) nounwind readnone -declare double @__rsqrt_uniform_double(double) nounwind readnone -declare double @__rcp_uniform_double(double) nounwind readnone declare float @__sqrt_uniform_float(float) nounwind readnone declare @__rcp_varying_float() nounwind readnone declare @__rsqrt_varying_float() nounwind readnone -declare @__rcp_varying_double() nounwind readnone -declare @__rsqrt_varying_double() nounwind readnone declare @__sqrt_varying_float() nounwind readnone @@ -393,3 +389,11 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind define_avgs() +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrtd_decl() +rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-16.ll b/builtins/target-neon-16.ll index 8e0ef121..d73b0a50 100644 --- a/builtins/target-neon-16.ll +++ b/builtins/target-neon-16.ll @@ -521,3 +521,6 @@ define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-32.ll b/builtins/target-neon-32.ll index d6e861a2..a8999269 100644 --- a/builtins/target-neon-32.ll +++ b/builtins/target-neon-32.ll @@ -491,3 +491,6 @@ define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-8.ll b/builtins/target-neon-8.ll index aaa0a7b7..0c1edf8c 100644 --- a/builtins/target-neon-8.ll +++ b/builtins/target-neon-8.ll @@ -587,3 +587,6 @@ define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index 9dcb064f..4bee3241 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -659,3 +659,6 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index 6a5709fd..7f82f933 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -594,3 +594,6 @@ gen_scatter(double) rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index c8f72d45..fa99e37c 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -495,3 +495,6 @@ define_down_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index 4b394734..c46798a6 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -497,3 +497,6 @@ define_down_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index e87f4640..70e3d01e 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -598,3 +598,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 9819d385..18f0d80e 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -521,3 +521,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/util.m4 b/builtins/util.m4 index 025018e9..f395b6bc 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -2516,13 +2516,6 @@ declare void @__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, , , ) nounwind -declare float @__log_uniform_float(float) nounwind readnone -declare @__log_varying_float() nounwind readnone -declare float @__exp_uniform_float(float) nounwind readnone -declare @__exp_varying_float() nounwind readnone -declare float @__pow_uniform_float(float, float) nounwind readnone -declare @__pow_varying_float(, ) nounwind readnone - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare void @__use8() @@ -4951,3 +4944,58 @@ define(`rcpd_decl', ` declare double @__rcp_uniform_double(double) declare @__rcp_varying_double() ') + + +define(`transcendetals_decl',` + declare float @__log_uniform_float(float) nounwind readnone + declare @__log_varying_float() nounwind readnone + declare float @__exp_uniform_float(float) nounwind readnone + declare @__exp_varying_float() nounwind readnone + declare float @__pow_uniform_float(float, float) nounwind readnone + declare @__pow_varying_float(, ) nounwind readnone + + declare double @__log_uniform_double(double) nounwind readnone + declare @__log_varying_double() nounwind readnone + declare double @__exp_uniform_double(double) nounwind readnone + declare @__exp_varying_double() nounwind readnone + declare double @__pow_uniform_double(double, double) nounwind readnone + declare @__pow_varying_double(, ) nounwind readnone +') + +define(`trigonometry_decl',` + declare @__sin_varying_float() nounwind readnone + declare @__asin_varying_float() nounwind readnone + declare @__cos_varying_float() nounwind readnone + declare @__acos_varying_float() nounwind readnone + declare void @__sincos_varying_float(, *, *) nounwind readnone + declare @__tan_varying_float() nounwind readnone + declare @__atan_varying_float() nounwind readnone + declare @__atan2_varying_float(,) nounwind readnone + + declare float @__sin_uniform_float(float) nounwind readnone + declare float @__asin_uniform_float(float) nounwind readnone + declare float @__cos_uniform_float(float) nounwind readnone + declare float @__acos_uniform_float(float) nounwind readnone + declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone + declare float @__tan_uniform_float(float) nounwind readnone + declare float @__atan_uniform_float(float) nounwind readnone + declare float @__atan2_uniform_float(float,float) nounwind readnone + + declare @__sin_varying_double() nounwind readnone + declare @__asin_varying_double() nounwind readnone + declare @__cos_varying_double() nounwind readnone + declare @__acos_varying_double() nounwind readnone + declare void @__sincos_varying_double(, *, *) nounwind readnone + declare @__tan_varying_double() nounwind readnone + declare @__atan_varying_double() nounwind readnone + declare @__atan2_varying_double(,) nounwind readnone + + declare double @__sin_uniform_double(double) nounwind readnone + declare double @__asin_uniform_double(double) nounwind readnone + declare double @__cos_uniform_double(double) nounwind readnone + declare double @__acos_uniform_double(double) nounwind readnone + declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone + declare double @__tan_uniform_double(double) nounwind readnone + declare double @__atan_uniform_double(double) nounwind readnone + declare double @__atan2_uniform_double(double,double) nounwind readnone +') diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 3b5c6ec3..c8f2cf08 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -691,39 +691,6 @@ SHIFT(__vec16_f, float, float) SHUFFLES(__vec16_f, float, float) LOAD_STORE(__vec16_f, float) -static FORCEINLINE float __exp_uniform_float(float v) { - return expf(v); -} - -static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = expf(v.v[i]); - return ret; -} - -static FORCEINLINE float __log_uniform_float(float v) { - return logf(v); -} - -static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = logf(v.v[i]); - return ret; -} - -static FORCEINLINE float __pow_uniform_float(float a, float b) { - return powf(a, b); -} - -static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = powf(a.v[i], b.v[i]); - return ret; -} - static FORCEINLINE int __intbits(float v) { union { float f; @@ -1813,3 +1780,97 @@ static FORCEINLINE uint64_t __clock() { #endif // !WIN32 + +/////////////////////////////////////////////////////////////////////////// +// Transcendentals +// +// +#define TRANSCENDENTALS(op) \ +static FORCEINLINE float __##op##_uniform_float(float v) { \ + return op##f(v); \ +} \ +static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { \ + __vec16_f ret; \ + for (int i = 0; i < 16; ++i) \ + ret.v[i] = op##f(v.v[i]); \ + return ret; \ +} \ +static FORCEINLINE double __##op##_uniform_double(double v) { \ + return op(v); \ +} \ +static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { \ + __vec16_d ret; \ + for (int i = 0; i < 16; ++i) \ + ret.v[i] = op(v.v[i]); \ + return ret; \ +} + + TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + + +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} +static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = powf(a.v[i], b.v[i]); + return ret; +} +static FORCEINLINE double __pow_uniform_double(double a, double b) { + return pow(a, b); +} +static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = pow(a.v[i], b.v[i]); + return ret; +} + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + + +static FORCEINLINE float __atan2_uniform_float(float a, float b) { + return atan2f(a, b); +} +static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = atan2f(a.v[i], b.v[i]); + return ret; +} +static FORCEINLINE double __atan2_uniform_double(double a, double b) { + return atan2(a, b); +} +static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = atan2(a.v[i], b.v[i]); + return ret; +} + +static FORCEINLINE void __sincos_uniform_float(float x, float *a, float *b) { + sincosf(x,a,b); +} +static FORCEINLINE void __sincos_varying_float(__vec16_f x, __vec16_f *a, __vec16_f *b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + sincosf(x.v[i], (float*)a + i, (float*)b+i); +} +static FORCEINLINE void __sincos_uniform_double(double x, double *a, double *b) { + sincos(x,a,b); +} +static FORCEINLINE void __sincos_varying_double(__vec16_d x, __vec16_d *a, __vec16_d *b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + sincos(x.v[i], (double*)a + i, (double*)b+i); +} diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h index ba6ef005..2e6afed5 100644 --- a/examples/intrinsics/knc-i1x16.h +++ b/examples/intrinsics/knc-i1x16.h @@ -1142,28 +1142,6 @@ template <> static FORCEINLINE void __store<64>(__vec16_f *p, __vec16_f v) } #endif -/******** math ******/ - -/*** float ***/ -static FORCEINLINE float __exp_uniform_float(float v) { return expf(v);} -static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { return _mm512_exp_ps(v); } - -static FORCEINLINE float __log_uniform_float(float v) { return logf(v);} -static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { return _mm512_log_ps(v); } - -static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);} -static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); } - -/*** double ***/ -static FORCEINLINE double __exp_uniform_double(double v) { return exp(v);} -static FORCEINLINE __vec16_d __exp_varying_double(__vec16_d v) { return __vec16_d(_mm512_exp_pd(v.v1),_mm512_exp_pd(v.v2)); } - -static FORCEINLINE double __log_uniform_double(double v) { return log(v);} -static FORCEINLINE __vec16_d __log_varying_double(__vec16_d v) { return __vec16_d(_mm512_log_pd(v.v1),_mm512_log_pd(v.v2)); } - -static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);} -static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); } - /******** bitcast ******/ static FORCEINLINE int __intbits(float v) { @@ -2806,6 +2784,40 @@ static FORCEINLINE uint64_t __clock() { #endif // !WIN32 + +/////////////////////////////////////////////////////////////////////////// +// Transcendentals + + +#define TRANSCENDENTALS(op) \ +static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { return _mm512_##op##_ps(v); } \ +static FORCEINLINE float __##op##_uniform_float(float v) { return op##f(v); } \ +static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { return __vec16_d(_mm512_##op##_pd(v.v1),_mm512_##op##_pd(v.v2)); } \ +static FORCEINLINE double __##op##_uniform_double(double a) { return op(a); } + +TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + +static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);} +static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); } +static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);} +static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); } + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + +static FORCEINLINE float __atan2_uniform_float(float a, float b) { return atan2f(a, b);} +static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { return _mm512_atan2_ps(a,b); } +static FORCEINLINE double __atan2_uniform_double(double a, double b) { return atan2(a,b);} +static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_atan2_pd(a.v1,b.v1),_mm512_atan2_pd(a.v2,b.v2)); } + #undef FORCEINLINE #undef PRE_ALIGN #undef POST_ALIGN diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index 458da458..4bdb184a 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -95,6 +95,7 @@ typedef struct PRE_ALIGN(64) __vec16_f { typedef struct PRE_ALIGN(64) __vec16_d { FORCEINLINE __vec16_d() : v1(_mm512_undefined_pd()), v2(_mm512_undefined_pd()) {} FORCEINLINE __vec16_d(const __vec16_d &o) : v1(o.v1), v2(o.v2) {} + FORCEINLINE __vec16_d(const __m512d _v1, const __m512d _v2) : v1(_v1), v2(_v2) {} FORCEINLINE __vec16_d& operator =(const __vec16_d &o) { v1=o.v1; v2=o.v2; return *this; } FORCEINLINE __vec16_d(double v00, double v01, double v02, double v03, double v04, double v05, double v06, double v07, @@ -1503,18 +1504,6 @@ static FORCEINLINE double __rsqrt_uniform_double(double v) } -static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { - return _mm512_exp_ps(v); -} - -static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { - return _mm512_log_ps(v); -} - -static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { - return _mm512_pow_ps(a, b); -} - /////////////////////////////////////////////////////////////////////////// // bit ops /////////////////////////////////////////////////////////////////////////// @@ -2173,6 +2162,39 @@ static FORCEINLINE uint64_t __clock() { } #endif // !WIN32 +/////////////////////////////////////////////////////////////////////////// +// Transcendentals + + +#define TRANSCENDENTALS(op) \ +static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { return _mm512_##op##_ps(v); } \ +static FORCEINLINE float __##op##_uniform_float(float v) { return op##f(v); } \ +static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { return __vec16_d(_mm512_##op##_pd(v.v1),_mm512_##op##_pd(v.v2)); } \ +static FORCEINLINE double __##op##_uniform_double(double a) { return op(a); } + +TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + +static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);} +static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); } +static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);} +static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); } + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + +static FORCEINLINE float __atan2_uniform_float(float a, float b) { return atan2f(a, b);} +static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { return _mm512_atan2_ps(a,b); } +static FORCEINLINE double __atan2_uniform_double(double a, double b) { return atan2(a,b);} +static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_atan2_pd(a.v1,b.v1),_mm512_atan2_pd(a.v2,b.v2)); } + #undef FORCEINLINE #undef PRE_ALIGN #undef POST_ALIGN diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 45b31be1..a25af10b 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -167,6 +167,10 @@ struct __vec4_d { } __m128d v[2]; + FORCEINLINE __vec4_d(double *p) { + v[0] = _mm_set_pd(p[1], p[0]); + v[1] = _mm_set_pd(p[3], p[2]); + } }; @@ -2471,39 +2475,6 @@ static FORCEINLINE __vec4_d __sqrt_varying_double(__vec4_d v) { return __vec4_d(_mm_sqrt_pd(v.v[0]), _mm_sqrt_pd(v.v[1])); } -static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = powf(__extract_element(a, i), __extract_element(b, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __pow_uniform_float(float a, float b) { - return powf(a, b); -} - -static FORCEINLINE __vec4_f __exp_varying_float(__vec4_f a) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = expf(__extract_element(a, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __exp_uniform_float(float a) { - return expf(a); -} - -static FORCEINLINE __vec4_f __log_varying_float(__vec4_f a) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = logf(__extract_element(a, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __log_uniform_float(float a) { - return logf(a); -} - static FORCEINLINE int __intbits(float v) { union { float f; @@ -4166,4 +4137,97 @@ static FORCEINLINE uint64_t __clock() { } #endif // !WIN32 + +/////////////////////////////////////////////////////////////////////////// +// Transcendentals + + +#define TRANSCENDENTALS(op) \ +static FORCEINLINE __vec4_f __##op##_varying_float(__vec4_f a) {\ + float r[4];\ + for (int i = 0; i < 4; ++i)\ + r[i] = op##f(__extract_element(a, i));\ + return __vec4_f(r);\ +}\ +static FORCEINLINE float __##op##_uniform_float(float a) {\ + return op##f(a);\ +}\ +static FORCEINLINE __vec4_d __##op##_varying_double(__vec4_d a) {\ + double r[4];\ + for (int i = 0; i < 4; ++i)\ + r[i] = op(__extract_element(a, i));\ + return __vec4_d(r);\ +}\ +static FORCEINLINE double __##op##_uniform_double(double a) {\ + return op(a);\ +} + +TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + + +static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) { + float r[4]; + for (int i = 0; i < 4; ++i) + r[i] = powf(__extract_element(a, i), __extract_element(b, i)); + return __vec4_f(r); +} +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} +static FORCEINLINE __vec4_d __pow_varying_double(__vec4_d a, __vec4_d b) { + double r[4]; + for (int i = 0; i < 4; ++i) + r[i] = pow(__extract_element(a, i), __extract_element(b, i)); + return __vec4_d(r); +} +static FORCEINLINE double __pow_uniform_double(double a, double b) { + return pow(a, b); +} + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + + +static FORCEINLINE __vec4_f __atan2_varying_float(__vec4_f a, __vec4_f b) { + float r[4]; + for (int i = 0; i < 4; ++i) + r[i] = atan2f(__extract_element(a, i), __extract_element(b, i)); + return __vec4_f(r); +} +static FORCEINLINE float __atan2_uniform_float(float a, float b) { + return atan2f(a, b); +} +static FORCEINLINE __vec4_d __atan2_varying_double(__vec4_d a, __vec4_d b) { + double r[4]; + for (int i = 0; i < 4; ++i) + r[i] = atan2(__extract_element(a, i), __extract_element(b, i)); + return __vec4_d(r); +} +static FORCEINLINE double __atan2_uniform_double(double a, double b) { + return atan2(a, b); +} + +static FORCEINLINE void __sincos_varying_float(__vec4_f x, __vec4_f * _sin, __vec4_f * _cos) { + for (int i = 0; i < 4; ++i) + sincosf(__extract_element(x, i), (float*)_sin + i, (float*)_cos + i); +} +static FORCEINLINE void __sincos_uniform_float(float x, float *_sin, float *_cos) { + sincosf(x, _sin, _cos); +} +static FORCEINLINE void __sincos_varying_double(__vec4_d x, __vec4_d * _sin, __vec4_d * _cos) { + for (int i = 0; i < 4; ++i) + sincos(__extract_element(x, i), (double*)_sin + i, (double*)_cos + i); +} +static FORCEINLINE void __sincos_uniform_double(double x, double *_sin, double *_cos) { + sincos(x, _sin, _cos); +} + #undef FORCEINLINE diff --git a/ispc.cpp b/ispc.cpp index 1386d65e..0792291e 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -202,6 +202,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : m_hasGather(false), m_hasScatter(false), m_hasTranscendentals(false), + m_hasTrigonometry(false), m_hasRsqrtd(false), m_hasRcpd(false) { @@ -420,6 +421,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } @@ -433,6 +435,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } @@ -446,6 +449,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } @@ -459,6 +463,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } @@ -472,6 +477,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } diff --git a/ispc.h b/ispc.h index 4b6df8c3..5e554bf7 100644 --- a/ispc.h +++ b/ispc.h @@ -282,6 +282,8 @@ public: bool hasTranscendentals() const {return m_hasTranscendentals;} + bool hasTrigonometry() const {return m_hasTrigonometry;} + bool hasRsqrtd() const {return m_hasRsqrtd;} bool hasRcpd() const {return m_hasRcpd;} @@ -385,6 +387,9 @@ private: sqrt, which we assume that all of them handle). */ bool m_hasTranscendentals; + /** Indicates whether the target has ISA support for trigonometry */ + bool m_hasTrigonometry; + /** Indicates whether there is an ISA double precision rsqrt. */ bool m_hasRsqrtd; diff --git a/stdlib.ispc b/stdlib.ispc index cb41c49c..d3d96619 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -2335,8 +2335,12 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) { __declspec(safe) static inline float sin(float x_full) { - if (__math_lib == __math_lib_svml) { - return __svml_sinf(x_full); + if (__have_native_trigonometry) + { + return __sin_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { + return __svml_sinf(x_full); } else if (__math_lib == __math_lib_system) { float ret; @@ -2397,7 +2401,11 @@ static inline float sin(float x_full) { __declspec(safe) static inline uniform float sin(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __sin_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_sinf(x_full); } @@ -2462,25 +2470,29 @@ static inline uniform float sin(uniform float x_full) { __declspec(safe) -static inline float asin(float x) { - bool isneg = x < 0; - x = abs(x); - +static inline float asin(float x0) { + bool isneg = x0< 0; + float x = abs(x0); bool isnan = (x > 1); - float v; - if (__math_lib == __math_lib_svml) { - return __svml_asinf(x); + + if (__have_native_trigonometry) + { + return __asin_varying_float(x0); + } + else if (__math_lib == __math_lib_svml) { + return __svml_asinf(x0); } else if (__math_lib == __math_lib_system) { float ret; foreach_active (i) { - uniform float r = __stdlib_asinf(extract(x, i)); + uniform float r = __stdlib_asinf(extract(x0, i)); ret = insert(ret, i, r); } return ret; } else if (__math_lib == __math_lib_ispc) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], // [|single...|], [1e-20;.9999999999999999]); @@ -2496,7 +2508,9 @@ static inline float asin(float x) { x * (3.05023305118083953857421875e-2f + x * (-1.2897425331175327301025390625e-2f + x * 2.38926825113594532012939453125e-3f))))))))); + } else if (__math_lib == __math_lib_ispc_fast) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], // [1e-20;.9999999999999999]); @@ -2507,6 +2521,7 @@ static inline float asin(float x) { x * (-4.489909112453460693359375e-2f + x * (1.928029954433441162109375e-2f + x * (-4.3095736764371395111083984375e-3f))))); + } v *= -sqrt(1.f - x); v = v + 1.57079637050628662109375; @@ -2521,18 +2536,21 @@ static inline float asin(float x) { __declspec(safe) -static inline uniform float asin(uniform float x) { - uniform bool isneg = x < 0; - x = abs(x); - +static inline uniform float asin(uniform float x0) { + uniform bool isneg = x0 < 0; + uniform float x = abs(x0); uniform bool isnan = (x > 1); - uniform float v; - if (__math_lib == __math_lib_svml || + if (__have_native_trigonometry) + { + return __asin_uniform_float(x0); + } + else if (__math_lib == __math_lib_svml || __math_lib == __math_lib_system) { - return __stdlib_asinf(x); + return __stdlib_asinf(x0); } else if (__math_lib == __math_lib_ispc) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], // [|single...|], [1e-20;.9999999999999999]); @@ -2548,7 +2566,9 @@ static inline uniform float asin(uniform float x) { x * (3.05023305118083953857421875e-2f + x * (-1.2897425331175327301025390625e-2f + x * 2.38926825113594532012939453125e-3f))))))))); + } else if (__math_lib == __math_lib_ispc_fast) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], // [1e-20;.9999999999999999]); @@ -2559,6 +2579,7 @@ static inline uniform float asin(uniform float x) { x * (-4.489909112453460693359375e-2f + x * (1.928029954433441162109375e-2f + x * (-4.3095736764371395111083984375e-3f))))); + } v *= -sqrt(1.f - x); v = v + 1.57079637050628662109375; @@ -2574,6 +2595,10 @@ static inline uniform float asin(uniform float x) { __declspec(safe) static inline float cos(float x_full) { + if (__have_native_trigonometry) + { + return __cos_varying_float(x_full); + } if (__math_lib == __math_lib_svml) { return __svml_cosf(x_full); } @@ -2635,7 +2660,11 @@ static inline float cos(float x_full) { __declspec(safe) static inline uniform float cos(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __cos_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_cosf(x_full); } @@ -2700,22 +2729,34 @@ static inline uniform float cos(uniform float x_full) { __declspec(safe) static inline float acos(float v) { + if (__have_native_trigonometry) + return __acos_varying_float(v); + else return 1.57079637050628662109375 - asin(v); } __declspec(safe) static inline double acos(const double v) { + if (__have_native_trigonometry) + return __acos_varying_double(v); + else return 1.57079637050628662109375d0 - asin(v); } __declspec(safe) static inline uniform float acos(uniform float v) { + if (__have_native_trigonometry) + return __acos_uniform_float(v); + else return 1.57079637050628662109375 - asin(v); } __declspec(safe) static inline uniform double acos(const uniform double v) { + if (__have_native_trigonometry) + return __acos_uniform_double(v); + else return 1.57079637050628662109375d0 - asin(v); } @@ -2723,6 +2764,10 @@ static inline uniform double acos(const uniform double v) { __declspec(safe) static inline void sincos(float x_full, varying float * uniform sin_result, varying float * uniform cos_result) { + if (__have_native_trigonometry) + { + __sincos_varying_float(x_full,sin_result,cos_result); + } if (__math_lib == __math_lib_svml) { __svml_sincosf(x_full, sin_result, cos_result); } @@ -2793,6 +2838,10 @@ static inline void sincos(float x_full, varying float * uniform sin_result, __declspec(safe) static inline void sincos(uniform float x_full, uniform float * uniform sin_result, uniform float * uniform cos_result) { + if (__have_native_trigonometry) + { + __sincos_uniform_float(x_full, sin_result, cos_result); + } if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { __stdlib_sincosf(x_full, sin_result, cos_result); @@ -2855,7 +2904,11 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu __declspec(safe) static inline float tan(float x_full) { - if (__math_lib == __math_lib_svml) { + if (__have_native_trigonometry) + { + return __tan_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_tanf(x_full); } else if (__math_lib == __math_lib_system) { @@ -2934,7 +2987,11 @@ static inline float tan(float x_full) { __declspec(safe) static inline uniform float tan(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __tan_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_tanf(x_full); } @@ -3006,7 +3063,11 @@ static inline uniform float tan(uniform float x_full) { __declspec(safe) static inline float atan(float x_full) { - if (__math_lib == __math_lib_svml) { + if (__have_native_trigonometry) + { + return __atan_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_atanf(x_full); } else if (__math_lib == __math_lib_system) { @@ -3057,7 +3118,11 @@ static inline float atan(float x_full) { __declspec(safe) static inline uniform float atan(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __atan_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_atanf(x_full); } @@ -3101,7 +3166,11 @@ static inline uniform float atan(uniform float x_full) { __declspec(safe) static inline float atan2(float y, float x) { - if (__math_lib == __math_lib_svml) { + if (__have_native_trigonometry) + { + return __atan2_varying_float(y,x); + } + else if (__math_lib == __math_lib_svml) { return __svml_atan2f(y, x); } else if (__math_lib == __math_lib_system) { @@ -3140,7 +3209,11 @@ static inline float atan2(float y, float x) { __declspec(safe) static inline uniform float atan2(uniform float y, uniform float x) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __atan2_uniform_float(y,x); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_atan2f(y, x); } @@ -3675,12 +3748,14 @@ static inline uniform double frexp(uniform double x, uniform int * uniform pw2) __declspec(safe) static inline double sin(double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_trigonometry) + { + return __sin_varying_double(x); + } + else if (__math_lib == __math_lib_svml) { return __svml_sind(x); } - else if (__math_lib == __math_lib_ispc_fast) - return sin((float)x); else { double ret; foreach_active (i) { @@ -3690,23 +3765,46 @@ static inline double sin(double x) { return ret; } } +__declspec(safe) +static inline double asin(double x) { + if (__have_native_trigonometry) + { + return __asin_varying_double(x); + } + else if (__math_lib == __math_lib_svml) + { + return __svml_asind(x); + } + else { + double ret; + foreach_active (i) { + uniform double r = __stdlib_asin(extract(x, i)); + ret = insert(ret, i, r); + } + return ret; + } +} __declspec(safe) static inline uniform double sin(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return sin((float)x); + if (__have_native_trigonometry) + { + return __sin_uniform_double(x); + } else return __stdlib_sin(x); } __declspec(safe) static inline double asin(const double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_trigonometry) + { + return __asin_varying_double(x); + } + else if (__math_lib == __math_lib_svml) { return __svml_asind(x); } - else if (__math_lib == __math_lib_ispc_fast) - return asin((float)x); else { double ret; foreach_active (i) { @@ -3719,12 +3817,14 @@ static inline double asin(const double x) { __declspec(safe) static inline double cos(const double x) { + if (__have_native_trigonometry) + { + return __cos_varying_double(x); + } if (__math_lib == __math_lib_svml) { return __svml_cosd(x); } - else if (__math_lib == __math_lib_ispc_fast) - return cos((float)x); else { double ret; foreach_active (i) { @@ -3737,8 +3837,10 @@ static inline double cos(const double x) { __declspec(safe) static inline uniform double cos(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return cos((float)x); + if (__have_native_trigonometry) + { + return __cos_uniform_double(x); + } else return __stdlib_cos(x); } @@ -3746,16 +3848,14 @@ static inline uniform double cos(uniform double x) { __declspec(safe) static inline void sincos(double x, varying double * uniform sin_result, varying double * uniform cos_result) { + if (__have_native_trigonometry) + { + __sincos_varying_double(x,sin_result,cos_result); + } if (__math_lib == __math_lib_svml) { __svml_sincosd(x, sin_result, cos_result); } - else if (__math_lib == __math_lib_ispc_fast) { - float sr, cr; - sincos((float)x, &sr, &cr); - *sin_result = sr; - *cos_result = cr; - } else { foreach_active (i) { uniform double sr, cr; @@ -3769,11 +3869,9 @@ static inline void sincos(double x, varying double * uniform sin_result, __declspec(safe) static inline void sincos(uniform double x, uniform double * uniform sin_result, uniform double * uniform cos_result) { - if (__math_lib == __math_lib_ispc_fast) { - uniform float sr, cr; - sincos((uniform float)x, &sr, &cr); - *sin_result = sr; - *cos_result = cr; + if (__have_native_trigonometry) + { + __sincos_uniform_double(x,sin_result, cos_result); } else __stdlib_sincos(x, sin_result, cos_result); @@ -3781,12 +3879,14 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result, __declspec(safe) static inline double tan(double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_trigonometry) + { + return __tan_varying_double(x); + } + else if (__math_lib == __math_lib_svml) { return __svml_tand(x); } - else if (__math_lib == __math_lib_ispc_fast) - return tan((float)x); else { double ret; foreach_active (i) { @@ -3799,16 +3899,20 @@ static inline double tan(double x) { __declspec(safe) static inline uniform double tan(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return tan((float)x); + if (__have_native_trigonometry) + { + return __tan_uniform_double(x); + } else return __stdlib_tan(x); } __declspec(safe) static inline double atan(double x) { - if (__math_lib == __math_lib_ispc_fast) - return atan((float)x); + if (__have_native_trigonometry) + { + return __atan_varying_double(x); + } else { double ret; foreach_active (i) { @@ -3821,20 +3925,24 @@ static inline double atan(double x) { __declspec(safe) static inline uniform double atan(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return atan((float)x); + if (__have_native_trigonometry) + { + return __atan_uniform_double(x); + } else return __stdlib_atan(x); } __declspec(safe) static inline double atan2(double y, double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_trigonometry) + { + return __atan2_varying_double(y,x); + } + else if (__math_lib == __math_lib_svml) { return __svml_atan2d(y,x); } - else if (__math_lib == __math_lib_ispc_fast) - return atan2((float)y, (float)x); else { double ret; foreach_active (i) { @@ -3847,20 +3955,23 @@ static inline double atan2(double y, double x) { __declspec(safe) static inline uniform double atan2(uniform double y, uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return atan2((float)y, (float)x); + if (__have_native_trigonometry) + { + return __atan2_uniform_double(y,x); + } else return __stdlib_atan2(y, x); } __declspec(safe) static inline double exp(double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_transcendentals) { + return __exp_varying_double(x); + } + else if (__math_lib == __math_lib_svml) { return __svml_expd(x); } - else if (__math_lib == __math_lib_ispc_fast) - return exp((float)x); else { double ret; foreach_active (i) { @@ -3873,20 +3984,22 @@ static inline double exp(double x) { __declspec(safe) static inline uniform double exp(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return exp((float)x); + if (__have_native_transcendentals) { + return __exp_uniform_double(x); + } else return __stdlib_exp(x); } __declspec(safe) static inline double log(double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_transcendentals) { + return __log_varying_double(x); + } + else if (__math_lib == __math_lib_svml) { return __svml_logd(x); } - else if (__math_lib == __math_lib_ispc_fast) - return log((float)x); else { double ret; foreach_active (i) { @@ -3899,20 +4012,22 @@ static inline double log(double x) { __declspec(safe) static inline uniform double log(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return log((float)x); + if (__have_native_transcendentals) { + return __log_uniform_double(x); + } else return __stdlib_log(x); } __declspec(safe) static inline double pow(double a, double b) { - if (__math_lib == __math_lib_svml) + if (__have_native_transcendentals) { + return __pow_varying_double(a,b); + } + else if (__math_lib == __math_lib_svml) { return __svml_powd(a,b); } - else if (__math_lib == __math_lib_ispc_fast) - return pow((float)a, (float)b); else { double ret; foreach_active (i) { @@ -3925,8 +4040,9 @@ static inline double pow(double a, double b) { __declspec(safe) static inline uniform double pow(uniform double a, uniform double b) { - if (__math_lib == __math_lib_ispc_fast) - return pow((float)a, (float)b); + if (__have_native_transcendentals) { + return __pow_uniform_double(a,b); + } else return __stdlib_pow(a, b); } diff --git a/tests/transcendentals-5-0.ispc b/tests/transcendentals-5-0.ispc new file mode 100644 index 00000000..562050df --- /dev/null +++ b/tests/transcendentals-5-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); + double ref = double4((8.742277955963554d-08),(0.000000000000000d0),(-8.742277955963554d-08),(1.7484555911927038d-07)); + RET[programIndex] = ok(sin(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-5-1.ispc b/tests/transcendentals-5-1.ispc new file mode 100644 index 00000000..fd1e1506 --- /dev/null +++ b/tests/transcendentals-5-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.5707963705062866d0),(1.5707963705062866d0)); + double ref = double4((0.8414709848078965d0),(-0.8414709848078965d0),(-1.000000000000000d0),(1.000000000000000d0)); + RET[programIndex] = ok(sin(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-5-2.ispc b/tests/transcendentals-5-2.ispc new file mode 100644 index 00000000..2a50730a --- /dev/null +++ b/tests/transcendentals-5-2.ispc @@ -0,0 +1,28 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { + bool r = (abs(x - ref) < 1d-14) || abs((x-ref)/ref) < 1d-14; + if (any(r == false)) + print("mismatch got %, expected %\n", x, ref); + return r; +} + +export void f_v(uniform float RET[]) { + double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); + double ref = double4((2.3849760909612067d-08),(-0.7568024953079282d0),(-0.5440211108893699d0),(0.5440211108893699d0)); + RET[programIndex] = ok(sin(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-5-3.ispc b/tests/transcendentals-5-3.ispc new file mode 100644 index 00000000..dbde3751 --- /dev/null +++ b/tests/transcendentals-5-3.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); + double ref = double4((0.9867719333537206d0),(-0.40484974621184605d0),(-0.7111606865372466d0),(0.4121184852417566d0)); + RET[programIndex] = ok(sin(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-6-0.ispc b/tests/transcendentals-6-0.ispc new file mode 100644 index 00000000..ae3dac39 --- /dev/null +++ b/tests/transcendentals-6-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((-3.141592653589793d0),(0.000000000000000d0),(3.141592653589793d0),(6.283185307179586d0)); + double ref = double4((-1.000000000000000d0),(1.000000000000000d0),(-1.000000000000000d0),(1.000000000000000d0)); + RET[programIndex] = ok(cos(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-6-1.ispc b/tests/transcendentals-6-1.ispc new file mode 100644 index 00000000..8f59c54d --- /dev/null +++ b/tests/transcendentals-6-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0)); + double ref = double4((0.5403023058681398d0),(0.5403023058681398d0),(-4.371139044595162d-08),(-4.371139044595162d-08)); + RET[programIndex] = ok(cos(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-6-2.ispc b/tests/transcendentals-6-2.ispc new file mode 100644 index 00000000..80ee9f7b --- /dev/null +++ b/tests/transcendentals-6-2.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((-9.42477796076938d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); + double ref = double4((-1.000000000000000d0),(-0.6536436208636119d0),(-0.8390715290764524d0),(-0.8390715290764524d0)); + RET[programIndex] = ok(cos(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-6-3.ispc b/tests/transcendentals-6-3.ispc new file mode 100644 index 00000000..e64c4b82 --- /dev/null +++ b/tests/transcendentals-6-3.ispc @@ -0,0 +1,29 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { + bool r = (abs(x - ref) < 1d-14) || abs((x-ref)/ref) < 1d-14; + if (any(r == false)) + print("mismatch got %, expected %\n", x, ref); + return r; +} + + +export void f_v(uniform float RET[]) { + double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); + double ref = double4((-0.1621146247115303d0),(0.9143832254542971d0),(-0.7030295000381365d0),(-0.9111302618846769d0)); + RET[programIndex] = ok(cos(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-7-0.ispc b/tests/transcendentals-7-0.ispc new file mode 100644 index 00000000..e59f1c7a --- /dev/null +++ b/tests/transcendentals-7-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); + double ref = double4((-8.742277955963587d-08),(0.000000000000000d0),(8.742277955963587d-08),(1.7484555911927306d-07)); + RET[programIndex] = ok(tan(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-7-1.ispc b/tests/transcendentals-7-1.ispc new file mode 100644 index 00000000..0b3a9c66 --- /dev/null +++ b/tests/transcendentals-7-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000000000000000d0),(-1.000000000000000d0),(0),(0)); + double ref = double4((1.557407724654902d0),(-1.557407724654902d0),(0),(0)); + RET[programIndex] = ok(tan(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-7-2.ispc b/tests/transcendentals-7-2.ispc new file mode 100644 index 00000000..4227981d --- /dev/null +++ b/tests/transcendentals-7-2.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); + double ref = double4((-2.3849760909612074d-08),(1.1578212823495775d0),(0.6483608274590867d0),(-0.6483608274590867d0)); + RET[programIndex] = ok(tan(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-7-3.ispc b/tests/transcendentals-7-3.ispc new file mode 100644 index 00000000..aa6e1380 --- /dev/null +++ b/tests/transcendentals-7-3.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); + double ref = double4((-6.086877942749462d0),(-0.442757188607329d0),(1.0115659250410816d0),(-0.4523156594418099d0)); + RET[programIndex] = ok(tan(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-8-0.ispc b/tests/transcendentals-8-0.ispc new file mode 100644 index 00000000..761d6e5a --- /dev/null +++ b/tests/transcendentals-8-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); + double ref = double4((0.04321391448589156d0),(1.000000000000000d0),(23.140694655803028d0),(535.4917491531108d0)); + RET[programIndex] = ok(exp(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-8-1.ispc b/tests/transcendentals-8-1.ispc new file mode 100644 index 00000000..f9433aab --- /dev/null +++ b/tests/transcendentals-8-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0)); + double ref = double4((2.718281828459045d0),(0.36787944117144233d0),(0.2078795672640568d0),(4.810477591238011d0)); + RET[programIndex] = ok(exp(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-8-2.ispc b/tests/transcendentals-8-2.ispc new file mode 100644 index 00000000..493b1632 --- /dev/null +++ b/tests/transcendentals-8-2.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); + double ref = double4((8.069951564564043d-05),(54.598150033144236d0),(22026.465794806718d0),(4.5399929762484854d-05)); + RET[programIndex] = ok(exp(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-8-3.ispc b/tests/transcendentals-8-3.ispc new file mode 100644 index 00000000..38101635 --- /dev/null +++ b/tests/transcendentals-8-3.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); + double ref = double4((1623346.2946371625d0),(0.001230912137451317d0),(6.208070673019769d-10),(8103.083927575384d0)); + RET[programIndex] = ok(exp(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-9-0.ispc b/tests/transcendentals-9-0.ispc new file mode 100644 index 00000000..8c51e240 --- /dev/null +++ b/tests/transcendentals-9-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-13; } + +export void f_v(uniform float RET[]) { + double v = double4((3.141602741012573d),(0.000010000000000d),(3.141602741012573d),(6.283195482025146d)); + double ref = double4((1.144733096770642d0),(-11.512925464970229d0),(1.144733096770642d0),(1.8378786857850002d0)); + RET[programIndex] = ok(log(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-9-1.ispc b/tests/transcendentals-9-1.ispc new file mode 100644 index 00000000..0b01c322 --- /dev/null +++ b/tests/transcendentals-9-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000010000000000d0),(1.000010000000000d0),(1.570806370506287d0),(1.570806370506287d0)); + double ref = double4((9.999950000398841d-06),(9.999950000398841d-06),(0.4515890992942722d0),(0.4515890992942722d0)); + RET[programIndex] = ok(log(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-9-2.ispc b/tests/transcendentals-9-2.ispc new file mode 100644 index 00000000..2391920b --- /dev/null +++ b/tests/transcendentals-9-2.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((9.424787984619140d0),(4.000010000000000d0),(10.000010000000000d0),(10.000010000000000d0)); + double ref = double4((2.2433432380804366d0),(1.3862968611167654d0),(2.3025860929935456d0),(2.3025860929935456d0)); + RET[programIndex] = ok(log(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-9-3.ispc b/tests/transcendentals-9-3.ispc new file mode 100644 index 00000000..ed787a5c --- /dev/null +++ b/tests/transcendentals-9-3.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } + +export void f_v(uniform float RET[]) { + double v = double4((14.300010190734863d0),(6.700009809265136d0),(21.200010762939453d0),(9.000010000000000d0)); + double ref = double4((2.6602602499044092d0),(1.902108990465272d0),(3.0540016893636612d0),(2.197225688446713d0)); + RET[programIndex] = ok(log(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[4]) { RET[programIndex] = 1.; }