From 668645fcda64201f590f24d76c70030f408a37ea Mon Sep 17 00:00:00 2001 From: Evghenii Date: Fri, 7 Feb 2014 11:05:36 +0100 Subject: [PATCH 1/7] first commit --- builtins.cpp | 2 ++ ispc.cpp | 6 ++++++ ispc.h | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/builtins.cpp b/builtins.cpp index fee322e7..581a712a 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -1150,6 +1150,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod symbolTable); lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(), module, symbolTable); + lDefineConstantInt("__have_native_trigonometry", g->target->hasTrigonometry(), + module, symbolTable); lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(), module, symbolTable); lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(), diff --git a/ispc.cpp b/ispc.cpp index 1386d65e..0792291e 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -202,6 +202,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : m_hasGather(false), m_hasScatter(false), m_hasTranscendentals(false), + m_hasTrigonometry(false), m_hasRsqrtd(false), m_hasRcpd(false) { @@ -420,6 +421,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } @@ -433,6 +435,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } @@ -446,6 +449,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } @@ -459,6 +463,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } @@ -472,6 +477,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_maskBitCount = 1; this->m_hasHalf = true; this->m_hasTranscendentals = true; + this->m_hasTrigonometry = true; this->m_hasGather = this->m_hasScatter = true; this->m_hasRsqrtd = this->m_hasRcpd = true; } diff --git a/ispc.h b/ispc.h index 4b6df8c3..5e554bf7 100644 --- a/ispc.h +++ b/ispc.h @@ -282,6 +282,8 @@ public: bool hasTranscendentals() const {return m_hasTranscendentals;} + bool hasTrigonometry() const {return m_hasTrigonometry;} + bool hasRsqrtd() const {return m_hasRsqrtd;} bool hasRcpd() const {return m_hasRcpd;} @@ -385,6 +387,9 @@ private: sqrt, which we assume that all of them handle). */ bool m_hasTranscendentals; + /** Indicates whether the target has ISA support for trigonometry */ + bool m_hasTrigonometry; + /** Indicates whether there is an ISA double precision rsqrt. */ bool m_hasRsqrtd; From 81aa19a8f0b11f14c9c95ce8795ff8a5d6dd868f Mon Sep 17 00:00:00 2001 From: Evghenii Date: Fri, 7 Feb 2014 11:49:24 +0100 Subject: [PATCH 2/7] added use of native_transendentals, need to add IR --- stdlib.ispc | 227 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 163 insertions(+), 64 deletions(-) diff --git a/stdlib.ispc b/stdlib.ispc index 24217cd0..eb5ee9c4 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -2298,8 +2298,12 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) { __declspec(safe) static inline float sin(float x_full) { - if (__math_lib == __math_lib_svml) { - return __svml_sinf(x_full); + if (__have_native_trigonometry) + { + return __sin_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { + return __svml_sinf(x_full); } else if (__math_lib == __math_lib_system) { float ret; @@ -2360,7 +2364,11 @@ static inline float sin(float x_full) { __declspec(safe) static inline uniform float sin(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __sin_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_sinf(x_full); } @@ -2432,7 +2440,11 @@ static inline float asin(float x) { bool isnan = (x > 1); float v; - if (__math_lib == __math_lib_svml) { + if (__have_native_trigonometry) + { + return __asin_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_asinf(x); } else if (__math_lib == __math_lib_system) { @@ -2491,7 +2503,11 @@ static inline uniform float asin(uniform float x) { uniform bool isnan = (x > 1); uniform float v; - if (__math_lib == __math_lib_svml || + if (__have_native_trigonometry) + { + return __asin_uniform_float(x_full); + } + else if (__math_lib == __math_lib_svml || __math_lib == __math_lib_system) { return __stdlib_asinf(x); } @@ -2537,6 +2553,10 @@ static inline uniform float asin(uniform float x) { __declspec(safe) static inline float cos(float x_full) { + if (__have_native_trigonometry) + { + return __cos_varying_float(x_full); + } if (__math_lib == __math_lib_svml) { return __svml_cosf(x_full); } @@ -2598,7 +2618,11 @@ static inline float cos(float x_full) { __declspec(safe) static inline uniform float cos(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __cos_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_cosf(x_full); } @@ -2686,6 +2710,10 @@ static inline uniform double acos(const uniform double v) { __declspec(safe) static inline void sincos(float x_full, varying float * uniform sin_result, varying float * uniform cos_result) { + if (__have_native_trigonometry) + { + __sincos_varying_float(x_full,sin_result,cos_result); + } if (__math_lib == __math_lib_svml) { __svml_sincosf(x_full, sin_result, cos_result); } @@ -2756,6 +2784,10 @@ static inline void sincos(float x_full, varying float * uniform sin_result, __declspec(safe) static inline void sincos(uniform float x_full, uniform float * uniform sin_result, uniform float * uniform cos_result) { + if (__have_native_trigonometry) + { + __sincos_uniform_float(x_full, sin_result, cos_result); + } if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { __stdlib_sincosf(x_full, sin_result, cos_result); @@ -2818,7 +2850,11 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu __declspec(safe) static inline float tan(float x_full) { - if (__math_lib == __math_lib_svml) { + if (__have_native_trigonometry) + { + return __tan_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_tanf(x_full); } else if (__math_lib == __math_lib_system) { @@ -2897,7 +2933,11 @@ static inline float tan(float x_full) { __declspec(safe) static inline uniform float tan(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __tan_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_tanf(x_full); } @@ -2969,7 +3009,11 @@ static inline uniform float tan(uniform float x_full) { __declspec(safe) static inline float atan(float x_full) { - if (__math_lib == __math_lib_svml) { + if (__have_native_trigonometry) + { + return __atan_varying_float(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_atanf(x_full); } else if (__math_lib == __math_lib_system) { @@ -3020,7 +3064,11 @@ static inline float atan(float x_full) { __declspec(safe) static inline uniform float atan(uniform float x_full) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __atan_uniform_float(x_full); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_atanf(x_full); } @@ -3064,7 +3112,11 @@ static inline uniform float atan(uniform float x_full) { __declspec(safe) static inline float atan2(float y, float x) { - if (__math_lib == __math_lib_svml) { + if (__have_native_trigonometry) + { + return __atan2_varying_float(y,x); + } + else if (__math_lib == __math_lib_svml) { return __svml_atan2f(y, x); } else if (__math_lib == __math_lib_system) { @@ -3103,7 +3155,11 @@ static inline float atan2(float y, float x) { __declspec(safe) static inline uniform float atan2(uniform float y, uniform float x) { - if (__math_lib == __math_lib_system || + if (__have_native_trigonometry) + { + return __atan2_uniform_float(y,x); + } + else if (__math_lib == __math_lib_system || __math_lib == __math_lib_svml) { return __stdlib_atan2f(y, x); } @@ -3638,12 +3694,14 @@ static inline uniform double frexp(uniform double x, uniform int * uniform pw2) __declspec(safe) static inline double sin(double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_trigonometry) + { + return __sin_varying_double(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_sind(x); } - else if (__math_lib == __math_lib_ispc_fast) - return sin((float)x); else { double ret; foreach_active (i) { @@ -3653,23 +3711,46 @@ static inline double sin(double x) { return ret; } } +__declspec(safe) +static inline double asin(double x) { + if (__have_native_trigonometry) + { + return __asin_varying_double(x_full); + } + else if (__math_lib == __math_lib_svml) + { + return __svml_asind(x); + } + else { + double ret; + foreach_active (i) { + uniform double r = __stdlib_asin(extract(x, i)); + ret = insert(ret, i, r); + } + return ret; + } +} __declspec(safe) static inline uniform double sin(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return sin((float)x); + if (__have_native_trigonometry) + { + return __sin_uniform_double(x_full); + } else return __stdlib_sin(x); } __declspec(safe) static inline double asin(const double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_trigonometry) + { + return __asin_varying_double(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_asind(x); } - else if (__math_lib == __math_lib_ispc_fast) - return asin((float)x); else { double ret; foreach_active (i) { @@ -3682,12 +3763,14 @@ static inline double asin(const double x) { __declspec(safe) static inline double cos(const double x) { + if (__have_native_trigonometry) + { + return __cos_varying_double(x_full); + } if (__math_lib == __math_lib_svml) { return __svml_cosd(x); } - else if (__math_lib == __math_lib_ispc_fast) - return cos((float)x); else { double ret; foreach_active (i) { @@ -3700,8 +3783,10 @@ static inline double cos(const double x) { __declspec(safe) static inline uniform double cos(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return cos((float)x); + if (__have_native_trigonometry) + { + return __cos_uniform_double(x_full); + } else return __stdlib_cos(x); } @@ -3709,16 +3794,14 @@ static inline uniform double cos(uniform double x) { __declspec(safe) static inline void sincos(double x, varying double * uniform sin_result, varying double * uniform cos_result) { + if (__have_native_trigonometry) + { + __sincos_varying_double(x_full),sin_result,cos_result); + } if (__math_lib == __math_lib_svml) { __svml_sincosd(x, sin_result, cos_result); } - else if (__math_lib == __math_lib_ispc_fast) { - float sr, cr; - sincos((float)x, &sr, &cr); - *sin_result = sr; - *cos_result = cr; - } else { foreach_active (i) { uniform double sr, cr; @@ -3732,11 +3815,9 @@ static inline void sincos(double x, varying double * uniform sin_result, __declspec(safe) static inline void sincos(uniform double x, uniform double * uniform sin_result, uniform double * uniform cos_result) { - if (__math_lib == __math_lib_ispc_fast) { - uniform float sr, cr; - sincos((uniform float)x, &sr, &cr); - *sin_result = sr; - *cos_result = cr; + if (__have_native_trigonometry) + { + __sincos_uniform_double(x_full,sin_result, cos_result); } else __stdlib_sincos(x, sin_result, cos_result); @@ -3744,12 +3825,14 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result, __declspec(safe) static inline double tan(double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_trigonometry) + { + return __tan_varying_double(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_tand(x); } - else if (__math_lib == __math_lib_ispc_fast) - return tan((float)x); else { double ret; foreach_active (i) { @@ -3762,16 +3845,20 @@ static inline double tan(double x) { __declspec(safe) static inline uniform double tan(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return tan((float)x); + if (__have_native_trigonometry) + { + return __tan_uniform_double(x_full); + } else return __stdlib_tan(x); } __declspec(safe) static inline double atan(double x) { - if (__math_lib == __math_lib_ispc_fast) - return atan((float)x); + if (__have_native_trigonometry) + { + return __atan_varying_double(x_full); + } else { double ret; foreach_active (i) { @@ -3784,20 +3871,24 @@ static inline double atan(double x) { __declspec(safe) static inline uniform double atan(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return atan((float)x); + if (__have_native_trigonometry) + { + return __atan_uniform_double(x_full); + } else return __stdlib_atan(x); } __declspec(safe) static inline double atan2(double y, double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_trigonometry) + { + return __atan2_varying_double(y,x); + } + else if (__math_lib == __math_lib_svml) { return __svml_atan2d(y,x); } - else if (__math_lib == __math_lib_ispc_fast) - return atan2((float)y, (float)x); else { double ret; foreach_active (i) { @@ -3810,20 +3901,23 @@ static inline double atan2(double y, double x) { __declspec(safe) static inline uniform double atan2(uniform double y, uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return atan2((float)y, (float)x); + if (__have_native_trigonometry) + { + return __atan2_uniform_double(y,x); + } else return __stdlib_atan2(y, x); } __declspec(safe) static inline double exp(double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_transcendentals) { + return __exp_varying_double(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_expd(x); } - else if (__math_lib == __math_lib_ispc_fast) - return exp((float)x); else { double ret; foreach_active (i) { @@ -3836,20 +3930,22 @@ static inline double exp(double x) { __declspec(safe) static inline uniform double exp(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return exp((float)x); + if (__have_native_transcendentals) { + return __exp_uniform_double(x_full); + } else return __stdlib_exp(x); } __declspec(safe) static inline double log(double x) { - if (__math_lib == __math_lib_svml) + if (__have_native_transcendentals) { + return __log_varying_double(x_full); + } + else if (__math_lib == __math_lib_svml) { return __svml_logd(x); } - else if (__math_lib == __math_lib_ispc_fast) - return log((float)x); else { double ret; foreach_active (i) { @@ -3862,20 +3958,22 @@ static inline double log(double x) { __declspec(safe) static inline uniform double log(uniform double x) { - if (__math_lib == __math_lib_ispc_fast) - return log((float)x); + if (__have_native_transcendentals) { + return __log_uniform_double(x_full); + } else return __stdlib_log(x); } __declspec(safe) static inline double pow(double a, double b) { - if (__math_lib == __math_lib_svml) + if (__have_native_transcendentals) { + return __pow_varying_double(a,b); + } + else if (__math_lib == __math_lib_svml) { return __svml_powd(a,b); } - else if (__math_lib == __math_lib_ispc_fast) - return pow((float)a, (float)b); else { double ret; foreach_active (i) { @@ -3888,8 +3986,9 @@ static inline double pow(double a, double b) { __declspec(safe) static inline uniform double pow(uniform double a, uniform double b) { - if (__math_lib == __math_lib_ispc_fast) - return pow((float)a, (float)b); + if (__have_native_transcendentals) { + return __pow_uniform_double(a,b); + } else return __stdlib_pow(a, b); } From 70a9b286e5fa3f1f61d56d98e02f6749983a2ad1 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Fri, 7 Feb 2014 15:28:39 +0100 Subject: [PATCH 3/7] added support for native and double precision trigonometry/transendentals --- builtins/target-avx-x2.ll | 2 + builtins/target-avx.ll | 3 + builtins/target-avx1-i64x4base.ll | 3 + builtins/target-generic-1.ll | 3 + builtins/target-generic-common.ll | 12 ++- builtins/target-neon-16.ll | 3 + builtins/target-neon-32.ll | 3 + builtins/target-neon-8.ll | 3 + builtins/target-sse2-x2.ll | 3 + builtins/target-sse2.ll | 3 + builtins/target-sse4-16.ll | 3 + builtins/target-sse4-8.ll | 3 + builtins/target-sse4-x2.ll | 3 + builtins/target-sse4.ll | 3 + builtins/util.m4 | 62 ++++++++++++-- examples/intrinsics/generic-16.h | 127 +++++++++++++++++++++-------- examples/intrinsics/sse4.h | 130 ++++++++++++++++++++++-------- stdlib.ispc | 79 +++++++++++------- 18 files changed, 340 insertions(+), 108 deletions(-) diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index b3a77871..a110dfef 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -694,3 +694,5 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin rsqrtd_decl() rcpd_decl() +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 9738f9d3..e0f4e45d 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -564,3 +564,6 @@ gen_scatter(double) rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll index a6601a28..3bd28c02 100644 --- a/builtins/target-avx1-i64x4base.ll +++ b/builtins/target-avx1-i64x4base.ll @@ -513,3 +513,6 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index 3dcd8373..1178e98c 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -997,3 +997,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 401c862d..ef33ff97 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -191,13 +191,9 @@ declare @__max_varying_double(, declare float @__rsqrt_uniform_float(float) nounwind readnone declare float @__rcp_uniform_float(float) nounwind readnone -declare double @__rsqrt_uniform_double(double) nounwind readnone -declare double @__rcp_uniform_double(double) nounwind readnone declare float @__sqrt_uniform_float(float) nounwind readnone declare @__rcp_varying_float() nounwind readnone declare @__rsqrt_varying_float() nounwind readnone -declare @__rcp_varying_double() nounwind readnone -declare @__rsqrt_varying_double() nounwind readnone declare @__sqrt_varying_float() nounwind readnone @@ -393,3 +389,11 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind define_avgs() +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrtd_decl() +rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-16.ll b/builtins/target-neon-16.ll index 8e0ef121..d73b0a50 100644 --- a/builtins/target-neon-16.ll +++ b/builtins/target-neon-16.ll @@ -521,3 +521,6 @@ define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-32.ll b/builtins/target-neon-32.ll index d6e861a2..a8999269 100644 --- a/builtins/target-neon-32.ll +++ b/builtins/target-neon-32.ll @@ -491,3 +491,6 @@ define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-neon-8.ll b/builtins/target-neon-8.ll index aaa0a7b7..0c1edf8c 100644 --- a/builtins/target-neon-8.ll +++ b/builtins/target-neon-8.ll @@ -587,3 +587,6 @@ define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone { rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index bfb927e5..f69c25c8 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -658,3 +658,6 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index 93a8eb93..4e033d88 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -593,3 +593,6 @@ gen_scatter(double) rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index 0de5c1b4..abca8459 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -494,3 +494,6 @@ define_down_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index 79f44212..2e9f83a7 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -496,3 +496,6 @@ define_down_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index ceff27f0..c0466b34 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -597,3 +597,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 9e2ac8a5..d5d9ca5f 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -520,3 +520,6 @@ define_avgs() rsqrtd_decl() rcpd_decl() + +transcendetals_decl() +trigonometry_decl() diff --git a/builtins/util.m4 b/builtins/util.m4 index fbd929a1..2af75c25 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -2106,13 +2106,6 @@ declare void @__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, , , ) nounwind -declare float @__log_uniform_float(float) nounwind readnone -declare @__log_varying_float() nounwind readnone -declare float @__exp_uniform_float(float) nounwind readnone -declare @__exp_varying_float() nounwind readnone -declare float @__pow_uniform_float(float, float) nounwind readnone -declare @__pow_varying_float(, ) nounwind readnone - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare void @__use8() @@ -4541,3 +4534,58 @@ define(`rcpd_decl', ` declare double @__rcp_uniform_double(double) declare @__rcp_varying_double() ') + + +define(`transcendetals_decl',` + declare float @__log_uniform_float(float) nounwind readnone + declare @__log_varying_float() nounwind readnone + declare float @__exp_uniform_float(float) nounwind readnone + declare @__exp_varying_float() nounwind readnone + declare float @__pow_uniform_float(float, float) nounwind readnone + declare @__pow_varying_float(, ) nounwind readnone + + declare double @__log_uniform_double(double) nounwind readnone + declare @__log_varying_double() nounwind readnone + declare double @__exp_uniform_double(double) nounwind readnone + declare @__exp_varying_double() nounwind readnone + declare double @__pow_uniform_double(double, double) nounwind readnone + declare @__pow_varying_double(, ) nounwind readnone +') + +define(`trigonometry_decl',` + declare @__sin_varying_float() nounwind readnone + declare @__asin_varying_float() nounwind readnone + declare @__cos_varying_float() nounwind readnone + declare @__acos_varying_float() nounwind readnone + declare void @__sincos_varying_float(, *, *) nounwind readnone + declare @__tan_varying_float() nounwind readnone + declare @__atan_varying_float() nounwind readnone + declare @__atan2_varying_float(,) nounwind readnone + + declare float @__sin_uniform_float(float) nounwind readnone + declare float @__asin_uniform_float(float) nounwind readnone + declare float @__cos_uniform_float(float) nounwind readnone + declare float @__acos_uniform_float(float) nounwind readnone + declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone + declare float @__tan_uniform_float(float) nounwind readnone + declare float @__atan_uniform_float(float) nounwind readnone + declare float @__atan2_uniform_float(float,float) nounwind readnone + + declare @__sin_varying_double() nounwind readnone + declare @__asin_varying_double() nounwind readnone + declare @__cos_varying_double() nounwind readnone + declare @__acos_varying_double() nounwind readnone + declare void @__sincos_varying_double(, *, *) nounwind readnone + declare @__tan_varying_double() nounwind readnone + declare @__atan_varying_double() nounwind readnone + declare @__atan2_varying_double(,) nounwind readnone + + declare double @__sin_uniform_double(double) nounwind readnone + declare double @__asin_uniform_double(double) nounwind readnone + declare double @__cos_uniform_double(double) nounwind readnone + declare double @__acos_uniform_double(double) nounwind readnone + declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone + declare double @__tan_uniform_double(double) nounwind readnone + declare double @__atan_uniform_double(double) nounwind readnone + declare double @__atan2_uniform_double(double,double) nounwind readnone +') diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index 3b5c6ec3..c8f2cf08 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -691,39 +691,6 @@ SHIFT(__vec16_f, float, float) SHUFFLES(__vec16_f, float, float) LOAD_STORE(__vec16_f, float) -static FORCEINLINE float __exp_uniform_float(float v) { - return expf(v); -} - -static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = expf(v.v[i]); - return ret; -} - -static FORCEINLINE float __log_uniform_float(float v) { - return logf(v); -} - -static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = logf(v.v[i]); - return ret; -} - -static FORCEINLINE float __pow_uniform_float(float a, float b) { - return powf(a, b); -} - -static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { - __vec16_f ret; - for (int i = 0; i < 16; ++i) - ret.v[i] = powf(a.v[i], b.v[i]); - return ret; -} - static FORCEINLINE int __intbits(float v) { union { float f; @@ -1813,3 +1780,97 @@ static FORCEINLINE uint64_t __clock() { #endif // !WIN32 + +/////////////////////////////////////////////////////////////////////////// +// Transcendentals +// +// +#define TRANSCENDENTALS(op) \ +static FORCEINLINE float __##op##_uniform_float(float v) { \ + return op##f(v); \ +} \ +static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { \ + __vec16_f ret; \ + for (int i = 0; i < 16; ++i) \ + ret.v[i] = op##f(v.v[i]); \ + return ret; \ +} \ +static FORCEINLINE double __##op##_uniform_double(double v) { \ + return op(v); \ +} \ +static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { \ + __vec16_d ret; \ + for (int i = 0; i < 16; ++i) \ + ret.v[i] = op(v.v[i]); \ + return ret; \ +} + + TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + + +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} +static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = powf(a.v[i], b.v[i]); + return ret; +} +static FORCEINLINE double __pow_uniform_double(double a, double b) { + return pow(a, b); +} +static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = pow(a.v[i], b.v[i]); + return ret; +} + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + + +static FORCEINLINE float __atan2_uniform_float(float a, float b) { + return atan2f(a, b); +} +static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = atan2f(a.v[i], b.v[i]); + return ret; +} +static FORCEINLINE double __atan2_uniform_double(double a, double b) { + return atan2(a, b); +} +static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + ret.v[i] = atan2(a.v[i], b.v[i]); + return ret; +} + +static FORCEINLINE void __sincos_uniform_float(float x, float *a, float *b) { + sincosf(x,a,b); +} +static FORCEINLINE void __sincos_varying_float(__vec16_f x, __vec16_f *a, __vec16_f *b) { + __vec16_f ret; + for (int i = 0; i < 16; ++i) + sincosf(x.v[i], (float*)a + i, (float*)b+i); +} +static FORCEINLINE void __sincos_uniform_double(double x, double *a, double *b) { + sincos(x,a,b); +} +static FORCEINLINE void __sincos_varying_double(__vec16_d x, __vec16_d *a, __vec16_d *b) { + __vec16_d ret; + for (int i = 0; i < 16; ++i) + sincos(x.v[i], (double*)a + i, (double*)b+i); +} diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 45b31be1..a25af10b 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -167,6 +167,10 @@ struct __vec4_d { } __m128d v[2]; + FORCEINLINE __vec4_d(double *p) { + v[0] = _mm_set_pd(p[1], p[0]); + v[1] = _mm_set_pd(p[3], p[2]); + } }; @@ -2471,39 +2475,6 @@ static FORCEINLINE __vec4_d __sqrt_varying_double(__vec4_d v) { return __vec4_d(_mm_sqrt_pd(v.v[0]), _mm_sqrt_pd(v.v[1])); } -static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = powf(__extract_element(a, i), __extract_element(b, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __pow_uniform_float(float a, float b) { - return powf(a, b); -} - -static FORCEINLINE __vec4_f __exp_varying_float(__vec4_f a) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = expf(__extract_element(a, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __exp_uniform_float(float a) { - return expf(a); -} - -static FORCEINLINE __vec4_f __log_varying_float(__vec4_f a) { - float r[4]; - for (int i = 0; i < 4; ++i) - r[i] = logf(__extract_element(a, i)); - return __vec4_f(r); -} - -static FORCEINLINE float __log_uniform_float(float a) { - return logf(a); -} - static FORCEINLINE int __intbits(float v) { union { float f; @@ -4166,4 +4137,97 @@ static FORCEINLINE uint64_t __clock() { } #endif // !WIN32 + +/////////////////////////////////////////////////////////////////////////// +// Transcendentals + + +#define TRANSCENDENTALS(op) \ +static FORCEINLINE __vec4_f __##op##_varying_float(__vec4_f a) {\ + float r[4];\ + for (int i = 0; i < 4; ++i)\ + r[i] = op##f(__extract_element(a, i));\ + return __vec4_f(r);\ +}\ +static FORCEINLINE float __##op##_uniform_float(float a) {\ + return op##f(a);\ +}\ +static FORCEINLINE __vec4_d __##op##_varying_double(__vec4_d a) {\ + double r[4];\ + for (int i = 0; i < 4; ++i)\ + r[i] = op(__extract_element(a, i));\ + return __vec4_d(r);\ +}\ +static FORCEINLINE double __##op##_uniform_double(double a) {\ + return op(a);\ +} + +TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + + +static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) { + float r[4]; + for (int i = 0; i < 4; ++i) + r[i] = powf(__extract_element(a, i), __extract_element(b, i)); + return __vec4_f(r); +} +static FORCEINLINE float __pow_uniform_float(float a, float b) { + return powf(a, b); +} +static FORCEINLINE __vec4_d __pow_varying_double(__vec4_d a, __vec4_d b) { + double r[4]; + for (int i = 0; i < 4; ++i) + r[i] = pow(__extract_element(a, i), __extract_element(b, i)); + return __vec4_d(r); +} +static FORCEINLINE double __pow_uniform_double(double a, double b) { + return pow(a, b); +} + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + + +static FORCEINLINE __vec4_f __atan2_varying_float(__vec4_f a, __vec4_f b) { + float r[4]; + for (int i = 0; i < 4; ++i) + r[i] = atan2f(__extract_element(a, i), __extract_element(b, i)); + return __vec4_f(r); +} +static FORCEINLINE float __atan2_uniform_float(float a, float b) { + return atan2f(a, b); +} +static FORCEINLINE __vec4_d __atan2_varying_double(__vec4_d a, __vec4_d b) { + double r[4]; + for (int i = 0; i < 4; ++i) + r[i] = atan2(__extract_element(a, i), __extract_element(b, i)); + return __vec4_d(r); +} +static FORCEINLINE double __atan2_uniform_double(double a, double b) { + return atan2(a, b); +} + +static FORCEINLINE void __sincos_varying_float(__vec4_f x, __vec4_f * _sin, __vec4_f * _cos) { + for (int i = 0; i < 4; ++i) + sincosf(__extract_element(x, i), (float*)_sin + i, (float*)_cos + i); +} +static FORCEINLINE void __sincos_uniform_float(float x, float *_sin, float *_cos) { + sincosf(x, _sin, _cos); +} +static FORCEINLINE void __sincos_varying_double(__vec4_d x, __vec4_d * _sin, __vec4_d * _cos) { + for (int i = 0; i < 4; ++i) + sincos(__extract_element(x, i), (double*)_sin + i, (double*)_cos + i); +} +static FORCEINLINE void __sincos_uniform_double(double x, double *_sin, double *_cos) { + sincos(x, _sin, _cos); +} + #undef FORCEINLINE diff --git a/stdlib.ispc b/stdlib.ispc index eb5ee9c4..a6ed8896 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -2433,29 +2433,29 @@ static inline uniform float sin(uniform float x_full) { __declspec(safe) -static inline float asin(float x) { - bool isneg = x < 0; - x = abs(x); - +static inline float asin(float x0) { + bool isneg = x0< 0; + float x = abs(x0); bool isnan = (x > 1); - float v; + if (__have_native_trigonometry) { - return __asin_varying_float(x_full); + return __asin_varying_float(x0); } else if (__math_lib == __math_lib_svml) { - return __svml_asinf(x); + return __svml_asinf(x0); } else if (__math_lib == __math_lib_system) { float ret; foreach_active (i) { - uniform float r = __stdlib_asinf(extract(x, i)); + uniform float r = __stdlib_asinf(extract(x0, i)); ret = insert(ret, i, r); } return ret; } else if (__math_lib == __math_lib_ispc) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], // [|single...|], [1e-20;.9999999999999999]); @@ -2471,7 +2471,9 @@ static inline float asin(float x) { x * (3.05023305118083953857421875e-2f + x * (-1.2897425331175327301025390625e-2f + x * 2.38926825113594532012939453125e-3f))))))))); + } else if (__math_lib == __math_lib_ispc_fast) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], // [1e-20;.9999999999999999]); @@ -2482,6 +2484,7 @@ static inline float asin(float x) { x * (-4.489909112453460693359375e-2f + x * (1.928029954433441162109375e-2f + x * (-4.3095736764371395111083984375e-3f))))); + } v *= -sqrt(1.f - x); v = v + 1.57079637050628662109375; @@ -2496,22 +2499,21 @@ static inline float asin(float x) { __declspec(safe) -static inline uniform float asin(uniform float x) { - uniform bool isneg = x < 0; - x = abs(x); - +static inline uniform float asin(uniform float x0) { + uniform bool isneg = x0 < 0; + uniform float x = abs(x0); uniform bool isnan = (x > 1); - uniform float v; if (__have_native_trigonometry) { - return __asin_uniform_float(x_full); + return __asin_uniform_float(x0); } else if (__math_lib == __math_lib_svml || __math_lib == __math_lib_system) { - return __stdlib_asinf(x); + return __stdlib_asinf(x0); } else if (__math_lib == __math_lib_ispc) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], // [|single...|], [1e-20;.9999999999999999]); @@ -2527,7 +2529,9 @@ static inline uniform float asin(uniform float x) { x * (3.05023305118083953857421875e-2f + x * (-1.2897425331175327301025390625e-2f + x * 2.38926825113594532012939453125e-3f))))))))); + } else if (__math_lib == __math_lib_ispc_fast) + { // sollya // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], // [1e-20;.9999999999999999]); @@ -2538,6 +2542,7 @@ static inline uniform float asin(uniform float x) { x * (-4.489909112453460693359375e-2f + x * (1.928029954433441162109375e-2f + x * (-4.3095736764371395111083984375e-3f))))); + } v *= -sqrt(1.f - x); v = v + 1.57079637050628662109375; @@ -2687,22 +2692,34 @@ static inline uniform float cos(uniform float x_full) { __declspec(safe) static inline float acos(float v) { + if (__have_native_trigonometry) + return __acos_varying_float(v); + else return 1.57079637050628662109375 - asin(v); } __declspec(safe) static inline double acos(const double v) { + if (__have_native_trigonometry) + return __acos_varying_double(v); + else return 1.57079637050628662109375d0 - asin(v); } __declspec(safe) static inline uniform float acos(uniform float v) { + if (__have_native_trigonometry) + return __acos_uniform_float(v); + else return 1.57079637050628662109375 - asin(v); } __declspec(safe) static inline uniform double acos(const uniform double v) { + if (__have_native_trigonometry) + return __acos_uniform_double(v); + else return 1.57079637050628662109375d0 - asin(v); } @@ -3696,7 +3713,7 @@ __declspec(safe) static inline double sin(double x) { if (__have_native_trigonometry) { - return __sin_varying_double(x_full); + return __sin_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3715,7 +3732,7 @@ __declspec(safe) static inline double asin(double x) { if (__have_native_trigonometry) { - return __asin_varying_double(x_full); + return __asin_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3735,7 +3752,7 @@ __declspec(safe) static inline uniform double sin(uniform double x) { if (__have_native_trigonometry) { - return __sin_uniform_double(x_full); + return __sin_uniform_double(x); } else return __stdlib_sin(x); @@ -3745,7 +3762,7 @@ __declspec(safe) static inline double asin(const double x) { if (__have_native_trigonometry) { - return __asin_varying_double(x_full); + return __asin_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3765,7 +3782,7 @@ __declspec(safe) static inline double cos(const double x) { if (__have_native_trigonometry) { - return __cos_varying_double(x_full); + return __cos_varying_double(x); } if (__math_lib == __math_lib_svml) { @@ -3785,7 +3802,7 @@ __declspec(safe) static inline uniform double cos(uniform double x) { if (__have_native_trigonometry) { - return __cos_uniform_double(x_full); + return __cos_uniform_double(x); } else return __stdlib_cos(x); @@ -3796,7 +3813,7 @@ static inline void sincos(double x, varying double * uniform sin_result, varying double * uniform cos_result) { if (__have_native_trigonometry) { - __sincos_varying_double(x_full),sin_result,cos_result); + __sincos_varying_double(x,sin_result,cos_result); } if (__math_lib == __math_lib_svml) { @@ -3817,7 +3834,7 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result, uniform double * uniform cos_result) { if (__have_native_trigonometry) { - __sincos_uniform_double(x_full,sin_result, cos_result); + __sincos_uniform_double(x,sin_result, cos_result); } else __stdlib_sincos(x, sin_result, cos_result); @@ -3827,7 +3844,7 @@ __declspec(safe) static inline double tan(double x) { if (__have_native_trigonometry) { - return __tan_varying_double(x_full); + return __tan_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3847,7 +3864,7 @@ __declspec(safe) static inline uniform double tan(uniform double x) { if (__have_native_trigonometry) { - return __tan_uniform_double(x_full); + return __tan_uniform_double(x); } else return __stdlib_tan(x); @@ -3857,7 +3874,7 @@ __declspec(safe) static inline double atan(double x) { if (__have_native_trigonometry) { - return __atan_varying_double(x_full); + return __atan_varying_double(x); } else { double ret; @@ -3873,7 +3890,7 @@ __declspec(safe) static inline uniform double atan(uniform double x) { if (__have_native_trigonometry) { - return __atan_uniform_double(x_full); + return __atan_uniform_double(x); } else return __stdlib_atan(x); @@ -3912,7 +3929,7 @@ static inline uniform double atan2(uniform double y, uniform double x) { __declspec(safe) static inline double exp(double x) { if (__have_native_transcendentals) { - return __exp_varying_double(x_full); + return __exp_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3931,7 +3948,7 @@ static inline double exp(double x) { __declspec(safe) static inline uniform double exp(uniform double x) { if (__have_native_transcendentals) { - return __exp_uniform_double(x_full); + return __exp_uniform_double(x); } else return __stdlib_exp(x); @@ -3940,7 +3957,7 @@ static inline uniform double exp(uniform double x) { __declspec(safe) static inline double log(double x) { if (__have_native_transcendentals) { - return __log_varying_double(x_full); + return __log_varying_double(x); } else if (__math_lib == __math_lib_svml) { @@ -3959,7 +3976,7 @@ static inline double log(double x) { __declspec(safe) static inline uniform double log(uniform double x) { if (__have_native_transcendentals) { - return __log_uniform_double(x_full); + return __log_uniform_double(x); } else return __stdlib_log(x); From 438cee4e21755f4e890ca3d481b9521aa65453a0 Mon Sep 17 00:00:00 2001 From: evghenii Date: Fri, 7 Feb 2014 15:43:42 +0100 Subject: [PATCH 4/7] added support for double precision/native transendentals/trigonometry --- examples/intrinsics/knc-i1x16.h | 56 ++++++++++++++++++++------------- examples/intrinsics/knc.h | 33 +++++++++++++++++++ 2 files changed, 67 insertions(+), 22 deletions(-) diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h index ba6ef005..2e6afed5 100644 --- a/examples/intrinsics/knc-i1x16.h +++ b/examples/intrinsics/knc-i1x16.h @@ -1142,28 +1142,6 @@ template <> static FORCEINLINE void __store<64>(__vec16_f *p, __vec16_f v) } #endif -/******** math ******/ - -/*** float ***/ -static FORCEINLINE float __exp_uniform_float(float v) { return expf(v);} -static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { return _mm512_exp_ps(v); } - -static FORCEINLINE float __log_uniform_float(float v) { return logf(v);} -static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { return _mm512_log_ps(v); } - -static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);} -static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); } - -/*** double ***/ -static FORCEINLINE double __exp_uniform_double(double v) { return exp(v);} -static FORCEINLINE __vec16_d __exp_varying_double(__vec16_d v) { return __vec16_d(_mm512_exp_pd(v.v1),_mm512_exp_pd(v.v2)); } - -static FORCEINLINE double __log_uniform_double(double v) { return log(v);} -static FORCEINLINE __vec16_d __log_varying_double(__vec16_d v) { return __vec16_d(_mm512_log_pd(v.v1),_mm512_log_pd(v.v2)); } - -static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);} -static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); } - /******** bitcast ******/ static FORCEINLINE int __intbits(float v) { @@ -2806,6 +2784,40 @@ static FORCEINLINE uint64_t __clock() { #endif // !WIN32 + +/////////////////////////////////////////////////////////////////////////// +// Transcendentals + + +#define TRANSCENDENTALS(op) \ +static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { return _mm512_##op##_ps(v); } \ +static FORCEINLINE float __##op##_uniform_float(float v) { return op##f(v); } \ +static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { return __vec16_d(_mm512_##op##_pd(v.v1),_mm512_##op##_pd(v.v2)); } \ +static FORCEINLINE double __##op##_uniform_double(double a) { return op(a); } + +TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + +static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);} +static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); } +static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);} +static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); } + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + +static FORCEINLINE float __atan2_uniform_float(float a, float b) { return atan2f(a, b);} +static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { return _mm512_atan2_ps(a,b); } +static FORCEINLINE double __atan2_uniform_double(double a, double b) { return atan2(a,b);} +static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_atan2_pd(a.v1,b.v1),_mm512_atan2_pd(a.v2,b.v2)); } + #undef FORCEINLINE #undef PRE_ALIGN #undef POST_ALIGN diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index 458da458..0d3d5198 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -2173,6 +2173,39 @@ static FORCEINLINE uint64_t __clock() { } #endif // !WIN32 +/////////////////////////////////////////////////////////////////////////// +// Transcendentals + + +#define TRANSCENDENTALS(op) \ +static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { return _mm512_##op##_ps(v); } \ +static FORCEINLINE float __##op##_uniform_float(float v) { return op##f(v); } \ +static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { return __vec16_d(_mm512_##op##_pd(v.v1),_mm512_##op##_pd(v.v2)); } \ +static FORCEINLINE double __##op##_uniform_double(double a) { return op(a); } + +TRANSCENDENTALS(log) +TRANSCENDENTALS(exp) + +static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);} +static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); } +static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);} +static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); } + +/////////////////////////////////////////////////////////////////////////// +// Trigonometry + +TRANSCENDENTALS(sin) +TRANSCENDENTALS(asin) +TRANSCENDENTALS(cos) +TRANSCENDENTALS(acos) +TRANSCENDENTALS(tan) +TRANSCENDENTALS(atan) + +static FORCEINLINE float __atan2_uniform_float(float a, float b) { return atan2f(a, b);} +static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { return _mm512_atan2_ps(a,b); } +static FORCEINLINE double __atan2_uniform_double(double a, double b) { return atan2(a,b);} +static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_atan2_pd(a.v1,b.v1),_mm512_atan2_pd(a.v2,b.v2)); } + #undef FORCEINLINE #undef PRE_ALIGN #undef POST_ALIGN From 8490efe0ad512719ab28d92b928590d0b08128c4 Mon Sep 17 00:00:00 2001 From: evghenii Date: Fri, 7 Feb 2014 16:00:21 +0100 Subject: [PATCH 5/7] fix for knc.h. Due to a bug in ICC (tested with 13.1.3 & 14.0.1) ,the resulting .cpp file fails to compile --- examples/intrinsics/knc.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index 0d3d5198..4bdb184a 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -95,6 +95,7 @@ typedef struct PRE_ALIGN(64) __vec16_f { typedef struct PRE_ALIGN(64) __vec16_d { FORCEINLINE __vec16_d() : v1(_mm512_undefined_pd()), v2(_mm512_undefined_pd()) {} FORCEINLINE __vec16_d(const __vec16_d &o) : v1(o.v1), v2(o.v2) {} + FORCEINLINE __vec16_d(const __m512d _v1, const __m512d _v2) : v1(_v1), v2(_v2) {} FORCEINLINE __vec16_d& operator =(const __vec16_d &o) { v1=o.v1; v2=o.v2; return *this; } FORCEINLINE __vec16_d(double v00, double v01, double v02, double v03, double v04, double v05, double v06, double v07, @@ -1503,18 +1504,6 @@ static FORCEINLINE double __rsqrt_uniform_double(double v) } -static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { - return _mm512_exp_ps(v); -} - -static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { - return _mm512_log_ps(v); -} - -static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { - return _mm512_pow_ps(a, b); -} - /////////////////////////////////////////////////////////////////////////// // bit ops /////////////////////////////////////////////////////////////////////////// From f0779f95a350908be063b01be0cc0a98166e8000 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Tue, 11 Feb 2014 11:40:40 +0100 Subject: [PATCH 6/7] added double precision tests --- tests/transcendentals-5-0.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-5-1.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-5-2.ispc | 28 ++++++++++++++++++++++++++++ tests/transcendentals-5-3.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-6-0.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-6-1.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-6-2.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-6-3.ispc | 29 +++++++++++++++++++++++++++++ tests/transcendentals-7-0.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-7-1.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-7-2.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-7-3.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-8-0.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-8-1.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-8-2.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-8-3.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-9-0.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-9-1.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-9-2.ispc | 23 +++++++++++++++++++++++ tests/transcendentals-9-3.ispc | 23 +++++++++++++++++++++++ 20 files changed, 471 insertions(+) create mode 100644 tests/transcendentals-5-0.ispc create mode 100644 tests/transcendentals-5-1.ispc create mode 100644 tests/transcendentals-5-2.ispc create mode 100644 tests/transcendentals-5-3.ispc create mode 100644 tests/transcendentals-6-0.ispc create mode 100644 tests/transcendentals-6-1.ispc create mode 100644 tests/transcendentals-6-2.ispc create mode 100644 tests/transcendentals-6-3.ispc create mode 100644 tests/transcendentals-7-0.ispc create mode 100644 tests/transcendentals-7-1.ispc create mode 100644 tests/transcendentals-7-2.ispc create mode 100644 tests/transcendentals-7-3.ispc create mode 100644 tests/transcendentals-8-0.ispc create mode 100644 tests/transcendentals-8-1.ispc create mode 100644 tests/transcendentals-8-2.ispc create mode 100644 tests/transcendentals-8-3.ispc create mode 100644 tests/transcendentals-9-0.ispc create mode 100644 tests/transcendentals-9-1.ispc create mode 100644 tests/transcendentals-9-2.ispc create mode 100644 tests/transcendentals-9-3.ispc diff --git a/tests/transcendentals-5-0.ispc b/tests/transcendentals-5-0.ispc new file mode 100644 index 00000000..c445b8a9 --- /dev/null +++ b/tests/transcendentals-5-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1e-16) || abs((x-ref)/ref) < 1e-15; } + +export void f_v(uniform float RET[]) { + double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); + double ref = double4((8.742277955963554d-08),(0.000000000000000d0),(-8.742277955963554d-08),(1.7484555911927038d-07)); + RET[programIndex] = ok(sin(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-5-1.ispc b/tests/transcendentals-5-1.ispc new file mode 100644 index 00000000..7d9980a4 --- /dev/null +++ b/tests/transcendentals-5-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1e-16) || abs((x-ref)/ref) < 1e-15; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.5707963705062866d0),(1.5707963705062866d0)); + double ref = double4((0.8414709848078965d0),(-0.8414709848078965d0),(-1.000000000000000d0),(1.000000000000000d0)); + RET[programIndex] = ok(sin(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-5-2.ispc b/tests/transcendentals-5-2.ispc new file mode 100644 index 00000000..38247397 --- /dev/null +++ b/tests/transcendentals-5-2.ispc @@ -0,0 +1,28 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { + bool r = (abs(x - ref) < 1e-14) || abs((x-ref)/ref) < 1e-14; + if (any(r == false)) + print("mismatch got %, expected %\n", x, ref); + return r; +} + +export void f_v(uniform float RET[]) { + double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); + double ref = double4((2.3849760909612067d-08),(-0.7568024953079282d0),(-0.5440211108893699d0),(0.5440211108893699d0)); + RET[programIndex] = ok(sin(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-5-3.ispc b/tests/transcendentals-5-3.ispc new file mode 100644 index 00000000..496f0b68 --- /dev/null +++ b/tests/transcendentals-5-3.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); + double ref = double4((0.9867719333537206d0),(-0.40484974621184605d0),(-0.7111606865372466d0),(0.4121184852417566d0)); + RET[programIndex] = ok(sin(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-6-0.ispc b/tests/transcendentals-6-0.ispc new file mode 100644 index 00000000..2b59989a --- /dev/null +++ b/tests/transcendentals-6-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((-3.141592653589793d0),(0.000000000000000d0),(3.141592653589793d0),(6.283185307179586d0)); + double ref = double4((-1.000000000000000d0),(1.000000000000000d0),(-1.000000000000000d0),(1.000000000000000d0)); + RET[programIndex] = ok(cos(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-6-1.ispc b/tests/transcendentals-6-1.ispc new file mode 100644 index 00000000..b460af9f --- /dev/null +++ b/tests/transcendentals-6-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0)); + double ref = double4((0.5403023058681398d0),(0.5403023058681398d0),(-4.371139044595162d-08),(-4.371139044595162d-08)); + RET[programIndex] = ok(cos(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-6-2.ispc b/tests/transcendentals-6-2.ispc new file mode 100644 index 00000000..5bcb1949 --- /dev/null +++ b/tests/transcendentals-6-2.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((-9.42477796076938d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); + double ref = double4((-1.000000000000000d0),(-0.6536436208636119d0),(-0.8390715290764524d0),(-0.8390715290764524d0)); + RET[programIndex] = ok(cos(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-6-3.ispc b/tests/transcendentals-6-3.ispc new file mode 100644 index 00000000..6d738f92 --- /dev/null +++ b/tests/transcendentals-6-3.ispc @@ -0,0 +1,29 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { + bool r = (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-15; + if (any(r == false)) + print("mismatch got %, expected %\n", x, ref); + return r; +} + + +export void f_v(uniform float RET[]) { + double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); + double ref = double4((-0.1621146247115303d0),(0.9143832254542971d0),(-0.7030295000381365d0),(-0.9111302618846769d0)); + RET[programIndex] = ok(cos(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-7-0.ispc b/tests/transcendentals-7-0.ispc new file mode 100644 index 00000000..e75fe986 --- /dev/null +++ b/tests/transcendentals-7-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); + double ref = double4((-8.742277955963587d-08),(0.000000000000000d0),(8.742277955963587d-08),(1.7484555911927306d-07)); + RET[programIndex] = ok(tan(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-7-1.ispc b/tests/transcendentals-7-1.ispc new file mode 100644 index 00000000..8f36a8d4 --- /dev/null +++ b/tests/transcendentals-7-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000000000000000d0),(-1.000000000000000d0),(0),(0)); + double ref = double4((1.557407724654902d0),(-1.557407724654902d0),(0),(0)); + RET[programIndex] = ok(tan(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-7-2.ispc b/tests/transcendentals-7-2.ispc new file mode 100644 index 00000000..bfe7226c --- /dev/null +++ b/tests/transcendentals-7-2.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); + double ref = double4((-2.3849760909612074d-08),(1.1578212823495775d0),(0.6483608274590867d0),(-0.6483608274590867d0)); + RET[programIndex] = ok(tan(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-7-3.ispc b/tests/transcendentals-7-3.ispc new file mode 100644 index 00000000..9b5843c4 --- /dev/null +++ b/tests/transcendentals-7-3.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); + double ref = double4((-6.086877942749462d0),(-0.442757188607329d0),(1.0115659250410816d0),(-0.4523156594418099d0)); + RET[programIndex] = ok(tan(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-8-0.ispc b/tests/transcendentals-8-0.ispc new file mode 100644 index 00000000..52b3fe7e --- /dev/null +++ b/tests/transcendentals-8-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); + double ref = double4((0.04321391448589156d0),(1.000000000000000d0),(23.140694655803028d0),(535.4917491531108d0)); + RET[programIndex] = ok(exp(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-8-1.ispc b/tests/transcendentals-8-1.ispc new file mode 100644 index 00000000..ac166aaa --- /dev/null +++ b/tests/transcendentals-8-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0)); + double ref = double4((2.718281828459045d0),(0.36787944117144233d0),(0.2078795672640568d0),(4.810477591238011d0)); + RET[programIndex] = ok(exp(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-8-2.ispc b/tests/transcendentals-8-2.ispc new file mode 100644 index 00000000..a250f690 --- /dev/null +++ b/tests/transcendentals-8-2.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); + double ref = double4((8.069951564564043d-05),(54.598150033144236d0),(22026.465794806718d0),(4.5399929762484854d-05)); + RET[programIndex] = ok(exp(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-8-3.ispc b/tests/transcendentals-8-3.ispc new file mode 100644 index 00000000..7021fc37 --- /dev/null +++ b/tests/transcendentals-8-3.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); + double ref = double4((1623346.2946371625d0),(0.001230912137451317d0),(6.208070673019769d-10),(8103.083927575384d0)); + RET[programIndex] = ok(exp(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-9-0.ispc b/tests/transcendentals-9-0.ispc new file mode 100644 index 00000000..8736677d --- /dev/null +++ b/tests/transcendentals-9-0.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((3.141602741012573d),(0.000010000000000d),(3.141602741012573d),(6.283195482025146d)); + double ref = double4((1.144733096770642d0),(-11.512925464970229d0),(1.144733096770642d0),(1.8378786857850002d0)); + RET[programIndex] = ok(log(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-9-1.ispc b/tests/transcendentals-9-1.ispc new file mode 100644 index 00000000..3a69782e --- /dev/null +++ b/tests/transcendentals-9-1.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((1.000010000000000d0),(1.000010000000000d0),(1.570806370506287d0),(1.570806370506287d0)); + double ref = double4((9.999950000398841d-06),(9.999950000398841d-06),(0.4515890992942722d0),(0.4515890992942722d0)); + RET[programIndex] = ok(log(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-9-2.ispc b/tests/transcendentals-9-2.ispc new file mode 100644 index 00000000..83113a42 --- /dev/null +++ b/tests/transcendentals-9-2.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((9.424787984619140d0),(4.000010000000000d0),(10.000010000000000d0),(10.000010000000000d0)); + double ref = double4((2.2433432380804366d0),(1.3862968611167654d0),(2.3025860929935456d0),(2.3025860929935456d0)); + RET[programIndex] = ok(log(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[]) { RET[programIndex] = 1.; } diff --git a/tests/transcendentals-9-3.ispc b/tests/transcendentals-9-3.ispc new file mode 100644 index 00000000..22ef5e47 --- /dev/null +++ b/tests/transcendentals-9-3.ispc @@ -0,0 +1,23 @@ +static double double4(uniform double a, uniform double b, uniform double c, + uniform double d) { + double ret = 0; + for (uniform int i = 0; i < programCount; i += 4) { + ret = insert(ret, i + 0, a); + ret = insert(ret, i + 1, b); + ret = insert(ret, i + 2, c); + ret = insert(ret, i + 3, d); + } + return ret; +} + +export uniform int width() { return programCount; } + + +bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } + +export void f_v(uniform float RET[]) { + double v = double4((14.300010190734863d0),(6.700009809265136d0),(21.200010762939453d0),(9.000010000000000d0)); + double ref = double4((2.6602602499044092d0),(1.902108990465272d0),(3.0540016893636612d0),(2.197225688446713d0)); + RET[programIndex] = ok(log(v), ref) ? 1. : 0.; +} +export void result(uniform float RET[4]) { RET[programIndex] = 1.; } From 193bba77b08a0c49e068f8d791e2a432028aa6d7 Mon Sep 17 00:00:00 2001 From: evghenii Date: Tue, 11 Feb 2014 11:49:03 +0100 Subject: [PATCH 7/7] accuracy fix --- tests/transcendentals-5-0.ispc | 2 +- tests/transcendentals-5-1.ispc | 2 +- tests/transcendentals-5-2.ispc | 2 +- tests/transcendentals-5-3.ispc | 2 +- tests/transcendentals-6-0.ispc | 2 +- tests/transcendentals-6-1.ispc | 2 +- tests/transcendentals-6-2.ispc | 2 +- tests/transcendentals-6-3.ispc | 2 +- tests/transcendentals-7-0.ispc | 2 +- tests/transcendentals-7-1.ispc | 2 +- tests/transcendentals-7-2.ispc | 2 +- tests/transcendentals-7-3.ispc | 2 +- tests/transcendentals-8-0.ispc | 2 +- tests/transcendentals-8-1.ispc | 2 +- tests/transcendentals-8-2.ispc | 2 +- tests/transcendentals-8-3.ispc | 2 +- tests/transcendentals-9-0.ispc | 2 +- tests/transcendentals-9-1.ispc | 2 +- tests/transcendentals-9-2.ispc | 2 +- tests/transcendentals-9-3.ispc | 2 +- 20 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/transcendentals-5-0.ispc b/tests/transcendentals-5-0.ispc index c445b8a9..562050df 100644 --- a/tests/transcendentals-5-0.ispc +++ b/tests/transcendentals-5-0.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1e-16) || abs((x-ref)/ref) < 1e-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); diff --git a/tests/transcendentals-5-1.ispc b/tests/transcendentals-5-1.ispc index 7d9980a4..fd1e1506 100644 --- a/tests/transcendentals-5-1.ispc +++ b/tests/transcendentals-5-1.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1e-16) || abs((x-ref)/ref) < 1e-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.5707963705062866d0),(1.5707963705062866d0)); diff --git a/tests/transcendentals-5-2.ispc b/tests/transcendentals-5-2.ispc index 38247397..2a50730a 100644 --- a/tests/transcendentals-5-2.ispc +++ b/tests/transcendentals-5-2.ispc @@ -14,7 +14,7 @@ export uniform int width() { return programCount; } bool ok(double x, double ref) { - bool r = (abs(x - ref) < 1e-14) || abs((x-ref)/ref) < 1e-14; + bool r = (abs(x - ref) < 1d-14) || abs((x-ref)/ref) < 1d-14; if (any(r == false)) print("mismatch got %, expected %\n", x, ref); return r; diff --git a/tests/transcendentals-5-3.ispc b/tests/transcendentals-5-3.ispc index 496f0b68..dbde3751 100644 --- a/tests/transcendentals-5-3.ispc +++ b/tests/transcendentals-5-3.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); diff --git a/tests/transcendentals-6-0.ispc b/tests/transcendentals-6-0.ispc index 2b59989a..ae3dac39 100644 --- a/tests/transcendentals-6-0.ispc +++ b/tests/transcendentals-6-0.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((-3.141592653589793d0),(0.000000000000000d0),(3.141592653589793d0),(6.283185307179586d0)); diff --git a/tests/transcendentals-6-1.ispc b/tests/transcendentals-6-1.ispc index b460af9f..8f59c54d 100644 --- a/tests/transcendentals-6-1.ispc +++ b/tests/transcendentals-6-1.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0)); diff --git a/tests/transcendentals-6-2.ispc b/tests/transcendentals-6-2.ispc index 5bcb1949..80ee9f7b 100644 --- a/tests/transcendentals-6-2.ispc +++ b/tests/transcendentals-6-2.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((-9.42477796076938d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); diff --git a/tests/transcendentals-6-3.ispc b/tests/transcendentals-6-3.ispc index 6d738f92..e64c4b82 100644 --- a/tests/transcendentals-6-3.ispc +++ b/tests/transcendentals-6-3.ispc @@ -14,7 +14,7 @@ export uniform int width() { return programCount; } bool ok(double x, double ref) { - bool r = (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-15; + bool r = (abs(x - ref) < 1d-14) || abs((x-ref)/ref) < 1d-14; if (any(r == false)) print("mismatch got %, expected %\n", x, ref); return r; diff --git a/tests/transcendentals-7-0.ispc b/tests/transcendentals-7-0.ispc index e75fe986..e59f1c7a 100644 --- a/tests/transcendentals-7-0.ispc +++ b/tests/transcendentals-7-0.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); diff --git a/tests/transcendentals-7-1.ispc b/tests/transcendentals-7-1.ispc index 8f36a8d4..0b3a9c66 100644 --- a/tests/transcendentals-7-1.ispc +++ b/tests/transcendentals-7-1.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((1.000000000000000d0),(-1.000000000000000d0),(0),(0)); diff --git a/tests/transcendentals-7-2.ispc b/tests/transcendentals-7-2.ispc index bfe7226c..4227981d 100644 --- a/tests/transcendentals-7-2.ispc +++ b/tests/transcendentals-7-2.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); diff --git a/tests/transcendentals-7-3.ispc b/tests/transcendentals-7-3.ispc index 9b5843c4..aa6e1380 100644 --- a/tests/transcendentals-7-3.ispc +++ b/tests/transcendentals-7-3.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); diff --git a/tests/transcendentals-8-0.ispc b/tests/transcendentals-8-0.ispc index 52b3fe7e..761d6e5a 100644 --- a/tests/transcendentals-8-0.ispc +++ b/tests/transcendentals-8-0.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0)); diff --git a/tests/transcendentals-8-1.ispc b/tests/transcendentals-8-1.ispc index ac166aaa..f9433aab 100644 --- a/tests/transcendentals-8-1.ispc +++ b/tests/transcendentals-8-1.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0)); diff --git a/tests/transcendentals-8-2.ispc b/tests/transcendentals-8-2.ispc index a250f690..493b1632 100644 --- a/tests/transcendentals-8-2.ispc +++ b/tests/transcendentals-8-2.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0)); diff --git a/tests/transcendentals-8-3.ispc b/tests/transcendentals-8-3.ispc index 7021fc37..38101635 100644 --- a/tests/transcendentals-8-3.ispc +++ b/tests/transcendentals-8-3.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0)); diff --git a/tests/transcendentals-9-0.ispc b/tests/transcendentals-9-0.ispc index 8736677d..8c51e240 100644 --- a/tests/transcendentals-9-0.ispc +++ b/tests/transcendentals-9-0.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-13; } export void f_v(uniform float RET[]) { double v = double4((3.141602741012573d),(0.000010000000000d),(3.141602741012573d),(6.283195482025146d)); diff --git a/tests/transcendentals-9-1.ispc b/tests/transcendentals-9-1.ispc index 3a69782e..0b01c322 100644 --- a/tests/transcendentals-9-1.ispc +++ b/tests/transcendentals-9-1.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((1.000010000000000d0),(1.000010000000000d0),(1.570806370506287d0),(1.570806370506287d0)); diff --git a/tests/transcendentals-9-2.ispc b/tests/transcendentals-9-2.ispc index 83113a42..2391920b 100644 --- a/tests/transcendentals-9-2.ispc +++ b/tests/transcendentals-9-2.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((9.424787984619140d0),(4.000010000000000d0),(10.000010000000000d0),(10.000010000000000d0)); diff --git a/tests/transcendentals-9-3.ispc b/tests/transcendentals-9-3.ispc index 22ef5e47..ed787a5c 100644 --- a/tests/transcendentals-9-3.ispc +++ b/tests/transcendentals-9-3.ispc @@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c, export uniform int width() { return programCount; } -bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; } +bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; } export void f_v(uniform float RET[]) { double v = double4((14.300010190734863d0),(6.700009809265136d0),(21.200010762939453d0),(9.000010000000000d0));