From 668645fcda64201f590f24d76c70030f408a37ea Mon Sep 17 00:00:00 2001
From: Evghenii <egaburov@dds.nl>
Date: Fri, 7 Feb 2014 11:05:36 +0100
Subject: [PATCH 1/7] first commit

---
 builtins.cpp | 2 ++
 ispc.cpp     | 6 ++++++
 ispc.h       | 5 +++++
 3 files changed, 13 insertions(+)

diff --git a/builtins.cpp b/builtins.cpp
index fee322e7..581a712a 100644
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -1150,6 +1150,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
                        symbolTable);
     lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(),
                        module, symbolTable);
+    lDefineConstantInt("__have_native_trigonometry", g->target->hasTrigonometry(),
+                       module, symbolTable);
     lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(),
                        module, symbolTable);
     lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
diff --git a/ispc.cpp b/ispc.cpp
index 1386d65e..0792291e 100644
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -202,6 +202,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
     m_hasGather(false),
     m_hasScatter(false),
     m_hasTranscendentals(false),
+    m_hasTrigonometry(false),
     m_hasRsqrtd(false),
     m_hasRcpd(false)
 {
@@ -420,6 +421,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_maskBitCount = 1;
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
+        this->m_hasTrigonometry = true;
         this->m_hasGather = this->m_hasScatter = true;
         this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
@@ -433,6 +435,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_maskBitCount = 1;
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
+        this->m_hasTrigonometry = true;
         this->m_hasGather = this->m_hasScatter = true;
         this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
@@ -446,6 +449,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_maskBitCount = 1;
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
+        this->m_hasTrigonometry = true;
         this->m_hasGather = this->m_hasScatter = true;
         this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
@@ -459,6 +463,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_maskBitCount = 1;
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
+        this->m_hasTrigonometry = true;
         this->m_hasGather = this->m_hasScatter = true;
         this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
@@ -472,6 +477,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_maskBitCount = 1;
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
+        this->m_hasTrigonometry = true;
         this->m_hasGather = this->m_hasScatter = true;
         this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
diff --git a/ispc.h b/ispc.h
index 4b6df8c3..5e554bf7 100644
--- a/ispc.h
+++ b/ispc.h
@@ -282,6 +282,8 @@ public:
 
     bool hasTranscendentals() const {return m_hasTranscendentals;}
     
+    bool hasTrigonometry() const {return m_hasTrigonometry;}
+    
     bool hasRsqrtd() const {return m_hasRsqrtd;}
     
     bool hasRcpd() const {return m_hasRcpd;}
@@ -385,6 +387,9 @@ private:
         sqrt, which we assume that all of them handle). */
     bool m_hasTranscendentals;
     
+    /** Indicates whether the target has ISA support for trigonometry */
+    bool m_hasTrigonometry;
+    
     /** Indicates whether there is an ISA double precision rsqrt. */
     bool m_hasRsqrtd;
     

From 81aa19a8f0b11f14c9c95ce8795ff8a5d6dd868f Mon Sep 17 00:00:00 2001
From: Evghenii <egaburov@dds.nl>
Date: Fri, 7 Feb 2014 11:49:24 +0100
Subject: [PATCH 2/7] added use of native_transendentals, need to add IR

---
 stdlib.ispc | 227 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 163 insertions(+), 64 deletions(-)

diff --git a/stdlib.ispc b/stdlib.ispc
index 24217cd0..eb5ee9c4 100644
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -2298,8 +2298,12 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) {
 
 __declspec(safe)
 static inline float sin(float x_full) {
-    if (__math_lib == __math_lib_svml) {
-        return __svml_sinf(x_full);
+    if (__have_native_trigonometry) 
+    {
+      return __sin_varying_float(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) {
+      return __svml_sinf(x_full);
     }
     else if (__math_lib == __math_lib_system) {
         float ret;
@@ -2360,7 +2364,11 @@ static inline float sin(float x_full) {
 
 __declspec(safe)
 static inline uniform float sin(uniform float x_full) {
-    if (__math_lib == __math_lib_system ||
+    if (__have_native_trigonometry) 
+    {
+      return __sin_uniform_float(x_full);
+    }
+    else if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
         return __stdlib_sinf(x_full);
     }
@@ -2432,7 +2440,11 @@ static inline float asin(float x) {
     bool isnan = (x > 1);
 
     float v;
-    if (__math_lib == __math_lib_svml) {
+    if (__have_native_trigonometry) 
+    {
+          return __asin_varying_float(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) {
         return __svml_asinf(x);
     } 
     else if (__math_lib == __math_lib_system) {
@@ -2491,7 +2503,11 @@ static inline uniform float asin(uniform float x) {
     uniform bool isnan = (x > 1);
 
     uniform float v;
-    if (__math_lib == __math_lib_svml ||
+    if (__have_native_trigonometry) 
+    {
+      return __asin_uniform_float(x_full);
+    }
+    else if (__math_lib == __math_lib_svml ||
         __math_lib == __math_lib_system) {
         return __stdlib_asinf(x);
     }
@@ -2537,6 +2553,10 @@ static inline uniform float asin(uniform float x) {
 
 __declspec(safe)
 static inline float cos(float x_full) {
+    if (__have_native_trigonometry) 
+    {
+      return __cos_varying_float(x_full);
+    }
     if (__math_lib == __math_lib_svml) {
         return __svml_cosf(x_full);
     }
@@ -2598,7 +2618,11 @@ static inline float cos(float x_full) {
 
 __declspec(safe)
 static inline uniform float cos(uniform float x_full) {
-    if (__math_lib == __math_lib_system ||
+    if (__have_native_trigonometry) 
+    {
+      return __cos_uniform_float(x_full);
+    }
+    else if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
         return __stdlib_cosf(x_full);
     }
@@ -2686,6 +2710,10 @@ static inline uniform double acos(const uniform double v) {
 __declspec(safe)
 static inline void sincos(float x_full, varying float * uniform sin_result, 
                           varying float * uniform cos_result) {
+    if (__have_native_trigonometry) 
+    {
+      __sincos_varying_float(x_full,sin_result,cos_result);
+    }
     if (__math_lib == __math_lib_svml) {
         __svml_sincosf(x_full, sin_result, cos_result);
     }
@@ -2756,6 +2784,10 @@ static inline void sincos(float x_full, varying float * uniform sin_result,
 __declspec(safe)
 static inline void sincos(uniform float x_full, uniform float * uniform sin_result,
                           uniform float * uniform cos_result) {
+    if (__have_native_trigonometry) 
+    {
+      __sincos_uniform_float(x_full, sin_result, cos_result);
+    }
     if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
         __stdlib_sincosf(x_full, sin_result, cos_result);
@@ -2818,7 +2850,11 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu
 
 __declspec(safe)
 static inline float tan(float x_full) {
-    if (__math_lib == __math_lib_svml) {
+    if (__have_native_trigonometry) 
+    {
+      return __tan_varying_float(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) {
         return __svml_tanf(x_full);
     }
     else if (__math_lib == __math_lib_system) {
@@ -2897,7 +2933,11 @@ static inline float tan(float x_full) {
 
 __declspec(safe)
 static inline uniform float tan(uniform float x_full) {
-    if (__math_lib == __math_lib_system ||
+    if (__have_native_trigonometry) 
+    {
+      return __tan_uniform_float(x_full);
+    }
+    else if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
         return __stdlib_tanf(x_full);
     }
@@ -2969,7 +3009,11 @@ static inline uniform float tan(uniform float x_full) {
 
 __declspec(safe)
 static inline float atan(float x_full) {
-    if (__math_lib == __math_lib_svml) {
+    if (__have_native_trigonometry) 
+    {
+      return __atan_varying_float(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) {
         return __svml_atanf(x_full);
     }
     else if (__math_lib == __math_lib_system) {
@@ -3020,7 +3064,11 @@ static inline float atan(float x_full) {
 
 __declspec(safe)
 static inline uniform float atan(uniform float x_full) {
-    if (__math_lib == __math_lib_system ||
+    if (__have_native_trigonometry) 
+    {
+      return __atan_uniform_float(x_full);
+    }
+    else if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
         return __stdlib_atanf(x_full);
     }
@@ -3064,7 +3112,11 @@ static inline uniform float atan(uniform float x_full) {
 
 __declspec(safe)
 static inline float atan2(float y, float x) {
-    if (__math_lib == __math_lib_svml) {
+    if (__have_native_trigonometry) 
+    {
+      return __atan2_varying_float(y,x);
+    }
+    else if (__math_lib == __math_lib_svml) {
         return __svml_atan2f(y, x);
     }
     else if (__math_lib == __math_lib_system) {
@@ -3103,7 +3155,11 @@ static inline float atan2(float y, float x) {
 
 __declspec(safe)
 static inline uniform float atan2(uniform float y, uniform float x) {
-    if (__math_lib == __math_lib_system ||
+    if (__have_native_trigonometry) 
+    {
+      return __atan2_uniform_float(y,x);
+    }
+    else if (__math_lib == __math_lib_system ||
         __math_lib == __math_lib_svml) {
         return __stdlib_atan2f(y, x);
     }
@@ -3638,12 +3694,14 @@ static inline uniform double frexp(uniform double x, uniform int * uniform pw2)
 
 __declspec(safe)
 static inline double sin(double x) {
-    if (__math_lib == __math_lib_svml) 
+    if (__have_native_trigonometry) 
+    {
+      return __sin_varying_double(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) 
     {
       return __svml_sind(x);
     }
-    else if (__math_lib == __math_lib_ispc_fast)
-        return sin((float)x);
     else {
         double ret;
         foreach_active (i) {
@@ -3653,23 +3711,46 @@ static inline double sin(double x) {
         return ret;
     }
 }
+__declspec(safe)
+static inline double asin(double x) {
+    if (__have_native_trigonometry) 
+    {
+      return __asin_varying_double(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) 
+    {
+      return __svml_asind(x);
+    }
+    else {
+        double ret;
+        foreach_active (i) {
+            uniform double r = __stdlib_asin(extract(x, i));
+            ret = insert(ret, i, r);
+        }
+        return ret;
+    }
+}
 
 __declspec(safe)
 static inline uniform double sin(uniform double x) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return sin((float)x);
+    if (__have_native_trigonometry) 
+    {
+      return __sin_uniform_double(x_full);
+    }
     else
         return __stdlib_sin(x);
 }
 
 __declspec(safe)
 static inline double asin(const double x) {
-    if (__math_lib == __math_lib_svml) 
+    if (__have_native_trigonometry) 
+    {
+      return __asin_varying_double(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) 
     {
       return __svml_asind(x);
     }
-    else if (__math_lib == __math_lib_ispc_fast)
-        return asin((float)x);
     else {
         double ret;
         foreach_active (i) {
@@ -3682,12 +3763,14 @@ static inline double asin(const double x) {
 
 __declspec(safe)
 static inline double cos(const double x) {
+    if (__have_native_trigonometry) 
+    {
+      return __cos_varying_double(x_full);
+    }
     if (__math_lib == __math_lib_svml) 
     {
       return __svml_cosd(x);
     }
-    else if (__math_lib == __math_lib_ispc_fast)
-        return cos((float)x);
     else {
         double ret;
         foreach_active (i) {
@@ -3700,8 +3783,10 @@ static inline double cos(const double x) {
 
 __declspec(safe)
 static inline uniform double cos(uniform double x) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return cos((float)x);
+    if (__have_native_trigonometry) 
+    {
+      return __cos_uniform_double(x_full);
+    }
     else
         return __stdlib_cos(x);
 }
@@ -3709,16 +3794,14 @@ static inline uniform double cos(uniform double x) {
 __declspec(safe)
 static inline void sincos(double x, varying double * uniform sin_result,
                           varying double * uniform cos_result) {
+    if (__have_native_trigonometry) 
+    {
+      __sincos_varying_double(x_full),sin_result,cos_result);
+    }
     if (__math_lib == __math_lib_svml) 
     {
       __svml_sincosd(x, sin_result, cos_result);
     }
-    else if (__math_lib == __math_lib_ispc_fast) {
-        float sr, cr;
-        sincos((float)x, &sr, &cr);
-        *sin_result = sr;
-        *cos_result = cr;
-    }
     else {
         foreach_active (i) {
             uniform double sr, cr;
@@ -3732,11 +3815,9 @@ static inline void sincos(double x, varying double * uniform sin_result,
 __declspec(safe)
 static inline void sincos(uniform double x, uniform double * uniform sin_result,
                           uniform double * uniform cos_result) {
-    if (__math_lib == __math_lib_ispc_fast) {
-        uniform float sr, cr;
-        sincos((uniform float)x, &sr, &cr);
-        *sin_result = sr;
-        *cos_result = cr;
+    if (__have_native_trigonometry) 
+    {
+      __sincos_uniform_double(x_full,sin_result, cos_result);
     }
     else
         __stdlib_sincos(x, sin_result, cos_result);
@@ -3744,12 +3825,14 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result,
 
 __declspec(safe)
 static inline double tan(double x) {
-    if (__math_lib == __math_lib_svml) 
+    if (__have_native_trigonometry) 
+    {
+      return __tan_varying_double(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) 
     {
       return __svml_tand(x);
     }
-    else if (__math_lib == __math_lib_ispc_fast)
-        return tan((float)x);
     else {
         double ret;
         foreach_active (i) {
@@ -3762,16 +3845,20 @@ static inline double tan(double x) {
 
 __declspec(safe)
 static inline uniform double tan(uniform double x) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return tan((float)x);
+    if (__have_native_trigonometry) 
+    {
+      return __tan_uniform_double(x_full);
+    }
     else
         return __stdlib_tan(x);
 }
 
 __declspec(safe)
 static inline double atan(double x) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return atan((float)x);
+    if (__have_native_trigonometry) 
+    {
+      return __atan_varying_double(x_full);
+    }
     else {
         double ret;
         foreach_active (i) {
@@ -3784,20 +3871,24 @@ static inline double atan(double x) {
 
 __declspec(safe)
 static inline uniform double atan(uniform double x) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return atan((float)x);
+    if (__have_native_trigonometry) 
+    {
+      return __atan_uniform_double(x_full);
+    }
     else
         return __stdlib_atan(x);
 }
 
 __declspec(safe)
 static inline double atan2(double y, double x) {
-    if (__math_lib == __math_lib_svml) 
+    if (__have_native_trigonometry) 
+    {
+      return __atan2_varying_double(y,x);
+    }
+    else if (__math_lib == __math_lib_svml) 
     {
       return __svml_atan2d(y,x);
     }
-    else if (__math_lib == __math_lib_ispc_fast)
-        return atan2((float)y, (float)x);
     else {
         double ret;
         foreach_active (i) {
@@ -3810,20 +3901,23 @@ static inline double atan2(double y, double x) {
 
 __declspec(safe)
 static inline uniform double atan2(uniform double y, uniform double x) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return atan2((float)y, (float)x);
+    if (__have_native_trigonometry) 
+    {
+      return __atan2_uniform_double(y,x);
+    }
     else
         return __stdlib_atan2(y, x);
 }
 
 __declspec(safe)
 static inline double exp(double x) {
-    if (__math_lib == __math_lib_svml) 
+    if (__have_native_transcendentals) {
+        return __exp_varying_double(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) 
     {
         return __svml_expd(x);
     }
-    else if (__math_lib == __math_lib_ispc_fast)
-        return exp((float)x);
     else {
         double ret;
         foreach_active (i) {
@@ -3836,20 +3930,22 @@ static inline double exp(double x) {
 
 __declspec(safe)
 static inline uniform double exp(uniform double x) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return exp((float)x);
+    if (__have_native_transcendentals) {
+        return __exp_uniform_double(x_full);
+    }
     else
         return __stdlib_exp(x);
 }
 
 __declspec(safe)
 static inline double log(double x) {
-    if (__math_lib == __math_lib_svml) 
+    if (__have_native_transcendentals) {
+        return __log_varying_double(x_full);
+    }
+    else if (__math_lib == __math_lib_svml) 
     {
         return __svml_logd(x);
     }
-    else if (__math_lib == __math_lib_ispc_fast)
-        return log((float)x);
     else {
         double ret;
         foreach_active (i) {
@@ -3862,20 +3958,22 @@ static inline double log(double x) {
 
 __declspec(safe)
 static inline uniform double log(uniform double x) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return log((float)x);
+    if (__have_native_transcendentals) {
+        return __log_uniform_double(x_full);
+    }
     else
         return __stdlib_log(x);
 }
 
 __declspec(safe)
 static inline double pow(double a, double b) {
-    if (__math_lib == __math_lib_svml) 
+    if (__have_native_transcendentals) {
+        return __pow_varying_double(a,b);
+    }
+    else if (__math_lib == __math_lib_svml) 
     {
         return __svml_powd(a,b);
     }
-    else if (__math_lib == __math_lib_ispc_fast)
-        return pow((float)a, (float)b);
     else {
         double ret;
         foreach_active (i) {
@@ -3888,8 +3986,9 @@ static inline double pow(double a, double b) {
 
 __declspec(safe)
 static inline uniform double pow(uniform double a, uniform double b) {
-    if (__math_lib == __math_lib_ispc_fast)
-        return pow((float)a, (float)b);
+    if (__have_native_transcendentals) {
+        return __pow_uniform_double(a,b);
+    }
     else
         return __stdlib_pow(a, b);
 }

From 70a9b286e5fa3f1f61d56d98e02f6749983a2ad1 Mon Sep 17 00:00:00 2001
From: Evghenii <egaburov@dds.nl>
Date: Fri, 7 Feb 2014 15:28:39 +0100
Subject: [PATCH 3/7] added support for native and double precision
 trigonometry/transendentals

---
 builtins/target-avx-x2.ll         |   2 +
 builtins/target-avx.ll            |   3 +
 builtins/target-avx1-i64x4base.ll |   3 +
 builtins/target-generic-1.ll      |   3 +
 builtins/target-generic-common.ll |  12 ++-
 builtins/target-neon-16.ll        |   3 +
 builtins/target-neon-32.ll        |   3 +
 builtins/target-neon-8.ll         |   3 +
 builtins/target-sse2-x2.ll        |   3 +
 builtins/target-sse2.ll           |   3 +
 builtins/target-sse4-16.ll        |   3 +
 builtins/target-sse4-8.ll         |   3 +
 builtins/target-sse4-x2.ll        |   3 +
 builtins/target-sse4.ll           |   3 +
 builtins/util.m4                  |  62 ++++++++++++--
 examples/intrinsics/generic-16.h  | 127 +++++++++++++++++++++--------
 examples/intrinsics/sse4.h        | 130 ++++++++++++++++++++++--------
 stdlib.ispc                       |  79 +++++++++++-------
 18 files changed, 340 insertions(+), 108 deletions(-)

diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll
index b3a77871..a110dfef 100644
--- a/builtins/target-avx-x2.ll
+++ b/builtins/target-avx-x2.ll
@@ -694,3 +694,5 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin
 rsqrtd_decl()
 rcpd_decl()
 
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll
index 9738f9d3..e0f4e45d 100644
--- a/builtins/target-avx.ll
+++ b/builtins/target-avx.ll
@@ -564,3 +564,6 @@ gen_scatter(double)
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll
index a6601a28..3bd28c02 100644
--- a/builtins/target-avx1-i64x4base.ll
+++ b/builtins/target-avx1-i64x4base.ll
@@ -513,3 +513,6 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll
index 3dcd8373..1178e98c 100644
--- a/builtins/target-generic-1.ll
+++ b/builtins/target-generic-1.ll
@@ -997,3 +997,6 @@ define_avgs()
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll
index 401c862d..ef33ff97 100644
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -191,13 +191,9 @@ declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
 
 declare float @__rsqrt_uniform_float(float) nounwind readnone 
 declare float @__rcp_uniform_float(float) nounwind readnone 
-declare double @__rsqrt_uniform_double(double) nounwind readnone 
-declare double @__rcp_uniform_double(double) nounwind readnone 
 declare float @__sqrt_uniform_float(float) nounwind readnone 
 declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone 
 declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone 
-declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone 
-declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone 
 
 declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone 
 
@@ -393,3 +389,11 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
 
 define_avgs()
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; reciprocals in double precision, if supported
+
+rsqrtd_decl()
+rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-neon-16.ll b/builtins/target-neon-16.ll
index 8e0ef121..d73b0a50 100644
--- a/builtins/target-neon-16.ll
+++ b/builtins/target-neon-16.ll
@@ -521,3 +521,6 @@ define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone {
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-neon-32.ll b/builtins/target-neon-32.ll
index d6e861a2..a8999269 100644
--- a/builtins/target-neon-32.ll
+++ b/builtins/target-neon-32.ll
@@ -491,3 +491,6 @@ define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone {
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-neon-8.ll b/builtins/target-neon-8.ll
index aaa0a7b7..0c1edf8c 100644
--- a/builtins/target-neon-8.ll
+++ b/builtins/target-neon-8.ll
@@ -587,3 +587,6 @@ define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone {
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll
index bfb927e5..f69c25c8 100644
--- a/builtins/target-sse2-x2.ll
+++ b/builtins/target-sse2-x2.ll
@@ -658,3 +658,6 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll
index 93a8eb93..4e033d88 100644
--- a/builtins/target-sse2.ll
+++ b/builtins/target-sse2.ll
@@ -593,3 +593,6 @@ gen_scatter(double)
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll
index 0de5c1b4..abca8459 100644
--- a/builtins/target-sse4-16.ll
+++ b/builtins/target-sse4-16.ll
@@ -494,3 +494,6 @@ define_down_avgs()
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll
index 79f44212..2e9f83a7 100644
--- a/builtins/target-sse4-8.ll
+++ b/builtins/target-sse4-8.ll
@@ -496,3 +496,6 @@ define_down_avgs()
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll
index ceff27f0..c0466b34 100644
--- a/builtins/target-sse4-x2.ll
+++ b/builtins/target-sse4-x2.ll
@@ -597,3 +597,6 @@ define_avgs()
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll
index 9e2ac8a5..d5d9ca5f 100644
--- a/builtins/target-sse4.ll
+++ b/builtins/target-sse4.ll
@@ -520,3 +520,6 @@ define_avgs()
 
 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
diff --git a/builtins/util.m4 b/builtins/util.m4
index fbd929a1..2af75c25 100644
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -2106,13 +2106,6 @@ declare void
 @__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>,
                                         <WIDTH x double>, <WIDTH x MASK>) nounwind
 
-declare float @__log_uniform_float(float) nounwind readnone
-declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
-declare float @__exp_uniform_float(float) nounwind readnone
-declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
-declare float @__pow_uniform_float(float, float) nounwind readnone
-declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 declare void @__use8(<WIDTH x i8>)
@@ -4541,3 +4534,58 @@ define(`rcpd_decl', `
 declare  double @__rcp_uniform_double(double)
 declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>)
 ')
+
+
+define(`transcendetals_decl',`
+    declare float @__log_uniform_float(float) nounwind readnone
+    declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
+    declare float @__exp_uniform_float(float) nounwind readnone
+    declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
+    declare float @__pow_uniform_float(float, float) nounwind readnone
+    declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
+
+    declare double @__log_uniform_double(double) nounwind readnone
+    declare <WIDTH x double> @__log_varying_double(<WIDTH x double>) nounwind readnone
+    declare double @__exp_uniform_double(double) nounwind readnone
+    declare <WIDTH x double> @__exp_varying_double(<WIDTH x double>) nounwind readnone
+    declare double @__pow_uniform_double(double, double) nounwind readnone
+    declare <WIDTH x double> @__pow_varying_double(<WIDTH x double>, <WIDTH x double>) nounwind readnone
+')
+
+define(`trigonometry_decl',`
+    declare <WIDTH x float> @__sin_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__asin_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__cos_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__acos_varying_float(<WIDTH x float>) nounwind readnone
+    declare void @__sincos_varying_float(<WIDTH x float>, <WIDTH x float>*, <WIDTH x float>*) nounwind readnone
+    declare <WIDTH x float> @__tan_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__atan_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__atan2_varying_float(<WIDTH x float>,<WIDTH x float>) nounwind readnone
+
+    declare float @__sin_uniform_float(float) nounwind readnone
+    declare float @__asin_uniform_float(float) nounwind readnone
+    declare float @__cos_uniform_float(float) nounwind readnone
+    declare float @__acos_uniform_float(float) nounwind readnone
+    declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone
+    declare float @__tan_uniform_float(float) nounwind readnone
+    declare float @__atan_uniform_float(float) nounwind readnone
+    declare float @__atan2_uniform_float(float,float) nounwind readnone
+
+    declare <WIDTH x double> @__sin_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__asin_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__cos_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__acos_varying_double(<WIDTH x double>) nounwind readnone
+    declare void @__sincos_varying_double(<WIDTH x double>, <WIDTH x double>*, <WIDTH x double>*) nounwind readnone
+    declare <WIDTH x double> @__tan_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__atan_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__atan2_varying_double(<WIDTH x double>,<WIDTH x double>) nounwind readnone
+
+    declare double @__sin_uniform_double(double) nounwind readnone
+    declare double @__asin_uniform_double(double) nounwind readnone
+    declare double @__cos_uniform_double(double) nounwind readnone
+    declare double @__acos_uniform_double(double) nounwind readnone
+    declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone
+    declare double @__tan_uniform_double(double) nounwind readnone
+    declare double @__atan_uniform_double(double) nounwind readnone
+    declare double @__atan2_uniform_double(double,double) nounwind readnone
+')
diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h
index 3b5c6ec3..c8f2cf08 100644
--- a/examples/intrinsics/generic-16.h
+++ b/examples/intrinsics/generic-16.h
@@ -691,39 +691,6 @@ SHIFT(__vec16_f, float, float)
 SHUFFLES(__vec16_f, float, float)
 LOAD_STORE(__vec16_f, float)
 
-static FORCEINLINE float __exp_uniform_float(float v) {
-    return expf(v);
-}
-
-static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
-    __vec16_f ret;
-    for (int i = 0; i < 16; ++i)
-        ret.v[i] = expf(v.v[i]);
-    return ret;
-}
-
-static FORCEINLINE float __log_uniform_float(float v) {
-    return logf(v);
-}
-
-static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) {
-    __vec16_f ret;
-    for (int i = 0; i < 16; ++i)
-        ret.v[i] = logf(v.v[i]);
-    return ret;
-}
-
-static FORCEINLINE float __pow_uniform_float(float a, float b) {
-    return powf(a, b);
-}
-
-static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
-    __vec16_f ret;
-    for (int i = 0; i < 16; ++i)
-        ret.v[i] = powf(a.v[i], b.v[i]);
-    return ret;
-}
-
 static FORCEINLINE int __intbits(float v) {
     union {
         float f;
@@ -1813,3 +1780,97 @@ static FORCEINLINE uint64_t __clock() {
 
 #endif // !WIN32
 
+
+///////////////////////////////////////////////////////////////////////////
+// Transcendentals
+//
+//
+#define TRANSCENDENTALS(op) \
+static FORCEINLINE float __##op##_uniform_float(float v) { \
+    return op##f(v); \
+} \
+static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { \
+    __vec16_f ret; \
+    for (int i = 0; i < 16; ++i) \
+        ret.v[i] = op##f(v.v[i]); \
+    return ret; \
+} \
+static FORCEINLINE double __##op##_uniform_double(double v) { \
+    return op(v); \
+} \
+static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { \
+    __vec16_d ret; \
+    for (int i = 0; i < 16; ++i) \
+        ret.v[i] = op(v.v[i]); \
+    return ret; \
+}
+
+  TRANSCENDENTALS(log)
+TRANSCENDENTALS(exp)
+
+
+static FORCEINLINE float __pow_uniform_float(float a, float b) {
+    return powf(a, b);
+}
+static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
+    __vec16_f ret;
+    for (int i = 0; i < 16; ++i)
+        ret.v[i] = powf(a.v[i], b.v[i]);
+    return ret;
+}
+static FORCEINLINE double __pow_uniform_double(double a, double b) {
+    return pow(a, b);
+}
+static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) {
+    __vec16_d ret;
+    for (int i = 0; i < 16; ++i)
+        ret.v[i] = pow(a.v[i], b.v[i]);
+    return ret;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// Trigonometry
+
+TRANSCENDENTALS(sin)
+TRANSCENDENTALS(asin)
+TRANSCENDENTALS(cos)
+TRANSCENDENTALS(acos)
+TRANSCENDENTALS(tan)
+TRANSCENDENTALS(atan)
+
+
+static FORCEINLINE float __atan2_uniform_float(float a, float b) {
+    return atan2f(a, b);
+}
+static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) {
+    __vec16_f ret;
+    for (int i = 0; i < 16; ++i)
+        ret.v[i] = atan2f(a.v[i], b.v[i]);
+    return ret;
+}
+static FORCEINLINE double __atan2_uniform_double(double a, double b) {
+    return atan2(a, b);
+}
+static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) {
+    __vec16_d ret;
+    for (int i = 0; i < 16; ++i)
+        ret.v[i] = atan2(a.v[i], b.v[i]);
+    return ret;
+}
+
+static FORCEINLINE void __sincos_uniform_float(float x, float *a, float *b) {
+    sincosf(x,a,b);
+}
+static FORCEINLINE void __sincos_varying_float(__vec16_f x, __vec16_f *a, __vec16_f *b) {
+    __vec16_f ret;
+    for (int i = 0; i < 16; ++i)
+        sincosf(x.v[i], (float*)a + i, (float*)b+i);
+}
+static FORCEINLINE void __sincos_uniform_double(double x, double *a, double *b) {
+    sincos(x,a,b);
+}
+static FORCEINLINE void __sincos_varying_double(__vec16_d x, __vec16_d *a, __vec16_d *b) {
+    __vec16_d ret;
+    for (int i = 0; i < 16; ++i)
+        sincos(x.v[i], (double*)a + i, (double*)b+i);
+}
diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h
index 45b31be1..a25af10b 100644
--- a/examples/intrinsics/sse4.h
+++ b/examples/intrinsics/sse4.h
@@ -167,6 +167,10 @@ struct __vec4_d {
     }
 
     __m128d v[2];
+    FORCEINLINE __vec4_d(double *p) {
+        v[0] = _mm_set_pd(p[1], p[0]);
+        v[1] = _mm_set_pd(p[3], p[2]);
+    }
 };
 
 
@@ -2471,39 +2475,6 @@ static FORCEINLINE __vec4_d __sqrt_varying_double(__vec4_d v) {
     return __vec4_d(_mm_sqrt_pd(v.v[0]), _mm_sqrt_pd(v.v[1]));
 }
 
-static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) {
-    float r[4];
-    for (int i = 0; i < 4; ++i)
-        r[i] = powf(__extract_element(a, i), __extract_element(b, i));
-    return __vec4_f(r);
-}
-
-static FORCEINLINE float __pow_uniform_float(float a, float b) {
-    return powf(a, b);
-}
-
-static FORCEINLINE __vec4_f __exp_varying_float(__vec4_f a) {
-    float r[4];
-    for (int i = 0; i < 4; ++i)
-        r[i] = expf(__extract_element(a, i));
-    return __vec4_f(r);
-}
-
-static FORCEINLINE float __exp_uniform_float(float a) {
-    return expf(a);
-}
-
-static FORCEINLINE __vec4_f __log_varying_float(__vec4_f a) {
-    float r[4];
-    for (int i = 0; i < 4; ++i)
-        r[i] = logf(__extract_element(a, i));
-    return __vec4_f(r);
-}
-
-static FORCEINLINE float __log_uniform_float(float a) {
-    return logf(a);
-}
-
 static FORCEINLINE int __intbits(float v) {
     union {
         float f;
@@ -4166,4 +4137,97 @@ static FORCEINLINE uint64_t __clock() {
 }
 #endif // !WIN32
 
+
+///////////////////////////////////////////////////////////////////////////
+// Transcendentals
+
+
+#define TRANSCENDENTALS(op) \
+static FORCEINLINE __vec4_f __##op##_varying_float(__vec4_f a) {\
+    float r[4];\
+    for (int i = 0; i < 4; ++i)\
+        r[i] = op##f(__extract_element(a, i));\
+    return __vec4_f(r);\
+}\
+static FORCEINLINE float __##op##_uniform_float(float a) {\
+    return op##f(a);\
+}\
+static FORCEINLINE __vec4_d __##op##_varying_double(__vec4_d a) {\
+    double r[4];\
+    for (int i = 0; i < 4; ++i)\
+        r[i] = op(__extract_element(a, i));\
+    return __vec4_d(r);\
+}\
+static FORCEINLINE double __##op##_uniform_double(double a) {\
+    return op(a);\
+}
+
+TRANSCENDENTALS(log)
+TRANSCENDENTALS(exp)
+
+
+static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) {
+    float r[4];
+    for (int i = 0; i < 4; ++i)
+        r[i] = powf(__extract_element(a, i), __extract_element(b, i));
+    return __vec4_f(r);
+}
+static FORCEINLINE float __pow_uniform_float(float a, float b) {
+    return powf(a, b);
+}
+static FORCEINLINE __vec4_d __pow_varying_double(__vec4_d a, __vec4_d b) {
+    double r[4];
+    for (int i = 0; i < 4; ++i)
+        r[i] = pow(__extract_element(a, i), __extract_element(b, i));
+    return __vec4_d(r);
+}
+static FORCEINLINE double __pow_uniform_double(double a, double b) {
+    return pow(a, b);
+}
+
+///////////////////////////////////////////////////////////////////////////
+// Trigonometry
+
+TRANSCENDENTALS(sin)
+TRANSCENDENTALS(asin)
+TRANSCENDENTALS(cos)
+TRANSCENDENTALS(acos)
+TRANSCENDENTALS(tan)
+TRANSCENDENTALS(atan)
+
+
+static FORCEINLINE __vec4_f __atan2_varying_float(__vec4_f a, __vec4_f b) {
+    float r[4];
+    for (int i = 0; i < 4; ++i)
+        r[i] = atan2f(__extract_element(a, i), __extract_element(b, i));
+    return __vec4_f(r);
+}
+static FORCEINLINE float __atan2_uniform_float(float a, float b) {
+    return atan2f(a, b);
+}
+static FORCEINLINE __vec4_d __atan2_varying_double(__vec4_d a, __vec4_d b) {
+    double r[4];
+    for (int i = 0; i < 4; ++i)
+        r[i] = atan2(__extract_element(a, i), __extract_element(b, i));
+    return __vec4_d(r);
+}
+static FORCEINLINE double __atan2_uniform_double(double a, double b) {
+    return atan2(a, b);
+}
+
+static FORCEINLINE void __sincos_varying_float(__vec4_f x, __vec4_f * _sin, __vec4_f * _cos) {
+    for (int i = 0; i < 4; ++i)
+         sincosf(__extract_element(x, i), (float*)_sin + i, (float*)_cos + i);
+}
+static FORCEINLINE void __sincos_uniform_float(float x, float *_sin, float *_cos) {
+    sincosf(x, _sin, _cos);
+}
+static FORCEINLINE void __sincos_varying_double(__vec4_d x, __vec4_d * _sin, __vec4_d * _cos) {
+    for (int i = 0; i < 4; ++i)
+         sincos(__extract_element(x, i), (double*)_sin + i, (double*)_cos + i);
+}
+static FORCEINLINE void __sincos_uniform_double(double x, double *_sin, double *_cos) {
+    sincos(x, _sin, _cos);
+}
+
 #undef FORCEINLINE
diff --git a/stdlib.ispc b/stdlib.ispc
index eb5ee9c4..a6ed8896 100644
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -2433,29 +2433,29 @@ static inline uniform float sin(uniform float x_full) {
 
 
 __declspec(safe)
-static inline float asin(float x) {
-    bool isneg = x < 0;
-    x = abs(x);
-
+static inline float asin(float x0) {
+    bool isneg = x0< 0;
+    float x = abs(x0);
     bool isnan = (x > 1);
-
     float v;
+
     if (__have_native_trigonometry) 
     {
-          return __asin_varying_float(x_full);
+      return __asin_varying_float(x0);
     }
     else if (__math_lib == __math_lib_svml) {
-        return __svml_asinf(x);
+        return __svml_asinf(x0);
     } 
     else if (__math_lib == __math_lib_system) {
         float ret;
         foreach_active (i) {
-            uniform float r = __stdlib_asinf(extract(x, i));
+            uniform float r = __stdlib_asinf(extract(x0, i));
             ret = insert(ret, i, r);
         }
         return ret;
     }
     else if (__math_lib == __math_lib_ispc)
+    {
         // sollya
         // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
         //           [|single...|], [1e-20;.9999999999999999]);
@@ -2471,7 +2471,9 @@ static inline float asin(float x) {
             x * (3.05023305118083953857421875e-2f + 
             x * (-1.2897425331175327301025390625e-2f +
             x * 2.38926825113594532012939453125e-3f)))))))));
+    }
     else if (__math_lib == __math_lib_ispc_fast)
+    {
         // sollya
         // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
         //           [1e-20;.9999999999999999]);
@@ -2482,6 +2484,7 @@ static inline float asin(float x) {
              x * (-4.489909112453460693359375e-2f + 
              x * (1.928029954433441162109375e-2f + 
              x * (-4.3095736764371395111083984375e-3f)))));
+    }
 
     v *= -sqrt(1.f - x);
     v = v + 1.57079637050628662109375;
@@ -2496,22 +2499,21 @@ static inline float asin(float x) {
 
 
 __declspec(safe)
-static inline uniform float asin(uniform float x) {
-    uniform bool isneg = x < 0;
-    x = abs(x);
-
+static inline uniform float asin(uniform float x0) {
+    uniform bool isneg = x0 < 0;
+    uniform float x = abs(x0);
     uniform bool isnan = (x > 1);
-
     uniform float v;
     if (__have_native_trigonometry) 
     {
-      return __asin_uniform_float(x_full);
+      return __asin_uniform_float(x0);
     }
     else if (__math_lib == __math_lib_svml ||
         __math_lib == __math_lib_system) {
-        return __stdlib_asinf(x);
+        return __stdlib_asinf(x0);
     }
     else if (__math_lib == __math_lib_ispc)
+    {
         // sollya
         // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
         //           [|single...|], [1e-20;.9999999999999999]);
@@ -2527,7 +2529,9 @@ static inline uniform float asin(uniform float x) {
             x * (3.05023305118083953857421875e-2f + 
             x * (-1.2897425331175327301025390625e-2f +
             x * 2.38926825113594532012939453125e-3f)))))))));
+    }
     else if (__math_lib == __math_lib_ispc_fast)
+    {
         // sollya
         // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
         //           [1e-20;.9999999999999999]);
@@ -2538,6 +2542,7 @@ static inline uniform float asin(uniform float x) {
              x * (-4.489909112453460693359375e-2f + 
              x * (1.928029954433441162109375e-2f + 
              x * (-4.3095736764371395111083984375e-3f)))));
+    }
 
     v *= -sqrt(1.f - x);
     v = v + 1.57079637050628662109375;
@@ -2687,22 +2692,34 @@ static inline uniform float cos(uniform float x_full) {
 
 __declspec(safe)
 static inline float acos(float v) {
+  if (__have_native_trigonometry) 
+    return __acos_varying_float(v);
+  else
     return 1.57079637050628662109375 - asin(v);
 }
 
 __declspec(safe)
 static inline double acos(const double v) {
+  if (__have_native_trigonometry) 
+    return __acos_varying_double(v);
+  else
     return 1.57079637050628662109375d0 - asin(v);
 }
 
 
 __declspec(safe)
 static inline uniform float acos(uniform float v) {
+  if (__have_native_trigonometry) 
+    return __acos_uniform_float(v);
+  else
     return 1.57079637050628662109375 - asin(v);
 }
 
 __declspec(safe)
 static inline uniform double acos(const uniform double v) {
+  if (__have_native_trigonometry) 
+    return __acos_uniform_double(v);
+  else
     return 1.57079637050628662109375d0 - asin(v);
 }
 
@@ -3696,7 +3713,7 @@ __declspec(safe)
 static inline double sin(double x) {
     if (__have_native_trigonometry) 
     {
-      return __sin_varying_double(x_full);
+      return __sin_varying_double(x);
     }
     else if (__math_lib == __math_lib_svml) 
     {
@@ -3715,7 +3732,7 @@ __declspec(safe)
 static inline double asin(double x) {
     if (__have_native_trigonometry) 
     {
-      return __asin_varying_double(x_full);
+      return __asin_varying_double(x);
     }
     else if (__math_lib == __math_lib_svml) 
     {
@@ -3735,7 +3752,7 @@ __declspec(safe)
 static inline uniform double sin(uniform double x) {
     if (__have_native_trigonometry) 
     {
-      return __sin_uniform_double(x_full);
+      return __sin_uniform_double(x);
     }
     else
         return __stdlib_sin(x);
@@ -3745,7 +3762,7 @@ __declspec(safe)
 static inline double asin(const double x) {
     if (__have_native_trigonometry) 
     {
-      return __asin_varying_double(x_full);
+      return __asin_varying_double(x);
     }
     else if (__math_lib == __math_lib_svml) 
     {
@@ -3765,7 +3782,7 @@ __declspec(safe)
 static inline double cos(const double x) {
     if (__have_native_trigonometry) 
     {
-      return __cos_varying_double(x_full);
+      return __cos_varying_double(x);
     }
     if (__math_lib == __math_lib_svml) 
     {
@@ -3785,7 +3802,7 @@ __declspec(safe)
 static inline uniform double cos(uniform double x) {
     if (__have_native_trigonometry) 
     {
-      return __cos_uniform_double(x_full);
+      return __cos_uniform_double(x);
     }
     else
         return __stdlib_cos(x);
@@ -3796,7 +3813,7 @@ static inline void sincos(double x, varying double * uniform sin_result,
                           varying double * uniform cos_result) {
     if (__have_native_trigonometry) 
     {
-      __sincos_varying_double(x_full),sin_result,cos_result);
+      __sincos_varying_double(x,sin_result,cos_result);
     }
     if (__math_lib == __math_lib_svml) 
     {
@@ -3817,7 +3834,7 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result,
                           uniform double * uniform cos_result) {
     if (__have_native_trigonometry) 
     {
-      __sincos_uniform_double(x_full,sin_result, cos_result);
+      __sincos_uniform_double(x,sin_result, cos_result);
     }
     else
         __stdlib_sincos(x, sin_result, cos_result);
@@ -3827,7 +3844,7 @@ __declspec(safe)
 static inline double tan(double x) {
     if (__have_native_trigonometry) 
     {
-      return __tan_varying_double(x_full);
+      return __tan_varying_double(x);
     }
     else if (__math_lib == __math_lib_svml) 
     {
@@ -3847,7 +3864,7 @@ __declspec(safe)
 static inline uniform double tan(uniform double x) {
     if (__have_native_trigonometry) 
     {
-      return __tan_uniform_double(x_full);
+      return __tan_uniform_double(x);
     }
     else
         return __stdlib_tan(x);
@@ -3857,7 +3874,7 @@ __declspec(safe)
 static inline double atan(double x) {
     if (__have_native_trigonometry) 
     {
-      return __atan_varying_double(x_full);
+      return __atan_varying_double(x);
     }
     else {
         double ret;
@@ -3873,7 +3890,7 @@ __declspec(safe)
 static inline uniform double atan(uniform double x) {
     if (__have_native_trigonometry) 
     {
-      return __atan_uniform_double(x_full);
+      return __atan_uniform_double(x);
     }
     else
         return __stdlib_atan(x);
@@ -3912,7 +3929,7 @@ static inline uniform double atan2(uniform double y, uniform double x) {
 __declspec(safe)
 static inline double exp(double x) {
     if (__have_native_transcendentals) {
-        return __exp_varying_double(x_full);
+        return __exp_varying_double(x);
     }
     else if (__math_lib == __math_lib_svml) 
     {
@@ -3931,7 +3948,7 @@ static inline double exp(double x) {
 __declspec(safe)
 static inline uniform double exp(uniform double x) {
     if (__have_native_transcendentals) {
-        return __exp_uniform_double(x_full);
+        return __exp_uniform_double(x);
     }
     else
         return __stdlib_exp(x);
@@ -3940,7 +3957,7 @@ static inline uniform double exp(uniform double x) {
 __declspec(safe)
 static inline double log(double x) {
     if (__have_native_transcendentals) {
-        return __log_varying_double(x_full);
+        return __log_varying_double(x);
     }
     else if (__math_lib == __math_lib_svml) 
     {
@@ -3959,7 +3976,7 @@ static inline double log(double x) {
 __declspec(safe)
 static inline uniform double log(uniform double x) {
     if (__have_native_transcendentals) {
-        return __log_uniform_double(x_full);
+        return __log_uniform_double(x);
     }
     else
         return __stdlib_log(x);

From 438cee4e21755f4e890ca3d481b9521aa65453a0 Mon Sep 17 00:00:00 2001
From: evghenii <egaburov@dds.nl>
Date: Fri, 7 Feb 2014 15:43:42 +0100
Subject: [PATCH 4/7] added support for double precision/native
 transendentals/trigonometry

---
 examples/intrinsics/knc-i1x16.h | 56 ++++++++++++++++++++-------------
 examples/intrinsics/knc.h       | 33 +++++++++++++++++++
 2 files changed, 67 insertions(+), 22 deletions(-)

diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h
index ba6ef005..2e6afed5 100644
--- a/examples/intrinsics/knc-i1x16.h
+++ b/examples/intrinsics/knc-i1x16.h
@@ -1142,28 +1142,6 @@ template <> static FORCEINLINE void __store<64>(__vec16_f *p, __vec16_f v)
 }
 #endif
 
-/******** math ******/
-
-/*** float ***/
-static FORCEINLINE float __exp_uniform_float(float v) {    return expf(v);}
-static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { return _mm512_exp_ps(v); }
-
-static FORCEINLINE float __log_uniform_float(float v) {    return logf(v);}
-static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { return _mm512_log_ps(v); }
-
-static FORCEINLINE float __pow_uniform_float(float a, float b) {    return powf(a, b);}
-static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); }
-
-/*** double ***/
-static FORCEINLINE double __exp_uniform_double(double v) {    return exp(v);}
-static FORCEINLINE __vec16_d __exp_varying_double(__vec16_d v) { return __vec16_d(_mm512_exp_pd(v.v1),_mm512_exp_pd(v.v2)); }
-
-static FORCEINLINE double __log_uniform_double(double v) {    return log(v);}
-static FORCEINLINE __vec16_d __log_varying_double(__vec16_d v) { return __vec16_d(_mm512_log_pd(v.v1),_mm512_log_pd(v.v2)); }
-
-static FORCEINLINE double __pow_uniform_double(double a, double b) {    return pow(a,b);}
-static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); }
-
 /******** bitcast ******/
 
 static FORCEINLINE int __intbits(float v) {
@@ -2806,6 +2784,40 @@ static FORCEINLINE uint64_t __clock() {
 
 #endif // !WIN32
 
+
+///////////////////////////////////////////////////////////////////////////
+// Transcendentals
+
+
+#define TRANSCENDENTALS(op) \
+static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { return _mm512_##op##_ps(v); } \
+static FORCEINLINE float __##op##_uniform_float(float v) { return op##f(v); } \
+static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { return __vec16_d(_mm512_##op##_pd(v.v1),_mm512_##op##_pd(v.v2)); } \
+static FORCEINLINE double __##op##_uniform_double(double a) { return op(a); }
+
+TRANSCENDENTALS(log)
+TRANSCENDENTALS(exp)
+
+static FORCEINLINE float __pow_uniform_float(float a, float b) {    return powf(a, b);}
+static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); }
+static FORCEINLINE double __pow_uniform_double(double a, double b) {    return pow(a,b);}
+static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); }
+
+///////////////////////////////////////////////////////////////////////////
+// Trigonometry
+
+TRANSCENDENTALS(sin)
+TRANSCENDENTALS(asin)
+TRANSCENDENTALS(cos)
+TRANSCENDENTALS(acos)
+TRANSCENDENTALS(tan)
+TRANSCENDENTALS(atan)
+
+static FORCEINLINE float __atan2_uniform_float(float a, float b) {    return atan2f(a, b);}
+static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { return _mm512_atan2_ps(a,b); }
+static FORCEINLINE double __atan2_uniform_double(double a, double b) {    return atan2(a,b);}
+static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_atan2_pd(a.v1,b.v1),_mm512_atan2_pd(a.v2,b.v2)); }
+
 #undef FORCEINLINE
 #undef PRE_ALIGN
 #undef POST_ALIGN
diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h
index 458da458..0d3d5198 100644
--- a/examples/intrinsics/knc.h
+++ b/examples/intrinsics/knc.h
@@ -2173,6 +2173,39 @@ static FORCEINLINE uint64_t __clock() {
 }
 #endif // !WIN32
 
+///////////////////////////////////////////////////////////////////////////
+// Transcendentals
+
+
+#define TRANSCENDENTALS(op) \
+static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { return _mm512_##op##_ps(v); } \
+static FORCEINLINE float __##op##_uniform_float(float v) { return op##f(v); } \
+static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { return __vec16_d(_mm512_##op##_pd(v.v1),_mm512_##op##_pd(v.v2)); } \
+static FORCEINLINE double __##op##_uniform_double(double a) { return op(a); }
+
+TRANSCENDENTALS(log)
+TRANSCENDENTALS(exp)
+
+static FORCEINLINE float __pow_uniform_float(float a, float b) {    return powf(a, b);}
+static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); }
+static FORCEINLINE double __pow_uniform_double(double a, double b) {    return pow(a,b);}
+static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); }
+
+///////////////////////////////////////////////////////////////////////////
+// Trigonometry
+
+TRANSCENDENTALS(sin)
+TRANSCENDENTALS(asin)
+TRANSCENDENTALS(cos)
+TRANSCENDENTALS(acos)
+TRANSCENDENTALS(tan)
+TRANSCENDENTALS(atan)
+
+static FORCEINLINE float __atan2_uniform_float(float a, float b) {    return atan2f(a, b);}
+static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { return _mm512_atan2_ps(a,b); }
+static FORCEINLINE double __atan2_uniform_double(double a, double b) {    return atan2(a,b);}
+static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_atan2_pd(a.v1,b.v1),_mm512_atan2_pd(a.v2,b.v2)); }
+
 #undef FORCEINLINE
 #undef PRE_ALIGN
 #undef POST_ALIGN

From 8490efe0ad512719ab28d92b928590d0b08128c4 Mon Sep 17 00:00:00 2001
From: evghenii <egaburov@dds.nl>
Date: Fri, 7 Feb 2014 16:00:21 +0100
Subject: [PATCH 5/7] fix for knc.h. Due to a bug in ICC (tested with 13.1.3 &
 14.0.1) ,the resulting .cpp file fails to compile

---
 examples/intrinsics/knc.h | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h
index 0d3d5198..4bdb184a 100644
--- a/examples/intrinsics/knc.h
+++ b/examples/intrinsics/knc.h
@@ -95,6 +95,7 @@ typedef struct PRE_ALIGN(64) __vec16_f {
 typedef struct PRE_ALIGN(64) __vec16_d {
     FORCEINLINE __vec16_d() : v1(_mm512_undefined_pd()), v2(_mm512_undefined_pd()) {}
     FORCEINLINE __vec16_d(const __vec16_d &o) : v1(o.v1), v2(o.v2) {}
+    FORCEINLINE __vec16_d(const __m512d _v1, const __m512d _v2) : v1(_v1), v2(_v2) {}
     FORCEINLINE __vec16_d& operator =(const __vec16_d &o) { v1=o.v1; v2=o.v2; return *this; }
     FORCEINLINE __vec16_d(double v00, double v01, double v02, double v03, 
                           double v04, double v05, double v06, double v07,
@@ -1503,18 +1504,6 @@ static FORCEINLINE double __rsqrt_uniform_double(double v)
 }
 
 
-static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
-  return _mm512_exp_ps(v);
-}
-
-static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) {
-  return _mm512_log_ps(v);
-}
-
-static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
-  return _mm512_pow_ps(a, b);
-}
-
 ///////////////////////////////////////////////////////////////////////////
 // bit ops
 ///////////////////////////////////////////////////////////////////////////

From f0779f95a350908be063b01be0cc0a98166e8000 Mon Sep 17 00:00:00 2001
From: Evghenii <egaburov@dds.nl>
Date: Tue, 11 Feb 2014 11:40:40 +0100
Subject: [PATCH 6/7] added double precision tests

---
 tests/transcendentals-5-0.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-5-1.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-5-2.ispc | 28 ++++++++++++++++++++++++++++
 tests/transcendentals-5-3.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-6-0.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-6-1.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-6-2.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-6-3.ispc | 29 +++++++++++++++++++++++++++++
 tests/transcendentals-7-0.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-7-1.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-7-2.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-7-3.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-8-0.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-8-1.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-8-2.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-8-3.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-9-0.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-9-1.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-9-2.ispc | 23 +++++++++++++++++++++++
 tests/transcendentals-9-3.ispc | 23 +++++++++++++++++++++++
 20 files changed, 471 insertions(+)
 create mode 100644 tests/transcendentals-5-0.ispc
 create mode 100644 tests/transcendentals-5-1.ispc
 create mode 100644 tests/transcendentals-5-2.ispc
 create mode 100644 tests/transcendentals-5-3.ispc
 create mode 100644 tests/transcendentals-6-0.ispc
 create mode 100644 tests/transcendentals-6-1.ispc
 create mode 100644 tests/transcendentals-6-2.ispc
 create mode 100644 tests/transcendentals-6-3.ispc
 create mode 100644 tests/transcendentals-7-0.ispc
 create mode 100644 tests/transcendentals-7-1.ispc
 create mode 100644 tests/transcendentals-7-2.ispc
 create mode 100644 tests/transcendentals-7-3.ispc
 create mode 100644 tests/transcendentals-8-0.ispc
 create mode 100644 tests/transcendentals-8-1.ispc
 create mode 100644 tests/transcendentals-8-2.ispc
 create mode 100644 tests/transcendentals-8-3.ispc
 create mode 100644 tests/transcendentals-9-0.ispc
 create mode 100644 tests/transcendentals-9-1.ispc
 create mode 100644 tests/transcendentals-9-2.ispc
 create mode 100644 tests/transcendentals-9-3.ispc

diff --git a/tests/transcendentals-5-0.ispc b/tests/transcendentals-5-0.ispc
new file mode 100644
index 00000000..c445b8a9
--- /dev/null
+++ b/tests/transcendentals-5-0.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1e-16) || abs((x-ref)/ref) < 1e-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
+    double ref = double4((8.742277955963554d-08),(0.000000000000000d0),(-8.742277955963554d-08),(1.7484555911927038d-07));
+    RET[programIndex] = ok(sin(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-5-1.ispc b/tests/transcendentals-5-1.ispc
new file mode 100644
index 00000000..7d9980a4
--- /dev/null
+++ b/tests/transcendentals-5-1.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1e-16) || abs((x-ref)/ref) < 1e-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.5707963705062866d0),(1.5707963705062866d0));
+    double ref = double4((0.8414709848078965d0),(-0.8414709848078965d0),(-1.000000000000000d0),(1.000000000000000d0));
+    RET[programIndex] = ok(sin(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-5-2.ispc b/tests/transcendentals-5-2.ispc
new file mode 100644
index 00000000..38247397
--- /dev/null
+++ b/tests/transcendentals-5-2.ispc
@@ -0,0 +1,28 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { 
+    bool r = (abs(x - ref) < 1e-14) || abs((x-ref)/ref) < 1e-14;
+    if (any(r == false))
+        print("mismatch got %, expected %\n", x, ref);
+    return r;
+}
+
+export void f_v(uniform float RET[]) {
+    double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
+    double ref = double4((2.3849760909612067d-08),(-0.7568024953079282d0),(-0.5440211108893699d0),(0.5440211108893699d0));
+    RET[programIndex] = ok(sin(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-5-3.ispc b/tests/transcendentals-5-3.ispc
new file mode 100644
index 00000000..496f0b68
--- /dev/null
+++ b/tests/transcendentals-5-3.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
+    double ref = double4((0.9867719333537206d0),(-0.40484974621184605d0),(-0.7111606865372466d0),(0.4121184852417566d0));
+    RET[programIndex] = ok(sin(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-6-0.ispc b/tests/transcendentals-6-0.ispc
new file mode 100644
index 00000000..2b59989a
--- /dev/null
+++ b/tests/transcendentals-6-0.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((-3.141592653589793d0),(0.000000000000000d0),(3.141592653589793d0),(6.283185307179586d0));
+    double ref = double4((-1.000000000000000d0),(1.000000000000000d0),(-1.000000000000000d0),(1.000000000000000d0));
+    RET[programIndex] = ok(cos(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-6-1.ispc b/tests/transcendentals-6-1.ispc
new file mode 100644
index 00000000..b460af9f
--- /dev/null
+++ b/tests/transcendentals-6-1.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0));
+    double ref = double4((0.5403023058681398d0),(0.5403023058681398d0),(-4.371139044595162d-08),(-4.371139044595162d-08));
+    RET[programIndex] = ok(cos(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-6-2.ispc b/tests/transcendentals-6-2.ispc
new file mode 100644
index 00000000..5bcb1949
--- /dev/null
+++ b/tests/transcendentals-6-2.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((-9.42477796076938d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
+    double ref = double4((-1.000000000000000d0),(-0.6536436208636119d0),(-0.8390715290764524d0),(-0.8390715290764524d0));
+    RET[programIndex] = ok(cos(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-6-3.ispc b/tests/transcendentals-6-3.ispc
new file mode 100644
index 00000000..6d738f92
--- /dev/null
+++ b/tests/transcendentals-6-3.ispc
@@ -0,0 +1,29 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { 
+    bool r = (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-15;
+    if (any(r == false))
+        print("mismatch got %, expected %\n", x, ref);
+    return r;
+}
+
+
+export void f_v(uniform float RET[]) {
+    double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
+    double ref = double4((-0.1621146247115303d0),(0.9143832254542971d0),(-0.7030295000381365d0),(-0.9111302618846769d0));
+    RET[programIndex] = ok(cos(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-7-0.ispc b/tests/transcendentals-7-0.ispc
new file mode 100644
index 00000000..e75fe986
--- /dev/null
+++ b/tests/transcendentals-7-0.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
+    double ref = double4((-8.742277955963587d-08),(0.000000000000000d0),(8.742277955963587d-08),(1.7484555911927306d-07));
+    RET[programIndex] = ok(tan(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-7-1.ispc b/tests/transcendentals-7-1.ispc
new file mode 100644
index 00000000..8f36a8d4
--- /dev/null
+++ b/tests/transcendentals-7-1.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((1.000000000000000d0),(-1.000000000000000d0),(0),(0));
+    double ref = double4((1.557407724654902d0),(-1.557407724654902d0),(0),(0));
+    RET[programIndex] = ok(tan(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-7-2.ispc b/tests/transcendentals-7-2.ispc
new file mode 100644
index 00000000..bfe7226c
--- /dev/null
+++ b/tests/transcendentals-7-2.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
+    double ref = double4((-2.3849760909612074d-08),(1.1578212823495775d0),(0.6483608274590867d0),(-0.6483608274590867d0));
+    RET[programIndex] = ok(tan(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-7-3.ispc b/tests/transcendentals-7-3.ispc
new file mode 100644
index 00000000..9b5843c4
--- /dev/null
+++ b/tests/transcendentals-7-3.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
+    double ref = double4((-6.086877942749462d0),(-0.442757188607329d0),(1.0115659250410816d0),(-0.4523156594418099d0));
+    RET[programIndex] = ok(tan(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-8-0.ispc b/tests/transcendentals-8-0.ispc
new file mode 100644
index 00000000..52b3fe7e
--- /dev/null
+++ b/tests/transcendentals-8-0.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
+    double ref = double4((0.04321391448589156d0),(1.000000000000000d0),(23.140694655803028d0),(535.4917491531108d0));
+    RET[programIndex] = ok(exp(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-8-1.ispc b/tests/transcendentals-8-1.ispc
new file mode 100644
index 00000000..ac166aaa
--- /dev/null
+++ b/tests/transcendentals-8-1.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0));
+    double ref = double4((2.718281828459045d0),(0.36787944117144233d0),(0.2078795672640568d0),(4.810477591238011d0));
+    RET[programIndex] = ok(exp(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-8-2.ispc b/tests/transcendentals-8-2.ispc
new file mode 100644
index 00000000..a250f690
--- /dev/null
+++ b/tests/transcendentals-8-2.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
+    double ref = double4((8.069951564564043d-05),(54.598150033144236d0),(22026.465794806718d0),(4.5399929762484854d-05));
+    RET[programIndex] = ok(exp(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-8-3.ispc b/tests/transcendentals-8-3.ispc
new file mode 100644
index 00000000..7021fc37
--- /dev/null
+++ b/tests/transcendentals-8-3.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
+    double ref = double4((1623346.2946371625d0),(0.001230912137451317d0),(6.208070673019769d-10),(8103.083927575384d0));
+    RET[programIndex] = ok(exp(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-9-0.ispc b/tests/transcendentals-9-0.ispc
new file mode 100644
index 00000000..8736677d
--- /dev/null
+++ b/tests/transcendentals-9-0.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((3.141602741012573d),(0.000010000000000d),(3.141602741012573d),(6.283195482025146d));
+    double ref = double4((1.144733096770642d0),(-11.512925464970229d0),(1.144733096770642d0),(1.8378786857850002d0));
+    RET[programIndex] = ok(log(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-9-1.ispc b/tests/transcendentals-9-1.ispc
new file mode 100644
index 00000000..3a69782e
--- /dev/null
+++ b/tests/transcendentals-9-1.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((1.000010000000000d0),(1.000010000000000d0),(1.570806370506287d0),(1.570806370506287d0));
+    double ref = double4((9.999950000398841d-06),(9.999950000398841d-06),(0.4515890992942722d0),(0.4515890992942722d0));
+    RET[programIndex] = ok(log(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-9-2.ispc b/tests/transcendentals-9-2.ispc
new file mode 100644
index 00000000..83113a42
--- /dev/null
+++ b/tests/transcendentals-9-2.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((9.424787984619140d0),(4.000010000000000d0),(10.000010000000000d0),(10.000010000000000d0));
+    double ref = double4((2.2433432380804366d0),(1.3862968611167654d0),(2.3025860929935456d0),(2.3025860929935456d0));
+    RET[programIndex] = ok(log(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[]) { RET[programIndex] = 1.; }
diff --git a/tests/transcendentals-9-3.ispc b/tests/transcendentals-9-3.ispc
new file mode 100644
index 00000000..22ef5e47
--- /dev/null
+++ b/tests/transcendentals-9-3.ispc
@@ -0,0 +1,23 @@
+static double double4(uniform double a, uniform double b, uniform double c, 
+                    uniform double d) {
+    double ret = 0;
+    for (uniform int i = 0; i < programCount; i += 4) {
+        ret = insert(ret, i + 0, a);
+        ret = insert(ret, i + 1, b);
+        ret = insert(ret, i + 2, c);
+        ret = insert(ret, i + 3, d);
+    }
+    return ret;
+}
+
+export uniform int width() { return programCount; }
+
+
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+
+export void f_v(uniform float RET[]) {
+    double v = double4((14.300010190734863d0),(6.700009809265136d0),(21.200010762939453d0),(9.000010000000000d0));
+    double ref = double4((2.6602602499044092d0),(1.902108990465272d0),(3.0540016893636612d0),(2.197225688446713d0));
+    RET[programIndex] = ok(log(v), ref) ? 1. : 0.;
+}
+export void result(uniform float RET[4]) { RET[programIndex] = 1.; }

From 193bba77b08a0c49e068f8d791e2a432028aa6d7 Mon Sep 17 00:00:00 2001
From: evghenii <egaburov@dds.nl>
Date: Tue, 11 Feb 2014 11:49:03 +0100
Subject: [PATCH 7/7] accuracy fix

---
 tests/transcendentals-5-0.ispc | 2 +-
 tests/transcendentals-5-1.ispc | 2 +-
 tests/transcendentals-5-2.ispc | 2 +-
 tests/transcendentals-5-3.ispc | 2 +-
 tests/transcendentals-6-0.ispc | 2 +-
 tests/transcendentals-6-1.ispc | 2 +-
 tests/transcendentals-6-2.ispc | 2 +-
 tests/transcendentals-6-3.ispc | 2 +-
 tests/transcendentals-7-0.ispc | 2 +-
 tests/transcendentals-7-1.ispc | 2 +-
 tests/transcendentals-7-2.ispc | 2 +-
 tests/transcendentals-7-3.ispc | 2 +-
 tests/transcendentals-8-0.ispc | 2 +-
 tests/transcendentals-8-1.ispc | 2 +-
 tests/transcendentals-8-2.ispc | 2 +-
 tests/transcendentals-8-3.ispc | 2 +-
 tests/transcendentals-9-0.ispc | 2 +-
 tests/transcendentals-9-1.ispc | 2 +-
 tests/transcendentals-9-2.ispc | 2 +-
 tests/transcendentals-9-3.ispc | 2 +-
 20 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tests/transcendentals-5-0.ispc b/tests/transcendentals-5-0.ispc
index c445b8a9..562050df 100644
--- a/tests/transcendentals-5-0.ispc
+++ b/tests/transcendentals-5-0.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1e-16) || abs((x-ref)/ref) < 1e-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
diff --git a/tests/transcendentals-5-1.ispc b/tests/transcendentals-5-1.ispc
index 7d9980a4..fd1e1506 100644
--- a/tests/transcendentals-5-1.ispc
+++ b/tests/transcendentals-5-1.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1e-16) || abs((x-ref)/ref) < 1e-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.5707963705062866d0),(1.5707963705062866d0));
diff --git a/tests/transcendentals-5-2.ispc b/tests/transcendentals-5-2.ispc
index 38247397..2a50730a 100644
--- a/tests/transcendentals-5-2.ispc
+++ b/tests/transcendentals-5-2.ispc
@@ -14,7 +14,7 @@ export uniform int width() { return programCount; }
 
 
 bool ok(double x, double ref) { 
-    bool r = (abs(x - ref) < 1e-14) || abs((x-ref)/ref) < 1e-14;
+    bool r = (abs(x - ref) < 1d-14) || abs((x-ref)/ref) < 1d-14;
     if (any(r == false))
         print("mismatch got %, expected %\n", x, ref);
     return r;
diff --git a/tests/transcendentals-5-3.ispc b/tests/transcendentals-5-3.ispc
index 496f0b68..dbde3751 100644
--- a/tests/transcendentals-5-3.ispc
+++ b/tests/transcendentals-5-3.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
diff --git a/tests/transcendentals-6-0.ispc b/tests/transcendentals-6-0.ispc
index 2b59989a..ae3dac39 100644
--- a/tests/transcendentals-6-0.ispc
+++ b/tests/transcendentals-6-0.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((-3.141592653589793d0),(0.000000000000000d0),(3.141592653589793d0),(6.283185307179586d0));
diff --git a/tests/transcendentals-6-1.ispc b/tests/transcendentals-6-1.ispc
index b460af9f..8f59c54d 100644
--- a/tests/transcendentals-6-1.ispc
+++ b/tests/transcendentals-6-1.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0));
diff --git a/tests/transcendentals-6-2.ispc b/tests/transcendentals-6-2.ispc
index 5bcb1949..80ee9f7b 100644
--- a/tests/transcendentals-6-2.ispc
+++ b/tests/transcendentals-6-2.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((-9.42477796076938d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
diff --git a/tests/transcendentals-6-3.ispc b/tests/transcendentals-6-3.ispc
index 6d738f92..e64c4b82 100644
--- a/tests/transcendentals-6-3.ispc
+++ b/tests/transcendentals-6-3.ispc
@@ -14,7 +14,7 @@ export uniform int width() { return programCount; }
 
 
 bool ok(double x, double ref) { 
-    bool r = (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-15;
+    bool r = (abs(x - ref) < 1d-14) || abs((x-ref)/ref) < 1d-14;
     if (any(r == false))
         print("mismatch got %, expected %\n", x, ref);
     return r;
diff --git a/tests/transcendentals-7-0.ispc b/tests/transcendentals-7-0.ispc
index e75fe986..e59f1c7a 100644
--- a/tests/transcendentals-7-0.ispc
+++ b/tests/transcendentals-7-0.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
diff --git a/tests/transcendentals-7-1.ispc b/tests/transcendentals-7-1.ispc
index 8f36a8d4..0b3a9c66 100644
--- a/tests/transcendentals-7-1.ispc
+++ b/tests/transcendentals-7-1.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((1.000000000000000d0),(-1.000000000000000d0),(0),(0));
diff --git a/tests/transcendentals-7-2.ispc b/tests/transcendentals-7-2.ispc
index bfe7226c..4227981d 100644
--- a/tests/transcendentals-7-2.ispc
+++ b/tests/transcendentals-7-2.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
diff --git a/tests/transcendentals-7-3.ispc b/tests/transcendentals-7-3.ispc
index 9b5843c4..aa6e1380 100644
--- a/tests/transcendentals-7-3.ispc
+++ b/tests/transcendentals-7-3.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
diff --git a/tests/transcendentals-8-0.ispc b/tests/transcendentals-8-0.ispc
index 52b3fe7e..761d6e5a 100644
--- a/tests/transcendentals-8-0.ispc
+++ b/tests/transcendentals-8-0.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
diff --git a/tests/transcendentals-8-1.ispc b/tests/transcendentals-8-1.ispc
index ac166aaa..f9433aab 100644
--- a/tests/transcendentals-8-1.ispc
+++ b/tests/transcendentals-8-1.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0));
diff --git a/tests/transcendentals-8-2.ispc b/tests/transcendentals-8-2.ispc
index a250f690..493b1632 100644
--- a/tests/transcendentals-8-2.ispc
+++ b/tests/transcendentals-8-2.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
diff --git a/tests/transcendentals-8-3.ispc b/tests/transcendentals-8-3.ispc
index 7021fc37..38101635 100644
--- a/tests/transcendentals-8-3.ispc
+++ b/tests/transcendentals-8-3.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
diff --git a/tests/transcendentals-9-0.ispc b/tests/transcendentals-9-0.ispc
index 8736677d..8c51e240 100644
--- a/tests/transcendentals-9-0.ispc
+++ b/tests/transcendentals-9-0.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-13; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((3.141602741012573d),(0.000010000000000d),(3.141602741012573d),(6.283195482025146d));
diff --git a/tests/transcendentals-9-1.ispc b/tests/transcendentals-9-1.ispc
index 3a69782e..0b01c322 100644
--- a/tests/transcendentals-9-1.ispc
+++ b/tests/transcendentals-9-1.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((1.000010000000000d0),(1.000010000000000d0),(1.570806370506287d0),(1.570806370506287d0));
diff --git a/tests/transcendentals-9-2.ispc b/tests/transcendentals-9-2.ispc
index 83113a42..2391920b 100644
--- a/tests/transcendentals-9-2.ispc
+++ b/tests/transcendentals-9-2.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((9.424787984619140d0),(4.000010000000000d0),(10.000010000000000d0),(10.000010000000000d0));
diff --git a/tests/transcendentals-9-3.ispc b/tests/transcendentals-9-3.ispc
index 22ef5e47..ed787a5c 100644
--- a/tests/transcendentals-9-3.ispc
+++ b/tests/transcendentals-9-3.ispc
@@ -13,7 +13,7 @@ static double double4(uniform double a, uniform double b, uniform double c,
 export uniform int width() { return programCount; }
 
 
-bool ok(double x, double ref) { return (abs(x - ref) < 1d-16) || abs((x-ref)/ref) < 1d-15; }
+bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
 
 export void f_v(uniform float RET[]) {
     double v = double4((14.300010190734863d0),(6.700009809265136d0),(21.200010762939453d0),(9.000010000000000d0));