merged with master

2014-02-21 08:25:28 +01:00
parent 690a8acb30 f280b32fa4
commit ac05de6835
46 changed files with 1127 additions and 197 deletions
--- a/builtins/target-avx-x2.ll
+++ b/builtins/target-avx-x2.ll
@@ -695,3 +695,5 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin
 rsqrtd_decl()
 rcpd_decl()

+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-avx.ll
+++ b/builtins/target-avx.ll
@@ -564,3 +564,6 @@ gen_scatter(double)

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-avx1-i64x4base.ll
+++ b/builtins/target-avx1-i64x4base.ll
@@ -514,3 +514,6 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-generic-1.ll
+++ b/builtins/target-generic-1.ll
@@ -999,3 +999,6 @@ define_avgs()

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -191,13 +191,9 @@ declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,

 declare float @__rsqrt_uniform_float(float) nounwind readnone 
 declare float @__rcp_uniform_float(float) nounwind readnone 
-declare double @__rsqrt_uniform_double(double) nounwind readnone 
-declare double @__rcp_uniform_double(double) nounwind readnone 
 declare float @__sqrt_uniform_float(float) nounwind readnone 
 declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone 
 declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone 
-declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone 
-declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone 

 declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone 

@@ -393,3 +389,12 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind

 define_avgs()
 declare_nvptx()
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; reciprocals in double precision, if supported
+
+rsqrtd_decl()
+rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-neon-16.ll
+++ b/builtins/target-neon-16.ll
@@ -521,3 +521,6 @@ define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone {

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-neon-32.ll
+++ b/builtins/target-neon-32.ll
@@ -491,3 +491,6 @@ define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone {

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-neon-8.ll
+++ b/builtins/target-neon-8.ll
@@ -587,3 +587,6 @@ define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone {

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-nvptx.ll
+++ b/builtins/target-nvptx.ll
@@ -2262,3 +2262,90 @@ define void @__memory_barrier() nounwind readnone alwaysinline {
 }

 saturation_arithmetic_novec();
+
+;;;;;;;;;;;;;;;;;;;;
+;; trigonometry
+
+
+define(`transcendetals_decl',`
+    declare float @__log_uniform_float(float) nounwind readnone
+    declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
+    declare float @__exp_uniform_float(float) nounwind readnone
+    declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
+    declare float @__pow_uniform_float(float, float) nounwind readnone
+    declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
+
+    declare double @__log_uniform_double(double) nounwind readnone
+    declare <WIDTH x double> @__log_varying_double(<WIDTH x double>) nounwind readnone
+    declare double @__exp_uniform_double(double) nounwind readnone
+    declare <WIDTH x double> @__exp_varying_double(<WIDTH x double>) nounwind readnone
+    declare double @__pow_uniform_double(double, double) nounwind readnone
+    declare <WIDTH x double> @__pow_varying_double(<WIDTH x double>, <WIDTH x double>) nounwind readnone
+')
+
+;; 1 - function call, e.g. __nv_fast_logf
+;; 2 - data-type, float/double
+;; 3 - local function name, e.g. __log, __exp, ..
+define(`transcendentals1',`
+declare $2 @$1($2)
+define $2 @$3_uniform_$2($2) nounwind readnone alwaysinline
+{
+  %ret = call $2 @$1($2 %0)
+  ret $2 %ret
+}
+define <1 x $2> @$3_varying_$2(<1 x $2>) nounwind readnone alwaysinline
+{
+  %v = bitcast <1 x $2> %0 to $2
+  %r = call $2 @$3_uniform_$2($2 %v);
+  %ret = bitcast $2 %r to <1 x $2>
+  ret <1 x $2> %ret
+}
+')
+
+
+define(`transcendentals2',`
+declare $2 @$1($2, $2)
+define $2 @$3_uniform_$2($2, $2) nounwind readnone alwaysinline
+{
+  %ret = call $2 @$1($2 %0, $2 %1)
+  ret $2 %ret
+}
+define <1 x $2> @$3_varying_$2(<1 x $2>, <1x $2>) nounwind readnone alwaysinline
+{
+  %v0 = bitcast <1 x $2> %0 to $2
+  %v1 = bitcast <1 x $2> %1 to $2
+  %r = call $2 @$3_uniform_$2($2 %v0, $2 %v1);
+  %ret = bitcast $2 %r to <1 x $2>
+  ret <1 x $2> %ret
+}
+')
+transcendentals1(__nv_fast_logf, float, __log)
+transcendentals1(__nv_fast_expf, float, __exp)
+transcendentals2(__nv_fast_powf, float, __pow)
+
+transcendentals1(__nv_log, double, __log)
+transcendentals1(__nv_exp, double, __exp)
+transcendentals2(__nv_pow, double, __pow)
+
+
+transcendentals1(__nv_fast_sinf, float, __sin)
+transcendentals1(__nv_fast_cosf, float, __cos)
+transcendentals1(__nv_fast_tanf, float, __tan)
+transcendentals1(__nv_asinf,     float, __asin)
+transcendentals1(__nv_acosf,     float, __acos)
+transcendentals1(__nv_atanf,     float, __atan)
+transcendentals2(__nv_atan2f,    float, __atan2)
+
+transcendentals1(__nv_sin,   double, __sin)
+transcendentals1(__nv_cos,   double, __cos)
+transcendentals1(__nv_tan,   double, __tan)
+transcendentals1(__nv_asin,  double, __asin)
+transcendentals1(__nv_acos,  double, __acos)
+transcendentals1(__nv_atan,  double, __atan)
+transcendentals2(__nv_atan2, double, __atan2)
+
+declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone
+declare void @__sincos_varying_float(<WIDTH x float>, <WIDTH x float>*, <WIDTH x float>*) nounwind readnone
+declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone
+declare void @__sincos_varying_double(<WIDTH x double>, <WIDTH x double>*, <WIDTH x double>*) nounwind readnone
+
--- a/builtins/target-sse2-x2.ll
+++ b/builtins/target-sse2-x2.ll
@@ -659,3 +659,6 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-sse2.ll
+++ b/builtins/target-sse2.ll
@@ -594,3 +594,6 @@ gen_scatter(double)

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-sse4-16.ll
+++ b/builtins/target-sse4-16.ll
@@ -495,3 +495,6 @@ define_down_avgs()

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-sse4-8.ll
+++ b/builtins/target-sse4-8.ll
@@ -497,3 +497,6 @@ define_down_avgs()

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-sse4-x2.ll
+++ b/builtins/target-sse4-x2.ll
@@ -598,3 +598,6 @@ define_avgs()

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/target-sse4.ll
+++ b/builtins/target-sse4.ll
@@ -521,3 +521,6 @@ define_avgs()

 rsqrtd_decl()
 rcpd_decl()
+
+transcendetals_decl()
+trigonometry_decl()
--- a/builtins/util-nvptx.m4
+++ b/builtins/util-nvptx.m4
@@ -1365,13 +1365,6 @@ declare void
@__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>,
                                        <WIDTH x double>, <WIDTH x MASK>) nounwind

-declare float @__log_uniform_float(float) nounwind readnone
-declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
-declare float @__exp_uniform_float(float) nounwind readnone
-declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
-declare float @__pow_uniform_float(float, float) nounwind readnone
-declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 declare void @__use8(<WIDTH x i8>)
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -2516,13 +2516,6 @@ declare void
@__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>,
                                        <WIDTH x double>, <WIDTH x MASK>) nounwind

-declare float @__log_uniform_float(float) nounwind readnone
-declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
-declare float @__exp_uniform_float(float) nounwind readnone
-declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
-declare float @__pow_uniform_float(float, float) nounwind readnone
-declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 declare void @__use8(<WIDTH x i8>)
@@ -5008,3 +5001,57 @@ global_atomic_varying(WIDTH, min, i64, int64)
 global_atomic_varying(WIDTH, max, i64, int64)
 global_atomic_varying(WIDTH, umin, i64, uint64)
 global_atomic_varying(WIDTH, umax, i64, uint64)
+
+define(`transcendetals_decl',`
+    declare float @__log_uniform_float(float) nounwind readnone
+    declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
+    declare float @__exp_uniform_float(float) nounwind readnone
+    declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
+    declare float @__pow_uniform_float(float, float) nounwind readnone
+    declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
+
+    declare double @__log_uniform_double(double) nounwind readnone
+    declare <WIDTH x double> @__log_varying_double(<WIDTH x double>) nounwind readnone
+    declare double @__exp_uniform_double(double) nounwind readnone
+    declare <WIDTH x double> @__exp_varying_double(<WIDTH x double>) nounwind readnone
+    declare double @__pow_uniform_double(double, double) nounwind readnone
+    declare <WIDTH x double> @__pow_varying_double(<WIDTH x double>, <WIDTH x double>) nounwind readnone
+')
+
+define(`trigonometry_decl',`
+    declare <WIDTH x float> @__sin_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__asin_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__cos_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__acos_varying_float(<WIDTH x float>) nounwind readnone
+    declare void @__sincos_varying_float(<WIDTH x float>, <WIDTH x float>*, <WIDTH x float>*) nounwind readnone
+    declare <WIDTH x float> @__tan_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__atan_varying_float(<WIDTH x float>) nounwind readnone
+    declare <WIDTH x float> @__atan2_varying_float(<WIDTH x float>,<WIDTH x float>) nounwind readnone
+
+    declare float @__sin_uniform_float(float) nounwind readnone
+    declare float @__asin_uniform_float(float) nounwind readnone
+    declare float @__cos_uniform_float(float) nounwind readnone
+    declare float @__acos_uniform_float(float) nounwind readnone
+    declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone
+    declare float @__tan_uniform_float(float) nounwind readnone
+    declare float @__atan_uniform_float(float) nounwind readnone
+    declare float @__atan2_uniform_float(float,float) nounwind readnone
+
+    declare <WIDTH x double> @__sin_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__asin_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__cos_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__acos_varying_double(<WIDTH x double>) nounwind readnone
+    declare void @__sincos_varying_double(<WIDTH x double>, <WIDTH x double>*, <WIDTH x double>*) nounwind readnone
+    declare <WIDTH x double> @__tan_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__atan_varying_double(<WIDTH x double>) nounwind readnone
+    declare <WIDTH x double> @__atan2_varying_double(<WIDTH x double>,<WIDTH x double>) nounwind readnone
+
+    declare double @__sin_uniform_double(double) nounwind readnone
+    declare double @__asin_uniform_double(double) nounwind readnone
+    declare double @__cos_uniform_double(double) nounwind readnone
+    declare double @__acos_uniform_double(double) nounwind readnone
+    declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone
+    declare double @__tan_uniform_double(double) nounwind readnone
+    declare double @__atan_uniform_double(double) nounwind readnone
+    declare double @__atan2_uniform_double(double,double) nounwind readnone
+')