merged with master

2013-10-08 19:13:30 +02:00
parent 153fbc3d7d 1485419414
commit 5d56d29240
162 changed files with 16148 additions and 1942 deletions
--- a/builtins/target-nvptx64.ll
+++ b/builtins/target-nvptx64.ll
@@ -5,6 +5,10 @@ define(`WIDTH',`1')

 include(`util.m4')

+include(`svml.m4')
+svml_stubs(float,f,WIDTH)
+svml_stubs(double,d,WIDTH)
+
 ; Define some basics for a 1-wide target
 stdlib_core()
 packed_load_and_store()
@@ -467,6 +471,9 @@ define  i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {

 declare i32 @llvm.ctpop.i32(i32) nounwind readnone

+declare i16 @__reduce_add_int8(<WIDTH x i8>) nounwind readnone
+declare i32 @__reduce_add_int16(<WIDTH x i16>) nounwind readnone
+
 define  i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
  %call = call i32 @llvm.ctpop.i32(i32 %0)
  ret i32 %call
@@ -643,103 +650,6 @@ define  <1 x double> @__rsqrt_varying_double(<1 x double> %v) nounwind readonly
 }


-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; svml stuff
-
-define  <1 x float> @__svml_sin(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  ;%r = extractelement <1 x float> %0, i32 0
-  ;%s = call float @llvm.sin.f32(float %r)
-  ;%rv = insertelement <1 x float> undef, float %r, i32 0
-  ;ret <1 x float> %rv
-  unary1to1(float,@llvm.sin.f32)
-   
-}
-
-define  <1 x float> @__svml_cos(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  ;%r = extractelement <1 x float> %0, i32 0
-  ;%s = call float @llvm.cos.f32(float %r)
-  ;%rv = insertelement <1 x float> undef, float %r, i32 0
-  ;ret <1 x float> %rv
-  unary1to1(float, @llvm.cos.f32)
-
-}
-
-define  void @__svml_sincos(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
-;  %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
-;  store <1 x float> %s, <1 x float> * %1
-;  ret void
-   %sin = call <1 x float> @__svml_sin (<1 x float> %0)
-   %cos = call <1 x float> @__svml_cos (<1 x float> %0)
-   store <1 x float> %sin, <1 x float> * %1
-   store <1 x float> %cos, <1 x float> * %2
-   ret void
-}
-
-define  <1 x float> @__svml_tan(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  ;%r = extractelement <1 x float> %0, i32 0
-  ;%s = call float @llvm_tan_f32(float %r)
-  ;%rv = insertelement <1 x float> undef, float %r, i32 0
-  ;ret <1 x float> %rv
-  ;unasry1to1(float, @llvm.tan.f32)
-  ; UNSUPPORTED!
-  ret <1 x float > %0
-}
-
-define  <1 x float> @__svml_atan(<1 x float>) nounwind readnone alwaysinline {
-;  %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
-;  ret <1 x float> %ret
-  ;%r = extractelement <1 x float> %0, i32 0
-  ;%s = call float @llvm_atan_f32(float %r)
-  ;%rv = insertelement <1 x float> undef, float %r, i32 0
-  ;ret <1 x float> %rv
-  ;unsary1to1(float,@llvm.atan.f32)
-  ;UNSUPPORTED!
-  ret <1 x float > %0
-
-}
-
-define  <1 x float> @__svml_atan2(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
-  ;ret <1 x float> %ret
-  ;%y = extractelement <1 x float> %0, i32 0
-  ;%x = extractelement <1 x float> %1, i32 0
-  ;%q = fdiv float %y, %x
-  ;%a = call float @llvm.atan.f32 (float %q)
-  ;%rv = insertelement <1 x float> undef, float %a, i32 0
-  ;ret <1 x float> %rv
-  ; UNSUPPORTED!
-  ret <1 x float > %0
-}
-
-define  <1 x float> @__svml_exp(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  unary1to1(float, @llvm.exp.f32)
-}
-
-define  <1 x float> @__svml_log(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  unary1to1(float, @llvm.log.f32)
-}
-
-define  <1 x float> @__svml_pow(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
-  ;ret <1 x float> %ret
-  %r = extractelement <1 x float> %0, i32 0
-  %e  = extractelement <1 x float> %1, i32 0
-  %s = call float @llvm.pow.f32(float %r,float %e)
-  %rv = insertelement <1 x float> undef, float %s, i32 0
-  ret <1 x float> %rv
-
-}
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max

@@ -957,3 +867,8 @@ declare float @__half_to_float_uniform(i16 %v) nounwind readnone
 declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
 declare i16 @__float_to_half_uniform(float %v) nounwind readnone
 declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; int8/int16 builtins
+
+define_avgs()