From 19379db3b60a60f2f1862a54709115bcf11c7545 Mon Sep 17 00:00:00 2001
From: egaburov <egaburov.work@gmail.com>
Date: Wed, 11 Sep 2013 16:48:56 +0200
Subject: [PATCH] svml cleanup

---
 builtins/svml.m4                  | 209 +++++++++---------------------
 builtins/target-avx-h.ll          |  11 +-
 builtins/target-avx-x2.ll         |   9 +-
 builtins/target-avx.ll            |  11 +-
 builtins/target-generic-common.ll |   4 +-
 builtins/target-sse2-x2.ll        |   8 +-
 builtins/target-sse2.ll           |  12 +-
 builtins/target-sse4-16.ll        |   4 +-
 builtins/target-sse4-8.ll         |   4 +-
 builtins/target-sse4-x2.ll        |   9 +-
 builtins/target-sse4.ll           |  11 +-
 11 files changed, 116 insertions(+), 176 deletions(-)

diff --git a/builtins/svml.m4 b/builtins/svml.m4
index cc3cd979..9608dea6 100644
--- a/builtins/svml.m4
+++ b/builtins/svml.m4
@@ -1,176 +1,93 @@
 ;; svml
 
-;; stub
-define(`svmlf_stubs',`
-  declare <$1 x float> @__svml_sinf(<$1 x float>) nounwind readnone alwaysinline
-  declare <$1 x float> @__svml_asinf(<$1 x float>) nounwind readnone alwaysinline 
-  declare <$1 x float> @__svml_cosf(<$1 x float>) nounwind readnone alwaysinline 
-  declare void @__svml_sincosf(<$1 x float>, <$1 x float> *, <$1 x float> *) nounwind readnone alwaysinline 
-  declare <$1 x float> @__svml_tanf(<$1 x float>) nounwind readnone alwaysinline 
-  declare <$1 x float> @__svml_atanf(<$1 x float>) nounwind readnone alwaysinline 
-  declare <$1 x float> @__svml_atan2f(<$1 x float>, <$1 x float>) nounwind readnone alwaysinline 
-  declare <$1 x float> @__svml_expf(<$1 x float>) nounwind readnone alwaysinline 
-  declare <$1 x float> @__svml_logf(<$1 x float>) nounwind readnone alwaysinline 
-  declare <$1 x float> @__svml_powf(<$1 x float>, <$1 x float>) nounwind readnone alwaysinline 
+;; stubs
+define(`svml_stubs',`
+  declare <$2 x $1> @__svml_sin$3(<$2 x $1>) nounwind readnone alwaysinline
+  declare <$2 x $1> @__svml_asin$3(<$2 x $1>) nounwind readnone alwaysinline 
+  declare <$2 x $1> @__svml_cos$3(<$2 x $1>) nounwind readnone alwaysinline 
+  declare void @__svml_sincos$3(<$2 x $1>, <$2 x $1> *, <$2 x $1> *) nounwind readnone alwaysinline 
+  declare <$2 x $1> @__svml_tan$3(<$2 x $1>) nounwind readnone alwaysinline 
+  declare <$2 x $1> @__svml_atan$3(<$2 x $1>) nounwind readnone alwaysinline 
+  declare <$2 x $1> @__svml_atan2$3(<$2 x $1>, <$2 x $1>) nounwind readnone alwaysinline 
+  declare <$2 x $1> @__svml_exp$3(<$2 x $1>) nounwind readnone alwaysinline 
+  declare <$2 x $1> @__svml_log$3(<$2 x $1>) nounwind readnone alwaysinline 
+  declare <$2 x $1> @__svml_pow$3(<$2 x $1>, <$2 x $1>) nounwind readnone alwaysinline 
 ')
 
-define(`svmld_stubs',`
-  declare <$1 x double> @__svml_sind(<$1 x double>) nounwind readnone alwaysinline 
-  declare <$1 x double> @__svml_asind(<$1 x double>) nounwind readnone alwaysinline 
-  declare <$1 x double> @__svml_cosd(<$1 x double>) nounwind readnone alwaysinline 
-  declare void @__svml_sincosd(<$1 x double>, <$1 x double> *, <$1 x double> *) nounwind readnone alwaysinline 
-  declare <$1 x double> @__svml_tand(<$1 x double>) nounwind readnone alwaysinline 
-  declare <$1 x double> @__svml_atand(<$1 x double>) nounwind readnone alwaysinline 
-  declare <$1 x double> @__svml_atan2d(<$1 x double>, <$1 x double>) nounwind readnone alwaysinline 
-  declare <$1 x double> @__svml_expd(<$1 x double>) nounwind readnone alwaysinline 
-  declare <$1 x double> @__svml_logd(<$1 x double>) nounwind readnone alwaysinline 
-  declare <$1 x double> @__svml_powd(<$1 x double>, <$1 x double>) nounwind readnone alwaysinline 
-')
-
-;; single precision
-define(`svmlf_declare',`
-  declare <$1 x float> @__svml_sinf$1(<$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_asinf$1(<$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_cosf$1(<$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_sincosf$1(<$1 x float> *, <$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_tanf$1(<$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_atanf$1(<$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_atan2f$1(<$1 x float>, <$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_expf$1(<$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_logf$1(<$1 x float>) nounwind readnone
-  declare <$1 x float> @__svml_powf$1(<$1 x float>, <$1 x float>) nounwind readnone
+;; decalre __svml calls
+define(`svml_declare',`
+  declare <$3 x $1> @__svml_sin$2(<$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_asin$2(<$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_cos$2(<$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_sincos$2(<$3 x $1> *, <$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_tan$2(<$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_atan$2(<$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_atan2$2(<$3 x $1>, <$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_exp$2(<$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_log$2(<$3 x $1>) nounwind readnone
+  declare <$3 x $1> @__svml_pow$2(<$3 x $1>, <$3 x $1>) nounwind readnone
 ');
 
-
-
-define(`svmlf_define',`
-  define <$1 x float> @__svml_sinf(<$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_sinf$1(<$1 x float> %0)
-    ret <$1 x float> %ret
+;; define native __svml calls
+define(`svml_define',`
+  define <$3 x $1> @__svml_sin$4(<$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_sin$2(<$3 x $1> %0)
+    ret <$3 x $1> %ret
   }
-  define <$1 x float> @__svml_asinf(<$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_asinf$1(<$1 x float> %0)
-    ret <$1 x float> %ret
+  define <$3 x $1> @__svml_asin$4(<$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_asin$2(<$3 x $1> %0)
+    ret <$3 x $1> %ret
   }
 
-  define <$1 x float> @__svml_cosf(<$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_cosf$1(<$1 x float> %0)
-    ret <$1 x float> %ret
+  define <$3 x $1> @__svml_cos$4(<$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_cos$2(<$3 x $1> %0)
+    ret <$3 x $1> %ret
   }
 
-  define void @__svml_sincosf(<$1 x float>, <$1 x float> *, <$1 x float> *) nounwind readnone alwaysinline {
-    %s = call <$1 x float> @__svml_sincosf$1(<$1 x float> * %2, <$1 x float> %0)
-    store <$1 x float> %s, <$1 x float> * %1
+  define void @__svml_sincos$4(<$3 x $1>, <$3 x $1> *, <$3 x $1> *) nounwind readnone alwaysinline {
+    %s = call <$3 x $1> @__svml_sincos$2(<$3 x $1> * %2, <$3 x $1> %0)
+    store <$3 x $1> %s, <$3 x $1> * %1
     ret void
   }
 
-  define <$1 x float> @__svml_tanf(<$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_tanf$1(<$1 x float> %0)
-    ret <$1 x float> %ret
+  define <$3 x $1> @__svml_tan$4(<$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_tan$2(<$3 x $1> %0)
+    ret <$3 x $1> %ret
   }
 
-  define <$1 x float> @__svml_atanf(<$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_atanf$1(<$1 x float> %0)
-    ret <$1 x float> %ret
+  define <$3 x $1> @__svml_atan$4(<$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_atan$2(<$3 x $1> %0)
+    ret <$3 x $1> %ret
   }
 
-  define <$1 x float> @__svml_atan2f(<$1 x float>, <$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_atan2f$1(<$1 x float> %0, <$1 x float> %1)
-    ret <$1 x float> %ret
+  define <$3 x $1> @__svml_atan2$4(<$3 x $1>, <$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_atan2$2(<$3 x $1> %0, <$3 x $1> %1)
+    ret <$3 x $1> %ret
   }
 
-  define <$1 x float> @__svml_expf(<$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_expf$1(<$1 x float> %0)
-    ret <$1 x float> %ret
+  define <$3 x $1> @__svml_exp$4(<$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_exp$2(<$3 x $1> %0)
+    ret <$3 x $1> %ret
   }
 
-  define <$1 x float> @__svml_logf(<$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_logf$1(<$1 x float> %0)
-    ret <$1 x float> %ret
+  define <$3 x $1> @__svml_log$4(<$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_log$2(<$3 x $1> %0)
+    ret <$3 x $1> %ret
   }
 
-  define <$1 x float> @__svml_powf(<$1 x float>, <$1 x float>) nounwind readnone alwaysinline {
-    %ret = call <$1 x float> @__svml_powf$1(<$1 x float> %0, <$1 x float> %1)
-    ret <$1 x float> %ret
+  define <$3 x $1> @__svml_pow$4(<$3 x $1>, <$3 x $1>) nounwind readnone alwaysinline {
+    %ret = call <$3 x $1> @__svml_pow$2(<$3 x $1> %0, <$3 x $1> %1)
+    ret <$3 x $1> %ret
   }
 ')
 
-;; double precision
-define(`svmld_declare',`
-  declare <$1 x double> @__svml_sin$1(<$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_asin$1(<$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_cos$1(<$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_sincos$1(<$1 x double> *, <$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_tan$1(<$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_atan$1(<$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_atan2$1(<$1 x double>, <$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_exp$1(<$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_log$1(<$1 x double>) nounwind readnone
-  declare <$1 x double> @__svml_pow$1(<$1 x double>, <$1 x double>) nounwind readnone
+
+;; define x2 __svml calls
+define(`svml_define_x2',`
+   svml_stubs($1,$3,$4)
 ')
 
-define(`svmld_define',`
-  define <$1 x double> @__svml_sind(<$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_sin$1(<$1 x double> %0)
-    ret <$1 x double> %ret
-  }
-  define <$1 x double> @__svml_asind(<$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_asin$1(<$1 x double> %0)
-    ret <$1 x double> %ret
-  }
-
-
-  define <$1 x double> @__svml_cosd(<$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_cos$1(<$1 x double> %0)
-    ret <$1 x double> %ret
-  }
-
-  define void @__svml_sincosd(<$1 x double>, <$1 x double> *, <$1 x double> *) nounwind readnone alwaysinline {
-    %s = call <$1 x double> @__svml_sincos$1(<$1 x double> * %2, <$1 x double> %0)
-    store <$1 x double> %s, <$1 x double> * %1
-    ret void
-  }
-
-  define <$1 x double> @__svml_tand(<$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_tan$1(<$1 x double> %0)
-    ret <$1 x double> %ret
-  }
-
-  define <$1 x double> @__svml_atand(<$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_atan$1(<$1 x double> %0)
-    ret <$1 x double> %ret
-  }
-
-  define <$1 x double> @__svml_atan2d(<$1 x double>, <$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_atan2$1(<$1 x double> %0, <$1 x double> %1)
-    ret <$1 x double> %ret
-  }
-
-  define <$1 x double> @__svml_expd(<$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_exp$1(<$1 x double> %0)
-    ret <$1 x double> %ret
-  }
-
-  define <$1 x double> @__svml_logd(<$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_log$1(<$1 x double> %0)
-    ret <$1 x double> %ret
-  }
-
-  define <$1 x double> @__svml_powd(<$1 x double>, <$1 x double>) nounwind readnone alwaysinline {
-    %ret = call <$1 x double> @__svml_pow$1(<$1 x double> %0, <$1 x double> %1)
-    ret <$1 x double> %ret
-  }
-')
-
-;; need to implement smvld for 2xvectorWidth ...:w
-
-define(`svmld2_define',`
-  define <$1 x double> @__svml_sinxx(<$1 x double>) nounwind readnone alwaysinline {
-    %v0 = shufflevector <$1 x double> %0, <$1 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-    %v1 = shufflevector <$1 x double> %0, <$1 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-    %ret0 = call <$2 x double> @__svml_sin$2(<$2 x double> %v0)
-    %ret1 = call <$2 x double> @__svml_sin$2(<$2 x double> %v1)
-    %ret  = shufflevector <$2 x double> %ret0, <$2 x double> %ret1, <$1 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-    ret <$1 x double> %ret
-  }
+;; define x4 __svml calls
+define(`svml_define_x4',`
+   svml_stubs($1,$3,$4)
 ')
diff --git a/builtins/target-avx-h.ll b/builtins/target-avx-h.ll
index a06e5ab3..283eaddd 100644
--- a/builtins/target-avx-h.ll
+++ b/builtins/target-avx-h.ll
@@ -155,10 +155,13 @@ define <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
 ;; svml
 
 include(`svml.m4')
-svmlf_declare(4)
-svmlf_define(4)
-svmld_declare(4)
-svmld_define(4)
+;; single precision
+svml_declare(float,f4,4)
+svml_define(float,f4,4,f)
+
+;; double precision
+svml_declare(double,4,4)
+svml_define(double,4,4,d)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max
diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll
index d646720e..f3f1590a 100644
--- a/builtins/target-avx-x2.ll
+++ b/builtins/target-avx-x2.ll
@@ -138,8 +138,13 @@ define <16 x float> @__sqrt_varying_float(<16 x float>) nounwind readonly always
 ;; svml
 
 include(`svml.m4')
-svmlf_stubs(16)
-svmld_stubs(16)
+;; single precision
+svml_declare(float,f8,8)
+svml_define_x2(float,f8,8,f,16)
+
+;; double precision
+svml_declare(double,4,4)
+svml_define_x2(double,4,4,d,16)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max
diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll
index 1d33e3f9..7e7ab330 100644
--- a/builtins/target-avx.ll
+++ b/builtins/target-avx.ll
@@ -138,10 +138,13 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
 ;; svml
 
 include(`svml.m4')
-svmlf_declare(8)
-svmlf_define(8)
-svmld_declare(4)
-svmld_stubs(8)
+;; single precision
+svml_declare(float,f8,8)
+svml_define(float,f8,8,f)
+
+;; double precision
+svml_declare(double,4,4)
+svml_define_x2(double,4,4,d,8)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max
diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll
index bc7db9ec..30a8b030 100644
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -209,8 +209,8 @@ declare i64 @__count_leading_zeros_i64(i64) nounwind readnone
 ;; svml
 
 include(`svml.m4')
-svmlf_stubs(WIDTH)
-svmld_stubs(WIDTH)
+svml_stubs(float,  WIDTH, f)
+svml_stubs(double, WIDTH, d)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; reductions
diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll
index 5688ebba..9fa607a4 100644
--- a/builtins/target-sse2-x2.ll
+++ b/builtins/target-sse2-x2.ll
@@ -106,10 +106,12 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
 ; svml stuff
 
 include(`svml.m4')
-svmlf_declare(4)
-svmld_declare(2)
-svmld_stubs(8)
+;; single precision
+svml_declare(float,f4,4)
 
+;; double precision
+svml_declare(double,2,2)
+svml_define_x4(double,2,2,d,8)
 
 define <8 x float> @__svml_sinf(<8 x float>) nounwind readnone alwaysinline {
   unary4to8(ret, float, @__svml_sinf4, %0)
diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll
index 236cda33..c858ccb6 100644
--- a/builtins/target-sse2.ll
+++ b/builtins/target-sse2.ll
@@ -497,10 +497,14 @@ define <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysin
 ; svml stuff
 
 include(`svml.m4')
-svmlf_declare(4)
-svmld_declare(2)
-svmlf_define(4)
-svmld_stubs(4)
+;; single precision
+svml_declare(float,f4,4)
+svml_define(float,f4,4,f)
+
+;; double precision
+svml_declare(double,2,2)
+svml_define_x2(double,2,2,d,4)
+
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max
diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll
index 3fbbe534..3f8cd339 100644
--- a/builtins/target-sse4-16.ll
+++ b/builtins/target-sse4-16.ll
@@ -210,8 +210,8 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r
 
 ; FIXME
 include(`svml.m4')
-svmlf_stubs(8)
-svmld_stubs(8)
+svml_stubs(float,8,f)
+svml_stubs(double,8,d)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; horizontal ops / reductions
diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll
index e65077b7..f43cd940 100644
--- a/builtins/target-sse4-8.ll
+++ b/builtins/target-sse4-8.ll
@@ -223,8 +223,8 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin
 ; FIXME
 
 include(`svml.m4')
-svmlf_stubs(16)
-svmld_stubs(16)
+svml_stubs(float,16,f)
+svml_stubs(double,16,d)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; horizontal ops / reductions
diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll
index 2a69b60a..c45966e3 100644
--- a/builtins/target-sse4-x2.ll
+++ b/builtins/target-sse4-x2.ll
@@ -106,9 +106,12 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
 ; svml stuff
 
 include(`svml.m4')
-svmlf_declare(4)
-svmld_declare(2)
-svmld_stubs(8)
+;; single precision
+svml_declare(float,f4,4)
+
+;; double precision
+svml_declare(double,2,2)
+svml_define_x4(double,2,2,d,8)
 
 
 define <8 x float> @__svml_sinf(<8 x float>) nounwind readnone alwaysinline {
diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll
index 686b4f84..eb82ab9a 100644
--- a/builtins/target-sse4.ll
+++ b/builtins/target-sse4.ll
@@ -210,10 +210,13 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r
 ; svml stuff
 
 include(`svml.m4')
-svmlf_declare(4)
-svmlf_define(4)
-svmld_declare(2)
-svmld_stubs(8)
+;; single precision
+svml_declare(float,f4,4)
+svml_define(float,f4,4,f)
+
+;; double precision
+svml_declare(double,2,2)
+svml_define_x2(double,2,2,d,4)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; horizontal ops / reductions