From 19379db3b60a60f2f1862a54709115bcf11c7545 Mon Sep 17 00:00:00 2001 From: egaburov Date: Wed, 11 Sep 2013 16:48:56 +0200 Subject: [PATCH] svml cleanup --- builtins/svml.m4 | 209 +++++++++--------------------- builtins/target-avx-h.ll | 11 +- builtins/target-avx-x2.ll | 9 +- builtins/target-avx.ll | 11 +- builtins/target-generic-common.ll | 4 +- builtins/target-sse2-x2.ll | 8 +- builtins/target-sse2.ll | 12 +- builtins/target-sse4-16.ll | 4 +- builtins/target-sse4-8.ll | 4 +- builtins/target-sse4-x2.ll | 9 +- builtins/target-sse4.ll | 11 +- 11 files changed, 116 insertions(+), 176 deletions(-) diff --git a/builtins/svml.m4 b/builtins/svml.m4 index cc3cd979..9608dea6 100644 --- a/builtins/svml.m4 +++ b/builtins/svml.m4 @@ -1,176 +1,93 @@ ;; svml -;; stub -define(`svmlf_stubs',` - declare <$1 x float> @__svml_sinf(<$1 x float>) nounwind readnone alwaysinline - declare <$1 x float> @__svml_asinf(<$1 x float>) nounwind readnone alwaysinline - declare <$1 x float> @__svml_cosf(<$1 x float>) nounwind readnone alwaysinline - declare void @__svml_sincosf(<$1 x float>, <$1 x float> *, <$1 x float> *) nounwind readnone alwaysinline - declare <$1 x float> @__svml_tanf(<$1 x float>) nounwind readnone alwaysinline - declare <$1 x float> @__svml_atanf(<$1 x float>) nounwind readnone alwaysinline - declare <$1 x float> @__svml_atan2f(<$1 x float>, <$1 x float>) nounwind readnone alwaysinline - declare <$1 x float> @__svml_expf(<$1 x float>) nounwind readnone alwaysinline - declare <$1 x float> @__svml_logf(<$1 x float>) nounwind readnone alwaysinline - declare <$1 x float> @__svml_powf(<$1 x float>, <$1 x float>) nounwind readnone alwaysinline +;; stubs +define(`svml_stubs',` + declare <$2 x $1> @__svml_sin$3(<$2 x $1>) nounwind readnone alwaysinline + declare <$2 x $1> @__svml_asin$3(<$2 x $1>) nounwind readnone alwaysinline + declare <$2 x $1> @__svml_cos$3(<$2 x $1>) nounwind readnone alwaysinline + declare void @__svml_sincos$3(<$2 x $1>, <$2 x $1> *, <$2 x $1> *) nounwind readnone alwaysinline + declare <$2 x $1> @__svml_tan$3(<$2 x $1>) nounwind readnone alwaysinline + declare <$2 x $1> @__svml_atan$3(<$2 x $1>) nounwind readnone alwaysinline + declare <$2 x $1> @__svml_atan2$3(<$2 x $1>, <$2 x $1>) nounwind readnone alwaysinline + declare <$2 x $1> @__svml_exp$3(<$2 x $1>) nounwind readnone alwaysinline + declare <$2 x $1> @__svml_log$3(<$2 x $1>) nounwind readnone alwaysinline + declare <$2 x $1> @__svml_pow$3(<$2 x $1>, <$2 x $1>) nounwind readnone alwaysinline ') -define(`svmld_stubs',` - declare <$1 x double> @__svml_sind(<$1 x double>) nounwind readnone alwaysinline - declare <$1 x double> @__svml_asind(<$1 x double>) nounwind readnone alwaysinline - declare <$1 x double> @__svml_cosd(<$1 x double>) nounwind readnone alwaysinline - declare void @__svml_sincosd(<$1 x double>, <$1 x double> *, <$1 x double> *) nounwind readnone alwaysinline - declare <$1 x double> @__svml_tand(<$1 x double>) nounwind readnone alwaysinline - declare <$1 x double> @__svml_atand(<$1 x double>) nounwind readnone alwaysinline - declare <$1 x double> @__svml_atan2d(<$1 x double>, <$1 x double>) nounwind readnone alwaysinline - declare <$1 x double> @__svml_expd(<$1 x double>) nounwind readnone alwaysinline - declare <$1 x double> @__svml_logd(<$1 x double>) nounwind readnone alwaysinline - declare <$1 x double> @__svml_powd(<$1 x double>, <$1 x double>) nounwind readnone alwaysinline -') - -;; single precision -define(`svmlf_declare',` - declare <$1 x float> @__svml_sinf$1(<$1 x float>) nounwind readnone - declare <$1 x float> @__svml_asinf$1(<$1 x float>) nounwind readnone - declare <$1 x float> @__svml_cosf$1(<$1 x float>) nounwind readnone - declare <$1 x float> @__svml_sincosf$1(<$1 x float> *, <$1 x float>) nounwind readnone - declare <$1 x float> @__svml_tanf$1(<$1 x float>) nounwind readnone - declare <$1 x float> @__svml_atanf$1(<$1 x float>) nounwind readnone - declare <$1 x float> @__svml_atan2f$1(<$1 x float>, <$1 x float>) nounwind readnone - declare <$1 x float> @__svml_expf$1(<$1 x float>) nounwind readnone - declare <$1 x float> @__svml_logf$1(<$1 x float>) nounwind readnone - declare <$1 x float> @__svml_powf$1(<$1 x float>, <$1 x float>) nounwind readnone +;; decalre __svml calls +define(`svml_declare',` + declare <$3 x $1> @__svml_sin$2(<$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_asin$2(<$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_cos$2(<$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_sincos$2(<$3 x $1> *, <$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_tan$2(<$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_atan$2(<$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_atan2$2(<$3 x $1>, <$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_exp$2(<$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_log$2(<$3 x $1>) nounwind readnone + declare <$3 x $1> @__svml_pow$2(<$3 x $1>, <$3 x $1>) nounwind readnone '); - - -define(`svmlf_define',` - define <$1 x float> @__svml_sinf(<$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_sinf$1(<$1 x float> %0) - ret <$1 x float> %ret +;; define native __svml calls +define(`svml_define',` + define <$3 x $1> @__svml_sin$4(<$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_sin$2(<$3 x $1> %0) + ret <$3 x $1> %ret } - define <$1 x float> @__svml_asinf(<$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_asinf$1(<$1 x float> %0) - ret <$1 x float> %ret + define <$3 x $1> @__svml_asin$4(<$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_asin$2(<$3 x $1> %0) + ret <$3 x $1> %ret } - define <$1 x float> @__svml_cosf(<$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_cosf$1(<$1 x float> %0) - ret <$1 x float> %ret + define <$3 x $1> @__svml_cos$4(<$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_cos$2(<$3 x $1> %0) + ret <$3 x $1> %ret } - define void @__svml_sincosf(<$1 x float>, <$1 x float> *, <$1 x float> *) nounwind readnone alwaysinline { - %s = call <$1 x float> @__svml_sincosf$1(<$1 x float> * %2, <$1 x float> %0) - store <$1 x float> %s, <$1 x float> * %1 + define void @__svml_sincos$4(<$3 x $1>, <$3 x $1> *, <$3 x $1> *) nounwind readnone alwaysinline { + %s = call <$3 x $1> @__svml_sincos$2(<$3 x $1> * %2, <$3 x $1> %0) + store <$3 x $1> %s, <$3 x $1> * %1 ret void } - define <$1 x float> @__svml_tanf(<$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_tanf$1(<$1 x float> %0) - ret <$1 x float> %ret + define <$3 x $1> @__svml_tan$4(<$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_tan$2(<$3 x $1> %0) + ret <$3 x $1> %ret } - define <$1 x float> @__svml_atanf(<$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_atanf$1(<$1 x float> %0) - ret <$1 x float> %ret + define <$3 x $1> @__svml_atan$4(<$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_atan$2(<$3 x $1> %0) + ret <$3 x $1> %ret } - define <$1 x float> @__svml_atan2f(<$1 x float>, <$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_atan2f$1(<$1 x float> %0, <$1 x float> %1) - ret <$1 x float> %ret + define <$3 x $1> @__svml_atan2$4(<$3 x $1>, <$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_atan2$2(<$3 x $1> %0, <$3 x $1> %1) + ret <$3 x $1> %ret } - define <$1 x float> @__svml_expf(<$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_expf$1(<$1 x float> %0) - ret <$1 x float> %ret + define <$3 x $1> @__svml_exp$4(<$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_exp$2(<$3 x $1> %0) + ret <$3 x $1> %ret } - define <$1 x float> @__svml_logf(<$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_logf$1(<$1 x float> %0) - ret <$1 x float> %ret + define <$3 x $1> @__svml_log$4(<$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_log$2(<$3 x $1> %0) + ret <$3 x $1> %ret } - define <$1 x float> @__svml_powf(<$1 x float>, <$1 x float>) nounwind readnone alwaysinline { - %ret = call <$1 x float> @__svml_powf$1(<$1 x float> %0, <$1 x float> %1) - ret <$1 x float> %ret + define <$3 x $1> @__svml_pow$4(<$3 x $1>, <$3 x $1>) nounwind readnone alwaysinline { + %ret = call <$3 x $1> @__svml_pow$2(<$3 x $1> %0, <$3 x $1> %1) + ret <$3 x $1> %ret } ') -;; double precision -define(`svmld_declare',` - declare <$1 x double> @__svml_sin$1(<$1 x double>) nounwind readnone - declare <$1 x double> @__svml_asin$1(<$1 x double>) nounwind readnone - declare <$1 x double> @__svml_cos$1(<$1 x double>) nounwind readnone - declare <$1 x double> @__svml_sincos$1(<$1 x double> *, <$1 x double>) nounwind readnone - declare <$1 x double> @__svml_tan$1(<$1 x double>) nounwind readnone - declare <$1 x double> @__svml_atan$1(<$1 x double>) nounwind readnone - declare <$1 x double> @__svml_atan2$1(<$1 x double>, <$1 x double>) nounwind readnone - declare <$1 x double> @__svml_exp$1(<$1 x double>) nounwind readnone - declare <$1 x double> @__svml_log$1(<$1 x double>) nounwind readnone - declare <$1 x double> @__svml_pow$1(<$1 x double>, <$1 x double>) nounwind readnone + +;; define x2 __svml calls +define(`svml_define_x2',` + svml_stubs($1,$3,$4) ') -define(`svmld_define',` - define <$1 x double> @__svml_sind(<$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_sin$1(<$1 x double> %0) - ret <$1 x double> %ret - } - define <$1 x double> @__svml_asind(<$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_asin$1(<$1 x double> %0) - ret <$1 x double> %ret - } - - - define <$1 x double> @__svml_cosd(<$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_cos$1(<$1 x double> %0) - ret <$1 x double> %ret - } - - define void @__svml_sincosd(<$1 x double>, <$1 x double> *, <$1 x double> *) nounwind readnone alwaysinline { - %s = call <$1 x double> @__svml_sincos$1(<$1 x double> * %2, <$1 x double> %0) - store <$1 x double> %s, <$1 x double> * %1 - ret void - } - - define <$1 x double> @__svml_tand(<$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_tan$1(<$1 x double> %0) - ret <$1 x double> %ret - } - - define <$1 x double> @__svml_atand(<$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_atan$1(<$1 x double> %0) - ret <$1 x double> %ret - } - - define <$1 x double> @__svml_atan2d(<$1 x double>, <$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_atan2$1(<$1 x double> %0, <$1 x double> %1) - ret <$1 x double> %ret - } - - define <$1 x double> @__svml_expd(<$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_exp$1(<$1 x double> %0) - ret <$1 x double> %ret - } - - define <$1 x double> @__svml_logd(<$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_log$1(<$1 x double> %0) - ret <$1 x double> %ret - } - - define <$1 x double> @__svml_powd(<$1 x double>, <$1 x double>) nounwind readnone alwaysinline { - %ret = call <$1 x double> @__svml_pow$1(<$1 x double> %0, <$1 x double> %1) - ret <$1 x double> %ret - } -') - -;; need to implement smvld for 2xvectorWidth ...:w - -define(`svmld2_define',` - define <$1 x double> @__svml_sinxx(<$1 x double>) nounwind readnone alwaysinline { - %v0 = shufflevector <$1 x double> %0, <$1 x double> undef, <4 x i32> - %v1 = shufflevector <$1 x double> %0, <$1 x double> undef, <4 x i32> - %ret0 = call <$2 x double> @__svml_sin$2(<$2 x double> %v0) - %ret1 = call <$2 x double> @__svml_sin$2(<$2 x double> %v1) - %ret = shufflevector <$2 x double> %ret0, <$2 x double> %ret1, <$1 x i32> - ret <$1 x double> %ret - } +;; define x4 __svml calls +define(`svml_define_x4',` + svml_stubs($1,$3,$4) ') diff --git a/builtins/target-avx-h.ll b/builtins/target-avx-h.ll index a06e5ab3..283eaddd 100644 --- a/builtins/target-avx-h.ll +++ b/builtins/target-avx-h.ll @@ -155,10 +155,13 @@ define <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline { ;; svml include(`svml.m4') -svmlf_declare(4) -svmlf_define(4) -svmld_declare(4) -svmld_define(4) +;; single precision +svml_declare(float,f4,4) +svml_define(float,f4,4,f) + +;; double precision +svml_declare(double,4,4) +svml_define(double,4,4,d) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float min/max diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index d646720e..f3f1590a 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -138,8 +138,13 @@ define <16 x float> @__sqrt_varying_float(<16 x float>) nounwind readonly always ;; svml include(`svml.m4') -svmlf_stubs(16) -svmld_stubs(16) +;; single precision +svml_declare(float,f8,8) +svml_define_x2(float,f8,8,f,16) + +;; double precision +svml_declare(double,4,4) +svml_define_x2(double,4,4,d,16) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float min/max diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 1d33e3f9..7e7ab330 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -138,10 +138,13 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin ;; svml include(`svml.m4') -svmlf_declare(8) -svmlf_define(8) -svmld_declare(4) -svmld_stubs(8) +;; single precision +svml_declare(float,f8,8) +svml_define(float,f8,8,f) + +;; double precision +svml_declare(double,4,4) +svml_define_x2(double,4,4,d,8) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float min/max diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index bc7db9ec..30a8b030 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -209,8 +209,8 @@ declare i64 @__count_leading_zeros_i64(i64) nounwind readnone ;; svml include(`svml.m4') -svmlf_stubs(WIDTH) -svmld_stubs(WIDTH) +svml_stubs(float, WIDTH, f) +svml_stubs(double, WIDTH, d) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; reductions diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index 5688ebba..9fa607a4 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -106,10 +106,12 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin ; svml stuff include(`svml.m4') -svmlf_declare(4) -svmld_declare(2) -svmld_stubs(8) +;; single precision +svml_declare(float,f4,4) +;; double precision +svml_declare(double,2,2) +svml_define_x4(double,2,2,d,8) define <8 x float> @__svml_sinf(<8 x float>) nounwind readnone alwaysinline { unary4to8(ret, float, @__svml_sinf4, %0) diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index 236cda33..c858ccb6 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -497,10 +497,14 @@ define <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysin ; svml stuff include(`svml.m4') -svmlf_declare(4) -svmld_declare(2) -svmlf_define(4) -svmld_stubs(4) +;; single precision +svml_declare(float,f4,4) +svml_define(float,f4,4,f) + +;; double precision +svml_declare(double,2,2) +svml_define_x2(double,2,2,d,4) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float min/max diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index 3fbbe534..3f8cd339 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -210,8 +210,8 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r ; FIXME include(`svml.m4') -svmlf_stubs(8) -svmld_stubs(8) +svml_stubs(float,8,f) +svml_stubs(double,8,d) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; horizontal ops / reductions diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index e65077b7..f43cd940 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -223,8 +223,8 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin ; FIXME include(`svml.m4') -svmlf_stubs(16) -svmld_stubs(16) +svml_stubs(float,16,f) +svml_stubs(double,16,d) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; horizontal ops / reductions diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index 2a69b60a..c45966e3 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -106,9 +106,12 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin ; svml stuff include(`svml.m4') -svmlf_declare(4) -svmld_declare(2) -svmld_stubs(8) +;; single precision +svml_declare(float,f4,4) + +;; double precision +svml_declare(double,2,2) +svml_define_x4(double,2,2,d,8) define <8 x float> @__svml_sinf(<8 x float>) nounwind readnone alwaysinline { diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 686b4f84..eb82ab9a 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -210,10 +210,13 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r ; svml stuff include(`svml.m4') -svmlf_declare(4) -svmlf_define(4) -svmld_declare(2) -svmld_stubs(8) +;; single precision +svml_declare(float,f4,4) +svml_define(float,f4,4,f) + +;; double precision +svml_declare(double,2,2) +svml_define_x2(double,2,2,d,4) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; horizontal ops / reductions