diff --git a/builtins/svml.m4 b/builtins/svml.m4 index 9608dea6..71a6a709 100644 --- a/builtins/svml.m4 +++ b/builtins/svml.m4 @@ -83,11 +83,43 @@ define(`svml_define',` ;; define x2 __svml calls -define(`svml_define_x2',` - svml_stubs($1,$3,$4) +define(`svml_define_x',` + define <$5 x $1> @__svml_sin$4(<$5 x $1>) nounwind readnone alwaysinline { + unary$3to$5(ret, $1, @__svml_sin$2, %0) + ret <$5 x $1> %ret + } + define <$5 x $1> @__svml_asin$4(<$5 x $1>) nounwind readnone alwaysinline { + unary$3to$5(ret, $1, @__svml_asin$2, %0) + ret <$5 x $1> %ret + } + define <$5 x $1> @__svml_cos$4(<$5 x $1>) nounwind readnone alwaysinline { + unary$3to$5(ret, $1, @__svml_cos$2, %0) + ret <$5 x $1> %ret + } + declare void @__svml_sincos$4(<$5 x $1>,<$5 x $1>*,<$5 x $1>*) nounwind readnone alwaysinline + define <$5 x $1> @__svml_tan$4(<$5 x $1>) nounwind readnone alwaysinline { + unary$3to$5(ret, $1, @__svml_tan$2, %0) + ret <$5 x $1> %ret + } + define <$5 x $1> @__svml_atan$4(<$5 x $1>) nounwind readnone alwaysinline { + unary$3to$5(ret, $1, @__svml_atan$2, %0) + ret <$5 x $1> %ret + } + define <$5 x $1> @__svml_atan2$4(<$5 x $1>,<$5 x $1>) nounwind readnone alwaysinline { + binary$3to$5(ret, $1, @__svml_atan2$2, %0, %1) + ret <$5 x $1> %ret + } + define <$5 x $1> @__svml_exp$4(<$5 x $1>) nounwind readnone alwaysinline { + unary$3to$5(ret, $1, @__svml_exp$2, %0) + ret <$5 x $1> %ret + } + define <$5 x $1> @__svml_log$4(<$5 x $1>) nounwind readnone alwaysinline { + unary$3to$5(ret, $1, @__svml_log$2, %0) + ret <$5 x $1> %ret + } + define <$5 x $1> @__svml_pow$4(<$5 x $1>,<$5 x $1>) nounwind readnone alwaysinline { + binary$3to$5(ret, $1, @__svml_pow$2, %0, %1) + ret <$5 x $1> %ret + } ') -;; define x4 __svml calls -define(`svml_define_x4',` - svml_stubs($1,$3,$4) -') diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index f3f1590a..f8fd5cd5 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -140,11 +140,11 @@ define <16 x float> @__sqrt_varying_float(<16 x float>) nounwind readonly always include(`svml.m4') ;; single precision svml_declare(float,f8,8) -svml_define_x2(float,f8,8,f,16) +svml_define_x(float,f8,8,f,16) ;; double precision svml_declare(double,4,4) -svml_define_x2(double,4,4,d,16) +svml_define_x(double,4,4,d,16) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float min/max diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 7e7ab330..196e5ea4 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -144,7 +144,7 @@ svml_define(float,f8,8,f) ;; double precision svml_declare(double,4,4) -svml_define_x2(double,4,4,d,8) +svml_define_x(double,4,4,d,8) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float min/max diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index 9fa607a4..77bf1a9d 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -108,86 +108,11 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin include(`svml.m4') ;; single precision svml_declare(float,f4,4) +svml_define_x(float,f4,4,f,8) ;; double precision svml_declare(double,2,2) -svml_define_x4(double,2,2,d,8) - -define <8 x float> @__svml_sinf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_sinf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_asinf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_asinf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_cosf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_cosf4, %0) - ret <8 x float> %ret -} - -define void @__svml_sincosf(<8 x float>, <8 x float> *, - <8 x float> *) nounwind readnone alwaysinline { - ; call svml_sincosf4 two times with the two 4-wide sub-vectors - %a = shufflevector <8 x float> %0, <8 x float> undef, - <4 x i32> - %b = shufflevector <8 x float> %0, <8 x float> undef, - <4 x i32> - - %cospa = alloca <4 x float> - %sa = call <4 x float> @__svml_sincosf4(<4 x float> * %cospa, <4 x float> %a) - - %cospb = alloca <4 x float> - %sb = call <4 x float> @__svml_sincosf4(<4 x float> * %cospb, <4 x float> %b) - - %sin = shufflevector <4 x float> %sa, <4 x float> %sb, - <8 x i32> - store <8 x float> %sin, <8 x float> * %1 - - %cosa = load <4 x float> * %cospa - %cosb = load <4 x float> * %cospb - %cos = shufflevector <4 x float> %cosa, <4 x float> %cosb, - <8 x i32> - store <8 x float> %cos, <8 x float> * %2 - - ret void -} - -define <8 x float> @__svml_tanf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_tanf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_atanf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_atanf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_atan2f(<8 x float>, - <8 x float>) nounwind readnone alwaysinline { - binary4to8(ret, float, @__svml_atan2f4, %0, %1) - ret <8 x float> %ret -} - -define <8 x float> @__svml_expf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_expf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_logf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_logf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_powf(<8 x float>, - <8 x float>) nounwind readnone alwaysinline { - binary4to8(ret, float, @__svml_powf4, %0, %1) - ret <8 x float> %ret -} +svml_define_x(double,2,2,d,8) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index c858ccb6..e42d4990 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -503,7 +503,7 @@ svml_define(float,f4,4,f) ;; double precision svml_declare(double,2,2) -svml_define_x2(double,2,2,d,4) +svml_define_x(double,2,2,d,4) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index c45966e3..842db53f 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -108,87 +108,11 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin include(`svml.m4') ;; single precision svml_declare(float,f4,4) +svml_define_x(float,f4,4,f,8) ;; double precision svml_declare(double,2,2) -svml_define_x4(double,2,2,d,8) - - -define <8 x float> @__svml_sinf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_sinf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_asinf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_asinf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_cosf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_cosf4, %0) - ret <8 x float> %ret -} - -define void @__svml_sincosf(<8 x float>, <8 x float> *, - <8 x float> *) nounwind readnone alwaysinline { - ; call svml_sincosf4 two times with the two 4-wide sub-vectors - %a = shufflevector <8 x float> %0, <8 x float> undef, - <4 x i32> - %b = shufflevector <8 x float> %0, <8 x float> undef, - <4 x i32> - - %cospa = alloca <4 x float> - %sa = call <4 x float> @__svml_sincosf4(<4 x float> * %cospa, <4 x float> %a) - - %cospb = alloca <4 x float> - %sb = call <4 x float> @__svml_sincosf4(<4 x float> * %cospb, <4 x float> %b) - - %sin = shufflevector <4 x float> %sa, <4 x float> %sb, - <8 x i32> - store <8 x float> %sin, <8 x float> * %1 - - %cosa = load <4 x float> * %cospa - %cosb = load <4 x float> * %cospb - %cos = shufflevector <4 x float> %cosa, <4 x float> %cosb, - <8 x i32> - store <8 x float> %cos, <8 x float> * %2 - - ret void -} - -define <8 x float> @__svml_tanf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_tanf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_atanf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_atanf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_atan2f(<8 x float>, - <8 x float>) nounwind readnone alwaysinline { - binary4to8(ret, float, @__svml_atan2f4, %0, %1) - ret <8 x float> %ret -} - -define <8 x float> @__svml_expf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_expf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_logf(<8 x float>) nounwind readnone alwaysinline { - unary4to8(ret, float, @__svml_logf4, %0) - ret <8 x float> %ret -} - -define <8 x float> @__svml_powf(<8 x float>, - <8 x float>) nounwind readnone alwaysinline { - binary4to8(ret, float, @__svml_powf4, %0, %1) - ret <8 x float> %ret -} +svml_define_x(double,2,2,d,8) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index eb82ab9a..88be6c59 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -216,7 +216,7 @@ svml_define(float,f4,4,f) ;; double precision svml_declare(double,2,2) -svml_define_x2(double,2,2,d,4) +svml_define_x(double,2,2,d,4) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; horizontal ops / reductions