From 28b49837fc5604951b03bf1134601e76893373c5 Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskiy Date: Thu, 21 May 2015 16:53:18 +0300 Subject: [PATCH] round2to16 was added --- builtins/target-sse4-8.ll | 10 +++------- builtins/util.m4 | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index e7fdb8f2..0688ad33 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -143,21 +143,17 @@ define <16 x float> @__ceil_varying_float(<16 x float>) nounwind readonly always declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone define <16 x double> @__round_varying_double(<16 x double>) nounwind readonly alwaysinline { -; XXXround2to4double(%0, 8) - ; FIXME: need round2to16double in util.m4... - ret <16 x double> undef + round2to16double(%0, 8) } define <16 x double> @__floor_varying_double(<16 x double>) nounwind readonly alwaysinline { ; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9 -; XXXround2to4double(%0, 9) - ret <16 x double> undef + round2to16double(%0, 9) } define <16 x double> @__ceil_varying_double(<16 x double>) nounwind readonly alwaysinline { ; roundpd, round up 0b10 | don't signal precision exceptions 0b1010 = 10 -; XXXround2to4double(%0, 10) - ret <16 x double> undef + round2to16double(%0, 10) } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/builtins/util.m4 b/builtins/util.m4 index 8951605b..f22705d0 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1130,6 +1130,42 @@ ret <8 x double> %ret ' ) +define(`round2to16double', ` +%v0 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> +%v1 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> +%v2 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> +%v3 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> +%v4 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> +%v5 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> +%v6 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> +%v7 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> +%r0 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v0, i32 $2) +%r1 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v1, i32 $2) +%r2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v2, i32 $2) +%r3 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v3, i32 $2) +%r4 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v4, i32 $2) +%r5 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v5, i32 $2) +%r6 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v6, i32 $2) +%r7 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v7, i32 $2) +%ret0 = shufflevector <2 x double> %r0, <2 x double> %r1, + <4 x i32> +%ret1 = shufflevector <2 x double> %r2, <2 x double> %r3, + <4 x i32> +%ret01 = shufflevector <4 x double> %ret0, <4 x double> %ret1, + <8 x i32> +%ret2 = shufflevector <2 x double> %r4, <2 x double> %r5, + <4 x i32> +%ret3 = shufflevector <2 x double> %r6, <2 x double> %r7, + <4 x i32> +%ret23 = shufflevector <4 x double> %ret2, <4 x double> %ret3, + <8 x i32> +%ret = shufflevector <8 x double> %ret01, <8 x double> %ret23, + <16 x i32> +ret <16 x double> %ret +' +) + define(`round4to16double', ` %v0 = shufflevector <16 x double> $1, <16 x double> undef, <4 x i32>