round2to16 was added

2015-05-21 16:53:18 +03:00
parent a70dcb13d1
commit 28b49837fc
2 changed files with 39 additions and 7 deletions
--- a/builtins/target-sse4-8.ll
+++ b/builtins/target-sse4-8.ll
@@ -143,21 +143,17 @@ define <16 x float> @__ceil_varying_float(<16 x float>) nounwind readonly always
 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone

 define <16 x double> @__round_varying_double(<16 x double>) nounwind readonly alwaysinline {
-;  XXXround2to4double(%0, 8)
-  ; FIXME: need round2to16double in util.m4...
-  ret <16 x double> undef  
+    round2to16double(%0, 8)
 }

 define <16 x double> @__floor_varying_double(<16 x double>) nounwind readonly alwaysinline {
  ; roundpd, round down 0b01 | don't signal precision exceptions 0b1001 = 9
-;  XXXround2to4double(%0, 9)
-  ret <16 x double> undef  
+    round2to16double(%0, 9)
 }

 define <16 x double> @__ceil_varying_double(<16 x double>) nounwind readonly alwaysinline {
  ; roundpd, round up 0b10 | don't signal precision exceptions 0b1010 = 10
-;  XXXround2to4double(%0, 10)
-  ret <16 x double> undef  
+    round2to16double(%0, 10)
 }

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -1130,6 +1130,42 @@ ret <8 x double> %ret
 '
 )

+define(`round2to16double', `
+%v0 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> <i32 0,  i32 1>
+%v1 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> <i32 2,  i32 3>
+%v2 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> <i32 4,  i32 5>
+%v3 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> <i32 6,  i32 7>
+%v4 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> <i32 8,  i32 9>
+%v5 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> <i32 10, i32 11>
+%v6 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> <i32 12, i32 13>
+%v7 = shufflevector <16 x double> $1, <16 x double> undef, <2 x i32> <i32 14, i32 15>
+%r0 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v0, i32 $2)
+%r1 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v1, i32 $2)
+%r2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v2, i32 $2)
+%r3 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v3, i32 $2)
+%r4 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v4, i32 $2)
+%r5 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v5, i32 $2)
+%r6 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v6, i32 $2)
+%r7 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %v7, i32 $2)
+%ret0 = shufflevector <2 x double> %r0, <2 x double> %r1,
+          <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+%ret1 = shufflevector <2 x double> %r2, <2 x double> %r3,
+          <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+%ret01 = shufflevector <4 x double> %ret0, <4 x double> %ret1,
+          <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+%ret2 = shufflevector <2 x double> %r4, <2 x double> %r5,
+          <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+%ret3 = shufflevector <2 x double> %r6, <2 x double> %r7,
+          <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+%ret23 = shufflevector <4 x double> %ret2, <4 x double> %ret3,
+          <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+%ret = shufflevector <8 x double> %ret01, <8 x double> %ret23,
+          <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                      i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ret <16 x double> %ret
+'
+)
+
 define(`round4to16double', `
 %v0 = shufflevector <16 x double> $1, <16 x double> undef,
         <4 x i32> <i32 0, i32 1, i32 2, i32 3>