Have aos/soa and broadcast/shuffle/rotate functions provided by the target.

The SSE/AVX targets use the old versions from util.m4, but these functions are now passed through to the generic targets.
2012-01-04 12:16:41 -08:00
parent 848a432640
commit f75c94a8f1
7 changed files with 376 additions and 370 deletions
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -34,6 +34,69 @@ include(`util.m4')

 stdlib_core()
 scans()
+reduce_equal(WIDTH)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; broadcast/rotate/shuffle
+
+declare <WIDTH x float> @__smear_float(float) nounwind readnone
+declare <WIDTH x double> @__smear_double(double) nounwind readnone
+declare <WIDTH x i8> @__smear_i8(i8) nounwind readnone
+declare <WIDTH x i16> @__smear_i16(i16) nounwind readnone
+declare <WIDTH x i32> @__smear_i32(i32) nounwind readnone
+declare <WIDTH x i64> @__smear_i64(i64) nounwind readnone
+
+declare <WIDTH x float> @__broadcast_float(<WIDTH x float>, i32) nounwind readnone
+declare <WIDTH x double> @__broadcast_double(<WIDTH x double>, i32) nounwind readnone
+declare <WIDTH x i8> @__broadcast_i8(<WIDTH x i8>, i32) nounwind readnone
+declare <WIDTH x i16> @__broadcast_i16(<WIDTH x i16>, i32) nounwind readnone
+declare <WIDTH x i32> @__broadcast_i32(<WIDTH x i32>, i32) nounwind readnone
+declare <WIDTH x i64> @__broadcast_i64(<WIDTH x i64>, i32) nounwind readnone
+
+declare <WIDTH x i8> @__rotate_i8(<WIDTH x i8>, i32) nounwind readnone
+declare <WIDTH x i16> @__rotate_i16(<WIDTH x i16>, i32) nounwind readnone
+declare <WIDTH x float> @__rotate_float(<WIDTH x float>, i32) nounwind readnone
+declare <WIDTH x i32> @__rotate_i32(<WIDTH x i32>, i32) nounwind readnone
+declare <WIDTH x double> @__rotate_double(<WIDTH x double>, i32) nounwind readnone
+declare <WIDTH x i64> @__rotate_i64(<WIDTH x i64>, i32) nounwind readnone
+
+declare <WIDTH x i8> @__shuffle_i8(<WIDTH x i8>, <WIDTH x i32>) nounwind readnone
+declare <WIDTH x i8> @__shuffle2_i8(<WIDTH x i8>, <WIDTH x i8>,
+                                    <WIDTH x i32>) nounwind readnone
+declare <WIDTH x i16> @__shuffle_i16(<WIDTH x i16>, <WIDTH x i32>) nounwind readnone
+declare <WIDTH x i16> @__shuffle2_i16(<WIDTH x i16>, <WIDTH x i16>,
+                                      <WIDTH x i32>) nounwind readnone
+declare <WIDTH x float> @__shuffle_float(<WIDTH x float>,
+                                         <WIDTH x i32>) nounwind readnone
+declare <WIDTH x float> @__shuffle2_float(<WIDTH x float>, <WIDTH x float>,
+                                          <WIDTH x i32>) nounwind readnone
+declare <WIDTH x i32> @__shuffle_i32(<WIDTH x i32>,
+                                     <WIDTH x i32>) nounwind readnone
+declare <WIDTH x i32> @__shuffle2_i32(<WIDTH x i32>, <WIDTH x i32>,
+                                      <WIDTH x i32>) nounwind readnone
+declare <WIDTH x double> @__shuffle_double(<WIDTH x double>,
+                                           <WIDTH x i32>) nounwind readnone
+declare <WIDTH x double> @__shuffle2_double(<WIDTH x double>,
+                                            <WIDTH x double>, <WIDTH x i32>) nounwind readnone
+declare <WIDTH x i64> @__shuffle_i64(<WIDTH x i64>,
+                                     <WIDTH x i32>) nounwind readnone
+declare <WIDTH x i64> @__shuffle2_i64(<WIDTH x i64>, <WIDTH x i64>,
+                                      <WIDTH x i32>) nounwind readnone
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; aos/soa
+
+declare void @__soa_to_aos3_float(<WIDTH x float> %v0, <WIDTH x float> %v1,
+                                  <WIDTH x float> %v2, float * noalias %p) nounwind
+declare void @__aos_to_soa3_float(float * noalias %p, <WIDTH x float> * %out0,
+                                  <WIDTH x float> * %out1, <WIDTH x float> * %out2) nounwind
+declare void @__soa_to_aos4_float(<WIDTH x float> %v0, <WIDTH x float> %v1,
+                                  <WIDTH x float> %v2, <WIDTH x float> %v3,
+                                  float * noalias %p) nounwind
+declare void @__aos_to_soa4_float(float * noalias %p, <WIDTH x float> * noalias %out0,
+                                  <WIDTH x float> * noalias %out1,
+                                  <WIDTH x float> * noalias %out2,
+                                  <WIDTH x float> * noalias %out3) nounwind

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; math
@@ -128,7 +191,6 @@ declare <WIDTH x float> @__svml_log(<WIDTH x float>)
 declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
 ;; reductions

 declare i32 @__movmsk(<WIDTH x i1>) nounwind readnone 
@@ -157,15 +219,6 @@ declare i64 @__reduce_add_uint64(<WIDTH x i64>) nounwind readnone
 declare i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone 
 declare i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone 

-declare i1 @__reduce_equal_int32(<WIDTH x i32> %v, i32 * nocapture %samevalue,
-                                 <WIDTH x i1> %mask) nounwind 
-declare i1 @__reduce_equal_float(<WIDTH x float> %v, float * nocapture %samevalue,
-                                 <WIDTH x i1> %mask) nounwind 
-declare i1 @__reduce_equal_int64(<WIDTH x i64> %v, i64 * nocapture %samevalue,
-                                 <WIDTH x i1> %mask) nounwind 
-declare i1 @__reduce_equal_double(<WIDTH x double> %v, double * nocapture %samevalue,
-                                  <WIDTH x i1> %mask) nounwind 
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts