Remove load_and_broadcast from built-ins.

Now that we never ever run with the mask all off, we no longer need that logic in a built-in function so that we can check the mask. In the one place where it was used (turning gathers to the same location into a load and broadcast), we now just emit the code for that directly.
2012-06-12 12:30:57 -07:00
parent 789e04ce90
commit 19b46be20d
10 changed files with 44 additions and 121 deletions
--- a/builtins/target-avx-x2.ll
+++ b/builtins/target-avx-x2.ll
@@ -353,13 +353,6 @@ define i64 @__reduce_max_uint64(<16 x i64>) nounwind readnone alwaysinline {
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts

-load_and_broadcast(i8)
-load_and_broadcast(i16)
-load_and_broadcast(i32)
-load_and_broadcast(float)
-load_and_broadcast(i64)
-load_and_broadcast(double)
-
 ; no masked load instruction for i8 and i16 types??
 masked_load(i8,  1)
 masked_load(i16, 2)
--- a/builtins/target-avx.ll
+++ b/builtins/target-avx.ll
@@ -334,12 +334,6 @@ define i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone alwaysinline {
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts

-load_and_broadcast(i8)
-load_and_broadcast(i16)
-load_and_broadcast(i32)
-load_and_broadcast(float)
-load_and_broadcast(i64)
-load_and_broadcast(double)

 ; no masked load instruction for i8 and i16 types??
 masked_load(i8,  1)
--- a/builtins/target-generic-1.ll
+++ b/builtins/target-generic-1.ll
@@ -21,12 +21,6 @@ gen_masked_store(i64)
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts

-load_and_broadcast(i8)
-load_and_broadcast(i16)
-load_and_broadcast(i32)
-load_and_broadcast(float)
-load_and_broadcast(i64)
-load_and_broadcast(double)

 masked_load(i8,  1)
 masked_load(i16, 2)
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -230,12 +230,6 @@ declare i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts

-load_and_broadcast(i8)
-load_and_broadcast(i16)
-load_and_broadcast(i32)
-load_and_broadcast(float)
-load_and_broadcast(i64)
-load_and_broadcast(double)

 declare <WIDTH x i8> @__masked_load_i8(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
 declare <WIDTH x i16> @__masked_load_i16(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
--- a/builtins/target-sse2-x2.ll
+++ b/builtins/target-sse2-x2.ll
@@ -433,12 +433,6 @@ reduce_equal(8)
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts

-load_and_broadcast(i8)
-load_and_broadcast(i16)
-load_and_broadcast(i32)
-load_and_broadcast(float)
-load_and_broadcast(i64)
-load_and_broadcast(double)

 masked_load(i8,  1)
 masked_load(i16, 2)
--- a/builtins/target-sse2.ll
+++ b/builtins/target-sse2.ll
@@ -562,12 +562,6 @@ gen_masked_store(i64)
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts

-load_and_broadcast(i8)
-load_and_broadcast(i16)
-load_and_broadcast(i32)
-load_and_broadcast(float)
-load_and_broadcast(i64)
-load_and_broadcast(double)

 masked_load(i8,  1)
 masked_load(i16, 2)
--- a/builtins/target-sse4-x2.ll
+++ b/builtins/target-sse4-x2.ll
@@ -360,12 +360,6 @@ reduce_equal(8)
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts

-load_and_broadcast(i8)
-load_and_broadcast(i16)
-load_and_broadcast(i32)
-load_and_broadcast(float)
-load_and_broadcast(i64)
-load_and_broadcast(double)

 masked_load(i8,  1)
 masked_load(i16, 2)
--- a/builtins/target-sse4.ll
+++ b/builtins/target-sse4.ll
@@ -461,12 +461,6 @@ masked_store_float_double()
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unaligned loads/loads+broadcasts

-load_and_broadcast(i8)
-load_and_broadcast(i16)
-load_and_broadcast(i32)
-load_and_broadcast(float)
-load_and_broadcast(i64)
-load_and_broadcast(double)

 masked_load(i8,  1)
 masked_load(i16, 2)
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -1796,19 +1796,6 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
  %mld = call <WIDTH x double> @__masked_load_double(i8 * %ptr, <WIDTH x MASK> %mask)
  call void @__usedouble(<WIDTH x double> %mld)

-  %lb8   = call <WIDTH x i8>  @__load_and_broadcast_i8(i8 * %ptr, <WIDTH x MASK> %mask)
-  call void @__use8(<WIDTH x i8> %lb8)
-  %lb16  = call <WIDTH x i16> @__load_and_broadcast_i16(i8 * %ptr, <WIDTH x MASK> %mask)
-  call void @__use16(<WIDTH x i16> %lb16)
-  %lb32  = call <WIDTH x i32> @__load_and_broadcast_i32(i8 * %ptr, <WIDTH x MASK> %mask)
-  call void @__use32(<WIDTH x i32> %lb32)
-  %lbf  = call <WIDTH x float> @__load_and_broadcast_float(i8 * %ptr, <WIDTH x MASK> %mask)
-  call void @__usefloat(<WIDTH x float> %lbf)
-  %lb64  = call <WIDTH x i64> @__load_and_broadcast_i64(i8 * %ptr, <WIDTH x MASK> %mask)
-  call void @__use64(<WIDTH x i64> %lb64)
-  %lbd  = call <WIDTH x double> @__load_and_broadcast_double(i8 * %ptr, <WIDTH x MASK> %mask)
-  call void @__usedouble(<WIDTH x double> %lbd)
-
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; stores
  %pv8 = bitcast i8 * %ptr to <WIDTH x i8> *
@@ -2680,23 +2667,6 @@ i64minmax(WIDTH,min,uint64,ult)
 i64minmax(WIDTH,max,uint64,ugt)
 ')

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Emit code to safely load a scalar value and broadcast it across the
-;; elements of a vector.  Parameter:
-;; $1: element type for which to emit the function (i32, i64, ...)
-
-define(`load_and_broadcast', `
-define <WIDTH x $1> @__load_and_broadcast_$1(i8 *, <WIDTH x MASK> %mask) nounwind alwaysinline {
-  %ptr = bitcast i8 * %0 to $1 *
-  %val = load $1 * %ptr
-
-  %ret0 = insertelement <WIDTH x $1> undef, $1 %val, i32 0
-  forloop(i, 1, eval(WIDTH-1), `
-  %ret`'i = insertelement <WIDTH x $1> %ret`'eval(i-1), $1 %val, i32 i')
-  ret <WIDTH x $1> %ret`'eval(WIDTH-1)
-}
-')
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Emit general-purpose code to do a masked load for targets that dont have
 ;; an instruction to do that.  Parameters: