Initial Support for new stdlib shift operator

2013-10-22 18:06:54 -04:00
parent c18fa15db1
commit 899f85ce9c
5 changed files with 217 additions and 0 deletions
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -80,6 +80,13 @@ declare <WIDTH x i32> @__rotate_i32(<WIDTH x i32>, i32) nounwind readnone
 declare <WIDTH x double> @__rotate_double(<WIDTH x double>, i32) nounwind readnone
 declare <WIDTH x i64> @__rotate_i64(<WIDTH x i64>, i32) nounwind readnone

+declare <WIDTH x i8> @__shift_i8(<WIDTH x i8>, i32) nounwind readnone
+declare <WIDTH x i16> @__shift_i16(<WIDTH x i16>, i32) nounwind readnone
+declare <WIDTH x float> @__shift_float(<WIDTH x float>, i32) nounwind readnone
+declare <WIDTH x i32> @__shift_i32(<WIDTH x i32>, i32) nounwind readnone
+declare <WIDTH x double> @__shift_double(<WIDTH x double>, i32) nounwind readnone
+declare <WIDTH x i64> @__shift_i64(<WIDTH x i64>, i32) nounwind readnone
+
 declare <WIDTH x i8> @__shuffle_i8(<WIDTH x i8>, <WIDTH x i32>) nounwind readnone
 declare <WIDTH x i8> @__shuffle2_i8(<WIDTH x i8>, <WIDTH x i8>,
                                    <WIDTH x i32>) nounwind readnone
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -797,6 +797,43 @@ not_const:
  ret <WIDTH x $1> %result
 }

+define <WIDTH x $1> @__shift_$1(<WIDTH x $1>, i32) nounwind readnone alwaysinline {
+  %isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
+  %zeropaddedvec = shufflevector <WIDTH x $1> %0, <WIDTH x $1> zeroinitializer,
+                     <eval(2*WIDTH) x i32> < forloop(i, 0, eval(2*WIDTH-2), `i32 i, ')i32 eval(2*WIDTH-1) >
+  br i1 %isc, label %is_const, label %not_const
+
+is_const:
+  ; though verbose, this turms into tight code if %1 is a constant
+forloop(i, 0, eval(WIDTH-1), `  
+  %delta_`'i = add i32 %1, i
+  %delta_clamped_`'i = and i32 %delta_`'i, eval(2*WIDTH-1)
+  %v_`'i = extractelement <eval(2*WIDTH) x $1> %zeropaddedvec, i32 %delta_clamped_`'i')
+  %ret_0 = insertelement <WIDTH x $1> zeroinitializer, $1 %v_0, i32 0
+forloop(i, 1, eval(WIDTH-1), `  %ret_`'i = insertelement <WIDTH x $1> %ret_`'eval(i-1), $1 %v_`'i, i32 i
+')
+  ret <WIDTH x $1> %ret_`'eval(WIDTH-1)
+
+not_const:
+  ; store two instances of the vector into memory
+  %ptr = alloca <WIDTH x $1>, i32 3
+  %ptr0 = getelementptr <WIDTH x $1> * %ptr, i32 0
+  store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr0
+  %ptr1 = getelementptr <WIDTH x $1> * %ptr, i32 1
+  store <WIDTH x $1> %0, <WIDTH x $1> * %ptr1
+  %ptr2 = getelementptr <WIDTH x $1> * %ptr, i32 2
+  store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr2
+
+  ; compute offset in [0,vectorwidth-1], then index into the doubled-up vector
+  %offset = add i32 %1, 16
+  %ptr_as_elt_array = bitcast <WIDTH x $1> * %ptr to [eval(3*WIDTH) x $1] *
+  %load_ptr = getelementptr [eval(3*WIDTH) x $1] * %ptr_as_elt_array, i32 0, i32 %offset
+  %load_ptr_vec = bitcast $1 * %load_ptr to <WIDTH x $1> *
+  %result = load <WIDTH x $1> * %load_ptr_vec, align $2
+  ret <WIDTH x $1> %result
+}
+
+
 define <WIDTH x $1> @__shuffle_$1(<WIDTH x $1>, <WIDTH x i32>) nounwind readnone alwaysinline {
 forloop(i, 0, eval(WIDTH-1), `  
  %index_`'i = extractelement <WIDTH x i32> %1, i32 i')