added shift

This commit is contained in:
Evghenii
2014-01-22 20:43:53 +01:00
parent 39962623cc
commit 7d0aa7a336
3 changed files with 67 additions and 92 deletions

View File

@@ -764,97 +764,6 @@ define <WIDTH x $1> @__broadcast_$1(<WIDTH x $1>, i32) nounwind readnone alwaysi
ret <WIDTH x $1> %broadcast
}
define <WIDTH x $1> @__rotate_$1(<WIDTH x $1>, i32) nounwind readnone alwaysinline {
%isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
br i1 %isc, label %is_const, label %not_const
is_const:
; though verbose, this turms into tight code if %1 is a constant
forloop(i, 0, eval(WIDTH-1), `
%delta_`'i = add i32 %1, i
%delta_clamped_`'i = and i32 %delta_`'i, eval(WIDTH-1)
%v_`'i = extractelement <WIDTH x $1> %0, i32 %delta_clamped_`'i')
%ret_0 = insertelement <WIDTH x $1> undef, $1 %v_0, i32 0
forloop(i, 1, eval(WIDTH-1), ` %ret_`'i = insertelement <WIDTH x $1> %ret_`'eval(i-1), $1 %v_`'i, i32 i
')
ret <WIDTH x $1> %ret_`'eval(WIDTH-1)
not_const:
; store two instances of the vector into memory
%ptr = alloca <WIDTH x $1>, i32 2
%ptr0 = getelementptr <WIDTH x $1> * %ptr, i32 0
store <WIDTH x $1> %0, <WIDTH x $1> * %ptr0
%ptr1 = getelementptr <WIDTH x $1> * %ptr, i32 1
store <WIDTH x $1> %0, <WIDTH x $1> * %ptr1
; compute offset in [0,vectorwidth-1], then index into the doubled-up vector
%offset = and i32 %1, eval(WIDTH-1)
%ptr_as_elt_array = bitcast <WIDTH x $1> * %ptr to [eval(2*WIDTH) x $1] *
%load_ptr = getelementptr [eval(2*WIDTH) x $1] * %ptr_as_elt_array, i32 0, i32 %offset
%load_ptr_vec = bitcast $1 * %load_ptr to <WIDTH x $1> *
%result = load <WIDTH x $1> * %load_ptr_vec, align $2
ret <WIDTH x $1> %result
}
define <WIDTH x $1> @__shift_$1(<WIDTH x $1>, i32) nounwind readnone alwaysinline {
%ptr = alloca <WIDTH x $1>, i32 3
%ptr0 = getelementptr <WIDTH x $1> * %ptr, i32 0
store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr0
%ptr1 = getelementptr <WIDTH x $1> * %ptr, i32 1
store <WIDTH x $1> %0, <WIDTH x $1> * %ptr1
%ptr2 = getelementptr <WIDTH x $1> * %ptr, i32 2
store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr2
%offset = add i32 %1, WIDTH
%ptr_as_elt_array = bitcast <WIDTH x $1> * %ptr to [eval(3*WIDTH) x $1] *
%load_ptr = getelementptr [eval(3*WIDTH) x $1] * %ptr_as_elt_array, i32 0, i32 %offset
%load_ptr_vec = bitcast $1 * %load_ptr to <WIDTH x $1> *
%result = load <WIDTH x $1> * %load_ptr_vec, align $2
ret <WIDTH x $1> %result
}
define <WIDTH x $1> @__shuffle2_$1(<WIDTH x $1>, <WIDTH x $1>, <WIDTH x i32>) nounwind readnone alwaysinline {
%v2 = shufflevector <WIDTH x $1> %0, <WIDTH x $1> %1, <eval(2*WIDTH) x i32> <
forloop(i, 0, eval(2*WIDTH-2), `i32 i, ') i32 eval(2*WIDTH-1)
>
forloop(i, 0, eval(WIDTH-1), `
%index_`'i = extractelement <WIDTH x i32> %2, i32 i')
%isc = call i1 @__is_compile_time_constant_varying_int32(<WIDTH x i32> %2)
br i1 %isc, label %is_const, label %not_const
is_const:
; extract from the requested lanes and insert into the result; LLVM turns
; this into good code in the end
forloop(i, 0, eval(WIDTH-1), `
%v_`'i = extractelement <eval(2*WIDTH) x $1> %v2, i32 %index_`'i')
%ret_0 = insertelement <WIDTH x $1> undef, $1 %v_0, i32 0
forloop(i, 1, eval(WIDTH-1), ` %ret_`'i = insertelement <WIDTH x $1> %ret_`'eval(i-1), $1 %v_`'i, i32 i
')
ret <WIDTH x $1> %ret_`'eval(WIDTH-1)
not_const:
; otherwise store the two vectors onto the stack and then use the given
; permutation vector to get indices into that array...
%ptr = alloca <eval(2*WIDTH) x $1>
store <eval(2*WIDTH) x $1> %v2, <eval(2*WIDTH) x $1> * %ptr
%baseptr = bitcast <eval(2*WIDTH) x $1> * %ptr to $1 *
%ptr_0 = getelementptr $1 * %baseptr, i32 %index_0
%val_0 = load $1 * %ptr_0
%result_0 = insertelement <WIDTH x $1> undef, $1 %val_0, i32 0
forloop(i, 1, eval(WIDTH-1), `
%ptr_`'i = getelementptr $1 * %baseptr, i32 %index_`'i
%val_`'i = load $1 * %ptr_`'i
%result_`'i = insertelement <WIDTH x $1> %result_`'eval(i-1), $1 %val_`'i, i32 i
')
ret <WIDTH x $1> %result_`'eval(WIDTH-1)
}
')
define(`define_shuffles',`