From 39962623cc669205c71d91456cb372e9c69279e8 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 22 Jan 2014 19:18:45 +0100 Subject: [PATCH] added shuffle --- builtins/target-nvptx.ll | 18 ++++++++++++++++++ builtins/util-nvptx.m4 | 12 ------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index 5334f882..4dab86c4 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -1044,6 +1044,24 @@ define i1 @__reduce_equal_double(<1 x double> %vv, double * %samevalue, } +;;;;;;;;;;; shuffle +define(`shuffle1', ` +define <1 x $1> @__shuffle_$1(<1 x $1>, <1 x i32>) nounwind readnone alwaysinline +{ + %val = extractelement <1 x $1> %0, i32 0 + %lane = extractelement <1 x i32> %1, i32 0 + %rets = tail call $1 @__shfl_$1_nvptx($1 %val, i32 %lane) + %retv = insertelement <1 x $1> undef, $1 %rets, i32 0 + ret <1 x $1> %retv +} +') +shuffle1(i8) +shuffle1(i16) +shuffle1(i32) +shuffle1(i64) +shuffle1(float) +shuffle1(double) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unaligned loads/loads+broadcasts diff --git a/builtins/util-nvptx.m4 b/builtins/util-nvptx.m4 index c948cfc3..76fc7f2b 100644 --- a/builtins/util-nvptx.m4 +++ b/builtins/util-nvptx.m4 @@ -815,18 +815,6 @@ define @__shift_$1(, i32) nounwind readnone alwaysinlin } -define @__shuffle_$1(, ) nounwind readnone alwaysinline { -forloop(i, 0, eval(WIDTH-1), ` - %index_`'i = extractelement %1, i32 i') -forloop(i, 0, eval(WIDTH-1), ` - %v_`'i = extractelement %0, i32 %index_`'i') - - %ret_0 = insertelement undef, $1 %v_0, i32 0 -forloop(i, 1, eval(WIDTH-1), ` %ret_`'i = insertelement %ret_`'eval(i-1), $1 %v_`'i, i32 i -') - ret %ret_`'eval(WIDTH-1) -} - define @__shuffle2_$1(, , ) nounwind readnone alwaysinline { %v2 = shufflevector %0, %1, < forloop(i, 0, eval(2*WIDTH-2), `i32 i, ') i32 eval(2*WIDTH-1)