diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index e93c65fe..a2fdacd6 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -1125,6 +1125,22 @@ rotate(i64) rotate(float) rotate(double) +define(`broadcast', ` +define <1 x $1> @__broadcast_$1(<1 x $1>, i32) nounwind readnone alwaysinline +{ + %val = extractelement <1 x $1> %0, i32 0 + %rets = tail call $1 @__shfl_$1_nvptx($1 %val, i32 %1) + %retv = insertelement <1 x $1> undef, $1 %rets, i32 0 + ret <1 x $1> %retv +} +') +broadcast(i8) +broadcast(i16) +broadcast(i32) +broadcast(i64) +broadcast(float) +broadcast(double) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unaligned loads/loads+broadcasts diff --git a/builtins/util-nvptx.m4 b/builtins/util-nvptx.m4 index aa3d4e82..ef7ce3f8 100644 --- a/builtins/util-nvptx.m4 +++ b/builtins/util-nvptx.m4 @@ -757,13 +757,6 @@ divert`'dnl ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define(`shuffles', ` -define @__broadcast_$1(, i32) nounwind readnone alwaysinline { - %v = extractelement %0, i32 %1 - %broadcast_init = insertelement undef, $1 %v, i32 0 - %broadcast = shufflevector %broadcast_init, undef, zeroinitializer - ret %broadcast -} - ') define(`define_shuffles',`