From fe150c539fa971d48303f248de63122d92cbc107 Mon Sep 17 00:00:00 2001 From: evghenii Date: Tue, 8 Jul 2014 13:33:04 +0200 Subject: [PATCH] fix for exclusive_scan_and --- builtins/target-nvptx.ll | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index 6db1cc89..d29d1c1c 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -1325,7 +1325,7 @@ define internal i32 @__shfl_scan_or_step_i32(i32 %partial, i32 %up_offset) nounw shfl.up.b32 r0|p, $1, $2, 0; @p or.b32 r0, r0, $3; mov.u32 $0, r0; - }", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind readnone alwaysinline + }", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind alwaysinline ret i32 %result; } define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline @@ -1341,7 +1341,7 @@ define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone shfl.up.b32 r0|p, $1, 1, 0; @!p mov.u32 r0, 0; mov.u32 $0, r0; - }","=r,r"(i32 %v1); + }","=r,r"(i32 %v1); alwaysinline nounwind %s1 = tail call i32 @__shfl_scan_or_step_i32(i32 %v, i32 1); %s2 = tail call i32 @__shfl_scan_or_step_i32(i32 %s1, i32 2); @@ -1354,13 +1354,13 @@ define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone ;; define internal i32 @__shfl_scan_and_step_i32(i32 %partial, i32 %up_offset) nounwind readnone alwaysinline { - %result = tail call i32 asm sideeffect + %result = call i32 asm "{.reg .u32 r0; .reg .pred p; shfl.up.b32 r0|p, $1, $2, 0; @p and.b32 r0, r0, $3; mov.u32 $0, r0; - }", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind readnone alwaysinline + }", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) alwaysinline ret i32 %result; } define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline @@ -1370,19 +1370,19 @@ define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnon %v1 = select i1 %mask, i32 %v0, i32 -1 ;; shfl-up by one for exclusive scan - %v = tail call i32 asm sideeffect + %v = call i32 asm "{.reg .u32 r0; .reg .pred p; shfl.up.b32 r0|p, $1, 1, 0; @!p mov.u32 r0, -1; mov.u32 $0, r0; - }","=r,r"(i32 %v1); + }","=r,r"(i32 %v1); alwaysinline - %s1 = tail call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1); - %s2 = tail call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2); - %s3 = tail call i32 @__shfl_scan_and_step_i32(i32 %s2, i32 4); - %s4 = tail call i32 @__shfl_scan_and_step_i32(i32 %s3, i32 8); - %s5 = tail call i32 @__shfl_scan_and_step_i32(i32 %s4, i32 16); + %s1 = call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1); + %s2 = call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2); + %s3 = call i32 @__shfl_scan_and_step_i32(i32 %s2, i32 4); + %s4 = call i32 @__shfl_scan_and_step_i32(i32 %s3, i32 8); + %s5 = call i32 @__shfl_scan_and_step_i32(i32 %s4, i32 16); %retv = insertelement <1 x i32> undef, i32 %s5, i32 0 ret <1 x i32> %retv }