fix for exclusive_scan_and
This commit is contained in:
@@ -1325,7 +1325,7 @@ define internal i32 @__shfl_scan_or_step_i32(i32 %partial, i32 %up_offset) nounw
|
|||||||
shfl.up.b32 r0|p, $1, $2, 0;
|
shfl.up.b32 r0|p, $1, $2, 0;
|
||||||
@p or.b32 r0, r0, $3;
|
@p or.b32 r0, r0, $3;
|
||||||
mov.u32 $0, r0;
|
mov.u32 $0, r0;
|
||||||
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind readnone alwaysinline
|
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind alwaysinline
|
||||||
ret i32 %result;
|
ret i32 %result;
|
||||||
}
|
}
|
||||||
define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
|
define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
|
||||||
@@ -1341,7 +1341,7 @@ define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone
|
|||||||
shfl.up.b32 r0|p, $1, 1, 0;
|
shfl.up.b32 r0|p, $1, 1, 0;
|
||||||
@!p mov.u32 r0, 0;
|
@!p mov.u32 r0, 0;
|
||||||
mov.u32 $0, r0;
|
mov.u32 $0, r0;
|
||||||
}","=r,r"(i32 %v1);
|
}","=r,r"(i32 %v1); alwaysinline nounwind
|
||||||
|
|
||||||
%s1 = tail call i32 @__shfl_scan_or_step_i32(i32 %v, i32 1);
|
%s1 = tail call i32 @__shfl_scan_or_step_i32(i32 %v, i32 1);
|
||||||
%s2 = tail call i32 @__shfl_scan_or_step_i32(i32 %s1, i32 2);
|
%s2 = tail call i32 @__shfl_scan_or_step_i32(i32 %s1, i32 2);
|
||||||
@@ -1354,13 +1354,13 @@ define <1 x i32> @__exclusive_scan_or_i32(<1 x i32>, <1 x i1>) nounwind readnone
|
|||||||
;;
|
;;
|
||||||
define internal i32 @__shfl_scan_and_step_i32(i32 %partial, i32 %up_offset) nounwind readnone alwaysinline
|
define internal i32 @__shfl_scan_and_step_i32(i32 %partial, i32 %up_offset) nounwind readnone alwaysinline
|
||||||
{
|
{
|
||||||
%result = tail call i32 asm sideeffect
|
%result = call i32 asm
|
||||||
"{.reg .u32 r0;
|
"{.reg .u32 r0;
|
||||||
.reg .pred p;
|
.reg .pred p;
|
||||||
shfl.up.b32 r0|p, $1, $2, 0;
|
shfl.up.b32 r0|p, $1, $2, 0;
|
||||||
@p and.b32 r0, r0, $3;
|
@p and.b32 r0, r0, $3;
|
||||||
mov.u32 $0, r0;
|
mov.u32 $0, r0;
|
||||||
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) nounwind readnone alwaysinline
|
}", "=r,r,r,r"(i32 %partial, i32 %up_offset, i32 %partial) alwaysinline
|
||||||
ret i32 %result;
|
ret i32 %result;
|
||||||
}
|
}
|
||||||
define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
|
define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnone alwaysinline
|
||||||
@@ -1370,19 +1370,19 @@ define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnon
|
|||||||
%v1 = select i1 %mask, i32 %v0, i32 -1
|
%v1 = select i1 %mask, i32 %v0, i32 -1
|
||||||
|
|
||||||
;; shfl-up by one for exclusive scan
|
;; shfl-up by one for exclusive scan
|
||||||
%v = tail call i32 asm sideeffect
|
%v = call i32 asm
|
||||||
"{.reg .u32 r0;
|
"{.reg .u32 r0;
|
||||||
.reg .pred p;
|
.reg .pred p;
|
||||||
shfl.up.b32 r0|p, $1, 1, 0;
|
shfl.up.b32 r0|p, $1, 1, 0;
|
||||||
@!p mov.u32 r0, -1;
|
@!p mov.u32 r0, -1;
|
||||||
mov.u32 $0, r0;
|
mov.u32 $0, r0;
|
||||||
}","=r,r"(i32 %v1);
|
}","=r,r"(i32 %v1); alwaysinline
|
||||||
|
|
||||||
%s1 = tail call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1);
|
%s1 = call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1);
|
||||||
%s2 = tail call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2);
|
%s2 = call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2);
|
||||||
%s3 = tail call i32 @__shfl_scan_and_step_i32(i32 %s2, i32 4);
|
%s3 = call i32 @__shfl_scan_and_step_i32(i32 %s2, i32 4);
|
||||||
%s4 = tail call i32 @__shfl_scan_and_step_i32(i32 %s3, i32 8);
|
%s4 = call i32 @__shfl_scan_and_step_i32(i32 %s3, i32 8);
|
||||||
%s5 = tail call i32 @__shfl_scan_and_step_i32(i32 %s4, i32 16);
|
%s5 = call i32 @__shfl_scan_and_step_i32(i32 %s4, i32 16);
|
||||||
%retv = insertelement <1 x i32> undef, i32 %s5, i32 0
|
%retv = insertelement <1 x i32> undef, i32 %s5, i32 0
|
||||||
ret <1 x i32> %retv
|
ret <1 x i32> %retv
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user