230 lines
7.3 KiB
Plaintext
230 lines
7.3 KiB
Plaintext
//
|
|
// Generated by LLVM NVPTX Back-End
|
|
//
|
|
|
|
.version 3.1
|
|
.target sm_35, texmode_independent
|
|
.address_size 64
|
|
|
|
// .globl mandelbrot_scanline
|
|
.func (.param .b32 func_retval0) puts
|
|
(
|
|
.param .b64 puts_param_0
|
|
)
|
|
;
|
|
.func abort
|
|
(
|
|
|
|
)
|
|
;
|
|
.global .align 1 .b8 __str[66] = {109, 97, 110, 100, 101, 108, 98, 114, 111, 116, 95, 116, 97, 115, 107, 46, 105, 115, 112, 99, 58, 53, 53, 58, 51, 58, 32, 65, 115, 115, 101, 114, 116, 105, 111, 110, 32, 102, 97, 105, 108, 101, 100, 58, 32, 120, 115, 112, 97, 110, 32, 62, 61, 32, 118, 101, 99, 116, 111, 114, 87, 105, 100, 116, 104, 0};
|
|
// @mandelbrot_scanline
|
|
.entry mandelbrot_scanline(
|
|
.param .f32 mandelbrot_scanline_param_0,
|
|
.param .f32 mandelbrot_scanline_param_1,
|
|
.param .f32 mandelbrot_scanline_param_2,
|
|
.param .f32 mandelbrot_scanline_param_3,
|
|
.param .u32 mandelbrot_scanline_param_4,
|
|
.param .u32 mandelbrot_scanline_param_5,
|
|
.param .u32 mandelbrot_scanline_param_6,
|
|
.param .u32 mandelbrot_scanline_param_7,
|
|
.param .u32 mandelbrot_scanline_param_8,
|
|
.param .u64 .ptr .align 4 mandelbrot_scanline_param_9
|
|
)
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0: // %allocas
|
|
ld.param.u32 %r3, [mandelbrot_scanline_param_6];
|
|
mov.u32 %r0, WARP_SZ;
|
|
setp.gt.s32 %p0, %r0, %r3;
|
|
@%p0 bra BB0_18;
|
|
// BB#1: // %for_test.preheader
|
|
ld.param.u32 %r7, [mandelbrot_scanline_param_5];
|
|
ld.param.u32 %r6, [mandelbrot_scanline_param_7];
|
|
mov.u32 %r8, %ctaid.y;
|
|
mul.lo.s32 %r1, %r8, %r6;
|
|
mad.lo.s32 %r2, %r8, %r6, %r6;
|
|
setp.lt.s32 %p0, %r2, %r7;
|
|
selp.b32 %r2, %r2, %r7, %p0;
|
|
setp.ge.s32 %p0, %r1, %r2;
|
|
@%p0 bra BB0_14;
|
|
// BB#2: // %for_test34.preheader.lr.ph
|
|
ld.param.f32 %f0, [mandelbrot_scanline_param_0];
|
|
ld.param.f32 %f1, [mandelbrot_scanline_param_1];
|
|
ld.param.f32 %f2, [mandelbrot_scanline_param_2];
|
|
mov.u32 %r4, %ctaid.x;
|
|
mul.lo.s32 %r2, %r4, %r3;
|
|
ld.param.f32 %f3, [mandelbrot_scanline_param_3];
|
|
mad.lo.s32 %r4, %r4, %r3, %r3;
|
|
ld.param.u32 %r3, [mandelbrot_scanline_param_4];
|
|
setp.lt.s32 %p0, %r4, %r3;
|
|
selp.b32 %r4, %r4, %r3, %p0;
|
|
ld.param.u32 %r5, [mandelbrot_scanline_param_8];
|
|
ld.param.u64 %rl0, [mandelbrot_scanline_param_9];
|
|
setp.gt.s32 %p0, %r5, 0;
|
|
not.b32 %r7, %r7;
|
|
add.s32 %r8, %r8, 1;
|
|
mul.lo.s32 %r6, %r8, %r6;
|
|
not.b32 %r6, %r6;
|
|
setp.gt.s32 %p1, %r7, %r6;
|
|
selp.b32 %r6, %r7, %r6, %p1;
|
|
not.b32 %r6, %r6;
|
|
BB0_3: // %for_test34.preheader
|
|
// =>This Loop Header: Depth=1
|
|
// Child Loop BB0_16 Depth 2
|
|
// Child Loop BB0_9 Depth 2
|
|
// Child Loop BB0_12 Depth 3
|
|
setp.ge.s32 %p1, %r2, %r4;
|
|
@%p1 bra BB0_13;
|
|
// BB#4: // %for_loop36.lr.ph
|
|
// in Loop: Header=BB0_3 Depth=1
|
|
mul.lo.s32 %r7, %r1, %r3;
|
|
mov.u32 %r8, %r2;
|
|
@%p0 bra BB0_5;
|
|
bra.uni BB0_16;
|
|
BB0_5: // in Loop: Header=BB0_3 Depth=1
|
|
cvt.rn.f32.s32 %f4, %r1;
|
|
fma.rn.f32 %f4, %f4, %f3, %f2;
|
|
mov.u32 %r8, %r2;
|
|
BB0_9: // %for_loop.i.lr.ph.us
|
|
// Parent Loop BB0_3 Depth=1
|
|
// => This Loop Header: Depth=2
|
|
// Child Loop BB0_12 Depth 3
|
|
mov.u32 %r9, %tid.x;
|
|
add.s32 %r10, %r0, -1;
|
|
and.b32 %r10, %r10, %r9;
|
|
add.s32 %r11, %r10, %r8;
|
|
cvt.rn.f32.s32 %f5, %r11;
|
|
fma.rn.f32 %f5, %f5, %f1, %f0;
|
|
mov.u32 %r10, 0;
|
|
mov.pred %p1, 0;
|
|
mov.pred %p3, -1;
|
|
mov.pred %p4, %p0;
|
|
mov.pred %p2, %p1;
|
|
mov.f32 %f7, %f5;
|
|
mov.f32 %f6, %f4;
|
|
BB0_12: // %for_loop.i.us
|
|
// Parent Loop BB0_3 Depth=1
|
|
// Parent Loop BB0_9 Depth=2
|
|
// => This Inner Loop Header: Depth=3
|
|
and.pred %p4, %p3, %p4;
|
|
mul.f32 %f8, %f7, %f7;
|
|
fma.rn.f32 %f9, %f6, %f6, %f8;
|
|
setp.gtu.f32 %p3, %f9, 0f40800000;
|
|
and.pred %p3, %p4, %p3;
|
|
or.pred %p2, %p3, %p2;
|
|
xor.pred %p5, %p2, %p4;
|
|
mov.pred %p3, %p1;
|
|
@!%p5 bra BB0_11;
|
|
bra.uni BB0_10;
|
|
BB0_10: // %not_all_continued_or_breaked.i.us
|
|
// in Loop: Header=BB0_12 Depth=3
|
|
mul.f32 %f9, %f6, %f6;
|
|
not.pred %p3, %p2;
|
|
and.pred %p3, %p4, %p3;
|
|
sub.f32 %f8, %f8, %f9;
|
|
add.f32 %f8, %f5, %f8;
|
|
add.f32 %f7, %f7, %f7;
|
|
fma.rn.f32 %f6, %f6, %f7, %f4;
|
|
mov.f32 %f7, %f8;
|
|
BB0_11: // %for_step.i.us
|
|
// in Loop: Header=BB0_12 Depth=3
|
|
add.s32 %r12, %r10, 1;
|
|
selp.b32 %r10, %r12, %r10, %p3;
|
|
setp.lt.s32 %p4, %r10, %r5;
|
|
and.pred %p5, %p3, %p4;
|
|
@%p5 bra BB0_12;
|
|
// BB#6: // %mandel___vyfvyfvyi.exit.us
|
|
// in Loop: Header=BB0_9 Depth=2
|
|
setp.ge.s32 %p1, %r11, %r4;
|
|
@%p1 bra BB0_8;
|
|
// BB#7: // %if_then.us
|
|
// in Loop: Header=BB0_9 Depth=2
|
|
add.s32 %r11, %r0, 1073741823;
|
|
and.b32 %r9, %r11, %r9;
|
|
add.s32 %r11, %r8, %r7;
|
|
add.s32 %r9, %r11, %r9;
|
|
shl.b32 %r9, %r9, 2;
|
|
cvt.s64.s32 %rl1, %r9;
|
|
add.s64 %rl1, %rl1, %rl0;
|
|
st.u32 [%rl1], %r10;
|
|
BB0_8: // %if_exit.us
|
|
// in Loop: Header=BB0_9 Depth=2
|
|
add.s32 %r8, %r0, %r8;
|
|
setp.lt.s32 %p1, %r8, %r4;
|
|
@%p1 bra BB0_9;
|
|
bra.uni BB0_13;
|
|
BB0_16: // %mandel___vyfvyfvyi.exit
|
|
// Parent Loop BB0_3 Depth=1
|
|
// => This Inner Loop Header: Depth=2
|
|
mov.u32 %r9, %tid.x;
|
|
add.s32 %r10, %r0, -1;
|
|
and.b32 %r10, %r10, %r9;
|
|
add.s32 %r10, %r10, %r8;
|
|
setp.lt.s32 %p1, %r10, %r4;
|
|
@%p1 bra BB0_17;
|
|
bra.uni BB0_15;
|
|
BB0_17: // %if_then
|
|
// in Loop: Header=BB0_16 Depth=2
|
|
add.s32 %r10, %r0, 1073741823;
|
|
and.b32 %r9, %r10, %r9;
|
|
add.s32 %r10, %r8, %r7;
|
|
add.s32 %r9, %r10, %r9;
|
|
shl.b32 %r9, %r9, 2;
|
|
cvt.s64.s32 %rl1, %r9;
|
|
add.s64 %rl1, %rl1, %rl0;
|
|
mov.u32 %r9, 0;
|
|
st.u32 [%rl1], %r9;
|
|
BB0_15: // %if_exit
|
|
// in Loop: Header=BB0_16 Depth=2
|
|
add.s32 %r8, %r0, %r8;
|
|
setp.lt.s32 %p1, %r8, %r4;
|
|
@%p1 bra BB0_16;
|
|
BB0_13: // %for_exit37
|
|
// in Loop: Header=BB0_3 Depth=1
|
|
add.s32 %r1, %r1, 1;
|
|
setp.eq.s32 %p1, %r1, %r6;
|
|
@%p1 bra BB0_14;
|
|
bra.uni BB0_3;
|
|
BB0_14: // %for_exit
|
|
ret;
|
|
BB0_18: // %fail.i
|
|
mov.u64 %rl0, __str;
|
|
cvta.global.u64 %rl0, %rl0;
|
|
// Callseq Start 0
|
|
{
|
|
.reg .b32 temp_param_reg;
|
|
// <end>}
|
|
.param .b64 param0;
|
|
st.param.b64 [param0+0], %rl0;
|
|
.param .b32 retval0;
|
|
call.uni (retval0),
|
|
puts,
|
|
(
|
|
param0
|
|
);
|
|
ld.param.b32 %r0, [retval0+0];
|
|
|
|
//{
|
|
}// Callseq End 0
|
|
// Callseq Start 1
|
|
{
|
|
.reg .b32 temp_param_reg;
|
|
// <end>}
|
|
call.uni
|
|
abort,
|
|
(
|
|
);
|
|
|
|
//{
|
|
}// Callseq End 1
|
|
}
|
|
|