Files
ispc/examples_cuda/mandelbrot_tasks3d/mandelbrot_task_nvptx64.ptx
2013-11-04 11:44:49 +01:00

230 lines
7.3 KiB
Plaintext

//
// Generated by LLVM NVPTX Back-End
//
.version 3.1
.target sm_35, texmode_independent
.address_size 64
// .globl mandelbrot_scanline
.func (.param .b32 func_retval0) puts
(
.param .b64 puts_param_0
)
;
.func abort
(
)
;
.global .align 1 .b8 __str[66] = {109, 97, 110, 100, 101, 108, 98, 114, 111, 116, 95, 116, 97, 115, 107, 46, 105, 115, 112, 99, 58, 53, 53, 58, 51, 58, 32, 65, 115, 115, 101, 114, 116, 105, 111, 110, 32, 102, 97, 105, 108, 101, 100, 58, 32, 120, 115, 112, 97, 110, 32, 62, 61, 32, 118, 101, 99, 116, 111, 114, 87, 105, 100, 116, 104, 0};
// @mandelbrot_scanline
.entry mandelbrot_scanline(
.param .f32 mandelbrot_scanline_param_0,
.param .f32 mandelbrot_scanline_param_1,
.param .f32 mandelbrot_scanline_param_2,
.param .f32 mandelbrot_scanline_param_3,
.param .u32 mandelbrot_scanline_param_4,
.param .u32 mandelbrot_scanline_param_5,
.param .u32 mandelbrot_scanline_param_6,
.param .u32 mandelbrot_scanline_param_7,
.param .u32 mandelbrot_scanline_param_8,
.param .u64 .ptr .align 4 mandelbrot_scanline_param_9
)
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0: // %allocas
ld.param.u32 %r3, [mandelbrot_scanline_param_6];
mov.u32 %r0, WARP_SZ;
setp.gt.s32 %p0, %r0, %r3;
@%p0 bra BB0_18;
// BB#1: // %for_test.preheader
ld.param.u32 %r7, [mandelbrot_scanline_param_5];
ld.param.u32 %r6, [mandelbrot_scanline_param_7];
mov.u32 %r8, %ctaid.y;
mul.lo.s32 %r1, %r8, %r6;
mad.lo.s32 %r2, %r8, %r6, %r6;
setp.lt.s32 %p0, %r2, %r7;
selp.b32 %r2, %r2, %r7, %p0;
setp.ge.s32 %p0, %r1, %r2;
@%p0 bra BB0_14;
// BB#2: // %for_test34.preheader.lr.ph
ld.param.f32 %f0, [mandelbrot_scanline_param_0];
ld.param.f32 %f1, [mandelbrot_scanline_param_1];
ld.param.f32 %f2, [mandelbrot_scanline_param_2];
mov.u32 %r4, %ctaid.x;
mul.lo.s32 %r2, %r4, %r3;
ld.param.f32 %f3, [mandelbrot_scanline_param_3];
mad.lo.s32 %r4, %r4, %r3, %r3;
ld.param.u32 %r3, [mandelbrot_scanline_param_4];
setp.lt.s32 %p0, %r4, %r3;
selp.b32 %r4, %r4, %r3, %p0;
ld.param.u32 %r5, [mandelbrot_scanline_param_8];
ld.param.u64 %rl0, [mandelbrot_scanline_param_9];
setp.gt.s32 %p0, %r5, 0;
not.b32 %r7, %r7;
add.s32 %r8, %r8, 1;
mul.lo.s32 %r6, %r8, %r6;
not.b32 %r6, %r6;
setp.gt.s32 %p1, %r7, %r6;
selp.b32 %r6, %r7, %r6, %p1;
not.b32 %r6, %r6;
BB0_3: // %for_test34.preheader
// =>This Loop Header: Depth=1
// Child Loop BB0_16 Depth 2
// Child Loop BB0_9 Depth 2
// Child Loop BB0_12 Depth 3
setp.ge.s32 %p1, %r2, %r4;
@%p1 bra BB0_13;
// BB#4: // %for_loop36.lr.ph
// in Loop: Header=BB0_3 Depth=1
mul.lo.s32 %r7, %r1, %r3;
mov.u32 %r8, %r2;
@%p0 bra BB0_5;
bra.uni BB0_16;
BB0_5: // in Loop: Header=BB0_3 Depth=1
cvt.rn.f32.s32 %f4, %r1;
fma.rn.f32 %f4, %f4, %f3, %f2;
mov.u32 %r8, %r2;
BB0_9: // %for_loop.i.lr.ph.us
// Parent Loop BB0_3 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB0_12 Depth 3
mov.u32 %r9, %tid.x;
add.s32 %r10, %r0, -1;
and.b32 %r10, %r10, %r9;
add.s32 %r11, %r10, %r8;
cvt.rn.f32.s32 %f5, %r11;
fma.rn.f32 %f5, %f5, %f1, %f0;
mov.u32 %r10, 0;
mov.pred %p1, 0;
mov.pred %p3, -1;
mov.pred %p4, %p0;
mov.pred %p2, %p1;
mov.f32 %f7, %f5;
mov.f32 %f6, %f4;
BB0_12: // %for_loop.i.us
// Parent Loop BB0_3 Depth=1
// Parent Loop BB0_9 Depth=2
// => This Inner Loop Header: Depth=3
and.pred %p4, %p3, %p4;
mul.f32 %f8, %f7, %f7;
fma.rn.f32 %f9, %f6, %f6, %f8;
setp.gtu.f32 %p3, %f9, 0f40800000;
and.pred %p3, %p4, %p3;
or.pred %p2, %p3, %p2;
xor.pred %p5, %p2, %p4;
mov.pred %p3, %p1;
@!%p5 bra BB0_11;
bra.uni BB0_10;
BB0_10: // %not_all_continued_or_breaked.i.us
// in Loop: Header=BB0_12 Depth=3
mul.f32 %f9, %f6, %f6;
not.pred %p3, %p2;
and.pred %p3, %p4, %p3;
sub.f32 %f8, %f8, %f9;
add.f32 %f8, %f5, %f8;
add.f32 %f7, %f7, %f7;
fma.rn.f32 %f6, %f6, %f7, %f4;
mov.f32 %f7, %f8;
BB0_11: // %for_step.i.us
// in Loop: Header=BB0_12 Depth=3
add.s32 %r12, %r10, 1;
selp.b32 %r10, %r12, %r10, %p3;
setp.lt.s32 %p4, %r10, %r5;
and.pred %p5, %p3, %p4;
@%p5 bra BB0_12;
// BB#6: // %mandel___vyfvyfvyi.exit.us
// in Loop: Header=BB0_9 Depth=2
setp.ge.s32 %p1, %r11, %r4;
@%p1 bra BB0_8;
// BB#7: // %if_then.us
// in Loop: Header=BB0_9 Depth=2
add.s32 %r11, %r0, 1073741823;
and.b32 %r9, %r11, %r9;
add.s32 %r11, %r8, %r7;
add.s32 %r9, %r11, %r9;
shl.b32 %r9, %r9, 2;
cvt.s64.s32 %rl1, %r9;
add.s64 %rl1, %rl1, %rl0;
st.u32 [%rl1], %r10;
BB0_8: // %if_exit.us
// in Loop: Header=BB0_9 Depth=2
add.s32 %r8, %r0, %r8;
setp.lt.s32 %p1, %r8, %r4;
@%p1 bra BB0_9;
bra.uni BB0_13;
BB0_16: // %mandel___vyfvyfvyi.exit
// Parent Loop BB0_3 Depth=1
// => This Inner Loop Header: Depth=2
mov.u32 %r9, %tid.x;
add.s32 %r10, %r0, -1;
and.b32 %r10, %r10, %r9;
add.s32 %r10, %r10, %r8;
setp.lt.s32 %p1, %r10, %r4;
@%p1 bra BB0_17;
bra.uni BB0_15;
BB0_17: // %if_then
// in Loop: Header=BB0_16 Depth=2
add.s32 %r10, %r0, 1073741823;
and.b32 %r9, %r10, %r9;
add.s32 %r10, %r8, %r7;
add.s32 %r9, %r10, %r9;
shl.b32 %r9, %r9, 2;
cvt.s64.s32 %rl1, %r9;
add.s64 %rl1, %rl1, %rl0;
mov.u32 %r9, 0;
st.u32 [%rl1], %r9;
BB0_15: // %if_exit
// in Loop: Header=BB0_16 Depth=2
add.s32 %r8, %r0, %r8;
setp.lt.s32 %p1, %r8, %r4;
@%p1 bra BB0_16;
BB0_13: // %for_exit37
// in Loop: Header=BB0_3 Depth=1
add.s32 %r1, %r1, 1;
setp.eq.s32 %p1, %r1, %r6;
@%p1 bra BB0_14;
bra.uni BB0_3;
BB0_14: // %for_exit
ret;
BB0_18: // %fail.i
mov.u64 %rl0, __str;
cvta.global.u64 %rl0, %rl0;
// Callseq Start 0
{
.reg .b32 temp_param_reg;
// <end>}
.param .b64 param0;
st.param.b64 [param0+0], %rl0;
.param .b32 retval0;
call.uni (retval0),
puts,
(
param0
);
ld.param.b32 %r0, [retval0+0];
//{
}// Callseq End 0
// Callseq Start 1
{
.reg .b32 temp_param_reg;
// <end>}
call.uni
abort,
(
);
//{
}// Callseq End 1
}