Files
ispc/examples/mandelbrot_tasks3d/mandelbrot_task1.ptx
2013-11-08 14:17:26 +01:00

179 lines
4.1 KiB
Plaintext

//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Thu Jul 18 02:37:37 2013 (1374107857)
// Cuda compilation tools, release 5.5, V5.5.0
//
.version 3.2
.target sm_20
.address_size 64
.visible .entry mandelbrot_scanline(
.param .f32 mandelbrot_scanline_param_0,
.param .f32 mandelbrot_scanline_param_1,
.param .f32 mandelbrot_scanline_param_2,
.param .f32 mandelbrot_scanline_param_3,
.param .u32 mandelbrot_scanline_param_4,
.param .u32 mandelbrot_scanline_param_5,
.param .u32 mandelbrot_scanline_param_6,
.param .u32 mandelbrot_scanline_param_7,
.param .u32 mandelbrot_scanline_param_8,
.param .u64 mandelbrot_scanline_param_9
)
{
.reg .pred %p<32>;
.reg .s32 %r<62>;
.reg .f32 %f<28>;
.reg .s64 %rd<6>;
ld.param.f32 %f11, [mandelbrot_scanline_param_0];
ld.param.f32 %f12, [mandelbrot_scanline_param_1];
ld.param.f32 %f13, [mandelbrot_scanline_param_2];
ld.param.f32 %f14, [mandelbrot_scanline_param_3];
ld.param.u32 %r17, [mandelbrot_scanline_param_4];
ld.param.u32 %r18, [mandelbrot_scanline_param_5];
ld.param.u32 %r19, [mandelbrot_scanline_param_6];
ld.param.u32 %r20, [mandelbrot_scanline_param_7];
ld.param.u32 %r21, [mandelbrot_scanline_param_8];
ld.param.u64 %rd1, [mandelbrot_scanline_param_9];
mov.u32 %r22, %ctaid.x;
mad.lo.s32 %r23, %r22, %r19, %r19;
min.s32 %r1, %r23, %r17;
mov.u32 %r2, %ctaid.y;
mul.lo.s32 %r59, %r2, %r20;
add.s32 %r24, %r59, %r20;
min.s32 %r25, %r24, %r18;
setp.ge.s32 %p10, %r59, %r25;
@%p10 bra BB0_15;
not.b32 %r26, %r18;
add.s32 %r27, %r2, 1;
mul.lo.s32 %r28, %r27, %r20;
not.b32 %r29, %r28;
max.s32 %r30, %r26, %r29;
not.b32 %r4, %r30;
BB0_2:
mul.lo.s32 %r60, %r22, %r19;
setp.ge.s32 %p11, %r60, %r1;
@%p11 bra BB0_14;
cvt.rn.f32.s32 %f15, %r59;
setp.gt.s32 %p12, %r21, 0;
fma.rn.f32 %f1, %f15, %f14, %f13;
@%p12 bra BB0_7;
BB0_4:
mov.u32 %r8, WARP_SZ;
add.s32 %r34, %r8, -1;
mov.u32 %r35, %tid.x;
and.b32 %r36, %r34, %r35;
add.s32 %r37, %r36, %r60;
setp.ge.s32 %p13, %r37, %r1;
@%p13 bra BB0_6;
mad.lo.s32 %r38, %r59, %r17, %r60;
add.s32 %r40, %r8, 1073741823;
and.b32 %r42, %r40, %r35;
add.s32 %r43, %r38, %r42;
shl.b32 %r44, %r43, 2;
cvt.s64.s32 %rd2, %r44;
add.s64 %rd3, %rd2, %rd1;
mov.u32 %r45, 0;
st.u32 [%rd3], %r45;
BB0_6:
add.s32 %r60, %r8, %r60;
setp.lt.s32 %p14, %r60, %r1;
@%p14 bra BB0_4;
bra.uni BB0_14;
BB0_7:
mov.u32 %r47, WARP_SZ;
add.s32 %r48, %r47, -1;
mov.u32 %r49, %tid.x;
and.b32 %r50, %r48, %r49;
add.s32 %r11, %r50, %r60;
cvt.rn.f32.s32 %f16, %r11;
fma.rn.f32 %f2, %f16, %f12, %f11;
mov.u32 %r61, 0;
mov.pred %p16, 0;
mov.pred %p29, -1;
mov.pred %p26, %p12;
mov.pred %p31, %p16;
mov.f32 %f22, %f2;
mov.f32 %f26, %f1;
BB0_8:
mov.f32 %f24, %f26;
mov.f32 %f27, %f24;
mov.f32 %f20, %f22;
mov.f32 %f23, %f20;
mov.pred %p3, %p29;
mov.pred %p2, %p26;
and.pred %p5, %p3, %p2;
mul.f32 %f6, %f23, %f23;
mul.f32 %f5, %f27, %f27;
add.f32 %f17, %f5, %f6;
setp.gtu.f32 %p18, %f17, 0f40800000;
and.pred %p19, %p5, %p18;
or.pred %p31, %p19, %p31;
xor.pred %p20, %p31, %p5;
mov.pred %p30, %p16;
@!%p20 bra BB0_10;
bra.uni BB0_9;
BB0_9:
add.f32 %f18, %f23, %f23;
fma.rn.f32 %f27, %f27, %f18, %f1;
sub.f32 %f19, %f6, %f5;
add.f32 %f23, %f2, %f19;
not.pred %p21, %p31;
and.pred %p7, %p5, %p21;
mov.pred %p30, %p7;
BB0_10:
mov.f32 %f9, %f27;
mov.f32 %f10, %f23;
mov.pred %p28, %p30;
mov.pred %p29, %p28;
add.s32 %r51, %r61, 1;
selp.b32 %r61, %r51, %r61, %p29;
setp.lt.s32 %p9, %r61, %r21;
and.pred %p22, %p29, %p9;
mov.pred %p26, %p9;
mov.f32 %f22, %f10;
mov.f32 %f26, %f9;
@%p22 bra BB0_8;
setp.ge.s32 %p23, %r11, %r1;
@%p23 bra BB0_13;
mad.lo.s32 %r52, %r59, %r17, %r60;
add.s32 %r54, %r47, 1073741823;
and.b32 %r56, %r54, %r49;
add.s32 %r57, %r52, %r56;
shl.b32 %r58, %r57, 2;
cvt.s64.s32 %rd4, %r58;
add.s64 %rd5, %rd4, %rd1;
st.u32 [%rd5], %r61;
BB0_13:
add.s32 %r60, %r47, %r60;
setp.lt.s32 %p24, %r60, %r1;
@%p24 bra BB0_7;
BB0_14:
add.s32 %r59, %r59, 1;
setp.ne.s32 %p25, %r59, %r4;
@%p25 bra BB0_2;
BB0_15:
ret;
}