179 lines
4.1 KiB
Plaintext
179 lines
4.1 KiB
Plaintext
//
|
|
// Generated by NVIDIA NVVM Compiler
|
|
// Compiler built on Thu Jul 18 02:37:37 2013 (1374107857)
|
|
// Cuda compilation tools, release 5.5, V5.5.0
|
|
//
|
|
|
|
.version 3.2
|
|
.target sm_20
|
|
.address_size 64
|
|
|
|
|
|
.visible .entry mandelbrot_scanline(
|
|
.param .f32 mandelbrot_scanline_param_0,
|
|
.param .f32 mandelbrot_scanline_param_1,
|
|
.param .f32 mandelbrot_scanline_param_2,
|
|
.param .f32 mandelbrot_scanline_param_3,
|
|
.param .u32 mandelbrot_scanline_param_4,
|
|
.param .u32 mandelbrot_scanline_param_5,
|
|
.param .u32 mandelbrot_scanline_param_6,
|
|
.param .u32 mandelbrot_scanline_param_7,
|
|
.param .u32 mandelbrot_scanline_param_8,
|
|
.param .u64 mandelbrot_scanline_param_9
|
|
)
|
|
{
|
|
.reg .pred %p<32>;
|
|
.reg .s32 %r<62>;
|
|
.reg .f32 %f<28>;
|
|
.reg .s64 %rd<6>;
|
|
|
|
|
|
ld.param.f32 %f11, [mandelbrot_scanline_param_0];
|
|
ld.param.f32 %f12, [mandelbrot_scanline_param_1];
|
|
ld.param.f32 %f13, [mandelbrot_scanline_param_2];
|
|
ld.param.f32 %f14, [mandelbrot_scanline_param_3];
|
|
ld.param.u32 %r17, [mandelbrot_scanline_param_4];
|
|
ld.param.u32 %r18, [mandelbrot_scanline_param_5];
|
|
ld.param.u32 %r19, [mandelbrot_scanline_param_6];
|
|
ld.param.u32 %r20, [mandelbrot_scanline_param_7];
|
|
ld.param.u32 %r21, [mandelbrot_scanline_param_8];
|
|
ld.param.u64 %rd1, [mandelbrot_scanline_param_9];
|
|
mov.u32 %r22, %ctaid.x;
|
|
mad.lo.s32 %r23, %r22, %r19, %r19;
|
|
min.s32 %r1, %r23, %r17;
|
|
mov.u32 %r2, %ctaid.y;
|
|
mul.lo.s32 %r59, %r2, %r20;
|
|
add.s32 %r24, %r59, %r20;
|
|
min.s32 %r25, %r24, %r18;
|
|
setp.ge.s32 %p10, %r59, %r25;
|
|
@%p10 bra BB0_15;
|
|
|
|
not.b32 %r26, %r18;
|
|
add.s32 %r27, %r2, 1;
|
|
mul.lo.s32 %r28, %r27, %r20;
|
|
not.b32 %r29, %r28;
|
|
max.s32 %r30, %r26, %r29;
|
|
not.b32 %r4, %r30;
|
|
|
|
BB0_2:
|
|
mul.lo.s32 %r60, %r22, %r19;
|
|
setp.ge.s32 %p11, %r60, %r1;
|
|
@%p11 bra BB0_14;
|
|
|
|
cvt.rn.f32.s32 %f15, %r59;
|
|
setp.gt.s32 %p12, %r21, 0;
|
|
fma.rn.f32 %f1, %f15, %f14, %f13;
|
|
@%p12 bra BB0_7;
|
|
|
|
BB0_4:
|
|
mov.u32 %r8, WARP_SZ;
|
|
add.s32 %r34, %r8, -1;
|
|
mov.u32 %r35, %tid.x;
|
|
and.b32 %r36, %r34, %r35;
|
|
add.s32 %r37, %r36, %r60;
|
|
setp.ge.s32 %p13, %r37, %r1;
|
|
@%p13 bra BB0_6;
|
|
|
|
mad.lo.s32 %r38, %r59, %r17, %r60;
|
|
add.s32 %r40, %r8, 1073741823;
|
|
and.b32 %r42, %r40, %r35;
|
|
add.s32 %r43, %r38, %r42;
|
|
shl.b32 %r44, %r43, 2;
|
|
cvt.s64.s32 %rd2, %r44;
|
|
add.s64 %rd3, %rd2, %rd1;
|
|
mov.u32 %r45, 0;
|
|
st.u32 [%rd3], %r45;
|
|
|
|
BB0_6:
|
|
add.s32 %r60, %r8, %r60;
|
|
setp.lt.s32 %p14, %r60, %r1;
|
|
@%p14 bra BB0_4;
|
|
bra.uni BB0_14;
|
|
|
|
BB0_7:
|
|
mov.u32 %r47, WARP_SZ;
|
|
add.s32 %r48, %r47, -1;
|
|
mov.u32 %r49, %tid.x;
|
|
and.b32 %r50, %r48, %r49;
|
|
add.s32 %r11, %r50, %r60;
|
|
cvt.rn.f32.s32 %f16, %r11;
|
|
fma.rn.f32 %f2, %f16, %f12, %f11;
|
|
mov.u32 %r61, 0;
|
|
mov.pred %p16, 0;
|
|
mov.pred %p29, -1;
|
|
mov.pred %p26, %p12;
|
|
mov.pred %p31, %p16;
|
|
mov.f32 %f22, %f2;
|
|
mov.f32 %f26, %f1;
|
|
|
|
BB0_8:
|
|
mov.f32 %f24, %f26;
|
|
mov.f32 %f27, %f24;
|
|
mov.f32 %f20, %f22;
|
|
mov.f32 %f23, %f20;
|
|
mov.pred %p3, %p29;
|
|
mov.pred %p2, %p26;
|
|
and.pred %p5, %p3, %p2;
|
|
mul.f32 %f6, %f23, %f23;
|
|
mul.f32 %f5, %f27, %f27;
|
|
add.f32 %f17, %f5, %f6;
|
|
setp.gtu.f32 %p18, %f17, 0f40800000;
|
|
and.pred %p19, %p5, %p18;
|
|
or.pred %p31, %p19, %p31;
|
|
xor.pred %p20, %p31, %p5;
|
|
mov.pred %p30, %p16;
|
|
@!%p20 bra BB0_10;
|
|
bra.uni BB0_9;
|
|
|
|
BB0_9:
|
|
add.f32 %f18, %f23, %f23;
|
|
fma.rn.f32 %f27, %f27, %f18, %f1;
|
|
sub.f32 %f19, %f6, %f5;
|
|
add.f32 %f23, %f2, %f19;
|
|
not.pred %p21, %p31;
|
|
and.pred %p7, %p5, %p21;
|
|
mov.pred %p30, %p7;
|
|
|
|
BB0_10:
|
|
mov.f32 %f9, %f27;
|
|
mov.f32 %f10, %f23;
|
|
mov.pred %p28, %p30;
|
|
mov.pred %p29, %p28;
|
|
add.s32 %r51, %r61, 1;
|
|
selp.b32 %r61, %r51, %r61, %p29;
|
|
setp.lt.s32 %p9, %r61, %r21;
|
|
and.pred %p22, %p29, %p9;
|
|
mov.pred %p26, %p9;
|
|
mov.f32 %f22, %f10;
|
|
mov.f32 %f26, %f9;
|
|
@%p22 bra BB0_8;
|
|
|
|
setp.ge.s32 %p23, %r11, %r1;
|
|
@%p23 bra BB0_13;
|
|
|
|
mad.lo.s32 %r52, %r59, %r17, %r60;
|
|
add.s32 %r54, %r47, 1073741823;
|
|
and.b32 %r56, %r54, %r49;
|
|
add.s32 %r57, %r52, %r56;
|
|
shl.b32 %r58, %r57, 2;
|
|
cvt.s64.s32 %rd4, %r58;
|
|
add.s64 %rd5, %rd4, %rd1;
|
|
st.u32 [%rd5], %r61;
|
|
|
|
BB0_13:
|
|
add.s32 %r60, %r47, %r60;
|
|
setp.lt.s32 %p24, %r60, %r1;
|
|
@%p24 bra BB0_7;
|
|
|
|
BB0_14:
|
|
add.s32 %r59, %r59, 1;
|
|
setp.ne.s32 %p25, %r59, %r4;
|
|
@%p25 bra BB0_2;
|
|
|
|
BB0_15:
|
|
ret;
|
|
}
|
|
|
|
|
|
|