// // Generated by NVIDIA NVVM Compiler // Compiler built on Thu Jul 18 02:37:37 2013 (1374107857) // Cuda compilation tools, release 5.5, V5.5.0 // .version 3.2 .target sm_20 .address_size 64 .visible .entry mandelbrot_scanline( .param .f32 mandelbrot_scanline_param_0, .param .f32 mandelbrot_scanline_param_1, .param .f32 mandelbrot_scanline_param_2, .param .f32 mandelbrot_scanline_param_3, .param .u32 mandelbrot_scanline_param_4, .param .u32 mandelbrot_scanline_param_5, .param .u32 mandelbrot_scanline_param_6, .param .u32 mandelbrot_scanline_param_7, .param .u32 mandelbrot_scanline_param_8, .param .u64 mandelbrot_scanline_param_9 ) { .reg .pred %p<32>; .reg .s32 %r<62>; .reg .f32 %f<28>; .reg .s64 %rd<6>; ld.param.f32 %f11, [mandelbrot_scanline_param_0]; ld.param.f32 %f12, [mandelbrot_scanline_param_1]; ld.param.f32 %f13, [mandelbrot_scanline_param_2]; ld.param.f32 %f14, [mandelbrot_scanline_param_3]; ld.param.u32 %r17, [mandelbrot_scanline_param_4]; ld.param.u32 %r18, [mandelbrot_scanline_param_5]; ld.param.u32 %r19, [mandelbrot_scanline_param_6]; ld.param.u32 %r20, [mandelbrot_scanline_param_7]; ld.param.u32 %r21, [mandelbrot_scanline_param_8]; ld.param.u64 %rd1, [mandelbrot_scanline_param_9]; mov.u32 %r22, %ctaid.x; mad.lo.s32 %r23, %r22, %r19, %r19; min.s32 %r1, %r23, %r17; mov.u32 %r2, %ctaid.y; mul.lo.s32 %r59, %r2, %r20; add.s32 %r24, %r59, %r20; min.s32 %r25, %r24, %r18; setp.ge.s32 %p10, %r59, %r25; @%p10 bra BB0_15; not.b32 %r26, %r18; add.s32 %r27, %r2, 1; mul.lo.s32 %r28, %r27, %r20; not.b32 %r29, %r28; max.s32 %r30, %r26, %r29; not.b32 %r4, %r30; BB0_2: mul.lo.s32 %r60, %r22, %r19; setp.ge.s32 %p11, %r60, %r1; @%p11 bra BB0_14; cvt.rn.f32.s32 %f15, %r59; setp.gt.s32 %p12, %r21, 0; fma.rn.f32 %f1, %f15, %f14, %f13; @%p12 bra BB0_7; BB0_4: mov.u32 %r8, WARP_SZ; add.s32 %r34, %r8, -1; mov.u32 %r35, %tid.x; and.b32 %r36, %r34, %r35; add.s32 %r37, %r36, %r60; setp.ge.s32 %p13, %r37, %r1; @%p13 bra BB0_6; mad.lo.s32 %r38, %r59, %r17, %r60; add.s32 %r40, %r8, 1073741823; and.b32 %r42, %r40, %r35; add.s32 %r43, %r38, %r42; shl.b32 %r44, %r43, 2; cvt.s64.s32 %rd2, %r44; add.s64 %rd3, %rd2, %rd1; mov.u32 %r45, 0; st.u32 [%rd3], %r45; BB0_6: add.s32 %r60, %r8, %r60; setp.lt.s32 %p14, %r60, %r1; @%p14 bra BB0_4; bra.uni BB0_14; BB0_7: mov.u32 %r47, WARP_SZ; add.s32 %r48, %r47, -1; mov.u32 %r49, %tid.x; and.b32 %r50, %r48, %r49; add.s32 %r11, %r50, %r60; cvt.rn.f32.s32 %f16, %r11; fma.rn.f32 %f2, %f16, %f12, %f11; mov.u32 %r61, 0; mov.pred %p16, 0; mov.pred %p29, -1; mov.pred %p26, %p12; mov.pred %p31, %p16; mov.f32 %f22, %f2; mov.f32 %f26, %f1; BB0_8: mov.f32 %f24, %f26; mov.f32 %f27, %f24; mov.f32 %f20, %f22; mov.f32 %f23, %f20; mov.pred %p3, %p29; mov.pred %p2, %p26; and.pred %p5, %p3, %p2; mul.f32 %f6, %f23, %f23; mul.f32 %f5, %f27, %f27; add.f32 %f17, %f5, %f6; setp.gtu.f32 %p18, %f17, 0f40800000; and.pred %p19, %p5, %p18; or.pred %p31, %p19, %p31; xor.pred %p20, %p31, %p5; mov.pred %p30, %p16; @!%p20 bra BB0_10; bra.uni BB0_9; BB0_9: add.f32 %f18, %f23, %f23; fma.rn.f32 %f27, %f27, %f18, %f1; sub.f32 %f19, %f6, %f5; add.f32 %f23, %f2, %f19; not.pred %p21, %p31; and.pred %p7, %p5, %p21; mov.pred %p30, %p7; BB0_10: mov.f32 %f9, %f27; mov.f32 %f10, %f23; mov.pred %p28, %p30; mov.pred %p29, %p28; add.s32 %r51, %r61, 1; selp.b32 %r61, %r51, %r61, %p29; setp.lt.s32 %p9, %r61, %r21; and.pred %p22, %p29, %p9; mov.pred %p26, %p9; mov.f32 %f22, %f10; mov.f32 %f26, %f9; @%p22 bra BB0_8; setp.ge.s32 %p23, %r11, %r1; @%p23 bra BB0_13; mad.lo.s32 %r52, %r59, %r17, %r60; add.s32 %r54, %r47, 1073741823; and.b32 %r56, %r54, %r49; add.s32 %r57, %r52, %r56; shl.b32 %r58, %r57, 2; cvt.s64.s32 %rd4, %r58; add.s64 %rd5, %rd4, %rd1; st.u32 [%rd5], %r61; BB0_13: add.s32 %r60, %r47, %r60; setp.lt.s32 %p24, %r60, %r1; @%p24 bra BB0_7; BB0_14: add.s32 %r59, %r59, 1; setp.ne.s32 %p25, %r59, %r4; @%p25 bra BB0_2; BB0_15: ret; }