// // Generated by LLVM NVPTX Back-End // .version 3.1 .target sm_35, texmode_independent .address_size 64 // .globl __vselect_i8 // @__vselect_i8 .func (.param .align 1 .b8 func_retval0[1]) __vselect_i8( .param .align 1 .b8 __vselect_i8_param_0[1], .param .align 1 .b8 __vselect_i8_param_1[1], .param .align 4 .b8 __vselect_i8_param_2[4] ) { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: ld.param.u32 %r0, [__vselect_i8_param_2]; setp.eq.s32 %p0, %r0, 0; ld.param.u8 %rc0, [__vselect_i8_param_0]; ld.param.u8 %rc1, [__vselect_i8_param_1]; selp.b16 %rc0, %rc0, %rc1, %p0; st.param.b8 [func_retval0+0], %rc0; ret; } // .globl __vselect_i16 .func (.param .align 2 .b8 func_retval0[2]) __vselect_i16( .param .align 2 .b8 __vselect_i16_param_0[2], .param .align 2 .b8 __vselect_i16_param_1[2], .param .align 4 .b8 __vselect_i16_param_2[4] ) // @__vselect_i16 { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: ld.param.u32 %r0, [__vselect_i16_param_2]; setp.eq.s32 %p0, %r0, 0; ld.param.u16 %rs0, [__vselect_i16_param_0]; ld.param.u16 %rs1, [__vselect_i16_param_1]; selp.b16 %rs0, %rs0, %rs1, %p0; st.param.b16 [func_retval0+0], %rs0; ret; } // .globl __vselect_i64 .func (.param .align 8 .b8 func_retval0[8]) __vselect_i64( .param .align 8 .b8 __vselect_i64_param_0[8], .param .align 8 .b8 __vselect_i64_param_1[8], .param .align 4 .b8 __vselect_i64_param_2[4] ) // @__vselect_i64 { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: ld.param.u32 %r0, [__vselect_i64_param_2]; setp.eq.s32 %p0, %r0, 0; ld.param.u64 %rl0, [__vselect_i64_param_0]; ld.param.u64 %rl1, [__vselect_i64_param_1]; selp.b64 %rl0, %rl0, %rl1, %p0; st.param.b64 [func_retval0+0], %rl0; ret; } // .globl __aos_to_soa4_float1 .func __aos_to_soa4_float1( .param .align 4 .b8 __aos_to_soa4_float1_param_0[4], .param .align 4 .b8 __aos_to_soa4_float1_param_1[4], .param .align 4 .b8 __aos_to_soa4_float1_param_2[4], .param .align 4 .b8 __aos_to_soa4_float1_param_3[4], .param .b64 __aos_to_soa4_float1_param_4, .param .b64 __aos_to_soa4_float1_param_5, .param .b64 __aos_to_soa4_float1_param_6, .param .b64 __aos_to_soa4_float1_param_7 ) // @__aos_to_soa4_float1 { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: ld.param.u64 %rl0, [__aos_to_soa4_float1_param_4]; ld.param.u64 %rl1, [__aos_to_soa4_float1_param_5]; ld.param.u64 %rl2, [__aos_to_soa4_float1_param_6]; ld.param.u64 %rl3, [__aos_to_soa4_float1_param_7]; ld.param.f32 %f0, [__aos_to_soa4_float1_param_0]; ld.param.f32 %f1, [__aos_to_soa4_float1_param_1]; ld.param.f32 %f2, [__aos_to_soa4_float1_param_2]; ld.param.f32 %f3, [__aos_to_soa4_float1_param_3]; st.f32 [%rl0], %f0; st.f32 [%rl1], %f1; st.f32 [%rl2], %f2; st.f32 [%rl3], %f3; ret; } // .globl __soa_to_aos4_float1 .func __soa_to_aos4_float1( .param .align 4 .b8 __soa_to_aos4_float1_param_0[4], .param .align 4 .b8 __soa_to_aos4_float1_param_1[4], .param .align 4 .b8 __soa_to_aos4_float1_param_2[4], .param .align 4 .b8 __soa_to_aos4_float1_param_3[4], .param .b64 __soa_to_aos4_float1_param_4, .param .b64 __soa_to_aos4_float1_param_5, .param .b64 __soa_to_aos4_float1_param_6, .param .b64 __soa_to_aos4_float1_param_7 ) // @__soa_to_aos4_float1 { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: ld.param.u64 %rl0, [__soa_to_aos4_float1_param_4]; ld.param.u64 %rl1, [__soa_to_aos4_float1_param_5]; ld.param.u64 %rl2, [__soa_to_aos4_float1_param_6]; ld.param.u64 %rl3, [__soa_to_aos4_float1_param_7]; ld.param.f32 %f0, [__soa_to_aos4_float1_param_0]; ld.param.f32 %f1, [__soa_to_aos4_float1_param_1]; ld.param.f32 %f2, [__soa_to_aos4_float1_param_2]; ld.param.f32 %f3, [__soa_to_aos4_float1_param_3]; st.f32 [%rl0], %f0; st.f32 [%rl1], %f1; st.f32 [%rl2], %f2; st.f32 [%rl3], %f3; ret; } // .globl __aos_to_soa3_float1 .func __aos_to_soa3_float1( .param .align 4 .b8 __aos_to_soa3_float1_param_0[4], .param .align 4 .b8 __aos_to_soa3_float1_param_1[4], .param .align 4 .b8 __aos_to_soa3_float1_param_2[4], .param .b64 __aos_to_soa3_float1_param_3, .param .b64 __aos_to_soa3_float1_param_4, .param .b64 __aos_to_soa3_float1_param_5 ) // @__aos_to_soa3_float1 { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: ld.param.u64 %rl0, [__aos_to_soa3_float1_param_3]; ld.param.u64 %rl1, [__aos_to_soa3_float1_param_4]; ld.param.u64 %rl2, [__aos_to_soa3_float1_param_5]; ld.param.f32 %f0, [__aos_to_soa3_float1_param_0]; ld.param.f32 %f1, [__aos_to_soa3_float1_param_1]; ld.param.f32 %f2, [__aos_to_soa3_float1_param_2]; st.f32 [%rl0], %f0; st.f32 [%rl1], %f1; st.f32 [%rl2], %f2; ret; } // .globl __soa_to_aos3_float1 .func __soa_to_aos3_float1( .param .align 4 .b8 __soa_to_aos3_float1_param_0[4], .param .align 4 .b8 __soa_to_aos3_float1_param_1[4], .param .align 4 .b8 __soa_to_aos3_float1_param_2[4], .param .b64 __soa_to_aos3_float1_param_3, .param .b64 __soa_to_aos3_float1_param_4, .param .b64 __soa_to_aos3_float1_param_5 ) // @__soa_to_aos3_float1 { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: ld.param.u64 %rl0, [__soa_to_aos3_float1_param_3]; ld.param.u64 %rl1, [__soa_to_aos3_float1_param_4]; ld.param.u64 %rl2, [__soa_to_aos3_float1_param_5]; ld.param.f32 %f0, [__soa_to_aos3_float1_param_0]; ld.param.f32 %f1, [__soa_to_aos3_float1_param_1]; ld.param.f32 %f2, [__soa_to_aos3_float1_param_2]; st.f32 [%rl0], %f0; st.f32 [%rl1], %f1; st.f32 [%rl2], %f2; ret; } // .globl __rsqrt_varying_double .func (.param .align 8 .b8 func_retval0[8]) __rsqrt_varying_double( .param .align 8 .b8 __rsqrt_varying_double_param_0[8] ) // @__rsqrt_varying_double { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: ld.param.f64 %fl0, [__rsqrt_varying_double_param_0]; rsqrt.approx.f64 %fl0, %fl0; st.param.f64 [func_retval0+0], %fl0; ret; } // .globl mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_ .func mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_( .param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_0, .param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_1, .param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_2, .param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_3, .param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_4, .param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_5, .param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_6, .param .b64 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_7, .param .align 4 .b8 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_8[4] ) // @mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_ { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: // %allocas ld.param.f32 %f0, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_0]; ld.param.f32 %f1, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_1]; ld.param.f32 %f3, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_2]; ld.param.f32 %f2, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_3]; ld.param.u32 %r0, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_4]; ld.param.u32 %r1, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_5]; ld.param.u32 %r2, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_6]; ld.param.u64 %rl0, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_7]; ld.param.u32 %r3, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_8]; setp.lt.s32 %p0, %r3, 0; sub.f32 %f3, %f3, %f0; cvt.rn.f32.s32 %f4, %r0; sub.f32 %f2, %f2, %f1; cvt.rn.f32.s32 %f5, %r1; div.rn.f32 %f2, %f2, %f5; div.rn.f32 %f3, %f3, %f4; @%p0 bra BB8_9; // BB#1: // %for_test110.preheader setp.lt.s32 %p0, %r1, 1; @%p0 bra BB8_45; // BB#2: // %outer_not_in_extras140.preheader.lr.ph setp.gt.s32 %p0, %r2, 0; mov.u32 %r3, 0; selp.b32 %r4, -1, 0, %p0; shl.b32 %r5, %r0, 2; mov.u32 %r6, %r3; BB8_3: // %outer_not_in_extras140.preheader // =>This Loop Header: Depth=1 // Child Loop BB8_41 Depth 2 // Child Loop BB8_43 Depth 2 // Child Loop BB8_38 Depth 2 // Child Loop BB8_33 Depth 3 setp.lt.s32 %p0, %r0, 1; @%p0 bra BB8_4; // BB#31: // %foreach_full_body120.lr.ph // in Loop: Header=BB8_3 Depth=1 setp.lt.s32 %p0, %r4, 0; mov.u32 %r7, %r0; mov.u32 %r8, %r3; @%p0 bra BB8_32; bra.uni BB8_43; BB8_32: // in Loop: Header=BB8_3 Depth=1 mov.u64 %rl1, 0; cvt.rn.f32.s32 %f4, %r6; fma.rn.f32 %f4, %f2, %f4, %f1; mul.lo.s32 %r7, %r6, %r0; BB8_38: // %for_loop.i380.lr.ph.us // Parent Loop BB8_3 Depth=1 // => This Loop Header: Depth=2 // Child Loop BB8_33 Depth 3 cvt.u32.u64 %r8, %rl1; cvt.rn.f32.s32 %f5, %r8; fma.rn.f32 %f5, %f3, %f5, %f0; mov.u32 %r10, 0; mov.u32 %r12, %r4; mov.u32 %r11, %r10; mov.u32 %r9, %r10; mov.f32 %f7, %f5; mov.f32 %f6, %f4; BB8_33: // %for_loop.i380.us // Parent Loop BB8_3 Depth=1 // Parent Loop BB8_38 Depth=2 // => This Inner Loop Header: Depth=3 mul.f32 %f8, %f7, %f7; fma.rn.f32 %f9, %f6, %f6, %f8; setp.gtu.f32 %p0, %f9, 0f40800000; selp.b32 %r13, %r12, 0, %p0; or.b32 %r11, %r13, %r11; shr.u32 %r13, %r11, 31; shr.u32 %r14, %r12, 31; setp.eq.s32 %p0, %r13, %r14; @%p0 bra BB8_34; bra.uni BB8_35; BB8_34: // in Loop: Header=BB8_33 Depth=3 mov.u32 %r12, %r10; bra.uni BB8_36; BB8_35: // %not_all_continued_or_breaked.i394.us // in Loop: Header=BB8_33 Depth=3 mul.f32 %f9, %f6, %f6; not.b32 %r13, %r11; and.b32 %r12, %r12, %r13; sub.f32 %f8, %f8, %f9; add.f32 %f8, %f5, %f8; add.f32 %f7, %f7, %f7; fma.rn.f32 %f6, %f6, %f7, %f4; mov.f32 %f7, %f8; BB8_36: // %for_step.i363.us // in Loop: Header=BB8_33 Depth=3 setp.ne.s32 %p0, %r12, 0; selp.u32 %r13, 1, 0, %p0; add.s32 %r9, %r9, %r13; setp.lt.s32 %p0, %r9, %r2; selp.b32 %r12, %r12, 0, %p0; setp.lt.s32 %p0, %r12, 0; @%p0 bra BB8_33; // BB#37: // %mandel___vyfvyfvyi.exit395.us // in Loop: Header=BB8_38 Depth=2 add.s32 %r8, %r8, %r7; shl.b32 %r8, %r8, 2; cvt.s64.s32 %rl2, %r8; add.s64 %rl2, %rl2, %rl0; st.u32 [%rl2], %r9; add.s64 %rl1, %rl1, 1; cvt.u32.u64 %r8, %rl1; setp.eq.s32 %p0, %r8, %r0; @%p0 bra BB8_44; bra.uni BB8_38; BB8_43: // %mandel___vyfvyfvyi.exit395 // Parent Loop BB8_3 Depth=1 // => This Inner Loop Header: Depth=2 cvt.s64.s32 %rl1, %r8; add.s64 %rl1, %rl1, %rl0; mov.u32 %r9, 0; st.u32 [%rl1], %r9; add.s32 %r8, %r8, 4; add.s32 %r7, %r7, -1; setp.eq.s32 %p0, %r7, 0; @%p0 bra BB8_44; bra.uni BB8_43; BB8_4: // %partial_inner_all_outer156 // in Loop: Header=BB8_3 Depth=1 @%p0 bra BB8_44; // BB#5: // %partial_inner_only197 // in Loop: Header=BB8_3 Depth=1 setp.gt.s32 %p0, %r0, 0; mov.u32 %r8, 0; fma.rn.f32 %f4, %f3, 0f00000000, %f0; cvt.rn.f32.s32 %f5, %r6; fma.rn.f32 %f5, %f2, %f5, %f1; selp.b32 %r7, %r4, 0, %p0; setp.lt.s32 %p1, %r7, 0; mov.u32 %r10, %r4; mov.u32 %r9, %r8; mov.u32 %r7, %r8; mov.f32 %f7, %f4; mov.f32 %f6, %f5; @%p1 bra BB8_41; bra.uni BB8_6; BB8_41: // %for_loop.i // Parent Loop BB8_3 Depth=1 // => This Inner Loop Header: Depth=2 selp.b32 %r11, %r10, 0, %p0; mul.f32 %f8, %f7, %f7; fma.rn.f32 %f9, %f6, %f6, %f8; setp.gtu.f32 %p1, %f9, 0f40800000; selp.b32 %r12, %r10, 0, %p1; or.b32 %r9, %r12, %r9; selp.b32 %r12, %r9, 0, %p0; shr.u32 %r12, %r12, 31; shr.u32 %r11, %r11, 31; setp.eq.s32 %p1, %r12, %r11; @%p1 bra BB8_42; bra.uni BB8_39; BB8_42: // in Loop: Header=BB8_41 Depth=2 mov.u32 %r10, %r8; bra.uni BB8_40; BB8_39: // %not_all_continued_or_breaked.i // in Loop: Header=BB8_41 Depth=2 mul.f32 %f9, %f6, %f6; not.b32 %r11, %r9; and.b32 %r10, %r10, %r11; sub.f32 %f8, %f8, %f9; add.f32 %f8, %f4, %f8; add.f32 %f7, %f7, %f7; fma.rn.f32 %f6, %f6, %f7, %f5; mov.f32 %f7, %f8; BB8_40: // %for_step.i // in Loop: Header=BB8_41 Depth=2 setp.ne.s32 %p1, %r10, 0; selp.u32 %r11, 1, 0, %p1; add.s32 %r7, %r7, %r11; setp.lt.s32 %p1, %r7, %r2; selp.b32 %r10, %r10, 0, %p1; selp.b32 %r11, %r10, 0, %p0; setp.gt.s32 %p1, %r11, -1; @%p1 bra BB8_7; bra.uni BB8_41; BB8_6: // in Loop: Header=BB8_3 Depth=1 mov.u32 %r7, %r8; BB8_7: // %mandel___vyfvyfvyi.exit // in Loop: Header=BB8_3 Depth=1 setp.lt.s32 %p0, %r0, 1; @%p0 bra BB8_44; // BB#8: // %pl_dolane.i // in Loop: Header=BB8_3 Depth=1 mul.lo.s32 %r8, %r6, %r0; shl.b32 %r8, %r8, 2; cvt.s64.s32 %rl1, %r8; add.s64 %rl1, %rl1, %rl0; st.u32 [%rl1], %r7; BB8_44: // %foreach_reset128 // in Loop: Header=BB8_3 Depth=1 add.s32 %r6, %r6, 1; add.s32 %r3, %r3, %r5; setp.eq.s32 %p0, %r6, %r1; @%p0 bra BB8_45; bra.uni BB8_3; BB8_9: // %for_test.preheader setp.lt.s32 %p0, %r1, 1; @%p0 bra BB8_45; // BB#10: // %outer_not_in_extras.preheader.lr.ph setp.gt.s32 %p0, %r2, 0; mov.u32 %r3, 0; selp.b32 %r4, -1, 0, %p0; shl.b32 %r5, %r0, 2; mov.u32 %r6, %r3; BB8_11: // %outer_not_in_extras.preheader // =>This Loop Header: Depth=1 // Child Loop BB8_23 Depth 2 // Child Loop BB8_20 Depth 2 // Child Loop BB8_19 Depth 2 // Child Loop BB8_14 Depth 3 setp.lt.s32 %p0, %r0, 1; @%p0 bra BB8_28; // BB#12: // %foreach_full_body.lr.ph // in Loop: Header=BB8_11 Depth=1 setp.lt.s32 %p0, %r4, 0; mov.u32 %r7, %r0; mov.u32 %r8, %r3; @%p0 bra BB8_13; bra.uni BB8_20; BB8_13: // in Loop: Header=BB8_11 Depth=1 mov.u64 %rl1, 0; cvt.rn.f32.s32 %f4, %r6; fma.rn.f32 %f4, %f2, %f4, %f1; mul.lo.s32 %r7, %r6, %r0; BB8_19: // %for_loop.i281.lr.ph.us // Parent Loop BB8_11 Depth=1 // => This Loop Header: Depth=2 // Child Loop BB8_14 Depth 3 cvt.u32.u64 %r8, %rl1; cvt.rn.f32.s32 %f5, %r8; fma.rn.f32 %f5, %f3, %f5, %f0; mov.u32 %r10, 0; mov.u32 %r12, %r4; mov.u32 %r11, %r10; mov.u32 %r9, %r10; mov.f32 %f7, %f5; mov.f32 %f6, %f4; BB8_14: // %for_loop.i281.us // Parent Loop BB8_11 Depth=1 // Parent Loop BB8_19 Depth=2 // => This Inner Loop Header: Depth=3 mul.f32 %f8, %f7, %f7; fma.rn.f32 %f9, %f6, %f6, %f8; setp.gtu.f32 %p0, %f9, 0f40800000; selp.b32 %r13, %r12, 0, %p0; or.b32 %r11, %r13, %r11; shr.u32 %r13, %r11, 31; shr.u32 %r14, %r12, 31; setp.eq.s32 %p0, %r13, %r14; @%p0 bra BB8_15; bra.uni BB8_16; BB8_15: // in Loop: Header=BB8_14 Depth=3 mov.u32 %r12, %r10; bra.uni BB8_17; BB8_16: // %not_all_continued_or_breaked.i295.us // in Loop: Header=BB8_14 Depth=3 mul.f32 %f9, %f6, %f6; not.b32 %r13, %r11; and.b32 %r12, %r12, %r13; sub.f32 %f8, %f8, %f9; add.f32 %f8, %f5, %f8; add.f32 %f7, %f7, %f7; fma.rn.f32 %f6, %f6, %f7, %f4; mov.f32 %f7, %f8; BB8_17: // %for_step.i264.us // in Loop: Header=BB8_14 Depth=3 setp.ne.s32 %p0, %r12, 0; selp.u32 %r13, 1, 0, %p0; add.s32 %r9, %r9, %r13; setp.lt.s32 %p0, %r9, %r2; selp.b32 %r12, %r12, 0, %p0; setp.lt.s32 %p0, %r12, 0; @%p0 bra BB8_14; // BB#18: // %mandel___vyfvyfvyi.exit296.us // in Loop: Header=BB8_19 Depth=2 add.s32 %r8, %r8, %r7; shl.b32 %r8, %r8, 2; cvt.s64.s32 %rl2, %r8; add.s64 %rl2, %rl2, %rl0; st.u32 [%rl2], %r9; add.s64 %rl1, %rl1, 1; cvt.u32.u64 %r8, %rl1; setp.eq.s32 %p0, %r8, %r0; @%p0 bra BB8_27; bra.uni BB8_19; BB8_20: // %mandel___vyfvyfvyi.exit296 // Parent Loop BB8_11 Depth=1 // => This Inner Loop Header: Depth=2 cvt.s64.s32 %rl1, %r8; add.s64 %rl1, %rl1, %rl0; mov.u32 %r9, 0; st.u32 [%rl1], %r9; add.s32 %r8, %r8, 4; add.s32 %r7, %r7, -1; setp.eq.s32 %p0, %r7, 0; @%p0 bra BB8_27; bra.uni BB8_20; BB8_28: // %partial_inner_all_outer // in Loop: Header=BB8_11 Depth=1 @%p0 bra BB8_27; // BB#29: // %partial_inner_only // in Loop: Header=BB8_11 Depth=1 setp.gt.s32 %p0, %r0, 0; mov.u32 %r8, 0; fma.rn.f32 %f4, %f3, 0f00000000, %f0; cvt.rn.f32.s32 %f5, %r6; fma.rn.f32 %f5, %f2, %f5, %f1; selp.b32 %r7, %r4, 0, %p0; setp.lt.s32 %p1, %r7, 0; mov.u32 %r10, %r4; mov.u32 %r9, %r8; mov.u32 %r7, %r8; mov.f32 %f7, %f4; mov.f32 %f6, %f5; @%p1 bra BB8_23; bra.uni BB8_30; BB8_23: // %for_loop.i332 // Parent Loop BB8_11 Depth=1 // => This Inner Loop Header: Depth=2 selp.b32 %r11, %r10, 0, %p0; mul.f32 %f8, %f7, %f7; fma.rn.f32 %f9, %f6, %f6, %f8; setp.gtu.f32 %p1, %f9, 0f40800000; selp.b32 %r12, %r10, 0, %p1; or.b32 %r9, %r12, %r9; selp.b32 %r12, %r9, 0, %p0; shr.u32 %r12, %r12, 31; shr.u32 %r11, %r11, 31; setp.eq.s32 %p1, %r12, %r11; @%p1 bra BB8_24; bra.uni BB8_21; BB8_24: // in Loop: Header=BB8_23 Depth=2 mov.u32 %r10, %r8; bra.uni BB8_22; BB8_21: // %not_all_continued_or_breaked.i346 // in Loop: Header=BB8_23 Depth=2 mul.f32 %f9, %f6, %f6; not.b32 %r11, %r9; and.b32 %r10, %r10, %r11; sub.f32 %f8, %f8, %f9; add.f32 %f8, %f4, %f8; add.f32 %f7, %f7, %f7; fma.rn.f32 %f6, %f6, %f7, %f5; mov.f32 %f7, %f8; BB8_22: // %for_step.i313 // in Loop: Header=BB8_23 Depth=2 setp.ne.s32 %p1, %r10, 0; selp.u32 %r11, 1, 0, %p1; add.s32 %r7, %r7, %r11; setp.lt.s32 %p1, %r7, %r2; selp.b32 %r10, %r10, 0, %p1; selp.b32 %r11, %r10, 0, %p0; setp.gt.s32 %p1, %r11, -1; @%p1 bra BB8_25; bra.uni BB8_23; BB8_30: // in Loop: Header=BB8_11 Depth=1 mov.u32 %r7, %r8; BB8_25: // %mandel___vyfvyfvyi.exit347 // in Loop: Header=BB8_11 Depth=1 setp.lt.s32 %p0, %r0, 1; @%p0 bra BB8_27; // BB#26: // %pl_dolane.i452 // in Loop: Header=BB8_11 Depth=1 mul.lo.s32 %r8, %r6, %r0; shl.b32 %r8, %r8, 2; cvt.s64.s32 %rl1, %r8; add.s64 %rl1, %rl1, %rl0; st.u32 [%rl1], %r7; BB8_27: // %foreach_reset // in Loop: Header=BB8_11 Depth=1 add.s32 %r6, %r6, 1; add.s32 %r3, %r3, %r5; setp.eq.s32 %p0, %r6, %r1; @%p0 bra BB8_45; bra.uni BB8_11; BB8_45: // %for_exit ret; } // .globl mandelbrot_ispc .func mandelbrot_ispc( .param .b32 mandelbrot_ispc_param_0, .param .b32 mandelbrot_ispc_param_1, .param .b32 mandelbrot_ispc_param_2, .param .b32 mandelbrot_ispc_param_3, .param .b32 mandelbrot_ispc_param_4, .param .b32 mandelbrot_ispc_param_5, .param .b32 mandelbrot_ispc_param_6, .param .b64 mandelbrot_ispc_param_7 ) // @mandelbrot_ispc { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: // %allocas ld.param.u32 %r0, [mandelbrot_ispc_param_5]; setp.lt.s32 %p0, %r0, 1; @%p0 bra BB9_18; // BB#1: // %outer_not_in_extras.preheader.lr.ph ld.param.f32 %f0, [mandelbrot_ispc_param_0]; ld.param.f32 %f1, [mandelbrot_ispc_param_1]; ld.param.f32 %f3, [mandelbrot_ispc_param_2]; ld.param.f32 %f2, [mandelbrot_ispc_param_3]; ld.param.u32 %r1, [mandelbrot_ispc_param_4]; ld.param.u32 %r2, [mandelbrot_ispc_param_6]; ld.param.u64 %rl0, [mandelbrot_ispc_param_7]; sub.f32 %f3, %f3, %f0; cvt.rn.f32.s32 %f4, %r1; sub.f32 %f2, %f2, %f1; cvt.rn.f32.s32 %f5, %r0; div.rn.f32 %f2, %f2, %f5; div.rn.f32 %f3, %f3, %f4; setp.gt.s32 %p0, %r2, 0; mov.u32 %r3, 0; selp.b32 %r4, -1, 0, %p0; BB9_2: // %outer_not_in_extras.preheader // =>This Loop Header: Depth=1 // Child Loop BB9_13 Depth 2 // Child Loop BB9_4 Depth 2 // Child Loop BB9_9 Depth 3 setp.lt.s32 %p0, %r1, 1; @%p0 bra BB9_19; // BB#3: // %foreach_full_body.lr.ph // in Loop: Header=BB9_2 Depth=1 mov.u64 %rl1, 0; cvt.rn.f32.s32 %f4, %r3; fma.rn.f32 %f4, %f2, %f4, %f1; mul.lo.s32 %r5, %r3, %r1; BB9_4: // %foreach_full_body // Parent Loop BB9_2 Depth=1 // => This Loop Header: Depth=2 // Child Loop BB9_9 Depth 3 setp.lt.s32 %p0, %r4, 0; cvt.u32.u64 %r6, %rl1; cvt.rn.f32.s32 %f5, %r6; fma.rn.f32 %f5, %f3, %f5, %f0; mov.u32 %r8, 0; mov.u32 %r10, %r4; mov.u32 %r9, %r8; mov.u32 %r7, %r8; mov.f32 %f7, %f5; mov.f32 %f6, %f4; @%p0 bra BB9_9; bra.uni BB9_5; BB9_9: // %for_loop.i281 // Parent Loop BB9_2 Depth=1 // Parent Loop BB9_4 Depth=2 // => This Inner Loop Header: Depth=3 mul.f32 %f8, %f7, %f7; fma.rn.f32 %f9, %f6, %f6, %f8; setp.gtu.f32 %p0, %f9, 0f40800000; selp.b32 %r11, %r10, 0, %p0; or.b32 %r9, %r11, %r9; shr.u32 %r11, %r9, 31; shr.u32 %r12, %r10, 31; setp.eq.s32 %p0, %r11, %r12; @%p0 bra BB9_10; bra.uni BB9_7; BB9_10: // in Loop: Header=BB9_9 Depth=3 mov.u32 %r10, %r8; bra.uni BB9_8; BB9_7: // %not_all_continued_or_breaked.i295 // in Loop: Header=BB9_9 Depth=3 mul.f32 %f9, %f6, %f6; not.b32 %r11, %r9; and.b32 %r10, %r10, %r11; sub.f32 %f8, %f8, %f9; add.f32 %f8, %f5, %f8; add.f32 %f7, %f7, %f7; fma.rn.f32 %f6, %f6, %f7, %f4; mov.f32 %f7, %f8; BB9_8: // %for_step.i264 // in Loop: Header=BB9_9 Depth=3 setp.ne.s32 %p0, %r10, 0; selp.u32 %r11, 1, 0, %p0; add.s32 %r7, %r7, %r11; setp.lt.s32 %p0, %r7, %r2; selp.b32 %r10, %r10, 0, %p0; setp.gt.s32 %p0, %r10, -1; @%p0 bra BB9_6; bra.uni BB9_9; BB9_5: // in Loop: Header=BB9_4 Depth=2 mov.u32 %r7, %r8; BB9_6: // %mandel___vyfvyfvyi.exit296 // in Loop: Header=BB9_4 Depth=2 add.s32 %r6, %r6, %r5; shl.b32 %r6, %r6, 2; cvt.s64.s32 %rl2, %r6; add.s64 %rl2, %rl2, %rl0; st.u32 [%rl2], %r7; add.s64 %rl1, %rl1, 1; cvt.u32.u64 %r6, %rl1; setp.eq.s32 %p0, %r6, %r1; @%p0 bra BB9_17; bra.uni BB9_4; BB9_19: // %partial_inner_all_outer // in Loop: Header=BB9_2 Depth=1 @%p0 bra BB9_17; // BB#20: // %partial_inner_only // in Loop: Header=BB9_2 Depth=1 setp.gt.s32 %p0, %r1, 0; mov.u32 %r6, 0; fma.rn.f32 %f4, %f3, 0f00000000, %f0; cvt.rn.f32.s32 %f5, %r3; fma.rn.f32 %f5, %f2, %f5, %f1; selp.b32 %r5, %r4, 0, %p0; setp.lt.s32 %p1, %r5, 0; mov.u32 %r8, %r4; mov.u32 %r7, %r6; mov.u32 %r5, %r6; mov.f32 %f7, %f4; mov.f32 %f6, %f5; @%p1 bra BB9_13; bra.uni BB9_21; BB9_13: // %for_loop.i332 // Parent Loop BB9_2 Depth=1 // => This Inner Loop Header: Depth=2 selp.b32 %r9, %r8, 0, %p0; mul.f32 %f8, %f7, %f7; fma.rn.f32 %f9, %f6, %f6, %f8; setp.gtu.f32 %p1, %f9, 0f40800000; selp.b32 %r10, %r8, 0, %p1; or.b32 %r7, %r10, %r7; selp.b32 %r10, %r7, 0, %p0; shr.u32 %r10, %r10, 31; shr.u32 %r9, %r9, 31; setp.eq.s32 %p1, %r10, %r9; @%p1 bra BB9_14; bra.uni BB9_11; BB9_14: // in Loop: Header=BB9_13 Depth=2 mov.u32 %r8, %r6; bra.uni BB9_12; BB9_11: // %not_all_continued_or_breaked.i346 // in Loop: Header=BB9_13 Depth=2 mul.f32 %f9, %f6, %f6; not.b32 %r9, %r7; and.b32 %r8, %r8, %r9; sub.f32 %f8, %f8, %f9; add.f32 %f8, %f4, %f8; add.f32 %f7, %f7, %f7; fma.rn.f32 %f6, %f6, %f7, %f5; mov.f32 %f7, %f8; BB9_12: // %for_step.i313 // in Loop: Header=BB9_13 Depth=2 setp.ne.s32 %p1, %r8, 0; selp.u32 %r9, 1, 0, %p1; add.s32 %r5, %r5, %r9; setp.lt.s32 %p1, %r5, %r2; selp.b32 %r8, %r8, 0, %p1; selp.b32 %r9, %r8, 0, %p0; setp.gt.s32 %p1, %r9, -1; @%p1 bra BB9_15; bra.uni BB9_13; BB9_21: // in Loop: Header=BB9_2 Depth=1 mov.u32 %r5, %r6; BB9_15: // %mandel___vyfvyfvyi.exit347 // in Loop: Header=BB9_2 Depth=1 setp.lt.s32 %p0, %r1, 1; @%p0 bra BB9_17; // BB#16: // %pl_dolane.i // in Loop: Header=BB9_2 Depth=1 mul.lo.s32 %r6, %r3, %r1; shl.b32 %r6, %r6, 2; cvt.s64.s32 %rl1, %r6; add.s64 %rl1, %rl1, %rl0; st.u32 [%rl1], %r5; BB9_17: // %foreach_reset // in Loop: Header=BB9_2 Depth=1 add.s32 %r3, %r3, 1; setp.eq.s32 %p0, %r3, %r0; @%p0 bra BB9_18; bra.uni BB9_2; BB9_18: // %for_exit ret; }