// // Generated by LLVM NVPTX Back-End // .version 3.1 .target sm_20, texmode_independent .address_size 64 // .globl stencil_step_task // @stencil_step_task .entry stencil_step_task( .param .u32 stencil_step_task_param_0, .param .u32 stencil_step_task_param_1, .param .u32 stencil_step_task_param_2, .param .u32 stencil_step_task_param_3, .param .u32 stencil_step_task_param_4, .param .u32 stencil_step_task_param_5, .param .u32 stencil_step_task_param_6, .param .u32 stencil_step_task_param_7, .param .u64 .ptr .align 8 stencil_step_task_param_8, .param .u64 .ptr .align 8 stencil_step_task_param_9, .param .u64 .ptr .align 8 stencil_step_task_param_10, .param .u64 .ptr .align 8 stencil_step_task_param_11 ) { .reg .pred %p<396>; .reg .s16 %rc<396>; .reg .s16 %rs<396>; .reg .s32 %r<396>; .reg .s64 %rl<396>; .reg .f32 %f<396>; .reg .f64 %fl<396>; // BB#0: // %allocas mov.u32 %r12, %ctaid.x; ld.param.u32 %r13, [stencil_step_task_param_4]; add.s32 %r16, %r12, %r13; add.s32 %r0, %r16, 1; setp.ge.s32 %p0, %r16, %r0; @%p0 bra BB0_11; // BB#1: // %for_test28.i.preheader.lr.ph ld.param.u32 %r0, [stencil_step_task_param_0]; ld.param.u32 %r1, [stencil_step_task_param_1]; ld.param.u32 %r2, [stencil_step_task_param_2]; ld.param.u32 %r3, [stencil_step_task_param_3]; ld.param.u32 %r4, [stencil_step_task_param_5]; ld.param.u32 %r5, [stencil_step_task_param_6]; mul.lo.s32 %r5, %r5, %r4; ld.param.u64 %rl3, [stencil_step_task_param_8]; ld.f64 %fl0, [%rl3]; ld.f64 %fl1, [%rl3+8]; ld.param.u64 %rl0, [stencil_step_task_param_9]; ld.f64 %fl2, [%rl3+16]; ld.param.u64 %rl1, [stencil_step_task_param_10]; ld.param.u64 %rl2, [stencil_step_task_param_11]; ld.f64 %fl3, [%rl3+24]; shl.b32 %r6, %r4, 1; mul.lo.s32 %r7, %r4, 3; mul.lo.s32 %r8, %r4, -3; shl.b32 %r9, %r5, 1; mul.lo.s32 %r10, %r5, 3; mul.lo.s32 %r11, %r5, -3; add.s32 %r12, %r12, %r13; neg.s32 %r13, %r9; neg.s32 %r14, %r6; mov.u32 %r32, WARP_SZ; BB0_2: // %for_test28.i.preheader // =>This Loop Header: Depth=1 // Child Loop BB0_9 Depth 2 // Child Loop BB0_5 Depth 3 mov.u32 %r15, %r16; setp.ge.s32 %p0, %r2, %r3; @%p0 bra BB0_10; // BB#3: // %for_test35.i.preheader.lr.ph // in Loop: Header=BB0_2 Depth=1 setp.lt.s32 %p0, %r0, %r1; @%p0 bra BB0_4; bra.uni BB0_10; BB0_4: // in Loop: Header=BB0_2 Depth=1 mul.lo.s32 %r16, %r15, %r5; mov.u32 %r17, %r2; BB0_9: // %for_loop37.i.lr.ph.us // Parent Loop BB0_2 Depth=1 // => This Loop Header: Depth=2 // Child Loop BB0_5 Depth 3 mad.lo.s32 %r18, %r17, %r4, %r16; add.s32 %r19, %r18, %r4; add.s32 %r20, %r18, %r6; sub.s32 %r21, %r18, %r4; add.s32 %r22, %r18, %r7; add.s32 %r23, %r18, %r14; add.s32 %r24, %r18, %r5; add.s32 %r25, %r18, %r8; add.s32 %r26, %r18, %r9; sub.s32 %r27, %r18, %r5; add.s32 %r28, %r18, %r10; add.s32 %r29, %r18, %r13; add.s32 %r30, %r18, %r11; mov.u32 %r31, %r0; BB0_5: // %for_loop37.i.us // Parent Loop BB0_2 Depth=1 // Parent Loop BB0_9 Depth=2 // => This Inner Loop Header: Depth=3 mov.u32 %r33, %tid.x; add.s32 %r34, %r32, -1; and.b32 %r33, %r34, %r33; add.s32 %r33, %r33, %r31; setp.ge.s32 %p0, %r33, %r1; @%p0 bra BB0_7; // BB#6: // %pl_dolane.i.us // in Loop: Header=BB0_5 Depth=3 add.s32 %r34, %r18, %r33; shl.b32 %r34, %r34, 3; add.s32 %r35, %r34, -8; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl4, [%rl3]; add.s32 %r35, %r34, 8; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl5, [%rl3]; add.s32 %r35, %r34, -16; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl6, [%rl3]; add.s32 %r35, %r34, 16; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl9, [%rl3]; add.s32 %r35, %r19, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl8, [%rl3]; add.s32 %r35, %r34, -24; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl7, [%rl3]; add.s32 %r35, %r34, 24; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl10, [%rl3]; add.s32 %r35, %r20, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl13, [%rl3]; add.s32 %r35, %r21, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl12, [%rl3]; add.s32 %r35, %r22, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl11, [%rl3]; add.s32 %r35, %r23, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl16, [%rl3]; add.s32 %r35, %r24, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl15, [%rl3]; add.s32 %r35, %r25, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl14, [%rl3]; add.s32 %r35, %r26, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl19, [%rl3]; add.s32 %r35, %r27, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl18, [%rl3]; add.s32 %r35, %r28, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl17, [%rl3]; add.s32 %r35, %r29, %r33; shl.b32 %r35, %r35, 3; cvt.s64.s32 %rl3, %r35; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl24, [%rl3]; cvt.s64.s32 %rl4, %r34; add.s64 %rl3, %rl4, %rl1; ld.f64 %fl21, [%rl3]; add.s32 %r33, %r30, %r33; shl.b32 %r33, %r33, 3; cvt.s64.s32 %rl3, %r33; add.s64 %rl3, %rl3, %rl1; ld.f64 %fl20, [%rl3]; add.s64 %rl3, %rl4, %rl2; ld.f64 %fl23, [%rl3]; add.s64 %rl4, %rl4, %rl0; ld.f64 %fl22, [%rl4]; add.f64 %fl25, %fl21, %fl21; sub.f64 %fl23, %fl25, %fl23; add.f64 %fl6, %fl6, %fl9; add.f64 %fl6, %fl6, %fl13; add.f64 %fl6, %fl6, %fl16; add.f64 %fl6, %fl6, %fl19; add.f64 %fl6, %fl6, %fl24; add.f64 %fl4, %fl4, %fl5; add.f64 %fl4, %fl4, %fl8; add.f64 %fl4, %fl4, %fl12; add.f64 %fl4, %fl4, %fl15; add.f64 %fl4, %fl4, %fl18; mul.f64 %fl5, %fl0, %fl21; fma.rn.f64 %fl4, %fl1, %fl4, %fl5; fma.rn.f64 %fl4, %fl2, %fl6, %fl4; add.f64 %fl5, %fl7, %fl10; add.f64 %fl5, %fl5, %fl11; add.f64 %fl5, %fl5, %fl14; add.f64 %fl5, %fl5, %fl17; add.f64 %fl5, %fl5, %fl20; fma.rn.f64 %fl4, %fl3, %fl5, %fl4; fma.rn.f64 %fl4, %fl4, %fl22, %fl23; st.f64 [%rl3], %fl4; BB0_7: // %safe_if_after_true.i.us // in Loop: Header=BB0_5 Depth=3 add.s32 %r31, %r32, %r31; setp.lt.s32 %p0, %r31, %r1; @%p0 bra BB0_5; // BB#8: // %for_exit38.i.us // in Loop: Header=BB0_9 Depth=2 add.s32 %r17, %r17, 1; setp.eq.s32 %p0, %r17, %r3; @%p0 bra BB0_10; bra.uni BB0_9; BB0_10: // %for_exit31.i // in Loop: Header=BB0_2 Depth=1 add.s32 %r16, %r15, 1; setp.ne.s32 %p0, %r15, %r12; @%p0 bra BB0_2; BB0_11: // %stencil_step___uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_.exit ret; }