// // Generated by NVIDIA NVVM Compiler // Compiler built on Thu Jul 18 02:37:37 2013 (1374107857) // Cuda compilation tools, release 5.5, V5.5.0 // .version 3.2 .target sm_35 .address_size 64 .file 1 "/home/evghenii/soft/ispc-code/ispc/examples/stencil/stencil.cu", 1383254912, 2112 ) { .reg .s32 %r<2>; mov.u32 %r1, 30; st.param.b32 [func_retval0+0], %r1; ret; } .weak .func (.param .b32 func_retval0) cudaFuncGetAttributes( .param .b64 cudaFuncGetAttributes_param_0, .param .b64 cudaFuncGetAttributes_param_1 ) { .reg .s32 %r<2>; mov.u32 %r1, 30; st.param.b32 [func_retval0+0], %r1; ret; } .visible .entry stencil_step_task( .param .u32 stencil_step_task_param_0, .param .u32 stencil_step_task_param_1, .param .u32 stencil_step_task_param_2, .param .u32 stencil_step_task_param_3, .param .u32 stencil_step_task_param_4, .param .u32 stencil_step_task_param_5, .param .u32 stencil_step_task_param_6, .param .u32 stencil_step_task_param_7, .param .u64 stencil_step_task_param_8, .param .u64 stencil_step_task_param_9, .param .u64 stencil_step_task_param_10, .param .u64 stencil_step_task_param_11 ) { .reg .pred %p<8>; .reg .s32 %r<54>; .reg .s64 %rd<36>; .reg .f64 %fd<48>; ld.param.u32 %r19, [stencil_step_task_param_0]; ld.param.u32 %r20, [stencil_step_task_param_1]; ld.param.u32 %r21, [stencil_step_task_param_2]; ld.param.u32 %r22, [stencil_step_task_param_3]; ld.param.u32 %r23, [stencil_step_task_param_4]; ld.param.u32 %r24, [stencil_step_task_param_5]; ld.param.u32 %r25, [stencil_step_task_param_6]; ld.param.u64 %rd4, [stencil_step_task_param_8]; ld.param.u64 %rd1, [stencil_step_task_param_9]; ld.param.u64 %rd2, [stencil_step_task_param_10]; ld.param.u64 %rd3, [stencil_step_task_param_11]; cvta.to.global.u64 %rd5, %rd4; mov.u32 %r26, %ctaid.x; add.s32 %r51, %r26, %r23; add.s32 %r27, %r51, 1; ld.global.f64 %fd1, [%rd5]; ld.global.f64 %fd2, [%rd5+8]; ld.global.f64 %fd3, [%rd5+16]; ld.global.f64 %fd4, [%rd5+24]; setp.ge.s32 %p1, %r51, %r27; @%p1 bra BB2_11; mul.lo.s32 %r28, %r25, %r24; shl.b32 %r29, %r28, 1; neg.s32 %r30, %r29; shl.b32 %r2, %r30, 3; cvta.to.global.u64 %rd6, %rd2; cvta.to.global.u64 %rd31, %rd3; cvta.to.global.u64 %rd32, %rd1; BB2_2: setp.ge.s32 %p2, %r21, %r22; @%p2 bra BB2_10; mov.u32 %r52, %r21; BB2_4: mov.u32 %r4, %r52; setp.ge.s32 %p3, %r19, %r20; @%p3 bra BB2_9; mul.lo.s32 %r32, %r51, %r28; mad.lo.s32 %r5, %r4, %r24, %r32; add.s32 %r6, %r24, %r5; add.s32 %r7, %r5, %r28; shl.b32 %r33, %r24, 1; add.s32 %r8, %r5, %r33; mad.lo.s32 %r9, %r24, -2, %r5; add.s32 %r10, %r5, %r29; mad.lo.s32 %r11, %r28, -2, %r5; add.s32 %r12, %r24, %r8; mad.lo.s32 %r13, %r28, 3, %r5; mov.u32 %r53, %r19; BB2_6: mov.u32 %r14, %r53; mov.u32 %r35, %tid.x; add.s32 %r36, %r35, %r14; add.s32 %r15, %r36, %r5; mul.wide.s32 %rd7, %r15, 8; add.s64 %rd8, %rd6, %rd7; ld.global.f64 %fd5, [%rd8]; ld.global.f64 %fd7, [%rd8+-8]; ld.global.f64 %fd8, [%rd8+8]; add.f64 %fd9, %fd8, %fd7; add.s32 %r37, %r6, %r36; mul.wide.s32 %rd9, %r37, 8; add.s64 %rd10, %rd6, %rd9; ld.global.f64 %fd10, [%rd10]; add.f64 %fd11, %fd9, %fd10; neg.s32 %r39, %r33; shl.b32 %r40, %r39, 3; cvt.s64.s32 %rd11, %r40; add.s64 %rd12, %rd10, %rd11; ld.global.f64 %fd12, [%rd12]; add.f64 %fd13, %fd11, %fd12; add.s32 %r41, %r7, %r36; mul.wide.s32 %rd13, %r41, 8; add.s64 %rd14, %rd6, %rd13; ld.global.f64 %fd14, [%rd14]; add.f64 %fd15, %fd13, %fd14; cvt.s64.s32 %rd15, %r2; add.s64 %rd16, %rd14, %rd15; ld.global.f64 %fd16, [%rd16]; add.f64 %fd17, %fd15, %fd16; mul.f64 %fd18, %fd2, %fd17; fma.rn.f64 %fd19, %fd1, %fd5, %fd18; ld.global.f64 %fd20, [%rd8+-16]; ld.global.f64 %fd21, [%rd8+16]; add.f64 %fd22, %fd21, %fd20; add.s32 %r42, %r8, %r36; mul.wide.s32 %rd17, %r42, 8; add.s64 %rd18, %rd6, %rd17; ld.global.f64 %fd23, [%rd18]; add.f64 %fd24, %fd22, %fd23; add.s32 %r43, %r9, %r36; mul.wide.s32 %rd19, %r43, 8; add.s64 %rd20, %rd6, %rd19; ld.global.f64 %fd25, [%rd20]; add.f64 %fd26, %fd24, %fd25; add.s32 %r44, %r10, %r36; mul.wide.s32 %rd21, %r44, 8; add.s64 %rd22, %rd6, %rd21; ld.global.f64 %fd27, [%rd22]; add.f64 %fd28, %fd26, %fd27; add.s32 %r45, %r11, %r36; mul.wide.s32 %rd23, %r45, 8; add.s64 %rd24, %rd6, %rd23; ld.global.f64 %fd29, [%rd24]; add.f64 %fd30, %fd28, %fd29; fma.rn.f64 %fd31, %fd3, %fd30, %fd19; ld.global.f64 %fd32, [%rd8+-24]; ld.global.f64 %fd33, [%rd8+24]; add.f64 %fd34, %fd33, %fd32; add.s32 %r46, %r12, %r36; mul.wide.s32 %rd25, %r46, 8; add.s64 %rd26, %rd6, %rd25; ld.global.f64 %fd35, [%rd26]; add.f64 %fd36, %fd34, %fd35; add.s64 %rd27, %rd12, %rd11; ld.global.f64 %fd37, [%rd27]; add.f64 %fd38, %fd36, %fd37; add.s32 %r47, %r13, %r36; mul.wide.s32 %rd28, %r47, 8; add.s64 %rd29, %rd6, %rd28; ld.global.f64 %fd39, [%rd29]; add.f64 %fd40, %fd38, %fd39; add.s64 %rd30, %rd16, %rd15; ld.global.f64 %fd41, [%rd30]; add.f64 %fd42, %fd40, %fd41; fma.rn.f64 %fd6, %fd4, %fd42, %fd31; setp.ge.s32 %p4, %r36, %r20; @%p4 bra BB2_8; mul.wide.s32 %rd33, %r15, 8; add.s64 %rd34, %rd31, %rd33; ld.global.f64 %fd43, [%rd34]; add.f64 %fd44, %fd5, %fd5; sub.f64 %fd45, %fd44, %fd43; add.s64 %rd35, %rd32, %rd33; ld.global.f64 %fd46, [%rd35]; fma.rn.f64 %fd47, %fd46, %fd6, %fd45; st.global.f64 [%rd34], %fd47; BB2_8: add.s32 %r16, %r14, 32; setp.lt.s32 %p5, %r16, %r20; mov.u32 %r53, %r16; @%p5 bra BB2_6; BB2_9: add.s32 %r17, %r4, 1; setp.lt.s32 %p6, %r17, %r22; mov.u32 %r52, %r17; @%p6 bra BB2_4; BB2_10: add.s32 %r51, %r51, 1; add.s32 %r49, %r23, %r26; add.s32 %r50, %r49, 1; setp.lt.s32 %p7, %r51, %r50; @%p7 bra BB2_2; BB2_11: ret; }