// // Generated by NVIDIA NVVM Compiler // Compiler built on Thu Jul 18 02:37:37 2013 (1374107857) // Cuda compilation tools, release 5.5, V5.5.0 // .version 3.2 .target sm_35 .address_size 64 .extern .func (.param .b32 func_retval0) cudaLaunchDevice ( .param .b64 cudaLaunchDevice_param_0, .param .b64 cudaLaunchDevice_param_1, .param .align 4 .b8 cudaLaunchDevice_param_2[12], .param .align 4 .b8 cudaLaunchDevice_param_3[12], .param .b32 cudaLaunchDevice_param_4, .param .b64 cudaLaunchDevice_param_5 ); .extern .func (.param .b64 func_retval0) cudaGetParameterBuffer ( .param .b64 cudaGetParameterBuffer_param_0, .param .b64 cudaGetParameterBuffer_param_1 ) ; .extern .func (.param .b32 func_retval0) cudaDeviceSynchronize ( ) ; .global .align 1 .b8 constDeltaForeach1[32]; .global .align 1 .b8 constDeltaForeach4[32] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; .visible .func (.param .b32 func_retval0) __shfl_i32( .param .b32 __shfl_i32_param_0, .param .b32 __shfl_i32_param_1 ) { .reg .s32 %r<4>; ld.param.u32 %r2, [__shfl_i32_param_0]; ld.param.u32 %r3, [__shfl_i32_param_1]; // inline asm shfl.idx.b32 %r1, %r2, %r3, 0x1f; // inline asm st.param.b32 [func_retval0+0], %r1; ret; } .visible .func (.param .b32 func_retval0) __shfl_xor_float( .param .b32 __shfl_xor_float_param_0, .param .b32 __shfl_xor_float_param_1 ) { .reg .s32 %r<2>; .reg .f32 %f<3>; ld.param.f32 %f2, [__shfl_xor_float_param_0]; ld.param.u32 %r1, [__shfl_xor_float_param_1]; // inline asm shfl.bfly.b32 %f1, %f2, %r1, 0x1f; // inline asm st.param.f32 [func_retval0+0], %f1; ret; } .visible .func (.param .b32 func_retval0) __shfl_xor_i32( .param .b32 __shfl_xor_i32_param_0, .param .b32 __shfl_xor_i32_param_1 ) { .reg .s32 %r<4>; ld.param.u32 %r2, [__shfl_xor_i32_param_0]; ld.param.u32 %r3, [__shfl_xor_i32_param_1]; // inline asm shfl.bfly.b32 %r1, %r2, %r3, 0x1f; // inline asm st.param.b32 [func_retval0+0], %r1; ret; } .visible .func (.param .b32 func_retval0) __fminf( .param .b32 __fminf_param_0, .param .b32 __fminf_param_1 ) { .reg .f32 %f<4>; ld.param.f32 %f2, [__fminf_param_0]; ld.param.f32 %f3, [__fminf_param_1]; // inline asm min.f32 %f1, %f2, %f3; // inline asm st.param.f32 [func_retval0+0], %f1; ret; } .visible .func (.param .b32 func_retval0) __fmaxf( .param .b32 __fmaxf_param_0, .param .b32 __fmaxf_param_1 ) { .reg .f32 %f<4>; ld.param.f32 %f2, [__fmaxf_param_0]; ld.param.f32 %f3, [__fmaxf_param_1]; // inline asm max.f32 %f1, %f2, %f3; // inline asm st.param.f32 [func_retval0+0], %f1; ret; } .visible .func (.param .b32 func_retval0) __ballot( .param .b32 __ballot_param_0 ) { .reg .s32 %r<3>; ld.param.u8 %r2, [__ballot_param_0]; // inline asm { .reg .pred %p1; setp.ne.u32 %p1, %r2, 0; vote.ballot.b32 %r1, %p1; } // inline asm st.param.b32 [func_retval0+0], %r1; ret; } .visible .func (.param .b32 func_retval0) __lanemask_lt( ) { .reg .s32 %r<2>; // inline asm mov.u32 %r1, %lanemask_lt; // inline asm st.param.b32 [func_retval0+0], %r1; ret; } .visible .func (.param .b64 func_retval0) ISPCAlloc( .param .b64 ISPCAlloc_param_0, .param .b64 ISPCAlloc_param_1, .param .b32 ISPCAlloc_param_2 ) { .reg .s64 %rd<2>; mov.u64 %rd1, 1; st.param.b64 [func_retval0+0], %rd1; ret; } .visible .func (.param .b64 func_retval0) ISPCGetParamBuffer( .param .b64 ISPCGetParamBuffer_param_0, .param .b64 ISPCGetParamBuffer_param_1, .param .b64 ISPCGetParamBuffer_param_2 ) { .reg .pred %p<2>; .reg .s32 %r<3>; .reg .s64 %rd<7>; ld.param.u64 %rd3, [ISPCGetParamBuffer_param_1]; ld.param.u64 %rd4, [ISPCGetParamBuffer_param_2]; mov.u32 %r1, %tid.x; and.b32 %r2, %r1, 31; setp.ne.s32 %p1, %r2, 0; mov.u64 %rd6, 0; @%p1 bra BB8_2; // Callseq Start 0 { .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd3; .param .b64 param1; st.param.b64 [param1+0], %rd4; .param .b64 retval0; call.uni (retval0), cudaGetParameterBuffer, ( param0, param1 ); ld.param.b64 %rd6, [retval0+0]; } // Callseq End 0 BB8_2: st.param.b64 [func_retval0+0], %rd6; ret; } .visible .func ISPCLaunch( .param .b64 ISPCLaunch_param_0, .param .b64 ISPCLaunch_param_1, .param .b64 ISPCLaunch_param_2, .param .b32 ISPCLaunch_param_3, .param .b32 ISPCLaunch_param_4, .param .b32 ISPCLaunch_param_5 ) { .reg .pred %p<2>; .reg .s32 %r<16>; .reg .s64 %rd<6>; ld.param.u64 %rd1, [ISPCLaunch_param_1]; ld.param.u64 %rd2, [ISPCLaunch_param_2]; ld.param.u32 %r1, [ISPCLaunch_param_3]; ld.param.u32 %r2, [ISPCLaunch_param_4]; ld.param.u32 %r3, [ISPCLaunch_param_5]; mov.u32 %r4, %tid.x; and.b32 %r5, %r4, 31; setp.ne.s32 %p1, %r5, 0; @%p1 bra BB9_2; add.s32 %r14, %r1, -1; shr.s32 %r15, %r14, 2; add.s32 %r7, %r15, 1; mov.u32 %r12, 1; mov.u32 %r10, 128; mov.u32 %r13, 0; mov.u64 %rd5, 0; // inline asm { .param .b64 param0; st.param.b64 [param0+0], %rd1; .param .b64 param1; st.param.b64 [param1+0], %rd2; .param .align 4 .b8 param2[12]; st.param.b32 [param2+0], %r7; st.param.b32 [param2+4], %r2; st.param.b32 [param2+8], %r3; .param .align 4 .b8 param3[12]; st.param.b32 [param3+0], %r10; st.param.b32 [param3+4], %r12; st.param.b32 [param3+8], %r12; .param .b32 param4; st.param.b32 [param4+0], %r13; .param .b64 param5; st.param.b64 [param5+0], %rd5; .param .b32 retval0; call.uni (retval0), cudaLaunchDevice, ( param0, param1, param2, param3, param4, param5 ); ld.param.b32 %r6, [retval0+0]; } // inline asm BB9_2: ret; } .visible .func ISPCSync( .param .b64 ISPCSync_param_0 ) { .reg .s32 %r<2>; // Callseq Start 1 { .reg .b32 temp_param_reg; .param .b32 retval0; call.uni (retval0), cudaDeviceSynchronize, ( ); ld.param.b32 %r1, [retval0+0]; } // Callseq End 1 ret; } .visible .func (.param .b64 func_retval0) __warpBinExclusiveScan( .param .b32 __warpBinExclusiveScan_param_0 ) { .reg .s32 %r<8>; .reg .s64 %rd<5>; ld.param.u8 %r2, [__warpBinExclusiveScan_param_0]; // inline asm { .reg .pred %p1; setp.ne.u32 %p1, %r2, 0; vote.ballot.b32 %r1, %p1; } // inline asm // inline asm popc.b32 %r3, %r1; // inline asm // inline asm mov.u32 %r5, %lanemask_lt; // inline asm and.b32 %r7, %r5, %r1; // inline asm popc.b32 %r6, %r7; // inline asm cvt.u64.u32 %rd1, %r6; shl.b64 %rd2, %rd1, 32; cvt.u64.u32 %rd3, %r3; or.b64 %rd4, %rd2, %rd3; st.param.b64 [func_retval0+0], %rd4; ret; } .entry stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_( .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_0, .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_1, .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_2, .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_3, .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_4, .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_5, .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_6, .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_7, .param .u32 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_8, .param .u64 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_9, .param .u64 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_10, .param .u64 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_11, .param .u64 stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_12 ) { .reg .pred %p<14>; .reg .s32 %r<178>; .reg .s64 %rd<96>; .reg .f64 %fd<95>; ld.param.u32 %r42, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_0]; ld.param.u32 %r43, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_1]; ld.param.u32 %r44, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_2]; ld.param.u32 %r45, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_3]; ld.param.u32 %r46, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_4]; ld.param.u32 %r47, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_5]; ld.param.u32 %r48, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_6]; ld.param.u32 %r49, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_7]; ld.param.u64 %rd2, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_9]; ld.param.u64 %rd3, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_10]; ld.param.u64 %rd4, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_11]; ld.param.u64 %rd5, [stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E__param_12]; mov.u32 %r1, %ctaid.x; shl.b32 %r50, %r1, 2; mov.u32 %r2, %tid.x; shr.s32 %r51, %r2, 5; add.s32 %r52, %r51, %r50; mov.u32 %r53, %nctaid.x; shl.b32 %r54, %r53, 2; setp.ge.s32 %p1, %r52, %r54; mov.u32 %r55, %nctaid.y; mov.u32 %r3, %ctaid.y; setp.ge.s32 %p2, %r3, %r55; or.pred %p3, %p1, %p2; mov.u32 %r56, %nctaid.z; mov.u32 %r4, %ctaid.z; setp.ge.s32 %p4, %r4, %r56; or.pred %p5, %p3, %p4; @%p5 bra BB12_13; shl.b32 %r57, %r1, 7; add.s32 %r58, %r2, %r57; and.b32 %r59, %r58, -32; add.s32 %r60, %r59, %r42; add.s32 %r61, %r60, 32; min.s32 %r5, %r43, %r61; shl.b32 %r6, %r3, 3; add.s32 %r62, %r6, %r44; add.s32 %r7, %r62, 8; shl.b32 %r8, %r4, 3; add.s32 %r172, %r8, %r46; add.s32 %r63, %r172, 8; min.s32 %r64, %r47, %r63; mul.lo.s32 %r10, %r49, %r48; sub.s32 %r65, %r5, %r60; shr.s32 %r66, %r65, 31; shr.u32 %r67, %r66, 27; add.s32 %r68, %r65, %r67; and.b32 %r69, %r68, -32; sub.s32 %r70, %r65, %r69; sub.s32 %r11, %r5, %r70; and.b32 %r71, %r2, 31; cvt.u64.u32 %rd6, %r71; mov.u64 %rd7, constDeltaForeach1; add.s64 %rd1, %rd7, %rd6; setp.ge.s32 %p6, %r172, %r64; @%p6 bra BB12_13; min.s32 %r12, %r45, %r7; shl.b32 %r15, %r10, 1; neg.s32 %r16, %r15; mul.lo.s32 %r17, %r10, 3; mul.lo.s32 %r18, %r10, -3; mov.u32 %r72, -9; sub.s32 %r73, %r72, %r44; sub.s32 %r74, %r73, %r6; not.b32 %r75, %r45; max.s32 %r76, %r74, %r75; not.b32 %r19, %r76; sub.s32 %r77, %r72, %r46; sub.s32 %r78, %r77, %r8; not.b32 %r79, %r47; max.s32 %r80, %r78, %r79; not.b32 %r20, %r80; ld.global.u8 %r13, [%rd1]; mov.u32 %r171, %r172; BB12_3: mov.u32 %r21, %r171; add.s32 %r23, %r21, %r13; setp.ge.s32 %p7, %r62, %r12; @%p7 bra BB12_12; mul.lo.s32 %r24, %r23, %r10; mov.u32 %r174, %r62; mov.u32 %r173, %r62; BB12_5: mov.u32 %r27, %r173; add.s32 %r30, %r27, %r13; setp.ge.s32 %p8, %r60, %r11; mov.u32 %r176, %r60; @%p8 bra BB12_8; mov.u64 %rd9, constDeltaForeach4; add.s64 %rd10, %rd9, %rd6; ld.global.u8 %r31, [%rd10]; mad.lo.s32 %r32, %r30, %r48, %r24; add.s32 %r177, %r59, %r42; BB12_7: cvta.to.global.u64 %rd11, %rd2; add.s32 %r98, %r32, %r177; add.s32 %r99, %r98, %r31; shl.b32 %r100, %r99, 3; cvt.s64.s32 %rd12, %r100; add.s64 %rd13, %rd12, %rd4; add.s32 %r101, %r100, 8; cvt.s64.s32 %rd14, %r101; add.s64 %rd15, %rd14, %rd4; add.s32 %r102, %r100, -8; cvt.s64.s32 %rd16, %r102; add.s64 %rd17, %rd16, %rd4; add.s32 %r103, %r99, %r48; shl.b32 %r104, %r103, 3; cvt.s64.s32 %rd18, %r104; add.s64 %rd19, %rd18, %rd4; sub.s32 %r105, %r99, %r48; shl.b32 %r106, %r105, 3; cvt.s64.s32 %rd20, %r106; add.s64 %rd21, %rd20, %rd4; add.s32 %r108, %r99, %r10; shl.b32 %r109, %r108, 3; cvt.s64.s32 %rd22, %r109; add.s64 %rd23, %rd22, %rd4; sub.s32 %r110, %r99, %r10; shl.b32 %r111, %r110, 3; cvt.s64.s32 %rd24, %r111; add.s64 %rd25, %rd24, %rd4; add.s32 %r112, %r100, 16; cvt.s64.s32 %rd26, %r112; add.s64 %rd27, %rd26, %rd4; add.s32 %r113, %r100, -16; cvt.s64.s32 %rd28, %r113; add.s64 %rd29, %rd28, %rd4; shl.b32 %r114, %r48, 1; add.s32 %r115, %r99, %r114; shl.b32 %r116, %r115, 3; cvt.s64.s32 %rd30, %r116; add.s64 %rd31, %rd30, %rd4; mad.lo.s32 %r117, %r48, -2, %r99; shl.b32 %r118, %r117, 3; cvt.s64.s32 %rd32, %r118; add.s64 %rd33, %rd32, %rd4; add.s32 %r119, %r99, %r15; shl.b32 %r120, %r119, 3; cvt.s64.s32 %rd34, %r120; add.s64 %rd35, %rd34, %rd4; add.s32 %r121, %r99, %r16; shl.b32 %r122, %r121, 3; cvt.s64.s32 %rd36, %r122; add.s64 %rd37, %rd36, %rd4; add.s32 %r123, %r100, 24; cvt.s64.s32 %rd38, %r123; add.s64 %rd39, %rd38, %rd4; add.s32 %r124, %r100, -24; cvt.s64.s32 %rd40, %r124; add.s64 %rd41, %rd40, %rd4; mad.lo.s32 %r125, %r48, 3, %r99; shl.b32 %r126, %r125, 3; cvt.s64.s32 %rd42, %r126; add.s64 %rd43, %rd42, %rd4; mad.lo.s32 %r127, %r48, -3, %r99; shl.b32 %r128, %r127, 3; cvt.s64.s32 %rd44, %r128; add.s64 %rd45, %rd44, %rd4; add.s32 %r129, %r99, %r17; shl.b32 %r130, %r129, 3; cvt.s64.s32 %rd46, %r130; add.s64 %rd47, %rd46, %rd4; add.s32 %r131, %r99, %r18; shl.b32 %r132, %r131, 3; cvt.s64.s32 %rd48, %r132; add.s64 %rd49, %rd48, %rd4; add.s64 %rd50, %rd12, %rd5; add.s64 %rd51, %rd12, %rd3; ld.f64 %fd1, [%rd13]; add.f64 %fd2, %fd1, %fd1; ld.f64 %fd3, [%rd50]; sub.f64 %fd4, %fd2, %fd3; ld.global.f64 %fd5, [%rd11]; ld.f64 %fd6, [%rd17]; ld.f64 %fd7, [%rd15]; add.f64 %fd8, %fd7, %fd6; ld.f64 %fd9, [%rd19]; add.f64 %fd10, %fd8, %fd9; ld.f64 %fd11, [%rd21]; add.f64 %fd12, %fd10, %fd11; ld.f64 %fd13, [%rd23]; add.f64 %fd14, %fd12, %fd13; ld.f64 %fd15, [%rd25]; add.f64 %fd16, %fd14, %fd15; ld.global.f64 %fd17, [%rd11+8]; mul.f64 %fd18, %fd17, %fd16; fma.rn.f64 %fd19, %fd5, %fd1, %fd18; ld.f64 %fd20, [%rd29]; ld.f64 %fd21, [%rd27]; add.f64 %fd22, %fd21, %fd20; ld.f64 %fd23, [%rd31]; add.f64 %fd24, %fd22, %fd23; ld.f64 %fd25, [%rd33]; add.f64 %fd26, %fd24, %fd25; ld.f64 %fd27, [%rd35]; add.f64 %fd28, %fd26, %fd27; ld.f64 %fd29, [%rd37]; add.f64 %fd30, %fd28, %fd29; ld.global.f64 %fd31, [%rd11+16]; fma.rn.f64 %fd32, %fd31, %fd30, %fd19; ld.f64 %fd33, [%rd41]; ld.f64 %fd34, [%rd39]; add.f64 %fd35, %fd34, %fd33; ld.f64 %fd36, [%rd43]; add.f64 %fd37, %fd35, %fd36; ld.f64 %fd38, [%rd45]; add.f64 %fd39, %fd37, %fd38; ld.f64 %fd40, [%rd47]; add.f64 %fd41, %fd39, %fd40; ld.f64 %fd42, [%rd49]; add.f64 %fd43, %fd41, %fd42; ld.global.f64 %fd44, [%rd11+24]; fma.rn.f64 %fd45, %fd44, %fd43, %fd32; ld.f64 %fd46, [%rd51]; fma.rn.f64 %fd47, %fd46, %fd45, %fd4; st.f64 [%rd50], %fd47; add.s32 %r177, %r177, 32; setp.lt.s32 %p9, %r177, %r11; mov.u32 %r175, %r177; mov.u32 %r176, %r175; @%p9 bra BB12_7; BB12_8: mov.u32 %r36, %r176; setp.ge.s32 %p10, %r36, %r5; @%p10 bra BB12_11; mov.u64 %rd53, constDeltaForeach4; add.s64 %rd54, %rd53, %rd6; ld.global.u8 %r135, [%rd54]; add.s32 %r37, %r36, %r135; setp.ge.s32 %p11, %r37, %r5; @%p11 bra BB12_11; cvta.to.global.u64 %rd55, %rd2; mad.lo.s32 %r136, %r30, %r48, %r24; add.s32 %r137, %r136, %r37; shl.b32 %r138, %r137, 3; cvt.s64.s32 %rd56, %r138; add.s64 %rd57, %rd56, %rd4; add.s32 %r139, %r138, 8; cvt.s64.s32 %rd58, %r139; add.s64 %rd59, %rd58, %rd4; add.s32 %r140, %r138, -8; cvt.s64.s32 %rd60, %r140; add.s64 %rd61, %rd60, %rd4; add.s32 %r141, %r137, %r48; shl.b32 %r142, %r141, 3; cvt.s64.s32 %rd62, %r142; add.s64 %rd63, %rd62, %rd4; sub.s32 %r143, %r137, %r48; shl.b32 %r144, %r143, 3; cvt.s64.s32 %rd64, %r144; add.s64 %rd65, %rd64, %rd4; add.s32 %r146, %r137, %r10; shl.b32 %r147, %r146, 3; cvt.s64.s32 %rd66, %r147; add.s64 %rd67, %rd66, %rd4; sub.s32 %r148, %r137, %r10; shl.b32 %r149, %r148, 3; cvt.s64.s32 %rd68, %r149; add.s64 %rd69, %rd68, %rd4; add.s32 %r150, %r138, 16; cvt.s64.s32 %rd70, %r150; add.s64 %rd71, %rd70, %rd4; add.s32 %r151, %r138, -16; cvt.s64.s32 %rd72, %r151; add.s64 %rd73, %rd72, %rd4; shl.b32 %r152, %r48, 1; add.s32 %r153, %r137, %r152; shl.b32 %r154, %r153, 3; cvt.s64.s32 %rd74, %r154; add.s64 %rd75, %rd74, %rd4; mad.lo.s32 %r155, %r48, -2, %r137; shl.b32 %r156, %r155, 3; cvt.s64.s32 %rd76, %r156; add.s64 %rd77, %rd76, %rd4; add.s32 %r157, %r137, %r15; shl.b32 %r158, %r157, 3; cvt.s64.s32 %rd78, %r158; add.s64 %rd79, %rd78, %rd4; add.s32 %r159, %r137, %r16; shl.b32 %r160, %r159, 3; cvt.s64.s32 %rd80, %r160; add.s64 %rd81, %rd80, %rd4; add.s32 %r161, %r138, 24; cvt.s64.s32 %rd82, %r161; add.s64 %rd83, %rd82, %rd4; add.s32 %r162, %r138, -24; cvt.s64.s32 %rd84, %r162; add.s64 %rd85, %rd84, %rd4; mad.lo.s32 %r163, %r48, 3, %r137; shl.b32 %r164, %r163, 3; cvt.s64.s32 %rd86, %r164; add.s64 %rd87, %rd86, %rd4; mad.lo.s32 %r165, %r48, -3, %r137; shl.b32 %r166, %r165, 3; cvt.s64.s32 %rd88, %r166; add.s64 %rd89, %rd88, %rd4; add.s32 %r167, %r137, %r17; shl.b32 %r168, %r167, 3; cvt.s64.s32 %rd90, %r168; add.s64 %rd91, %rd90, %rd4; add.s32 %r169, %r137, %r18; shl.b32 %r170, %r169, 3; cvt.s64.s32 %rd92, %r170; add.s64 %rd93, %rd92, %rd4; add.s64 %rd94, %rd56, %rd5; add.s64 %rd95, %rd56, %rd3; ld.f64 %fd48, [%rd57]; add.f64 %fd49, %fd48, %fd48; ld.f64 %fd50, [%rd94]; sub.f64 %fd51, %fd49, %fd50; ld.global.f64 %fd52, [%rd55]; ld.f64 %fd53, [%rd61]; ld.f64 %fd54, [%rd59]; add.f64 %fd55, %fd54, %fd53; ld.f64 %fd56, [%rd63]; add.f64 %fd57, %fd55, %fd56; ld.f64 %fd58, [%rd65]; add.f64 %fd59, %fd57, %fd58; ld.f64 %fd60, [%rd67]; add.f64 %fd61, %fd59, %fd60; ld.f64 %fd62, [%rd69]; add.f64 %fd63, %fd61, %fd62; ld.global.f64 %fd64, [%rd55+8]; mul.f64 %fd65, %fd64, %fd63; fma.rn.f64 %fd66, %fd52, %fd48, %fd65; ld.f64 %fd67, [%rd73]; ld.f64 %fd68, [%rd71]; add.f64 %fd69, %fd68, %fd67; ld.f64 %fd70, [%rd75]; add.f64 %fd71, %fd69, %fd70; ld.f64 %fd72, [%rd77]; add.f64 %fd73, %fd71, %fd72; ld.f64 %fd74, [%rd79]; add.f64 %fd75, %fd73, %fd74; ld.f64 %fd76, [%rd81]; add.f64 %fd77, %fd75, %fd76; ld.global.f64 %fd78, [%rd55+16]; fma.rn.f64 %fd79, %fd78, %fd77, %fd66; ld.f64 %fd80, [%rd85]; ld.f64 %fd81, [%rd83]; add.f64 %fd82, %fd81, %fd80; ld.f64 %fd83, [%rd87]; add.f64 %fd84, %fd82, %fd83; ld.f64 %fd85, [%rd89]; add.f64 %fd86, %fd84, %fd85; ld.f64 %fd87, [%rd91]; add.f64 %fd88, %fd86, %fd87; ld.f64 %fd89, [%rd93]; add.f64 %fd90, %fd88, %fd89; ld.global.f64 %fd91, [%rd55+24]; fma.rn.f64 %fd92, %fd91, %fd90, %fd79; ld.f64 %fd93, [%rd95]; fma.rn.f64 %fd94, %fd92, %fd93, %fd51; st.f64 [%rd94], %fd94; BB12_11: add.s32 %r39, %r174, 1; setp.ne.s32 %p12, %r39, %r19; mov.u32 %r174, %r39; mov.u32 %r173, %r39; @%p12 bra BB12_5; BB12_12: add.s32 %r171, %r172, 1; setp.ne.s32 %p13, %r171, %r20; mov.u32 %r172, %r171; @%p13 bra BB12_3; BB12_13: ret; } .visible .func loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E_( .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_0, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_1, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_2, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_3, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_4, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_5, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_6, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_7, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_8, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_9, .param .b32 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_10, .param .b64 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_11, .param .b64 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_12, .param .b64 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_13, .param .b64 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_14, .param .align 1 .b8 loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_15[1] ) { .reg .pred %p<9>; .reg .s32 %r<63>; .reg .s64 %rd<18>; ld.param.u32 %r62, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_0]; ld.param.u32 %r12, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_1]; ld.param.u32 %r13, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_2]; ld.param.u32 %r14, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_3]; ld.param.u32 %r15, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_4]; ld.param.u32 %r16, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_5]; ld.param.u32 %r17, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_6]; ld.param.u32 %r18, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_7]; ld.param.u32 %r19, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_8]; ld.param.u32 %r20, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_9]; ld.param.u32 %r21, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_10]; ld.param.u64 %rd4, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_11]; ld.param.u64 %rd5, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_12]; ld.param.u64 %rd6, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_13]; ld.param.u64 %rd7, [loop_stencil_ispc_tasks___uniuniuniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_un_3C_und_3E__param_14]; setp.ge.s32 %p1, %r62, %r12; @%p1 bra BB13_14; mov.u32 %r22, 31; sub.s32 %r23, %r22, %r13; add.s32 %r24, %r23, %r14; shr.s32 %r25, %r24, 31; shr.u32 %r26, %r25, 27; add.s32 %r27, %r24, %r26; shr.s32 %r28, %r27, 5; mov.u32 %r29, 7; sub.s32 %r30, %r29, %r15; add.s32 %r31, %r30, %r16; shr.s32 %r32, %r31, 31; shr.u32 %r33, %r32, 29; add.s32 %r34, %r31, %r33; shr.s32 %r1, %r34, 3; sub.s32 %r35, %r29, %r17; add.s32 %r36, %r35, %r18; shr.s32 %r37, %r36, 31; shr.u32 %r38, %r37, 29; add.s32 %r39, %r36, %r38; shr.s32 %r2, %r39, 3; add.s32 %r40, %r28, -1; shr.s32 %r41, %r40, 2; add.s32 %r3, %r41, 1; mov.u32 %r42, %tid.x; and.b32 %r4, %r42, 31; sub.s32 %r61, %r62, %r12; BB13_2: and.b32 %r8, %r62, 1; setp.ne.s32 %p2, %r4, 0; mov.u64 %rd17, 0; @%p2 bra BB13_4; mov.u64 %rd9, 8; mov.u64 %rd10, 72; // Callseq Start 2 { .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd9; .param .b64 param1; st.param.b64 [param1+0], %rd10; .param .b64 retval0; call.uni (retval0), cudaGetParameterBuffer, ( param0, param1 ); ld.param.b64 %rd17, [retval0+0]; } // Callseq End 2 BB13_4: setp.eq.s32 %p3, %r8, 0; @%p3 bra BB13_9; setp.eq.s64 %p4, %rd17, 0; @%p4 bra BB13_7; st.u32 [%rd17], %r13; st.u32 [%rd17+4], %r14; st.u32 [%rd17+8], %r15; st.u32 [%rd17+12], %r16; st.u32 [%rd17+16], %r17; st.u32 [%rd17+20], %r18; st.u32 [%rd17+24], %r19; st.u32 [%rd17+28], %r20; st.u32 [%rd17+32], %r21; st.u64 [%rd17+40], %rd4; st.u64 [%rd17+48], %rd5; st.u64 [%rd17+56], %rd7; st.u64 [%rd17+64], %rd6; BB13_7: @%p2 bra BB13_13; mov.u32 %r47, 128; mov.u32 %r49, 1; mov.u32 %r50, 0; mov.u64 %rd13, 0; mov.u64 %rd11, stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_; // inline asm { .param .b64 param0; st.param.b64 [param0+0], %rd11; .param .b64 param1; st.param.b64 [param1+0], %rd17; .param .align 4 .b8 param2[12]; st.param.b32 [param2+0], %r3; st.param.b32 [param2+4], %r1; st.param.b32 [param2+8], %r2; .param .align 4 .b8 param3[12]; st.param.b32 [param3+0], %r47; st.param.b32 [param3+4], %r49; st.param.b32 [param3+8], %r49; .param .b32 param4; st.param.b32 [param4+0], %r50; .param .b64 param5; st.param.b64 [param5+0], %rd13; .param .b32 retval0; call.uni (retval0), cudaLaunchDevice, ( param0, param1, param2, param3, param4, param5 ); ld.param.b32 %r43, [retval0+0]; } // inline asm bra.uni BB13_13; BB13_9: setp.eq.s64 %p6, %rd17, 0; @%p6 bra BB13_11; st.u32 [%rd17], %r13; st.u32 [%rd17+4], %r14; st.u32 [%rd17+8], %r15; st.u32 [%rd17+12], %r16; st.u32 [%rd17+16], %r17; st.u32 [%rd17+20], %r18; st.u32 [%rd17+24], %r19; st.u32 [%rd17+28], %r20; st.u32 [%rd17+32], %r21; st.u64 [%rd17+40], %rd4; st.u64 [%rd17+48], %rd5; st.u64 [%rd17+56], %rd6; st.u64 [%rd17+64], %rd7; BB13_11: @%p2 bra BB13_13; mov.u32 %r55, 128; mov.u32 %r57, 1; mov.u32 %r58, 0; mov.u64 %rd16, 0; mov.u64 %rd14, stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_; // inline asm { .param .b64 param0; st.param.b64 [param0+0], %rd14; .param .b64 param1; st.param.b64 [param1+0], %rd17; .param .align 4 .b8 param2[12]; st.param.b32 [param2+0], %r3; st.param.b32 [param2+4], %r1; st.param.b32 [param2+8], %r2; .param .align 4 .b8 param3[12]; st.param.b32 [param3+0], %r55; st.param.b32 [param3+4], %r57; st.param.b32 [param3+8], %r57; .param .b32 param4; st.param.b32 [param4+0], %r58; .param .b64 param5; st.param.b64 [param5+0], %rd16; .param .b32 retval0; call.uni (retval0), cudaLaunchDevice, ( param0, param1, param2, param3, param4, param5 ); ld.param.b32 %r51, [retval0+0]; } // inline asm BB13_13: // Callseq Start 3 { .reg .b32 temp_param_reg; .param .b32 retval0; call.uni (retval0), cudaDeviceSynchronize, ( ); ld.param.b32 %r59, [retval0+0]; } // Callseq End 3 add.s32 %r62, %r62, 1; add.s32 %r61, %r61, 1; setp.ne.s32 %p8, %r61, 0; @%p8 bra BB13_2; BB13_14: // Callseq Start 4 { .reg .b32 temp_param_reg; .param .b32 retval0; call.uni (retval0), cudaDeviceSynchronize, ( ); ld.param.b32 %r60, [retval0+0]; } // Callseq End 4 ret; } .visible .entry loop_stencil_ispc_tasks( .param .u32 loop_stencil_ispc_tasks_param_0, .param .u32 loop_stencil_ispc_tasks_param_1, .param .u32 loop_stencil_ispc_tasks_param_2, .param .u32 loop_stencil_ispc_tasks_param_3, .param .u32 loop_stencil_ispc_tasks_param_4, .param .u32 loop_stencil_ispc_tasks_param_5, .param .u32 loop_stencil_ispc_tasks_param_6, .param .u32 loop_stencil_ispc_tasks_param_7, .param .u32 loop_stencil_ispc_tasks_param_8, .param .u32 loop_stencil_ispc_tasks_param_9, .param .u32 loop_stencil_ispc_tasks_param_10, .param .u64 loop_stencil_ispc_tasks_param_11, .param .u64 loop_stencil_ispc_tasks_param_12, .param .u64 loop_stencil_ispc_tasks_param_13, .param .u64 loop_stencil_ispc_tasks_param_14 ) { .reg .pred %p<9>; .reg .s32 %r<63>; .reg .s64 %rd<18>; ld.param.u32 %r62, [loop_stencil_ispc_tasks_param_0]; ld.param.u32 %r12, [loop_stencil_ispc_tasks_param_1]; ld.param.u32 %r13, [loop_stencil_ispc_tasks_param_2]; ld.param.u32 %r14, [loop_stencil_ispc_tasks_param_3]; ld.param.u32 %r15, [loop_stencil_ispc_tasks_param_4]; ld.param.u32 %r16, [loop_stencil_ispc_tasks_param_5]; ld.param.u32 %r17, [loop_stencil_ispc_tasks_param_6]; ld.param.u32 %r18, [loop_stencil_ispc_tasks_param_7]; ld.param.u32 %r19, [loop_stencil_ispc_tasks_param_8]; ld.param.u32 %r20, [loop_stencil_ispc_tasks_param_9]; ld.param.u32 %r21, [loop_stencil_ispc_tasks_param_10]; ld.param.u64 %rd4, [loop_stencil_ispc_tasks_param_11]; ld.param.u64 %rd5, [loop_stencil_ispc_tasks_param_12]; ld.param.u64 %rd6, [loop_stencil_ispc_tasks_param_13]; ld.param.u64 %rd7, [loop_stencil_ispc_tasks_param_14]; setp.ge.s32 %p1, %r62, %r12; @%p1 bra BB14_14; mov.u32 %r22, 31; sub.s32 %r23, %r22, %r13; add.s32 %r24, %r23, %r14; shr.s32 %r25, %r24, 31; shr.u32 %r26, %r25, 27; add.s32 %r27, %r24, %r26; shr.s32 %r28, %r27, 5; mov.u32 %r29, 7; sub.s32 %r30, %r29, %r15; add.s32 %r31, %r30, %r16; shr.s32 %r32, %r31, 31; shr.u32 %r33, %r32, 29; add.s32 %r34, %r31, %r33; shr.s32 %r1, %r34, 3; sub.s32 %r35, %r29, %r17; add.s32 %r36, %r35, %r18; shr.s32 %r37, %r36, 31; shr.u32 %r38, %r37, 29; add.s32 %r39, %r36, %r38; shr.s32 %r2, %r39, 3; add.s32 %r40, %r28, -1; shr.s32 %r41, %r40, 2; add.s32 %r3, %r41, 1; mov.u32 %r42, %tid.x; and.b32 %r4, %r42, 31; sub.s32 %r61, %r62, %r12; BB14_2: and.b32 %r8, %r62, 1; setp.ne.s32 %p2, %r4, 0; mov.u64 %rd17, 0; @%p2 bra BB14_4; mov.u64 %rd9, 8; mov.u64 %rd10, 72; // Callseq Start 5 { .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd9; .param .b64 param1; st.param.b64 [param1+0], %rd10; .param .b64 retval0; call.uni (retval0), cudaGetParameterBuffer, ( param0, param1 ); ld.param.b64 %rd17, [retval0+0]; } // Callseq End 5 BB14_4: setp.eq.s32 %p3, %r8, 0; @%p3 bra BB14_9; setp.eq.s64 %p4, %rd17, 0; @%p4 bra BB14_7; st.u32 [%rd17], %r13; st.u32 [%rd17+4], %r14; st.u32 [%rd17+8], %r15; st.u32 [%rd17+12], %r16; st.u32 [%rd17+16], %r17; st.u32 [%rd17+20], %r18; st.u32 [%rd17+24], %r19; st.u32 [%rd17+28], %r20; st.u32 [%rd17+32], %r21; st.u64 [%rd17+40], %rd4; st.u64 [%rd17+48], %rd5; st.u64 [%rd17+56], %rd7; st.u64 [%rd17+64], %rd6; BB14_7: @%p2 bra BB14_13; mov.u32 %r47, 128; mov.u32 %r49, 1; mov.u32 %r50, 0; mov.u64 %rd13, 0; mov.u64 %rd11, stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_; // inline asm { .param .b64 param0; st.param.b64 [param0+0], %rd11; .param .b64 param1; st.param.b64 [param1+0], %rd17; .param .align 4 .b8 param2[12]; st.param.b32 [param2+0], %r3; st.param.b32 [param2+4], %r1; st.param.b32 [param2+8], %r2; .param .align 4 .b8 param3[12]; st.param.b32 [param3+0], %r47; st.param.b32 [param3+4], %r49; st.param.b32 [param3+8], %r49; .param .b32 param4; st.param.b32 [param4+0], %r50; .param .b64 param5; st.param.b64 [param5+0], %rd13; .param .b32 retval0; call.uni (retval0), cudaLaunchDevice, ( param0, param1, param2, param3, param4, param5 ); ld.param.b32 %r43, [retval0+0]; } // inline asm bra.uni BB14_13; BB14_9: setp.eq.s64 %p6, %rd17, 0; @%p6 bra BB14_11; st.u32 [%rd17], %r13; st.u32 [%rd17+4], %r14; st.u32 [%rd17+8], %r15; st.u32 [%rd17+12], %r16; st.u32 [%rd17+16], %r17; st.u32 [%rd17+20], %r18; st.u32 [%rd17+24], %r19; st.u32 [%rd17+28], %r20; st.u32 [%rd17+32], %r21; st.u64 [%rd17+40], %rd4; st.u64 [%rd17+48], %rd5; st.u64 [%rd17+56], %rd6; st.u64 [%rd17+64], %rd7; BB14_11: @%p2 bra BB14_13; mov.u32 %r55, 128; mov.u32 %r57, 1; mov.u32 %r58, 0; mov.u64 %rd16, 0; mov.u64 %rd14, stencil_step_task___UM_uniuniuniuniuniuniuniuniuniun_3C_Cund_3E_un_3C_Cund_3E_un_3C_Cund_3E_un_3C_und_3E_; // inline asm { .param .b64 param0; st.param.b64 [param0+0], %rd14; .param .b64 param1; st.param.b64 [param1+0], %rd17; .param .align 4 .b8 param2[12]; st.param.b32 [param2+0], %r3; st.param.b32 [param2+4], %r1; st.param.b32 [param2+8], %r2; .param .align 4 .b8 param3[12]; st.param.b32 [param3+0], %r55; st.param.b32 [param3+4], %r57; st.param.b32 [param3+8], %r57; .param .b32 param4; st.param.b32 [param4+0], %r58; .param .b64 param5; st.param.b64 [param5+0], %rd16; .param .b32 retval0; call.uni (retval0), cudaLaunchDevice, ( param0, param1, param2, param3, param4, param5 ); ld.param.b32 %r51, [retval0+0]; } // inline asm BB14_13: // Callseq Start 6 { .reg .b32 temp_param_reg; .param .b32 retval0; call.uni (retval0), cudaDeviceSynchronize, ( ); ld.param.b32 %r59, [retval0+0]; } // Callseq End 6 add.s32 %r62, %r62, 1; add.s32 %r61, %r61, 1; setp.ne.s32 %p8, %r61, 0; @%p8 bra BB14_2; BB14_14: // Callseq Start 7 { .reg .b32 temp_param_reg; .param .b32 retval0; call.uni (retval0), cudaDeviceSynchronize, ( ); ld.param.b32 %r60, [retval0+0]; } // Callseq End 7 ret; }