// // Generated by NVIDIA NVVM Compiler // Compiler built on Thu Jul 18 02:37:37 2013 (1374107857) // Cuda compilation tools, release 5.5, V5.5.0 // .version 3.2 .target sm_35 .address_size 64 .file 1 "/home/evghenii/soft/ispc-code/ispc/examples/mandelbrot_tasks3d/mandel_task_cu.cu", 1383122156, 1370 .file 2 "/usr/local/cuda-5.5/bin/..//include/cuda_device_runtime_api.h", 1375338991, 7655 .file 3 "/usr/local/cuda-5.5/bin/..//include/device_functions.h", 1375338991, 185228 .extern .func (.param .b32 func_retval0) vprintf ( .param .b64 vprintf_param_0, .param .b64 vprintf_param_1 ) ; .global .align 1 .b8 $str[26] = {118, 101, 99, 116, 111, 114, 73, 110, 100, 101, 120, 61, 32, 37, 100, 32, 32, 98, 105, 100, 61, 32, 37, 100, 10, 0}; .weak .func (.param .b32 func_retval0) cudaMalloc( .param .b64 cudaMalloc_param_0, .param .b64 cudaMalloc_param_1 ) { .reg .s32 %r<2>; mov.u32 %r1, 30; st.param.b32 [func_retval0+0], %r1; .loc 2 66 3 ret; } .weak .func (.param .b32 func_retval0) cudaFuncGetAttributes( .param .b64 cudaFuncGetAttributes_param_0, .param .b64 cudaFuncGetAttributes_param_1 ) { .reg .s32 %r<2>; mov.u32 %r1, 30; st.param.b32 [func_retval0+0], %r1; .loc 2 71 3 ret; } .visible .entry mandelbrot_scanline( .param .f32 mandelbrot_scanline_param_0, .param .f32 mandelbrot_scanline_param_1, .param .f32 mandelbrot_scanline_param_2, .param .f32 mandelbrot_scanline_param_3, .param .u32 mandelbrot_scanline_param_4, .param .u32 mandelbrot_scanline_param_5, .param .u32 mandelbrot_scanline_param_6, .param .u32 mandelbrot_scanline_param_7, .param .u32 mandelbrot_scanline_param_8, .param .u64 mandelbrot_scanline_param_9 ) { .local .align 8 .b8 __local_depot2[8]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<9>; .reg .s32 %r<40>; .reg .f32 %f<20>; .reg .s64 %rd<8>; mov.u64 %SPL, __local_depot2; cvta.local.u64 %SP, %SPL; ld.param.f32 %f9, [mandelbrot_scanline_param_0]; ld.param.f32 %f10, [mandelbrot_scanline_param_1]; ld.param.f32 %f11, [mandelbrot_scanline_param_2]; ld.param.f32 %f12, [mandelbrot_scanline_param_3]; ld.param.u32 %r14, [mandelbrot_scanline_param_4]; ld.param.u32 %r17, [mandelbrot_scanline_param_5]; ld.param.u32 %r15, [mandelbrot_scanline_param_6]; ld.param.u32 %r18, [mandelbrot_scanline_param_7]; ld.param.u32 %r16, [mandelbrot_scanline_param_8]; ld.param.u64 %rd1, [mandelbrot_scanline_param_9]; add.u64 %rd2, %SP, 0; .loc 1 35 1 cvta.to.local.u64 %rd3, %rd2; mov.u32 %r19, %tid.x; and.b32 %r20, %r19, 31; mov.u32 %r21, %ntid.x; cvta.global.u64 %rd4, $str; st.local.v2.u32 [%rd3], {%r20, %r21}; // Callseq Start 0 { .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd4; .param .b64 param1; st.param.b64 [param1+0], %rd2; .param .b32 retval0; .loc 1 35 1 call.uni (retval0), vprintf, ( param0, param1 ); ld.param.b32 %r22, [retval0+0]; } // Callseq End 0 .loc 1 36 1 mov.u32 %r23, %ctaid.x; .loc 1 37 1 mad.lo.s32 %r24, %r23, %r15, %r15; .loc 3 2621 10 min.s32 %r1, %r24, %r14; .loc 1 39 1 mov.u32 %r25, %ctaid.y; mul.lo.s32 %r37, %r25, %r18; .loc 1 40 1 add.s32 %r26, %r37, %r18; .loc 3 2621 10 min.s32 %r3, %r26, %r17; .loc 1 42 1 setp.ge.s32 %p1, %r37, %r3; @%p1 bra BB2_12; cvta.to.global.u64 %rd5, %rd1; BB2_2: .loc 1 36 1 mul.lo.s32 %r38, %r23, %r15; .loc 1 43 1 setp.ge.s32 %p2, %r38, %r1; @%p2 bra BB2_11; .loc 1 46 1 cvt.rn.f32.s32 %f13, %r37; fma.rn.f32 %f1, %f13, %f12, %f11; BB2_4: .loc 1 45 1 add.s32 %r7, %r20, %r38; cvt.rn.f32.u32 %f14, %r7; fma.rn.f32 %f2, %f14, %f10, %f9; mov.u32 %r39, 0; setp.gt.s32 %p3, %r16, 0; .loc 1 12 1 @%p3 bra BB2_5; bra.uni BB2_8; BB2_5: mov.f32 %f18, %f1; mov.f32 %f19, %f2; BB2_6: .loc 1 13 1 mov.f32 %f4, %f19; mov.f32 %f3, %f18; mul.f32 %f5, %f3, %f3; mul.f32 %f6, %f4, %f4; add.f32 %f15, %f6, %f5; setp.gt.f32 %p4, %f15, 0f40800000; @%p4 bra BB2_8; .loc 1 16 1 sub.f32 %f16, %f6, %f5; .loc 1 17 1 add.f32 %f17, %f4, %f4; .loc 1 19 1 add.f32 %f7, %f2, %f16; .loc 1 20 1 fma.rn.f32 %f8, %f17, %f3, %f1; .loc 1 12 96 add.s32 %r39, %r39, 1; .loc 1 12 1 setp.lt.s32 %p5, %r39, %r16; mov.f32 %f18, %f8; mov.f32 %f19, %f7; @%p5 bra BB2_6; BB2_8: .loc 1 49 1 mad.lo.s32 %r34, %r37, %r14, %r38; add.s32 %r11, %r34, %r20; .loc 1 50 1 setp.ge.u32 %p6, %r7, %r1; @%p6 bra BB2_10; .loc 1 51 1 mul.wide.s32 %rd6, %r11, 4; add.s64 %rd7, %rd5, %rd6; st.global.u32 [%rd7], %r39; BB2_10: .loc 1 43 57 add.s32 %r38, %r38, 32; .loc 1 43 1 setp.lt.s32 %p7, %r38, %r1; @%p7 bra BB2_4; BB2_11: .loc 1 42 57 add.s32 %r37, %r37, 1; .loc 1 42 1 setp.lt.s32 %p8, %r37, %r3; @%p8 bra BB2_2; BB2_12: .loc 1 53 2 ret; }