.file "mandelbrot_task.ispc" .text .globl mandelbrot_ispc .align 16, 0x90 .type mandelbrot_ispc,@function mandelbrot_ispc: # @mandelbrot_ispc # BB#0: # %allocas pushq %rbp movq %rsp, %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx andq $-32, %rsp subq $224, %rsp movq %rcx, %r14 movl %edx, %r12d movl %esi, %r13d movl %edi, %ebx vmovss %xmm3, 44(%rsp) # 4-byte Spill vmovss %xmm2, 36(%rsp) # 4-byte Spill vmovss %xmm1, 40(%rsp) # 4-byte Spill vmovss %xmm0, 32(%rsp) # 4-byte Spill movq $0, 192(%rsp) leaq 192(%rsp), %r15 movq %r15, %rdi movl $80, %esi movl $32, %edx callq CUDAAlloc vcvtsi2ssl %ebx, %xmm0, %xmm0 vmovss 36(%rsp), %xmm1 # 4-byte Reload vmovss 32(%rsp), %xmm2 # 4-byte Reload vsubss %xmm2, %xmm1, %xmm1 vmovss %xmm2, 92(%rsp) vdivss %xmm0, %xmm1, %xmm0 vcvtsi2ssl %r13d, %xmm0, %xmm1 vmovss 40(%rsp), %xmm3 # 4-byte Reload vmovss 44(%rsp), %xmm2 # 4-byte Reload vsubss %xmm3, %xmm2, %xmm2 movl %ebx, %r9d sarl $31, %r9d shrl $27, %r9d movl %r13d, %eax sarl $31, %eax leaq 92(%rsp), %rcx movq %rcx, 96(%rsp) vdivss %xmm1, %xmm2, %xmm1 shrl $30, %eax addl %r13d, %eax addl %ebx, %r9d leaq 72(%rsp), %rcx leaq 76(%rsp), %rdx leaq 80(%rsp), %rsi sarl $5, %r9d sarl $2, %eax vmovss %xmm0, 88(%rsp) leaq 88(%rsp), %rdi movq %rdi, 104(%rsp) vmovss %xmm3, 84(%rsp) leaq 84(%rsp), %rdi movq %rdi, 112(%rsp) vmovss %xmm1, 80(%rsp) leaq 68(%rsp), %rdi movq %rsi, 120(%rsp) movl %ebx, 76(%rsp) movq %rdx, 128(%rsp) movl %r13d, 72(%rsp) leaq 96(%rsp), %r8 movq %rcx, 136(%rsp) movl $32, 68(%rsp) leaq 48(%rsp), %rcx movq %rdi, 144(%rsp) movl $4, 64(%rsp) leaq 64(%rsp), %rdx movq %rdx, 152(%rsp) leaq 60(%rsp), %rdx movl %r12d, 60(%rsp) movq %rdx, 160(%rsp) movq %r14, 48(%rsp) movq %rcx, 168(%rsp) movl %eax, (%rsp) movl $1, 8(%rsp) movq %r15, %rdi movl $.L.module_str, %esi movl $.L.ptx_str, %edx movl $.L.func_str, %ecx callq CUDALaunch movq 192(%rsp), %rdi testq %rdi, %rdi je .LBB0_2 # BB#1: # %call_sync callq ISPCSync movq $0, 192(%rsp) .LBB0_2: # %post_sync leaq -40(%rbp), %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp ret .Ltmp0: .size mandelbrot_ispc, .Ltmp0-mandelbrot_ispc .type .L.module_str,@object # @.module_str .section .rodata,"a",@progbits .align 16 .L.module_str: .asciz "mandelbrot_task.ispc" .size .L.module_str, 21 .type .L.ptx_str,@object # @.ptx_str .align 16 .L.ptx_str: .asciz "//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 3.1\n.target sm_35, texmode_independent\n.address_size 64\n\n\t// .globl\tmandelbrot_scanline\n.func (.param .b32 func_retval0) puts\n(\n\t.param .b64 puts_param_0\n)\n;\n.func abort\n(\n\n)\n;\n.global .align 1 .b8 __str[66] = {109, 97, 110, 100, 101, 108, 98, 114, 111, 116, 95, 116, 97, 115, 107, 46, 105, 115, 112, 99, 58, 53, 53, 58, 51, 58, 32, 65, 115, 115, 101, 114, 116, 105, 111, 110, 32, 102, 97, 105, 108, 101, 100, 58, 32, 120, 115, 112, 97, 110, 32, 62, 61, 32, 118, 101, 99, 116, 111, 114, 87, 105, 100, 116, 104, 0};\n // @mandelbrot_scanline\n.entry mandelbrot_scanline(\n\t.param .f32 mandelbrot_scanline_param_0,\n\t.param .f32 mandelbrot_scanline_param_1,\n\t.param .f32 mandelbrot_scanline_param_2,\n\t.param .f32 mandelbrot_scanline_param_3,\n\t.param .u32 mandelbrot_scanline_param_4,\n\t.param .u32 mandelbrot_scanline_param_5,\n\t.param .u32 mandelbrot_scanline_param_6,\n\t.param .u32 mandelbrot_scanline_param_7,\n\t.param .u32 mandelbrot_scanline_param_8,\n\t.param .u64 .ptr .align 4 mandelbrot_scanline_param_9\n)\n{\n\t.reg .pred %p<396>;\n\t.reg .s16 %rc<396>;\n\t.reg .s16 %rs<396>;\n\t.reg .s32 %r<396>;\n\t.reg .s64 %rl<396>;\n\t.reg .f32 %f<396>;\n\t.reg .f64 %fl<396>;\n\n// BB#0: // %allocas\n\tld.param.u32 \t%r3, [mandelbrot_scanline_param_6];\n\tmov.u32 \t%r0, WARP_SZ;\n\tsetp.gt.s32 \t%p0, %r0, %r3;\n\t@%p0 bra \tBB0_18;\n// BB#1: // %for_test.preheader\n\tld.param.u32 \t%r7, [mandelbrot_scanline_param_5];\n\tld.param.u32 \t%r6, [mandelbrot_scanline_param_7];\n\tmov.u32 \t%r8, %ctaid.y;\n\tmul.lo.s32 \t%r1, %r8, %r6;\n\tmad.lo.s32 \t%r2, %r8, %r6, %r6;\n\tsetp.lt.s32 \t%p0, %r2, %r7;\n\tselp.b32 \t%r2, %r2, %r7, %p0;\n\tsetp.ge.s32 \t%p0, %r1, %r2;\n\t@%p0 bra \tBB0_14;\n// BB#2: // %for_test34.preheader.lr.ph\n\tld.param.f32 \t%f0, [mandelbrot_scanline_param_0];\n\tld.param.f32 \t%f1, [mandelbrot_scanline_param_1];\n\tld.param.f32 \t%f2, [mandelbrot_scanline_param_2];\n\tmov.u32 \t%r4, %ctaid.x;\n\tmul.lo.s32 \t%r2, %r4, %r3;\n\tld.param.f32 \t%f3, [mandelbrot_scanline_param_3];\n\tmad.lo.s32 \t%r4, %r4, %r3, %r3;\n\tld.param.u32 \t%r3, [mandelbrot_scanline_param_4];\n\tsetp.lt.s32 \t%p0, %r4, %r3;\n\tselp.b32 \t%r4, %r4, %r3, %p0;\n\tld.param.u32 \t%r5, [mandelbrot_scanline_param_8];\n\tld.param.u64 \t%rl0, [mandelbrot_scanline_param_9];\n\tsetp.gt.s32 \t%p0, %r5, 0;\n\tnot.b32 \t%r7, %r7;\n\tadd.s32 \t%r8, %r8, 1;\n\tmul.lo.s32 \t%r6, %r8, %r6;\n\tnot.b32 \t%r6, %r6;\n\tsetp.gt.s32 \t%p1, %r7, %r6;\n\tselp.b32 \t%r6, %r7, %r6, %p1;\n\tnot.b32 \t%r6, %r6;\nBB0_3: // %for_test34.preheader\n // =>This Loop Header: Depth=1\n // Child Loop BB0_16 Depth 2\n // Child Loop BB0_9 Depth 2\n // Child Loop BB0_12 Depth 3\n\tsetp.ge.s32 \t%p1, %r2, %r4;\n\t@%p1 bra \tBB0_13;\n// BB#4: // %for_loop36.lr.ph\n // in Loop: Header=BB0_3 Depth=1\n\tmul.lo.s32 \t%r7, %r1, %r3;\n\tmov.u32 \t%r8, %r2;\n\t@%p0 bra \tBB0_5;\n\tbra.uni \tBB0_16;\nBB0_5: // in Loop: Header=BB0_3 Depth=1\n\tcvt.rn.f32.s32 \t%f4, %r1;\n\tfma.rn.f32 \t%f4, %f4, %f3, %f2;\n\tmov.u32 \t%r8, %r2;\nBB0_9: // %for_loop.i.lr.ph.us\n // Parent Loop BB0_3 Depth=1\n // => This Loop Header: Depth=2\n // Child Loop BB0_12 Depth 3\n\tmov.u32 \t%r9, %tid.x;\n\tadd.s32 \t%r10, %r0, -1;\n\tand.b32 \t%r10, %r10, %r9;\n\tadd.s32 \t%r11, %r10, %r8;\n\tcvt.rn.f32.s32 \t%f5, %r11;\n\tfma.rn.f32 \t%f5, %f5, %f1, %f0;\n\tmov.u32 \t%r10, 0;\n\tmov.pred \t%p1, 0;\n\tmov.pred \t%p3, -1;\n\tmov.pred \t%p4, %p0;\n\tmov.pred \t%p2, %p1;\n\tmov.f32 \t%f7, %f5;\n\tmov.f32 \t%f6, %f4;\nBB0_12: // %for_loop.i.us\n // Parent Loop BB0_3 Depth=1\n // Parent Loop BB0_9 Depth=2\n // => This Inner Loop Header: Depth=3\n\tand.pred \t%p4, %p3, %p4;\n\tmul.f32 \t%f8, %f7, %f7;\n\tfma.rn.f32 \t%f9, %f6, %f6, %f8;\n\tsetp.gtu.f32 \t%p3, %f9, 0f40800000;\n\tand.pred \t%p3, %p4, %p3;\n\tor.pred \t%p2, %p3, %p2;\n\txor.pred \t%p5, %p2, %p4;\n\tmov.pred \t%p3, %p1;\n\t@!%p5 bra \tBB0_11;\n\tbra.uni \tBB0_10;\nBB0_10: // %not_all_continued_or_breaked.i.us\n // in Loop: Header=BB0_12 Depth=3\n\tmul.f32 \t%f9, %f6, %f6;\n\tnot.pred \t%p3, %p2;\n\tand.pred \t%p3, %p4, %p3;\n\tsub.f32 \t%f8, %f8, %f9;\n\tadd.f32 \t%f8, %f5, %f8;\n\tadd.f32 \t%f7, %f7, %f7;\n\tfma.rn.f32 \t%f6, %f6, %f7, %f4;\n\tmov.f32 \t%f7, %f8;\nBB0_11: // %for_step.i.us\n // in Loop: Header=BB0_12 Depth=3\n\tadd.s32 \t%r12, %r10, 1;\n\tselp.b32 \t%r10, %r12, %r10, %p3;\n\tsetp.lt.s32 \t%p4, %r10, %r5;\n\tand.pred \t%p5, %p3, %p4;\n\t@%p5 bra \tBB0_12;\n// BB#6: // %mandel___vyfvyfvyi.exit.us\n // in Loop: Header=BB0_9 Depth=2\n\tsetp.ge.s32 \t%p1, %r11, %r4;\n\t@%p1 bra \tBB0_8;\n// BB#7: // %if_then.us\n // in Loop: Header=BB0_9 Depth=2\n\tadd.s32 \t%r11, %r0, 1073741823;\n\tand.b32 \t%r9, %r11, %r9;\n\tadd.s32 \t%r11, %r8, %r7;\n\tadd.s32 \t%r9, %r11, %r9;\n\tshl.b32 \t%r9, %r9, 2;\n\tcvt.s64.s32 \t%rl1, %r9;\n\tadd.s64 \t%rl1, %rl1, %rl0;\n\tst.u32 \t[%rl1], %r10;\nBB0_8: // %if_exit.us\n // in Loop: Header=BB0_9 Depth=2\n\tadd.s32 \t%r8, %r0, %r8;\n\tsetp.lt.s32 \t%p1, %r8, %r4;\n\t@%p1 bra \tBB0_9;\n\tbra.uni \tBB0_13;\nBB0_16: // %mandel___vyfvyfvyi.exit\n // Parent Loop BB0_3 Depth=1\n // => This Inner Loop Header: Depth=2\n\tmov.u32 \t%r9, %tid.x;\n\tadd.s32 \t%r10, %r0, -1;\n\tand.b32 \t%r10, %r10, %r9;\n\tadd.s32 \t%r10, %r10, %r8;\n\tsetp.lt.s32 \t%p1, %r10, %r4;\n\t@%p1 bra \tBB0_17;\n\tbra.uni \tBB0_15;\nBB0_17: // %if_then\n // in Loop: Header=BB0_16 Depth=2\n\tadd.s32 \t%r10, %r0, 1073741823;\n\tand.b32 \t%r9, %r10, %r9;\n\tadd.s32 \t%r10, %r8, %r7;\n\tadd.s32 \t%r9, %r10, %r9;\n\tshl.b32 \t%r9, %r9, 2;\n\tcvt.s64.s32 \t%rl1, %r9;\n\tadd.s64 \t%rl1, %rl1, %rl0;\n\tmov.u32 \t%r9, 0;\n\tst.u32 \t[%rl1], %r9;\nBB0_15: // %if_exit\n // in Loop: Header=BB0_16 Depth=2\n\tadd.s32 \t%r8, %r0, %r8;\n\tsetp.lt.s32 \t%p1, %r8, %r4;\n\t@%p1 bra \tBB0_16;\nBB0_13: // %for_exit37\n // in Loop: Header=BB0_3 Depth=1\n\tadd.s32 \t%r1, %r1, 1;\n\tsetp.eq.s32 \t%p1, %r1, %r6;\n\t@%p1 bra \tBB0_14;\n\tbra.uni \tBB0_3;\nBB0_14: // %for_exit\n\tret;\nBB0_18: // %fail.i\n\tmov.u64 \t%rl0, __str;\n\tcvta.global.u64 \t%rl0, %rl0;\n\t// Callseq Start 2\n\t{\n\t.reg .b32 temp_param_reg;\n\t// }\n\t.param .b64 param0;\n\tst.param.b64\t[param0+0], %rl0;\n\t.param .b32 retval0;\n\tcall.uni (retval0), \n\tputs, \n\t(\n\tparam0\n\t);\n\tld.param.b32\t%r0, [retval0+0];\n\t\n\t//{\n\t}// Callseq End 2\n\t// Callseq Start 3\n\t{\n\t.reg .b32 temp_param_reg;\n\t// }\n\tcall.uni \n\tabort, \n\t(\n\t);\n\t\n\t//{\n\t}// Callseq End 3\n}\n\n" .size .L.ptx_str, 7522 .type .L.func_str,@object # @.func_str .align 16 .L.func_str: .asciz "mandelbrot_scanline" .size .L.func_str, 20 .section ".note.GNU-stack","",@progbits