Files
ispc/examples/mandelbrot_tasks3d/mandelbrot_task_avx.ptxx
2013-11-08 14:17:26 +01:00

172 lines
4.3 KiB
Plaintext

.file "mandelbrot_task.ispc"
.text
.globl mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_
.align 16, 0x90
.type mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_,@function
mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_: # @mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_
# BB#0: # %allocas
pushq %rbp
pushq %r15
pushq %r14
pushq %rbx
subq $88, %rsp
vmovups %ymm4, 32(%rsp) # 32-byte Folded Spill
movq %rcx, %r14
movl %edx, %r15d
movl %esi, %ebx
movl %edi, %ebp
vmovss %xmm1, 76(%rsp) # 4-byte Spill
vmovss %xmm0, 28(%rsp) # 4-byte Spill
vcvtsi2ssl %ebp, %xmm0, %xmm5
vsubss %xmm0, %xmm2, %xmm4
vcvtsi2ssl %ebx, %xmm0, %xmm2
vsubss %xmm1, %xmm3, %xmm3
movq $0, 80(%rsp)
leaq 80(%rsp), %rdi
vdivss %xmm2, %xmm3, %xmm1
vmovss %xmm1, 72(%rsp) # 4-byte Spill
vdivss %xmm5, %xmm4, %xmm0
vmovss %xmm0, 68(%rsp) # 4-byte Spill
movl $96, %esi
movl $32, %edx
vzeroupper
callq ISPCAlloc
vmovups 32(%rsp), %ymm0 # 32-byte Folded Reload
movq %rax, %rdx
movl %ebx, %r8d
sarl $31, %r8d
shrl $28, %r8d
addl %ebx, %r8d
vmovss 28(%rsp), %xmm1 # 4-byte Reload
vmovss %xmm1, (%rdx)
sarl $4, %r8d
movl %ebp, %ecx
sarl $31, %ecx
shrl $28, %ecx
addl %ebp, %ecx
sarl $4, %ecx
vmovmskps %ymm0, %eax
cmpl $255, %eax
vmovss 68(%rsp), %xmm1 # 4-byte Reload
vmovss %xmm1, 4(%rdx)
vmovss 76(%rsp), %xmm1 # 4-byte Reload
vmovss %xmm1, 8(%rdx)
vmovss 72(%rsp), %xmm1 # 4-byte Reload
vmovss %xmm1, 12(%rdx)
movl %ebp, 16(%rdx)
movl %ebx, 20(%rdx)
movl $16, 24(%rdx)
movl $16, 28(%rdx)
movl %r15d, 32(%rdx)
movq %r14, 40(%rdx)
jne .LBB0_2
# BB#1: # %all_on
vpcmpeqd %xmm0, %xmm0, %xmm0
vinsertf128 $1, %xmm0, %ymm0, %ymm0
.LBB0_2: # %all_on
vmovaps %ymm0, 64(%rdx)
leaq 80(%rsp), %rdi
movl $mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E_, %esi
movl $1, %r9d
vzeroupper
callq ISPCLaunch
movq 80(%rsp), %rdi
testq %rdi, %rdi
je .LBB0_4
# BB#3: # %call_sync
callq ISPCSync
movq $0, 80(%rsp)
.LBB0_4: # %post_sync
addq $88, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.Ltmp0:
.size mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_, .Ltmp0-mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_
.globl mandelbrot_ispc
.align 16, 0x90
.type mandelbrot_ispc,@function
mandelbrot_ispc: # @mandelbrot_ispc
# BB#0: # %allocas
pushq %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $32, %rsp
movq %rcx, %r14
movl %edx, %r15d
movl %esi, %ebx
movl %edi, %ebp
vmovss %xmm1, 20(%rsp) # 4-byte Spill
vmovss %xmm0, 8(%rsp) # 4-byte Spill
vcvtsi2ssl %ebp, %xmm0, %xmm5
vsubss %xmm0, %xmm2, %xmm4
vcvtsi2ssl %ebx, %xmm0, %xmm2
vsubss %xmm1, %xmm3, %xmm3
movq $0, 24(%rsp)
leaq 24(%rsp), %r12
vdivss %xmm2, %xmm3, %xmm1
vmovss %xmm1, 16(%rsp) # 4-byte Spill
vdivss %xmm5, %xmm4, %xmm0
vmovss %xmm0, 12(%rsp) # 4-byte Spill
movq %r12, %rdi
movl $96, %esi
movl $32, %edx
callq ISPCAlloc
movl %ebx, %r8d
sarl $31, %r8d
vpcmpeqd %xmm0, %xmm0, %xmm0
vmovss 8(%rsp), %xmm1 # 4-byte Reload
vmovss %xmm1, (%rax)
shrl $28, %r8d
addl %ebx, %r8d
movl %ebp, %ecx
sarl $31, %ecx
shrl $28, %ecx
addl %ebp, %ecx
sarl $4, %ecx
sarl $4, %r8d
vinsertf128 $1, %xmm0, %ymm0, %ymm0
vmovss 12(%rsp), %xmm1 # 4-byte Reload
vmovss %xmm1, 4(%rax)
vmovss 20(%rsp), %xmm1 # 4-byte Reload
vmovss %xmm1, 8(%rax)
vmovss 16(%rsp), %xmm1 # 4-byte Reload
vmovss %xmm1, 12(%rax)
movl %ebp, 16(%rax)
movl %ebx, 20(%rax)
movl $16, 24(%rax)
movl $16, 28(%rax)
movl %r15d, 32(%rax)
movq %r14, 40(%rax)
vmovaps %ymm0, 64(%rax)
movq %r12, %rdi
movl $mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E_, %esi
movq %rax, %rdx
movl $1, %r9d
vzeroupper
callq ISPCLaunch
movq 24(%rsp), %rdi
testq %rdi, %rdi
je .LBB1_2
# BB#1: # %call_sync
callq ISPCSync
movq $0, 24(%rsp)
.LBB1_2: # %post_sync
addq $32, %rsp
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
.Ltmp1:
.size mandelbrot_ispc, .Ltmp1-mandelbrot_ispc
.section ".note.GNU-stack","",@progbits