Files
ispc/examples/mandelbrot_tasks3d/test.s
2013-11-08 14:17:26 +01:00

802 lines
22 KiB
ArmAsm

//
// Generated by LLVM NVPTX Back-End
//
.version 3.1
.target sm_35, texmode_independent
.address_size 64
// .globl __vselect_i8
.func (.param .b32 func_retval0) getBlockIndex0___
(
.param .align 4 .b8 getBlockIndex0____param_0[4]
)
;
.func (.param .b32 func_retval0) getBlockIndex1___
(
.param .align 4 .b8 getBlockIndex1____param_0[4]
)
;
.func (.param .b32 func_retval0) getLaneIndex___
(
.param .align 4 .b8 getLaneIndex____param_0[4]
)
;
// @__vselect_i8
.func (.param .align 1 .b8 func_retval0[1]) __vselect_i8(
.param .align 1 .b8 __vselect_i8_param_0[1],
.param .align 1 .b8 __vselect_i8_param_1[1],
.param .align 4 .b8 __vselect_i8_param_2[4]
)
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0:
ld.param.u32 %r0, [__vselect_i8_param_2];
setp.eq.s32 %p0, %r0, 0;
ld.param.u8 %rc0, [__vselect_i8_param_0];
ld.param.u8 %rc1, [__vselect_i8_param_1];
selp.b16 %rc0, %rc0, %rc1, %p0;
st.param.b8 [func_retval0+0], %rc0;
ret;
}
// .globl __vselect_i16
.func (.param .align 2 .b8 func_retval0[2]) __vselect_i16(
.param .align 2 .b8 __vselect_i16_param_0[2],
.param .align 2 .b8 __vselect_i16_param_1[2],
.param .align 4 .b8 __vselect_i16_param_2[4]
) // @__vselect_i16
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0:
ld.param.u32 %r0, [__vselect_i16_param_2];
setp.eq.s32 %p0, %r0, 0;
ld.param.u16 %rs0, [__vselect_i16_param_0];
ld.param.u16 %rs1, [__vselect_i16_param_1];
selp.b16 %rs0, %rs0, %rs1, %p0;
st.param.b16 [func_retval0+0], %rs0;
ret;
}
// .globl __vselect_i64
.func (.param .align 8 .b8 func_retval0[8]) __vselect_i64(
.param .align 8 .b8 __vselect_i64_param_0[8],
.param .align 8 .b8 __vselect_i64_param_1[8],
.param .align 4 .b8 __vselect_i64_param_2[4]
) // @__vselect_i64
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0:
ld.param.u32 %r0, [__vselect_i64_param_2];
setp.eq.s32 %p0, %r0, 0;
ld.param.u64 %rl0, [__vselect_i64_param_0];
ld.param.u64 %rl1, [__vselect_i64_param_1];
selp.b64 %rl0, %rl0, %rl1, %p0;
st.param.b64 [func_retval0+0], %rl0;
ret;
}
// .globl __aos_to_soa4_float1
.func __aos_to_soa4_float1(
.param .align 4 .b8 __aos_to_soa4_float1_param_0[4],
.param .align 4 .b8 __aos_to_soa4_float1_param_1[4],
.param .align 4 .b8 __aos_to_soa4_float1_param_2[4],
.param .align 4 .b8 __aos_to_soa4_float1_param_3[4],
.param .b64 __aos_to_soa4_float1_param_4,
.param .b64 __aos_to_soa4_float1_param_5,
.param .b64 __aos_to_soa4_float1_param_6,
.param .b64 __aos_to_soa4_float1_param_7
) // @__aos_to_soa4_float1
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0:
ld.param.u64 %rl0, [__aos_to_soa4_float1_param_4];
ld.param.u64 %rl1, [__aos_to_soa4_float1_param_5];
ld.param.u64 %rl2, [__aos_to_soa4_float1_param_6];
ld.param.u64 %rl3, [__aos_to_soa4_float1_param_7];
ld.param.f32 %f0, [__aos_to_soa4_float1_param_0];
ld.param.f32 %f1, [__aos_to_soa4_float1_param_1];
ld.param.f32 %f2, [__aos_to_soa4_float1_param_2];
ld.param.f32 %f3, [__aos_to_soa4_float1_param_3];
st.f32 [%rl0], %f0;
st.f32 [%rl1], %f1;
st.f32 [%rl2], %f2;
st.f32 [%rl3], %f3;
ret;
}
// .globl __soa_to_aos4_float1
.func __soa_to_aos4_float1(
.param .align 4 .b8 __soa_to_aos4_float1_param_0[4],
.param .align 4 .b8 __soa_to_aos4_float1_param_1[4],
.param .align 4 .b8 __soa_to_aos4_float1_param_2[4],
.param .align 4 .b8 __soa_to_aos4_float1_param_3[4],
.param .b64 __soa_to_aos4_float1_param_4,
.param .b64 __soa_to_aos4_float1_param_5,
.param .b64 __soa_to_aos4_float1_param_6,
.param .b64 __soa_to_aos4_float1_param_7
) // @__soa_to_aos4_float1
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0:
ld.param.u64 %rl0, [__soa_to_aos4_float1_param_4];
ld.param.u64 %rl1, [__soa_to_aos4_float1_param_5];
ld.param.u64 %rl2, [__soa_to_aos4_float1_param_6];
ld.param.u64 %rl3, [__soa_to_aos4_float1_param_7];
ld.param.f32 %f0, [__soa_to_aos4_float1_param_0];
ld.param.f32 %f1, [__soa_to_aos4_float1_param_1];
ld.param.f32 %f2, [__soa_to_aos4_float1_param_2];
ld.param.f32 %f3, [__soa_to_aos4_float1_param_3];
st.f32 [%rl0], %f0;
st.f32 [%rl1], %f1;
st.f32 [%rl2], %f2;
st.f32 [%rl3], %f3;
ret;
}
// .globl __aos_to_soa3_float1
.func __aos_to_soa3_float1(
.param .align 4 .b8 __aos_to_soa3_float1_param_0[4],
.param .align 4 .b8 __aos_to_soa3_float1_param_1[4],
.param .align 4 .b8 __aos_to_soa3_float1_param_2[4],
.param .b64 __aos_to_soa3_float1_param_3,
.param .b64 __aos_to_soa3_float1_param_4,
.param .b64 __aos_to_soa3_float1_param_5
) // @__aos_to_soa3_float1
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0:
ld.param.u64 %rl0, [__aos_to_soa3_float1_param_3];
ld.param.u64 %rl1, [__aos_to_soa3_float1_param_4];
ld.param.u64 %rl2, [__aos_to_soa3_float1_param_5];
ld.param.f32 %f0, [__aos_to_soa3_float1_param_0];
ld.param.f32 %f1, [__aos_to_soa3_float1_param_1];
ld.param.f32 %f2, [__aos_to_soa3_float1_param_2];
st.f32 [%rl0], %f0;
st.f32 [%rl1], %f1;
st.f32 [%rl2], %f2;
ret;
}
// .globl __soa_to_aos3_float1
.func __soa_to_aos3_float1(
.param .align 4 .b8 __soa_to_aos3_float1_param_0[4],
.param .align 4 .b8 __soa_to_aos3_float1_param_1[4],
.param .align 4 .b8 __soa_to_aos3_float1_param_2[4],
.param .b64 __soa_to_aos3_float1_param_3,
.param .b64 __soa_to_aos3_float1_param_4,
.param .b64 __soa_to_aos3_float1_param_5
) // @__soa_to_aos3_float1
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0:
ld.param.u64 %rl0, [__soa_to_aos3_float1_param_3];
ld.param.u64 %rl1, [__soa_to_aos3_float1_param_4];
ld.param.u64 %rl2, [__soa_to_aos3_float1_param_5];
ld.param.f32 %f0, [__soa_to_aos3_float1_param_0];
ld.param.f32 %f1, [__soa_to_aos3_float1_param_1];
ld.param.f32 %f2, [__soa_to_aos3_float1_param_2];
st.f32 [%rl0], %f0;
st.f32 [%rl1], %f1;
st.f32 [%rl2], %f2;
ret;
}
// .globl __rsqrt_varying_double
.func (.param .align 8 .b8 func_retval0[8]) __rsqrt_varying_double(
.param .align 8 .b8 __rsqrt_varying_double_param_0[8]
) // @__rsqrt_varying_double
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0:
ld.param.f64 %fl0, [__rsqrt_varying_double_param_0];
rsqrt.approx.f64 %fl0, %fl0;
st.param.f64 [func_retval0+0], %fl0;
ret;
}
// .globl mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E_
.func mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E_(
.param .b64 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_0,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_1,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_2,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_3,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_4,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_5,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_6,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_7,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_8,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_9,
.param .b32 mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_10
) // @mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E_
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0: // %allocas
ld.param.u64 %rl0, [mandelbrot_scanline___unfunfunfunfuniuniuniuniuniun_3C_uni_3E__param_0];
ld.f32 %f0, [%rl0];
ld.f32 %f1, [%rl0+4];
ld.f32 %f2, [%rl0+8];
ld.f32 %f3, [%rl0+12];
ld.u32 %r1, [%rl0+16];
ld.u32 %r7, [%rl0+20];
ld.u32 %r9, [%rl0+24];
ld.u32 %r8, [%rl0+28];
ld.u32 %r0, [%rl0+32];
ld.u32 %r2, [%rl0+48];
setp.gt.s32 %p0, %r2, -1;
@%p0 bra BB8_3;
bra.uni BB8_1;
BB8_3: // %some_on
// Callseq Start 0
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getBlockIndex0___,
(
param0
);
ld.param.b32 %r5, [retval0+0];
//{
}// Callseq End 0
// Callseq Start 1
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getBlockIndex1___,
(
param0
);
ld.param.b32 %r10, [retval0+0];
//{
}// Callseq End 1
mul.lo.s32 %r3, %r10, %r8;
mad.lo.s32 %r4, %r10, %r8, %r8;
setp.lt.s32 %p0, %r4, %r7;
selp.b32 %r4, %r4, %r7, %p0;
setp.ge.s32 %p0, %r3, %r4;
@%p0 bra BB8_31;
// BB#4: // %for_test112.preheader.lr.ph
mul.lo.s32 %r4, %r5, %r9;
mad.lo.s32 %r5, %r5, %r9, %r9;
setp.lt.s32 %p0, %r5, %r1;
selp.b32 %r1, %r5, %r1, %p0;
setp.gt.s32 %p0, %r0, 0;
selp.b32 %r5, -1, 0, %p0;
and.b32 %r6, %r5, %r2;
not.b32 %r7, %r7;
add.s32 %r9, %r10, 1;
mul.lo.s32 %r8, %r8, %r9;
not.b32 %r8, %r8;
setp.gt.s32 %p0, %r7, %r8;
selp.b32 %r7, %r7, %r8, %p0;
not.b32 %r7, %r7;
BB8_5: // %for_test112.preheader
// =>This Loop Header: Depth=1
// Child Loop BB8_29 Depth 2
// Child Loop BB8_28 Depth 2
// Child Loop BB8_23 Depth 3
setp.ge.s32 %p0, %r4, %r1;
@%p0 bra BB8_30;
// BB#21: // %for_loop114.lr.ph
// in Loop: Header=BB8_5 Depth=1
setp.lt.s32 %p0, %r6, 0;
mov.u32 %r8, %r4;
@%p0 bra BB8_22;
bra.uni BB8_29;
BB8_22: // in Loop: Header=BB8_5 Depth=1
cvt.rn.f32.s32 %f4, %r3;
fma.rn.f32 %f4, %f3, %f4, %f2;
mov.u32 %r8, %r4;
BB8_28: // %for_loop.i.lr.ph.us
// Parent Loop BB8_5 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB8_23 Depth 3
// Callseq Start 5
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r9, [retval0+0];
//{
}// Callseq End 5
add.s32 %r9, %r9, %r8;
cvt.rn.f32.s32 %f5, %r9;
fma.rn.f32 %f5, %f1, %f5, %f0;
mov.u32 %r9, 0;
mov.u32 %r12, %r5;
mov.u32 %r10, %r9;
mov.u32 %r11, %r9;
mov.f32 %f7, %f5;
mov.f32 %f6, %f4;
BB8_23: // %for_loop.i.us
// Parent Loop BB8_5 Depth=1
// Parent Loop BB8_28 Depth=2
// => This Inner Loop Header: Depth=3
and.b32 %r13, %r12, %r2;
mul.f32 %f8, %f7, %f7;
fma.rn.f32 %f9, %f6, %f6, %f8;
setp.gtu.f32 %p0, %f9, 0f40800000;
selp.b32 %r14, %r12, 0, %p0;
or.b32 %r10, %r14, %r10;
and.b32 %r14, %r10, %r2;
shr.u32 %r14, %r14, 31;
shr.u32 %r13, %r13, 31;
setp.eq.s32 %p0, %r14, %r13;
@%p0 bra BB8_24;
bra.uni BB8_25;
BB8_24: // in Loop: Header=BB8_23 Depth=3
mov.u32 %r12, %r9;
bra.uni BB8_26;
BB8_25: // %not_all_continued_or_breaked.i.us
// in Loop: Header=BB8_23 Depth=3
mul.f32 %f9, %f6, %f6;
not.b32 %r13, %r10;
and.b32 %r12, %r12, %r13;
sub.f32 %f8, %f8, %f9;
add.f32 %f8, %f5, %f8;
add.f32 %f7, %f7, %f7;
fma.rn.f32 %f6, %f6, %f7, %f4;
mov.f32 %f7, %f8;
BB8_26: // %for_step.i.us
// in Loop: Header=BB8_23 Depth=3
setp.ne.s32 %p0, %r12, 0;
selp.u32 %r13, 1, 0, %p0;
add.s32 %r11, %r11, %r13;
setp.lt.s32 %p0, %r11, %r0;
selp.b32 %r12, %r12, 0, %p0;
and.b32 %r13, %r12, %r2;
setp.lt.s32 %p0, %r13, 0;
@%p0 bra BB8_23;
// BB#27: // %if_exit156.us
// in Loop: Header=BB8_28 Depth=2
// Callseq Start 6
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r9, [retval0+0];
//{
}// Callseq End 6
// Callseq Start 7
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r9, [retval0+0];
//{
}// Callseq End 7
add.s32 %r8, %r8, 32;
setp.lt.s32 %p0, %r8, %r1;
@%p0 bra BB8_28;
bra.uni BB8_30;
BB8_29: // %if_exit156
// Parent Loop BB8_5 Depth=1
// => This Inner Loop Header: Depth=2
// Callseq Start 2
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r9, [retval0+0];
//{
}// Callseq End 2
// Callseq Start 3
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r9, [retval0+0];
//{
}// Callseq End 3
// Callseq Start 4
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r9, [retval0+0];
//{
}// Callseq End 4
add.s32 %r8, %r8, 32;
setp.lt.s32 %p0, %r8, %r1;
@%p0 bra BB8_29;
BB8_30: // %for_exit115
// in Loop: Header=BB8_5 Depth=1
add.s32 %r3, %r3, 1;
setp.eq.s32 %p0, %r3, %r7;
@%p0 bra BB8_31;
bra.uni BB8_5;
BB8_1: // %all_on
ld.u64 %rl0, [%rl0+40];
mov.u32 %r2, -1;
// Callseq Start 8
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getBlockIndex0___,
(
param0
);
ld.param.b32 %r10, [retval0+0];
//{
}// Callseq End 8
// Callseq Start 9
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getBlockIndex1___,
(
param0
);
ld.param.b32 %r11, [retval0+0];
//{
}// Callseq End 9
mul.lo.s32 %r3, %r11, %r8;
mad.lo.s32 %r4, %r11, %r8, %r8;
setp.lt.s32 %p0, %r4, %r7;
selp.b32 %r4, %r4, %r7, %p0;
setp.ge.s32 %p0, %r3, %r4;
@%p0 bra BB8_31;
// BB#2: // %for_test40.preheader.lr.ph
mul.lo.s32 %r4, %r10, %r9;
mad.lo.s32 %r5, %r10, %r9, %r9;
setp.lt.s32 %p0, %r5, %r1;
selp.b32 %r5, %r5, %r1, %p0;
setp.gt.s32 %p0, %r0, 0;
selp.b32 %r6, -1, 0, %p0;
not.b32 %r7, %r7;
add.s32 %r12, %r11, 1;
mul.lo.s32 %r12, %r8, %r12;
not.b32 %r12, %r12;
setp.gt.s32 %p0, %r7, %r12;
selp.b32 %r7, %r7, %r12, %p0;
not.b32 %r7, %r7;
mul.lo.s32 %r8, %r11, %r8;
mul.lo.s32 %r8, %r8, %r1;
mad.lo.s32 %r8, %r10, %r9, %r8;
BB8_7: // %for_test40.preheader
// =>This Loop Header: Depth=1
// Child Loop BB8_19 Depth 2
// Child Loop BB8_13 Depth 2
// Child Loop BB8_14 Depth 3
setp.ge.s32 %p0, %r4, %r5;
@%p0 bra BB8_6;
// BB#8: // %for_loop42.lr.ph
// in Loop: Header=BB8_7 Depth=1
setp.lt.s32 %p0, %r6, 0;
mov.u32 %r9, %r8;
mov.u32 %r10, %r4;
@%p0 bra BB8_9;
bra.uni BB8_19;
BB8_9: // in Loop: Header=BB8_7 Depth=1
cvt.rn.f32.s32 %f4, %r3;
mul.lo.s32 %r9, %r3, %r1;
fma.rn.f32 %f4, %f3, %f4, %f2;
mov.u32 %r10, %r4;
BB8_13: // %for_loop.i204.lr.ph.us
// Parent Loop BB8_7 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB8_14 Depth 3
// Callseq Start 13
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r11, [retval0+0];
//{
}// Callseq End 13
add.s32 %r11, %r11, %r10;
cvt.rn.f32.s32 %f5, %r11;
fma.rn.f32 %f5, %f1, %f5, %f0;
mov.u32 %r12, 0;
mov.u32 %r14, %r6;
mov.u32 %r13, %r12;
mov.u32 %r11, %r12;
mov.f32 %f7, %f5;
mov.f32 %f6, %f4;
BB8_14: // %for_loop.i204.us
// Parent Loop BB8_7 Depth=1
// Parent Loop BB8_13 Depth=2
// => This Inner Loop Header: Depth=3
mul.f32 %f8, %f7, %f7;
fma.rn.f32 %f9, %f6, %f6, %f8;
setp.gtu.f32 %p0, %f9, 0f40800000;
selp.b32 %r15, %r14, 0, %p0;
or.b32 %r13, %r15, %r13;
shr.u32 %r15, %r13, 31;
shr.u32 %r16, %r14, 31;
setp.eq.s32 %p0, %r15, %r16;
@%p0 bra BB8_15;
bra.uni BB8_16;
BB8_15: // in Loop: Header=BB8_14 Depth=3
mov.u32 %r14, %r12;
bra.uni BB8_17;
BB8_16: // %not_all_continued_or_breaked.i218.us
// in Loop: Header=BB8_14 Depth=3
mul.f32 %f9, %f6, %f6;
not.b32 %r15, %r13;
and.b32 %r14, %r14, %r15;
sub.f32 %f8, %f8, %f9;
add.f32 %f8, %f5, %f8;
add.f32 %f7, %f7, %f7;
fma.rn.f32 %f6, %f6, %f7, %f4;
mov.f32 %f7, %f8;
BB8_17: // %for_step.i187.us
// in Loop: Header=BB8_14 Depth=3
setp.ne.s32 %p0, %r14, 0;
selp.u32 %r15, 1, 0, %p0;
add.s32 %r11, %r11, %r15;
setp.lt.s32 %p0, %r11, %r0;
selp.b32 %r14, %r14, 0, %p0;
setp.lt.s32 %p0, %r14, 0;
@%p0 bra BB8_14;
// BB#10: // %mandel___vyfvyfvyi.exit219.us
// in Loop: Header=BB8_13 Depth=2
// Callseq Start 14
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r12, [retval0+0];
//{
}// Callseq End 14
// Callseq Start 15
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r13, [retval0+0];
//{
}// Callseq End 15
add.s32 %r13, %r13, %r10;
setp.ge.s32 %p0, %r13, %r5;
@%p0 bra BB8_12;
// BB#11: // %if_then.us
// in Loop: Header=BB8_13 Depth=2
add.s32 %r13, %r10, %r9;
add.s32 %r12, %r13, %r12;
shl.b32 %r12, %r12, 2;
cvt.s64.s32 %rl1, %r12;
add.s64 %rl1, %rl1, %rl0;
st.u32 [%rl1], %r11;
BB8_12: // %if_exit.us
// in Loop: Header=BB8_13 Depth=2
add.s32 %r10, %r10, 32;
setp.lt.s32 %p0, %r10, %r5;
@%p0 bra BB8_13;
bra.uni BB8_6;
BB8_19: // %mandel___vyfvyfvyi.exit219
// Parent Loop BB8_7 Depth=1
// => This Inner Loop Header: Depth=2
// Callseq Start 10
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r11, [retval0+0];
//{
}// Callseq End 10
// Callseq Start 11
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r11, [retval0+0];
//{
}// Callseq End 11
// Callseq Start 12
{
.reg .b32 temp_param_reg;
// <end>}
.param .b32 param0;
st.param.b32 [param0+0], %r2;
.param .b32 retval0;
call.uni (retval0),
getLaneIndex___,
(
param0
);
ld.param.b32 %r12, [retval0+0];
//{
}// Callseq End 12
add.s32 %r12, %r12, %r10;
setp.lt.s32 %p0, %r12, %r5;
@%p0 bra BB8_20;
bra.uni BB8_18;
BB8_20: // %if_then
// in Loop: Header=BB8_19 Depth=2
add.s32 %r11, %r11, %r9;
shl.b32 %r11, %r11, 2;
cvt.s64.s32 %rl1, %r11;
add.s64 %rl1, %rl1, %rl0;
mov.u32 %r11, 0;
st.u32 [%rl1], %r11;
BB8_18: // %if_exit
// in Loop: Header=BB8_19 Depth=2
add.s32 %r10, %r10, 32;
add.s32 %r9, %r9, 32;
setp.lt.s32 %p0, %r10, %r5;
@%p0 bra BB8_19;
BB8_6: // %for_exit43
// in Loop: Header=BB8_7 Depth=1
add.s32 %r3, %r3, 1;
add.s32 %r8, %r8, %r1;
setp.eq.s32 %p0, %r3, %r7;
@%p0 bra BB8_31;
bra.uni BB8_7;
BB8_31: // %for_exit
ret;
}