844 lines
29 KiB
Plaintext
844 lines
29 KiB
Plaintext
//
|
|
// Generated by LLVM NVPTX Back-End
|
|
//
|
|
|
|
.version 3.1
|
|
.target sm_35, texmode_independent
|
|
.address_size 64
|
|
|
|
// .globl __vselect_i8
|
|
// @__vselect_i8
|
|
.func (.param .align 1 .b8 func_retval0[1]) __vselect_i8(
|
|
.param .align 1 .b8 __vselect_i8_param_0[1],
|
|
.param .align 1 .b8 __vselect_i8_param_1[1],
|
|
.param .align 4 .b8 __vselect_i8_param_2[4]
|
|
)
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0:
|
|
ld.param.u32 %r0, [__vselect_i8_param_2];
|
|
setp.eq.s32 %p0, %r0, 0;
|
|
ld.param.u8 %rc0, [__vselect_i8_param_0];
|
|
ld.param.u8 %rc1, [__vselect_i8_param_1];
|
|
selp.b16 %rc0, %rc0, %rc1, %p0;
|
|
st.param.b8 [func_retval0+0], %rc0;
|
|
ret;
|
|
}
|
|
|
|
// .globl __vselect_i16
|
|
.func (.param .align 2 .b8 func_retval0[2]) __vselect_i16(
|
|
.param .align 2 .b8 __vselect_i16_param_0[2],
|
|
.param .align 2 .b8 __vselect_i16_param_1[2],
|
|
.param .align 4 .b8 __vselect_i16_param_2[4]
|
|
) // @__vselect_i16
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0:
|
|
ld.param.u32 %r0, [__vselect_i16_param_2];
|
|
setp.eq.s32 %p0, %r0, 0;
|
|
ld.param.u16 %rs0, [__vselect_i16_param_0];
|
|
ld.param.u16 %rs1, [__vselect_i16_param_1];
|
|
selp.b16 %rs0, %rs0, %rs1, %p0;
|
|
st.param.b16 [func_retval0+0], %rs0;
|
|
ret;
|
|
}
|
|
|
|
// .globl __vselect_i64
|
|
.func (.param .align 8 .b8 func_retval0[8]) __vselect_i64(
|
|
.param .align 8 .b8 __vselect_i64_param_0[8],
|
|
.param .align 8 .b8 __vselect_i64_param_1[8],
|
|
.param .align 4 .b8 __vselect_i64_param_2[4]
|
|
) // @__vselect_i64
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0:
|
|
ld.param.u32 %r0, [__vselect_i64_param_2];
|
|
setp.eq.s32 %p0, %r0, 0;
|
|
ld.param.u64 %rl0, [__vselect_i64_param_0];
|
|
ld.param.u64 %rl1, [__vselect_i64_param_1];
|
|
selp.b64 %rl0, %rl0, %rl1, %p0;
|
|
st.param.b64 [func_retval0+0], %rl0;
|
|
ret;
|
|
}
|
|
|
|
// .globl __aos_to_soa4_float1
|
|
.func __aos_to_soa4_float1(
|
|
.param .align 4 .b8 __aos_to_soa4_float1_param_0[4],
|
|
.param .align 4 .b8 __aos_to_soa4_float1_param_1[4],
|
|
.param .align 4 .b8 __aos_to_soa4_float1_param_2[4],
|
|
.param .align 4 .b8 __aos_to_soa4_float1_param_3[4],
|
|
.param .b64 __aos_to_soa4_float1_param_4,
|
|
.param .b64 __aos_to_soa4_float1_param_5,
|
|
.param .b64 __aos_to_soa4_float1_param_6,
|
|
.param .b64 __aos_to_soa4_float1_param_7
|
|
) // @__aos_to_soa4_float1
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0:
|
|
ld.param.u64 %rl0, [__aos_to_soa4_float1_param_4];
|
|
ld.param.u64 %rl1, [__aos_to_soa4_float1_param_5];
|
|
ld.param.u64 %rl2, [__aos_to_soa4_float1_param_6];
|
|
ld.param.u64 %rl3, [__aos_to_soa4_float1_param_7];
|
|
ld.param.f32 %f0, [__aos_to_soa4_float1_param_0];
|
|
ld.param.f32 %f1, [__aos_to_soa4_float1_param_1];
|
|
ld.param.f32 %f2, [__aos_to_soa4_float1_param_2];
|
|
ld.param.f32 %f3, [__aos_to_soa4_float1_param_3];
|
|
st.f32 [%rl0], %f0;
|
|
st.f32 [%rl1], %f1;
|
|
st.f32 [%rl2], %f2;
|
|
st.f32 [%rl3], %f3;
|
|
ret;
|
|
}
|
|
|
|
// .globl __soa_to_aos4_float1
|
|
.func __soa_to_aos4_float1(
|
|
.param .align 4 .b8 __soa_to_aos4_float1_param_0[4],
|
|
.param .align 4 .b8 __soa_to_aos4_float1_param_1[4],
|
|
.param .align 4 .b8 __soa_to_aos4_float1_param_2[4],
|
|
.param .align 4 .b8 __soa_to_aos4_float1_param_3[4],
|
|
.param .b64 __soa_to_aos4_float1_param_4,
|
|
.param .b64 __soa_to_aos4_float1_param_5,
|
|
.param .b64 __soa_to_aos4_float1_param_6,
|
|
.param .b64 __soa_to_aos4_float1_param_7
|
|
) // @__soa_to_aos4_float1
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0:
|
|
ld.param.u64 %rl0, [__soa_to_aos4_float1_param_4];
|
|
ld.param.u64 %rl1, [__soa_to_aos4_float1_param_5];
|
|
ld.param.u64 %rl2, [__soa_to_aos4_float1_param_6];
|
|
ld.param.u64 %rl3, [__soa_to_aos4_float1_param_7];
|
|
ld.param.f32 %f0, [__soa_to_aos4_float1_param_0];
|
|
ld.param.f32 %f1, [__soa_to_aos4_float1_param_1];
|
|
ld.param.f32 %f2, [__soa_to_aos4_float1_param_2];
|
|
ld.param.f32 %f3, [__soa_to_aos4_float1_param_3];
|
|
st.f32 [%rl0], %f0;
|
|
st.f32 [%rl1], %f1;
|
|
st.f32 [%rl2], %f2;
|
|
st.f32 [%rl3], %f3;
|
|
ret;
|
|
}
|
|
|
|
// .globl __aos_to_soa3_float1
|
|
.func __aos_to_soa3_float1(
|
|
.param .align 4 .b8 __aos_to_soa3_float1_param_0[4],
|
|
.param .align 4 .b8 __aos_to_soa3_float1_param_1[4],
|
|
.param .align 4 .b8 __aos_to_soa3_float1_param_2[4],
|
|
.param .b64 __aos_to_soa3_float1_param_3,
|
|
.param .b64 __aos_to_soa3_float1_param_4,
|
|
.param .b64 __aos_to_soa3_float1_param_5
|
|
) // @__aos_to_soa3_float1
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0:
|
|
ld.param.u64 %rl0, [__aos_to_soa3_float1_param_3];
|
|
ld.param.u64 %rl1, [__aos_to_soa3_float1_param_4];
|
|
ld.param.u64 %rl2, [__aos_to_soa3_float1_param_5];
|
|
ld.param.f32 %f0, [__aos_to_soa3_float1_param_0];
|
|
ld.param.f32 %f1, [__aos_to_soa3_float1_param_1];
|
|
ld.param.f32 %f2, [__aos_to_soa3_float1_param_2];
|
|
st.f32 [%rl0], %f0;
|
|
st.f32 [%rl1], %f1;
|
|
st.f32 [%rl2], %f2;
|
|
ret;
|
|
}
|
|
|
|
// .globl __soa_to_aos3_float1
|
|
.func __soa_to_aos3_float1(
|
|
.param .align 4 .b8 __soa_to_aos3_float1_param_0[4],
|
|
.param .align 4 .b8 __soa_to_aos3_float1_param_1[4],
|
|
.param .align 4 .b8 __soa_to_aos3_float1_param_2[4],
|
|
.param .b64 __soa_to_aos3_float1_param_3,
|
|
.param .b64 __soa_to_aos3_float1_param_4,
|
|
.param .b64 __soa_to_aos3_float1_param_5
|
|
) // @__soa_to_aos3_float1
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0:
|
|
ld.param.u64 %rl0, [__soa_to_aos3_float1_param_3];
|
|
ld.param.u64 %rl1, [__soa_to_aos3_float1_param_4];
|
|
ld.param.u64 %rl2, [__soa_to_aos3_float1_param_5];
|
|
ld.param.f32 %f0, [__soa_to_aos3_float1_param_0];
|
|
ld.param.f32 %f1, [__soa_to_aos3_float1_param_1];
|
|
ld.param.f32 %f2, [__soa_to_aos3_float1_param_2];
|
|
st.f32 [%rl0], %f0;
|
|
st.f32 [%rl1], %f1;
|
|
st.f32 [%rl2], %f2;
|
|
ret;
|
|
}
|
|
|
|
// .globl __rsqrt_varying_double
|
|
.func (.param .align 8 .b8 func_retval0[8]) __rsqrt_varying_double(
|
|
.param .align 8 .b8 __rsqrt_varying_double_param_0[8]
|
|
) // @__rsqrt_varying_double
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0:
|
|
ld.param.f64 %fl0, [__rsqrt_varying_double_param_0];
|
|
rsqrt.approx.f64 %fl0, %fl0;
|
|
st.param.f64 [func_retval0+0], %fl0;
|
|
ret;
|
|
}
|
|
|
|
// .globl mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_
|
|
.func mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_(
|
|
.param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_0,
|
|
.param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_1,
|
|
.param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_2,
|
|
.param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_3,
|
|
.param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_4,
|
|
.param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_5,
|
|
.param .b32 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_6,
|
|
.param .b64 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_7,
|
|
.param .align 4 .b8 mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_8[4]
|
|
) // @mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E_
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0: // %allocas
|
|
ld.param.f32 %f0, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_0];
|
|
ld.param.f32 %f1, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_1];
|
|
ld.param.f32 %f3, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_2];
|
|
ld.param.f32 %f2, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_3];
|
|
ld.param.u32 %r0, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_4];
|
|
ld.param.u32 %r1, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_5];
|
|
ld.param.u32 %r2, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_6];
|
|
ld.param.u64 %rl0, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_7];
|
|
ld.param.u32 %r3, [mandelbrot_ispc___unfunfunfunfuniuniuniun_3C_uni_3E__param_8];
|
|
setp.lt.s32 %p0, %r3, 0;
|
|
sub.f32 %f3, %f3, %f0;
|
|
cvt.rn.f32.s32 %f4, %r0;
|
|
sub.f32 %f2, %f2, %f1;
|
|
cvt.rn.f32.s32 %f5, %r1;
|
|
div.rn.f32 %f2, %f2, %f5;
|
|
div.rn.f32 %f3, %f3, %f4;
|
|
@%p0 bra BB8_9;
|
|
// BB#1: // %for_test110.preheader
|
|
setp.lt.s32 %p0, %r1, 1;
|
|
@%p0 bra BB8_45;
|
|
// BB#2: // %outer_not_in_extras140.preheader.lr.ph
|
|
setp.gt.s32 %p0, %r2, 0;
|
|
mov.u32 %r3, 0;
|
|
selp.b32 %r4, -1, 0, %p0;
|
|
shl.b32 %r5, %r0, 2;
|
|
mov.u32 %r6, %r3;
|
|
BB8_3: // %outer_not_in_extras140.preheader
|
|
// =>This Loop Header: Depth=1
|
|
// Child Loop BB8_41 Depth 2
|
|
// Child Loop BB8_43 Depth 2
|
|
// Child Loop BB8_38 Depth 2
|
|
// Child Loop BB8_33 Depth 3
|
|
setp.lt.s32 %p0, %r0, 1;
|
|
@%p0 bra BB8_4;
|
|
// BB#31: // %foreach_full_body120.lr.ph
|
|
// in Loop: Header=BB8_3 Depth=1
|
|
setp.lt.s32 %p0, %r4, 0;
|
|
mov.u32 %r7, %r0;
|
|
mov.u32 %r8, %r3;
|
|
@%p0 bra BB8_32;
|
|
bra.uni BB8_43;
|
|
BB8_32: // in Loop: Header=BB8_3 Depth=1
|
|
mov.u64 %rl1, 0;
|
|
cvt.rn.f32.s32 %f4, %r6;
|
|
fma.rn.f32 %f4, %f2, %f4, %f1;
|
|
mul.lo.s32 %r7, %r6, %r0;
|
|
BB8_38: // %for_loop.i380.lr.ph.us
|
|
// Parent Loop BB8_3 Depth=1
|
|
// => This Loop Header: Depth=2
|
|
// Child Loop BB8_33 Depth 3
|
|
cvt.u32.u64 %r8, %rl1;
|
|
cvt.rn.f32.s32 %f5, %r8;
|
|
fma.rn.f32 %f5, %f3, %f5, %f0;
|
|
mov.u32 %r10, 0;
|
|
mov.u32 %r12, %r4;
|
|
mov.u32 %r11, %r10;
|
|
mov.u32 %r9, %r10;
|
|
mov.f32 %f7, %f5;
|
|
mov.f32 %f6, %f4;
|
|
BB8_33: // %for_loop.i380.us
|
|
// Parent Loop BB8_3 Depth=1
|
|
// Parent Loop BB8_38 Depth=2
|
|
// => This Inner Loop Header: Depth=3
|
|
mul.f32 %f8, %f7, %f7;
|
|
fma.rn.f32 %f9, %f6, %f6, %f8;
|
|
setp.gtu.f32 %p0, %f9, 0f40800000;
|
|
selp.b32 %r13, %r12, 0, %p0;
|
|
or.b32 %r11, %r13, %r11;
|
|
shr.u32 %r13, %r11, 31;
|
|
shr.u32 %r14, %r12, 31;
|
|
setp.eq.s32 %p0, %r13, %r14;
|
|
@%p0 bra BB8_34;
|
|
bra.uni BB8_35;
|
|
BB8_34: // in Loop: Header=BB8_33 Depth=3
|
|
mov.u32 %r12, %r10;
|
|
bra.uni BB8_36;
|
|
BB8_35: // %not_all_continued_or_breaked.i394.us
|
|
// in Loop: Header=BB8_33 Depth=3
|
|
mul.f32 %f9, %f6, %f6;
|
|
not.b32 %r13, %r11;
|
|
and.b32 %r12, %r12, %r13;
|
|
sub.f32 %f8, %f8, %f9;
|
|
add.f32 %f8, %f5, %f8;
|
|
add.f32 %f7, %f7, %f7;
|
|
fma.rn.f32 %f6, %f6, %f7, %f4;
|
|
mov.f32 %f7, %f8;
|
|
BB8_36: // %for_step.i363.us
|
|
// in Loop: Header=BB8_33 Depth=3
|
|
setp.ne.s32 %p0, %r12, 0;
|
|
selp.u32 %r13, 1, 0, %p0;
|
|
add.s32 %r9, %r9, %r13;
|
|
setp.lt.s32 %p0, %r9, %r2;
|
|
selp.b32 %r12, %r12, 0, %p0;
|
|
setp.lt.s32 %p0, %r12, 0;
|
|
@%p0 bra BB8_33;
|
|
// BB#37: // %mandel___vyfvyfvyi.exit395.us
|
|
// in Loop: Header=BB8_38 Depth=2
|
|
add.s32 %r8, %r8, %r7;
|
|
shl.b32 %r8, %r8, 2;
|
|
cvt.s64.s32 %rl2, %r8;
|
|
add.s64 %rl2, %rl2, %rl0;
|
|
st.u32 [%rl2], %r9;
|
|
add.s64 %rl1, %rl1, 1;
|
|
cvt.u32.u64 %r8, %rl1;
|
|
setp.eq.s32 %p0, %r8, %r0;
|
|
@%p0 bra BB8_44;
|
|
bra.uni BB8_38;
|
|
BB8_43: // %mandel___vyfvyfvyi.exit395
|
|
// Parent Loop BB8_3 Depth=1
|
|
// => This Inner Loop Header: Depth=2
|
|
cvt.s64.s32 %rl1, %r8;
|
|
add.s64 %rl1, %rl1, %rl0;
|
|
mov.u32 %r9, 0;
|
|
st.u32 [%rl1], %r9;
|
|
add.s32 %r8, %r8, 4;
|
|
add.s32 %r7, %r7, -1;
|
|
setp.eq.s32 %p0, %r7, 0;
|
|
@%p0 bra BB8_44;
|
|
bra.uni BB8_43;
|
|
BB8_4: // %partial_inner_all_outer156
|
|
// in Loop: Header=BB8_3 Depth=1
|
|
@%p0 bra BB8_44;
|
|
// BB#5: // %partial_inner_only197
|
|
// in Loop: Header=BB8_3 Depth=1
|
|
setp.gt.s32 %p0, %r0, 0;
|
|
mov.u32 %r8, 0;
|
|
fma.rn.f32 %f4, %f3, 0f00000000, %f0;
|
|
cvt.rn.f32.s32 %f5, %r6;
|
|
fma.rn.f32 %f5, %f2, %f5, %f1;
|
|
selp.b32 %r7, %r4, 0, %p0;
|
|
setp.lt.s32 %p1, %r7, 0;
|
|
mov.u32 %r10, %r4;
|
|
mov.u32 %r9, %r8;
|
|
mov.u32 %r7, %r8;
|
|
mov.f32 %f7, %f4;
|
|
mov.f32 %f6, %f5;
|
|
@%p1 bra BB8_41;
|
|
bra.uni BB8_6;
|
|
BB8_41: // %for_loop.i
|
|
// Parent Loop BB8_3 Depth=1
|
|
// => This Inner Loop Header: Depth=2
|
|
selp.b32 %r11, %r10, 0, %p0;
|
|
mul.f32 %f8, %f7, %f7;
|
|
fma.rn.f32 %f9, %f6, %f6, %f8;
|
|
setp.gtu.f32 %p1, %f9, 0f40800000;
|
|
selp.b32 %r12, %r10, 0, %p1;
|
|
or.b32 %r9, %r12, %r9;
|
|
selp.b32 %r12, %r9, 0, %p0;
|
|
shr.u32 %r12, %r12, 31;
|
|
shr.u32 %r11, %r11, 31;
|
|
setp.eq.s32 %p1, %r12, %r11;
|
|
@%p1 bra BB8_42;
|
|
bra.uni BB8_39;
|
|
BB8_42: // in Loop: Header=BB8_41 Depth=2
|
|
mov.u32 %r10, %r8;
|
|
bra.uni BB8_40;
|
|
BB8_39: // %not_all_continued_or_breaked.i
|
|
// in Loop: Header=BB8_41 Depth=2
|
|
mul.f32 %f9, %f6, %f6;
|
|
not.b32 %r11, %r9;
|
|
and.b32 %r10, %r10, %r11;
|
|
sub.f32 %f8, %f8, %f9;
|
|
add.f32 %f8, %f4, %f8;
|
|
add.f32 %f7, %f7, %f7;
|
|
fma.rn.f32 %f6, %f6, %f7, %f5;
|
|
mov.f32 %f7, %f8;
|
|
BB8_40: // %for_step.i
|
|
// in Loop: Header=BB8_41 Depth=2
|
|
setp.ne.s32 %p1, %r10, 0;
|
|
selp.u32 %r11, 1, 0, %p1;
|
|
add.s32 %r7, %r7, %r11;
|
|
setp.lt.s32 %p1, %r7, %r2;
|
|
selp.b32 %r10, %r10, 0, %p1;
|
|
selp.b32 %r11, %r10, 0, %p0;
|
|
setp.gt.s32 %p1, %r11, -1;
|
|
@%p1 bra BB8_7;
|
|
bra.uni BB8_41;
|
|
BB8_6: // in Loop: Header=BB8_3 Depth=1
|
|
mov.u32 %r7, %r8;
|
|
BB8_7: // %mandel___vyfvyfvyi.exit
|
|
// in Loop: Header=BB8_3 Depth=1
|
|
setp.lt.s32 %p0, %r0, 1;
|
|
@%p0 bra BB8_44;
|
|
// BB#8: // %pl_dolane.i
|
|
// in Loop: Header=BB8_3 Depth=1
|
|
mul.lo.s32 %r8, %r6, %r0;
|
|
shl.b32 %r8, %r8, 2;
|
|
cvt.s64.s32 %rl1, %r8;
|
|
add.s64 %rl1, %rl1, %rl0;
|
|
st.u32 [%rl1], %r7;
|
|
BB8_44: // %foreach_reset128
|
|
// in Loop: Header=BB8_3 Depth=1
|
|
add.s32 %r6, %r6, 1;
|
|
add.s32 %r3, %r3, %r5;
|
|
setp.eq.s32 %p0, %r6, %r1;
|
|
@%p0 bra BB8_45;
|
|
bra.uni BB8_3;
|
|
BB8_9: // %for_test.preheader
|
|
setp.lt.s32 %p0, %r1, 1;
|
|
@%p0 bra BB8_45;
|
|
// BB#10: // %outer_not_in_extras.preheader.lr.ph
|
|
setp.gt.s32 %p0, %r2, 0;
|
|
mov.u32 %r3, 0;
|
|
selp.b32 %r4, -1, 0, %p0;
|
|
shl.b32 %r5, %r0, 2;
|
|
mov.u32 %r6, %r3;
|
|
BB8_11: // %outer_not_in_extras.preheader
|
|
// =>This Loop Header: Depth=1
|
|
// Child Loop BB8_23 Depth 2
|
|
// Child Loop BB8_20 Depth 2
|
|
// Child Loop BB8_19 Depth 2
|
|
// Child Loop BB8_14 Depth 3
|
|
setp.lt.s32 %p0, %r0, 1;
|
|
@%p0 bra BB8_28;
|
|
// BB#12: // %foreach_full_body.lr.ph
|
|
// in Loop: Header=BB8_11 Depth=1
|
|
setp.lt.s32 %p0, %r4, 0;
|
|
mov.u32 %r7, %r0;
|
|
mov.u32 %r8, %r3;
|
|
@%p0 bra BB8_13;
|
|
bra.uni BB8_20;
|
|
BB8_13: // in Loop: Header=BB8_11 Depth=1
|
|
mov.u64 %rl1, 0;
|
|
cvt.rn.f32.s32 %f4, %r6;
|
|
fma.rn.f32 %f4, %f2, %f4, %f1;
|
|
mul.lo.s32 %r7, %r6, %r0;
|
|
BB8_19: // %for_loop.i281.lr.ph.us
|
|
// Parent Loop BB8_11 Depth=1
|
|
// => This Loop Header: Depth=2
|
|
// Child Loop BB8_14 Depth 3
|
|
cvt.u32.u64 %r8, %rl1;
|
|
cvt.rn.f32.s32 %f5, %r8;
|
|
fma.rn.f32 %f5, %f3, %f5, %f0;
|
|
mov.u32 %r10, 0;
|
|
mov.u32 %r12, %r4;
|
|
mov.u32 %r11, %r10;
|
|
mov.u32 %r9, %r10;
|
|
mov.f32 %f7, %f5;
|
|
mov.f32 %f6, %f4;
|
|
BB8_14: // %for_loop.i281.us
|
|
// Parent Loop BB8_11 Depth=1
|
|
// Parent Loop BB8_19 Depth=2
|
|
// => This Inner Loop Header: Depth=3
|
|
mul.f32 %f8, %f7, %f7;
|
|
fma.rn.f32 %f9, %f6, %f6, %f8;
|
|
setp.gtu.f32 %p0, %f9, 0f40800000;
|
|
selp.b32 %r13, %r12, 0, %p0;
|
|
or.b32 %r11, %r13, %r11;
|
|
shr.u32 %r13, %r11, 31;
|
|
shr.u32 %r14, %r12, 31;
|
|
setp.eq.s32 %p0, %r13, %r14;
|
|
@%p0 bra BB8_15;
|
|
bra.uni BB8_16;
|
|
BB8_15: // in Loop: Header=BB8_14 Depth=3
|
|
mov.u32 %r12, %r10;
|
|
bra.uni BB8_17;
|
|
BB8_16: // %not_all_continued_or_breaked.i295.us
|
|
// in Loop: Header=BB8_14 Depth=3
|
|
mul.f32 %f9, %f6, %f6;
|
|
not.b32 %r13, %r11;
|
|
and.b32 %r12, %r12, %r13;
|
|
sub.f32 %f8, %f8, %f9;
|
|
add.f32 %f8, %f5, %f8;
|
|
add.f32 %f7, %f7, %f7;
|
|
fma.rn.f32 %f6, %f6, %f7, %f4;
|
|
mov.f32 %f7, %f8;
|
|
BB8_17: // %for_step.i264.us
|
|
// in Loop: Header=BB8_14 Depth=3
|
|
setp.ne.s32 %p0, %r12, 0;
|
|
selp.u32 %r13, 1, 0, %p0;
|
|
add.s32 %r9, %r9, %r13;
|
|
setp.lt.s32 %p0, %r9, %r2;
|
|
selp.b32 %r12, %r12, 0, %p0;
|
|
setp.lt.s32 %p0, %r12, 0;
|
|
@%p0 bra BB8_14;
|
|
// BB#18: // %mandel___vyfvyfvyi.exit296.us
|
|
// in Loop: Header=BB8_19 Depth=2
|
|
add.s32 %r8, %r8, %r7;
|
|
shl.b32 %r8, %r8, 2;
|
|
cvt.s64.s32 %rl2, %r8;
|
|
add.s64 %rl2, %rl2, %rl0;
|
|
st.u32 [%rl2], %r9;
|
|
add.s64 %rl1, %rl1, 1;
|
|
cvt.u32.u64 %r8, %rl1;
|
|
setp.eq.s32 %p0, %r8, %r0;
|
|
@%p0 bra BB8_27;
|
|
bra.uni BB8_19;
|
|
BB8_20: // %mandel___vyfvyfvyi.exit296
|
|
// Parent Loop BB8_11 Depth=1
|
|
// => This Inner Loop Header: Depth=2
|
|
cvt.s64.s32 %rl1, %r8;
|
|
add.s64 %rl1, %rl1, %rl0;
|
|
mov.u32 %r9, 0;
|
|
st.u32 [%rl1], %r9;
|
|
add.s32 %r8, %r8, 4;
|
|
add.s32 %r7, %r7, -1;
|
|
setp.eq.s32 %p0, %r7, 0;
|
|
@%p0 bra BB8_27;
|
|
bra.uni BB8_20;
|
|
BB8_28: // %partial_inner_all_outer
|
|
// in Loop: Header=BB8_11 Depth=1
|
|
@%p0 bra BB8_27;
|
|
// BB#29: // %partial_inner_only
|
|
// in Loop: Header=BB8_11 Depth=1
|
|
setp.gt.s32 %p0, %r0, 0;
|
|
mov.u32 %r8, 0;
|
|
fma.rn.f32 %f4, %f3, 0f00000000, %f0;
|
|
cvt.rn.f32.s32 %f5, %r6;
|
|
fma.rn.f32 %f5, %f2, %f5, %f1;
|
|
selp.b32 %r7, %r4, 0, %p0;
|
|
setp.lt.s32 %p1, %r7, 0;
|
|
mov.u32 %r10, %r4;
|
|
mov.u32 %r9, %r8;
|
|
mov.u32 %r7, %r8;
|
|
mov.f32 %f7, %f4;
|
|
mov.f32 %f6, %f5;
|
|
@%p1 bra BB8_23;
|
|
bra.uni BB8_30;
|
|
BB8_23: // %for_loop.i332
|
|
// Parent Loop BB8_11 Depth=1
|
|
// => This Inner Loop Header: Depth=2
|
|
selp.b32 %r11, %r10, 0, %p0;
|
|
mul.f32 %f8, %f7, %f7;
|
|
fma.rn.f32 %f9, %f6, %f6, %f8;
|
|
setp.gtu.f32 %p1, %f9, 0f40800000;
|
|
selp.b32 %r12, %r10, 0, %p1;
|
|
or.b32 %r9, %r12, %r9;
|
|
selp.b32 %r12, %r9, 0, %p0;
|
|
shr.u32 %r12, %r12, 31;
|
|
shr.u32 %r11, %r11, 31;
|
|
setp.eq.s32 %p1, %r12, %r11;
|
|
@%p1 bra BB8_24;
|
|
bra.uni BB8_21;
|
|
BB8_24: // in Loop: Header=BB8_23 Depth=2
|
|
mov.u32 %r10, %r8;
|
|
bra.uni BB8_22;
|
|
BB8_21: // %not_all_continued_or_breaked.i346
|
|
// in Loop: Header=BB8_23 Depth=2
|
|
mul.f32 %f9, %f6, %f6;
|
|
not.b32 %r11, %r9;
|
|
and.b32 %r10, %r10, %r11;
|
|
sub.f32 %f8, %f8, %f9;
|
|
add.f32 %f8, %f4, %f8;
|
|
add.f32 %f7, %f7, %f7;
|
|
fma.rn.f32 %f6, %f6, %f7, %f5;
|
|
mov.f32 %f7, %f8;
|
|
BB8_22: // %for_step.i313
|
|
// in Loop: Header=BB8_23 Depth=2
|
|
setp.ne.s32 %p1, %r10, 0;
|
|
selp.u32 %r11, 1, 0, %p1;
|
|
add.s32 %r7, %r7, %r11;
|
|
setp.lt.s32 %p1, %r7, %r2;
|
|
selp.b32 %r10, %r10, 0, %p1;
|
|
selp.b32 %r11, %r10, 0, %p0;
|
|
setp.gt.s32 %p1, %r11, -1;
|
|
@%p1 bra BB8_25;
|
|
bra.uni BB8_23;
|
|
BB8_30: // in Loop: Header=BB8_11 Depth=1
|
|
mov.u32 %r7, %r8;
|
|
BB8_25: // %mandel___vyfvyfvyi.exit347
|
|
// in Loop: Header=BB8_11 Depth=1
|
|
setp.lt.s32 %p0, %r0, 1;
|
|
@%p0 bra BB8_27;
|
|
// BB#26: // %pl_dolane.i452
|
|
// in Loop: Header=BB8_11 Depth=1
|
|
mul.lo.s32 %r8, %r6, %r0;
|
|
shl.b32 %r8, %r8, 2;
|
|
cvt.s64.s32 %rl1, %r8;
|
|
add.s64 %rl1, %rl1, %rl0;
|
|
st.u32 [%rl1], %r7;
|
|
BB8_27: // %foreach_reset
|
|
// in Loop: Header=BB8_11 Depth=1
|
|
add.s32 %r6, %r6, 1;
|
|
add.s32 %r3, %r3, %r5;
|
|
setp.eq.s32 %p0, %r6, %r1;
|
|
@%p0 bra BB8_45;
|
|
bra.uni BB8_11;
|
|
BB8_45: // %for_exit
|
|
ret;
|
|
}
|
|
|
|
// .globl mandelbrot_ispc
|
|
.func mandelbrot_ispc(
|
|
.param .b32 mandelbrot_ispc_param_0,
|
|
.param .b32 mandelbrot_ispc_param_1,
|
|
.param .b32 mandelbrot_ispc_param_2,
|
|
.param .b32 mandelbrot_ispc_param_3,
|
|
.param .b32 mandelbrot_ispc_param_4,
|
|
.param .b32 mandelbrot_ispc_param_5,
|
|
.param .b32 mandelbrot_ispc_param_6,
|
|
.param .b64 mandelbrot_ispc_param_7
|
|
) // @mandelbrot_ispc
|
|
{
|
|
.reg .pred %p<396>;
|
|
.reg .s16 %rc<396>;
|
|
.reg .s16 %rs<396>;
|
|
.reg .s32 %r<396>;
|
|
.reg .s64 %rl<396>;
|
|
.reg .f32 %f<396>;
|
|
.reg .f64 %fl<396>;
|
|
|
|
// BB#0: // %allocas
|
|
ld.param.u32 %r0, [mandelbrot_ispc_param_5];
|
|
setp.lt.s32 %p0, %r0, 1;
|
|
@%p0 bra BB9_18;
|
|
// BB#1: // %outer_not_in_extras.preheader.lr.ph
|
|
ld.param.f32 %f0, [mandelbrot_ispc_param_0];
|
|
ld.param.f32 %f1, [mandelbrot_ispc_param_1];
|
|
ld.param.f32 %f3, [mandelbrot_ispc_param_2];
|
|
ld.param.f32 %f2, [mandelbrot_ispc_param_3];
|
|
ld.param.u32 %r1, [mandelbrot_ispc_param_4];
|
|
ld.param.u32 %r2, [mandelbrot_ispc_param_6];
|
|
ld.param.u64 %rl0, [mandelbrot_ispc_param_7];
|
|
sub.f32 %f3, %f3, %f0;
|
|
cvt.rn.f32.s32 %f4, %r1;
|
|
sub.f32 %f2, %f2, %f1;
|
|
cvt.rn.f32.s32 %f5, %r0;
|
|
div.rn.f32 %f2, %f2, %f5;
|
|
div.rn.f32 %f3, %f3, %f4;
|
|
setp.gt.s32 %p0, %r2, 0;
|
|
mov.u32 %r3, 0;
|
|
selp.b32 %r4, -1, 0, %p0;
|
|
BB9_2: // %outer_not_in_extras.preheader
|
|
// =>This Loop Header: Depth=1
|
|
// Child Loop BB9_13 Depth 2
|
|
// Child Loop BB9_4 Depth 2
|
|
// Child Loop BB9_9 Depth 3
|
|
setp.lt.s32 %p0, %r1, 1;
|
|
@%p0 bra BB9_19;
|
|
// BB#3: // %foreach_full_body.lr.ph
|
|
// in Loop: Header=BB9_2 Depth=1
|
|
mov.u64 %rl1, 0;
|
|
cvt.rn.f32.s32 %f4, %r3;
|
|
fma.rn.f32 %f4, %f2, %f4, %f1;
|
|
mul.lo.s32 %r5, %r3, %r1;
|
|
BB9_4: // %foreach_full_body
|
|
// Parent Loop BB9_2 Depth=1
|
|
// => This Loop Header: Depth=2
|
|
// Child Loop BB9_9 Depth 3
|
|
setp.lt.s32 %p0, %r4, 0;
|
|
cvt.u32.u64 %r6, %rl1;
|
|
cvt.rn.f32.s32 %f5, %r6;
|
|
fma.rn.f32 %f5, %f3, %f5, %f0;
|
|
mov.u32 %r8, 0;
|
|
mov.u32 %r10, %r4;
|
|
mov.u32 %r9, %r8;
|
|
mov.u32 %r7, %r8;
|
|
mov.f32 %f7, %f5;
|
|
mov.f32 %f6, %f4;
|
|
@%p0 bra BB9_9;
|
|
bra.uni BB9_5;
|
|
BB9_9: // %for_loop.i281
|
|
// Parent Loop BB9_2 Depth=1
|
|
// Parent Loop BB9_4 Depth=2
|
|
// => This Inner Loop Header: Depth=3
|
|
mul.f32 %f8, %f7, %f7;
|
|
fma.rn.f32 %f9, %f6, %f6, %f8;
|
|
setp.gtu.f32 %p0, %f9, 0f40800000;
|
|
selp.b32 %r11, %r10, 0, %p0;
|
|
or.b32 %r9, %r11, %r9;
|
|
shr.u32 %r11, %r9, 31;
|
|
shr.u32 %r12, %r10, 31;
|
|
setp.eq.s32 %p0, %r11, %r12;
|
|
@%p0 bra BB9_10;
|
|
bra.uni BB9_7;
|
|
BB9_10: // in Loop: Header=BB9_9 Depth=3
|
|
mov.u32 %r10, %r8;
|
|
bra.uni BB9_8;
|
|
BB9_7: // %not_all_continued_or_breaked.i295
|
|
// in Loop: Header=BB9_9 Depth=3
|
|
mul.f32 %f9, %f6, %f6;
|
|
not.b32 %r11, %r9;
|
|
and.b32 %r10, %r10, %r11;
|
|
sub.f32 %f8, %f8, %f9;
|
|
add.f32 %f8, %f5, %f8;
|
|
add.f32 %f7, %f7, %f7;
|
|
fma.rn.f32 %f6, %f6, %f7, %f4;
|
|
mov.f32 %f7, %f8;
|
|
BB9_8: // %for_step.i264
|
|
// in Loop: Header=BB9_9 Depth=3
|
|
setp.ne.s32 %p0, %r10, 0;
|
|
selp.u32 %r11, 1, 0, %p0;
|
|
add.s32 %r7, %r7, %r11;
|
|
setp.lt.s32 %p0, %r7, %r2;
|
|
selp.b32 %r10, %r10, 0, %p0;
|
|
setp.gt.s32 %p0, %r10, -1;
|
|
@%p0 bra BB9_6;
|
|
bra.uni BB9_9;
|
|
BB9_5: // in Loop: Header=BB9_4 Depth=2
|
|
mov.u32 %r7, %r8;
|
|
BB9_6: // %mandel___vyfvyfvyi.exit296
|
|
// in Loop: Header=BB9_4 Depth=2
|
|
add.s32 %r6, %r6, %r5;
|
|
shl.b32 %r6, %r6, 2;
|
|
cvt.s64.s32 %rl2, %r6;
|
|
add.s64 %rl2, %rl2, %rl0;
|
|
st.u32 [%rl2], %r7;
|
|
add.s64 %rl1, %rl1, 1;
|
|
cvt.u32.u64 %r6, %rl1;
|
|
setp.eq.s32 %p0, %r6, %r1;
|
|
@%p0 bra BB9_17;
|
|
bra.uni BB9_4;
|
|
BB9_19: // %partial_inner_all_outer
|
|
// in Loop: Header=BB9_2 Depth=1
|
|
@%p0 bra BB9_17;
|
|
// BB#20: // %partial_inner_only
|
|
// in Loop: Header=BB9_2 Depth=1
|
|
setp.gt.s32 %p0, %r1, 0;
|
|
mov.u32 %r6, 0;
|
|
fma.rn.f32 %f4, %f3, 0f00000000, %f0;
|
|
cvt.rn.f32.s32 %f5, %r3;
|
|
fma.rn.f32 %f5, %f2, %f5, %f1;
|
|
selp.b32 %r5, %r4, 0, %p0;
|
|
setp.lt.s32 %p1, %r5, 0;
|
|
mov.u32 %r8, %r4;
|
|
mov.u32 %r7, %r6;
|
|
mov.u32 %r5, %r6;
|
|
mov.f32 %f7, %f4;
|
|
mov.f32 %f6, %f5;
|
|
@%p1 bra BB9_13;
|
|
bra.uni BB9_21;
|
|
BB9_13: // %for_loop.i332
|
|
// Parent Loop BB9_2 Depth=1
|
|
// => This Inner Loop Header: Depth=2
|
|
selp.b32 %r9, %r8, 0, %p0;
|
|
mul.f32 %f8, %f7, %f7;
|
|
fma.rn.f32 %f9, %f6, %f6, %f8;
|
|
setp.gtu.f32 %p1, %f9, 0f40800000;
|
|
selp.b32 %r10, %r8, 0, %p1;
|
|
or.b32 %r7, %r10, %r7;
|
|
selp.b32 %r10, %r7, 0, %p0;
|
|
shr.u32 %r10, %r10, 31;
|
|
shr.u32 %r9, %r9, 31;
|
|
setp.eq.s32 %p1, %r10, %r9;
|
|
@%p1 bra BB9_14;
|
|
bra.uni BB9_11;
|
|
BB9_14: // in Loop: Header=BB9_13 Depth=2
|
|
mov.u32 %r8, %r6;
|
|
bra.uni BB9_12;
|
|
BB9_11: // %not_all_continued_or_breaked.i346
|
|
// in Loop: Header=BB9_13 Depth=2
|
|
mul.f32 %f9, %f6, %f6;
|
|
not.b32 %r9, %r7;
|
|
and.b32 %r8, %r8, %r9;
|
|
sub.f32 %f8, %f8, %f9;
|
|
add.f32 %f8, %f4, %f8;
|
|
add.f32 %f7, %f7, %f7;
|
|
fma.rn.f32 %f6, %f6, %f7, %f5;
|
|
mov.f32 %f7, %f8;
|
|
BB9_12: // %for_step.i313
|
|
// in Loop: Header=BB9_13 Depth=2
|
|
setp.ne.s32 %p1, %r8, 0;
|
|
selp.u32 %r9, 1, 0, %p1;
|
|
add.s32 %r5, %r5, %r9;
|
|
setp.lt.s32 %p1, %r5, %r2;
|
|
selp.b32 %r8, %r8, 0, %p1;
|
|
selp.b32 %r9, %r8, 0, %p0;
|
|
setp.gt.s32 %p1, %r9, -1;
|
|
@%p1 bra BB9_15;
|
|
bra.uni BB9_13;
|
|
BB9_21: // in Loop: Header=BB9_2 Depth=1
|
|
mov.u32 %r5, %r6;
|
|
BB9_15: // %mandel___vyfvyfvyi.exit347
|
|
// in Loop: Header=BB9_2 Depth=1
|
|
setp.lt.s32 %p0, %r1, 1;
|
|
@%p0 bra BB9_17;
|
|
// BB#16: // %pl_dolane.i
|
|
// in Loop: Header=BB9_2 Depth=1
|
|
mul.lo.s32 %r6, %r3, %r1;
|
|
shl.b32 %r6, %r6, 2;
|
|
cvt.s64.s32 %rl1, %r6;
|
|
add.s64 %rl1, %rl1, %rl0;
|
|
st.u32 [%rl1], %r5;
|
|
BB9_17: // %foreach_reset
|
|
// in Loop: Header=BB9_2 Depth=1
|
|
add.s32 %r3, %r3, 1;
|
|
setp.eq.s32 %p0, %r3, %r0;
|
|
@%p0 bra BB9_18;
|
|
bra.uni BB9_2;
|
|
BB9_18: // %for_exit
|
|
ret;
|
|
}
|
|
|