[AVX-512]: masked_load_float/double was replaced

This commit is contained in:
Vsevolod Livinskiy
2015-07-09 13:45:14 +03:00
parent b6d2d8dd4c
commit 25aeedb003

View File

@@ -407,13 +407,13 @@ declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
%ret = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %0, <16 x i32> %1,
<16 x i32> zeroinitializer, i16 -1)
<16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %ret
}
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
%ret = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %0, <16 x i32> %1,
<16 x i32> zeroinitializer, i16 -1)
<16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %ret
}
@@ -814,9 +814,37 @@ masked_load(i16, 2)
masked_load(i32, 4)
masked_load(i64, 8)
masked_load_float_double()
gen_masked_store(i8)
declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16)
define <16 x float> @__masked_load_float(i8 * %ptr, <16 x i1> %mask) readonly alwaysinline {
%mask_i16 = bitcast <16 x i1> %mask to i16
%res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask_i16)
ret <16 x float> %res
}
declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8)
define <16 x double> @__masked_load_double(i8 * %ptr, <16 x i1> %mask) readonly alwaysinline {
%mask_i16 = bitcast <16 x i1> %mask to i16
%mask_lo_i8 = trunc i16 %mask_i16 to i8
%mask_hi = shufflevector <16 x i1> %mask, <16 x i1> undef,
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%mask_hi_i8 = bitcast <8 x i1> %mask_hi to i8
%r0 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask_hi_i8)
%ptr_d = bitcast i8* %ptr to <16 x double>*
%ptr_lo = getelementptr <16 x double>, <16 x double>* %ptr_d, i32 8
%ptr_lo_i8 = bitcast <16 x double>* %ptr_lo to i8*
%r1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr_lo_i8, <8 x double> zeroinitializer, i8 %mask_lo_i8)
%res = shufflevector <8 x double> %r0, <8 x double> %r1,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x double> %res
}
gen_masked_store(i8) ; llvm.x86.sse2.storeu.dq
gen_masked_store(i16)
gen_masked_store(i32)
gen_masked_store(i64)