[AVX-512]: masked_load_float/double was replaced
This commit is contained in:
@@ -407,13 +407,13 @@ declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16
|
|||||||
|
|
||||||
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||||
%ret = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %0, <16 x i32> %1,
|
%ret = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %0, <16 x i32> %1,
|
||||||
<16 x i32> zeroinitializer, i16 -1)
|
<16 x i32> zeroinitializer, i16 -1)
|
||||||
ret <16 x i32> %ret
|
ret <16 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||||
%ret = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %0, <16 x i32> %1,
|
%ret = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %0, <16 x i32> %1,
|
||||||
<16 x i32> zeroinitializer, i16 -1)
|
<16 x i32> zeroinitializer, i16 -1)
|
||||||
ret <16 x i32> %ret
|
ret <16 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -814,9 +814,37 @@ masked_load(i16, 2)
|
|||||||
masked_load(i32, 4)
|
masked_load(i32, 4)
|
||||||
masked_load(i64, 8)
|
masked_load(i64, 8)
|
||||||
|
|
||||||
masked_load_float_double()
|
|
||||||
|
|
||||||
gen_masked_store(i8)
|
declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16)
|
||||||
|
|
||||||
|
define <16 x float> @__masked_load_float(i8 * %ptr, <16 x i1> %mask) readonly alwaysinline {
|
||||||
|
%mask_i16 = bitcast <16 x i1> %mask to i16
|
||||||
|
%res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask_i16)
|
||||||
|
ret <16 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8)
|
||||||
|
|
||||||
|
define <16 x double> @__masked_load_double(i8 * %ptr, <16 x i1> %mask) readonly alwaysinline {
|
||||||
|
%mask_i16 = bitcast <16 x i1> %mask to i16
|
||||||
|
%mask_lo_i8 = trunc i16 %mask_i16 to i8
|
||||||
|
%mask_hi = shufflevector <16 x i1> %mask, <16 x i1> undef,
|
||||||
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%mask_hi_i8 = bitcast <8 x i1> %mask_hi to i8
|
||||||
|
%r0 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask_hi_i8)
|
||||||
|
%ptr_d = bitcast i8* %ptr to <16 x double>*
|
||||||
|
%ptr_lo = getelementptr <16 x double>, <16 x double>* %ptr_d, i32 8
|
||||||
|
%ptr_lo_i8 = bitcast <16 x double>* %ptr_lo to i8*
|
||||||
|
%r1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr_lo_i8, <8 x double> zeroinitializer, i8 %mask_lo_i8)
|
||||||
|
%res = shufflevector <8 x double> %r0, <8 x double> %r1,
|
||||||
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||||
|
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
ret <16 x double> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
gen_masked_store(i8) ; llvm.x86.sse2.storeu.dq
|
||||||
gen_masked_store(i16)
|
gen_masked_store(i16)
|
||||||
gen_masked_store(i32)
|
gen_masked_store(i32)
|
||||||
gen_masked_store(i64)
|
gen_masked_store(i64)
|
||||||
|
|||||||
Reference in New Issue
Block a user