IR change for x86 mask load/store instructions in LLVM 3.8 (r250817)
This commit is contained in:
@@ -399,12 +399,12 @@ reduce_equal(8)
|
||||
masked_load(i8, 1)
|
||||
masked_load(i16, 2)
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask)
|
||||
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask)
|
||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x MfORi32> %mask)
|
||||
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x MdORi64> %mask)
|
||||
|
||||
define <8 x i32> @__masked_load_i32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%floatmask = bitcast <8 x i32> %mask to <8 x float>
|
||||
%floatval = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8 * %0, <8 x float> %floatmask)
|
||||
%floatmask = bitcast <8 x i32> %mask to <8 x MfORi32>
|
||||
%floatval = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8 * %0, <8 x MfORi32> %floatmask)
|
||||
%retval = bitcast <8 x float> %floatval to <8 x i32>
|
||||
ret <8 x i32> %retval
|
||||
}
|
||||
@@ -416,12 +416,12 @@ define <8 x i64> @__masked_load_i64(i8 *, <8 x i32> %mask) nounwind alwaysinline
|
||||
<8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
||||
%mask1 = shufflevector <8 x i32> %mask, <8 x i32> undef,
|
||||
<8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
|
||||
%mask0d = bitcast <8 x i32> %mask0 to <4 x double>
|
||||
%mask1d = bitcast <8 x i32> %mask1 to <4 x double>
|
||||
%mask0d = bitcast <8 x i32> %mask0 to <4 x MdORi64>
|
||||
%mask1d = bitcast <8 x i32> %mask1 to <4 x MdORi64>
|
||||
|
||||
%val0d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %0, <4 x double> %mask0d)
|
||||
%val0d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %0, <4 x MdORi64> %mask0d)
|
||||
%ptr1 = getelementptr PTR_OP_ARGS(`i8') %0, i32 32
|
||||
%val1d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %ptr1, <4 x double> %mask1d)
|
||||
%val1d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %ptr1, <4 x MdORi64> %mask1d)
|
||||
|
||||
%vald = shufflevector <4 x double> %val0d, <4 x double> %val1d,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@@ -438,15 +438,15 @@ gen_masked_store(i8)
|
||||
gen_masked_store(i16)
|
||||
|
||||
; note that mask is the 2nd parameter, not the 3rd one!!
|
||||
declare void @llvm.x86.avx.maskstore.ps.256(i8 *, <8 x float>, <8 x float>)
|
||||
declare void @llvm.x86.avx.maskstore.pd.256(i8 *, <4 x double>, <4 x double>)
|
||||
declare void @llvm.x86.avx.maskstore.ps.256(i8 *, <8 x MfORi32>, <8 x float>)
|
||||
declare void @llvm.x86.avx.maskstore.pd.256(i8 *, <4 x MdORi64>, <4 x double>)
|
||||
|
||||
define void @__masked_store_i32(<8 x i32>* nocapture, <8 x i32>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
%ptr = bitcast <8 x i32> * %0 to i8 *
|
||||
%val = bitcast <8 x i32> %1 to <8 x float>
|
||||
%mask = bitcast <8 x i32> %2 to <8 x float>
|
||||
call void @llvm.x86.avx.maskstore.ps.256(i8 * %ptr, <8 x float> %mask, <8 x float> %val)
|
||||
%mask = bitcast <8 x i32> %2 to <8 x MfORi32>
|
||||
call void @llvm.x86.avx.maskstore.ps.256(i8 * %ptr, <8 x MfORi32> %mask, <8 x float> %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -460,17 +460,17 @@ define void @__masked_store_i64(<8 x i64>* nocapture, <8 x i64>,
|
||||
%mask1 = shufflevector <8 x i32> %mask, <8 x i32> undef,
|
||||
<8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
|
||||
|
||||
%mask0d = bitcast <8 x i32> %mask0 to <4 x double>
|
||||
%mask1d = bitcast <8 x i32> %mask1 to <4 x double>
|
||||
%mask0d = bitcast <8 x i32> %mask0 to <4 x MdORi64>
|
||||
%mask1d = bitcast <8 x i32> %mask1 to <4 x MdORi64>
|
||||
|
||||
%val0 = shufflevector <8 x double> %val, <8 x double> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%val1 = shufflevector <8 x double> %val, <8 x double> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
|
||||
call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr, <4 x double> %mask0d, <4 x double> %val0)
|
||||
call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr, <4 x MdORi64> %mask0d, <4 x double> %val0)
|
||||
%ptr1 = getelementptr PTR_OP_ARGS(`i8') %ptr, i32 32
|
||||
call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr1, <4 x double> %mask1d, <4 x double> %val1)
|
||||
call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr1, <4 x MdORi64> %mask1d, <4 x double> %val1)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user