Use native float/half conversion instructions with the AVX2 target.
This commit is contained in:
@@ -63,6 +63,44 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float/half conversions
|
||||
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||
; 0 is round nearest even
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||
|
||||
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
|
||||
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
|
||||
ret <8 x float> %r
|
||||
}
|
||||
|
||||
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
|
||||
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
%v1 = bitcast i16 %v to <1 x i16>
|
||||
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||
%r = extractelement <8 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
%v1 = bitcast float %v to <1 x float>
|
||||
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; round to nearest even
|
||||
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||
%r = extractelement <8 x i16> %rv, i32 0
|
||||
ret i16 %r
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
@@ -70,6 +108,3 @@ gen_gather(8, i8)
|
||||
gen_gather(8, i16)
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user