Add avg_{up,down}_int{8,16} routines to stdlib

These compute the average of two given values, rounding up and down,
respectively, if the result isn't exact.  When possible, these are
mapped to target-specific intrinsics (PADD[BW] on IA and VH[R]ADD[US]
on NEON.)

A subsequent commit will add pattern-matching to generate calls to
these intrinsincs when the corresponding patterns are detected in the
IR.)
This commit is contained in:
Matt Pharr
2013-08-03 20:44:25 -07:00
parent 4f48d3258a
commit 5b20b06bd9
23 changed files with 592 additions and 15 deletions

View File

@@ -426,3 +426,62 @@ define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16
declare <4 x i8> @llvm.arm.neon.vrhaddu.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_up_uint8(<4 x i8>, <4 x i8>) nounwind readnone {
%r = call <4 x i8> @llvm.arm.neon.vrhaddu.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vrhadds.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_up_int8(<4 x i8>, <4 x i8>) nounwind readnone {
%r = call <4 x i8> @llvm.arm.neon.vrhadds.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vhaddu.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_down_uint8(<4 x i8>, <4 x i8>) nounwind readnone {
%r = call <4 x i8> @llvm.arm.neon.vhaddu.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vhadds.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_down_int8(<4 x i8>, <4 x i8>) nounwind readnone {
%r = call <4 x i8> @llvm.arm.neon.vhadds.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_up_uint16(<4 x i16>, <4 x i16>) nounwind readnone {
%r = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_up_int16(<4 x i16>, <4 x i16>) nounwind readnone {
%r = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_down_uint16(<4 x i16>, <4 x i16>) nounwind readnone {
%r = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone {
%r = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}