Add reduce_add() for int8 and int16 types.

This maps to specialized instructions (e.g. PSADBW) when available.
This commit is contained in:
Matt Pharr
2013-07-25 09:11:39 -07:00
parent 2d063925a1
commit b6df447b55
24 changed files with 464 additions and 44 deletions

View File

@@ -471,6 +471,15 @@ define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
ret i64 %call
}
define i8 @__reduce_add_int8(<1 x i8> %v) nounwind readonly alwaysinline {
%r = extractelement <1 x i8> %v, i32 0
ret i8 %r
}
define i16 @__reduce_add_int16(<1 x i16> %v) nounwind readonly alwaysinline {
%r = extractelement <1 x i16> %v, i32 0
ret i16 %r
}
define float @__reduce_add_float(<1 x float> %v) nounwind readonly alwaysinline {
%r = extractelement <1 x float> %v, i32 0