Add reduce_add() for int8 and int16 types.
This maps to specialized instructions (e.g. PSADBW) when available.
This commit is contained in:
@@ -471,6 +471,15 @@ define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
||||
ret i64 %call
|
||||
}
|
||||
|
||||
define i8 @__reduce_add_int8(<1 x i8> %v) nounwind readonly alwaysinline {
|
||||
%r = extractelement <1 x i8> %v, i32 0
|
||||
ret i8 %r
|
||||
}
|
||||
|
||||
define i16 @__reduce_add_int16(<1 x i16> %v) nounwind readonly alwaysinline {
|
||||
%r = extractelement <1 x i16> %v, i32 0
|
||||
ret i16 %r
|
||||
}
|
||||
|
||||
define float @__reduce_add_float(<1 x float> %v) nounwind readonly alwaysinline {
|
||||
%r = extractelement <1 x float> %v, i32 0
|
||||
|
||||
Reference in New Issue
Block a user