Add reduce_add() for int8 and int16 types.

This maps to specialized instructions (e.g. PSADBW) when available.
This commit is contained in:
Matt Pharr
2013-07-25 09:11:39 -07:00
parent 2d063925a1
commit b6df447b55
24 changed files with 464 additions and 44 deletions

View File

@@ -1607,6 +1607,9 @@ static FORCEINLINE int64_t __count_leading_zeros_i64(uint64_t v) {
///////////////////////////////////////////////////////////////////////////
// reductions
REDUCE_ADD(int16_t, __vec32_i8, __reduce_add_int8)
REDUCE_ADD(int32_t, __vec32_i16, __reduce_add_int16)
static FORCEINLINE float __reduce_add_float(__vec32_f v) {
return _mm512_reduce_add_ps(v.v1) + _mm512_reduce_add_ps(v.v2);
}