adding __packed_store_active2 to generic targets

This commit is contained in:
Ilia Filippov
2013-12-19 14:13:55 +04:00
parent d666fc3f8f
commit 15816eb07e
9 changed files with 125 additions and 65 deletions

View File

@@ -2496,20 +2496,23 @@ static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec8_i32 val,
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
return _mm_countbits_32(uint32_t(0xFF & mask));
}
static FORCEINLINE int32_t __packed_store_active2(uint32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
return __packed_store_active(ptr, val, mask);
}
static FORCEINLINE int32_t __packed_load_active(int32_t *p, __vec8_i32 *val,
__vec8_i1 mask) {
__vec8_i32 v = __load<64>(val);
v = _mm512_mask_extloadunpacklo_epi32(v, 0xFF & mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
v = _mm512_mask_extloadunpackhi_epi32(v, 0xFF & mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
__store<64>(val, v);
return _mm_countbits_32(uint32_t(0xFF & mask));
return __packed_load_active((uint32_t *)p, val, mask);
}
static FORCEINLINE int32_t __packed_store_active(int32_t *p, __vec8_i32 val,
__vec8_i1 mask) {
_mm512_mask_extpackstorelo_epi32(p, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, 0xFF & mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
return _mm_countbits_32(uint32_t(0xFF & mask));
return __packed_store_active((uint32_t *)p, val, mask);
}
static FORCEINLINE int32_t __packed_store_active2(int32_t *ptr, __vec4_i32 val,
__vec4_i1 mask) {
return __packed_store_active(ptr, val, mask);
}
#endif
///////////////////////////////////////////////////////////////////////////