64-bit min-max functions for knl.h

This commit is contained in:
Anton Mitrokhin
2015-04-02 14:56:20 +03:00
parent 29c73f242c
commit 0292730cea

View File

@@ -2565,33 +2565,29 @@ static FORCEINLINE __vec16_i32 __min_varying_uint32(__vec16_i32 v1, __vec16_i32
static FORCEINLINE __vec16_i64 __max_varying_int64 (__vec16_i64 v1, __vec16_i64 v2) {
__vec16_i64 ret;
ret.v_hi = _mm512_max_epi32(v1.v_hi, v2.v_hi);
__vec16_i1 mask = _mm512_cmpeq_epi32_mask(ret.v_hi, v2.v_hi);
ret.v_lo = _mm512_mask_max_epi32(v1.v_lo, mask, v1.v_lo, v2.v_lo);
ret.v_hi = _mm512_max_epi64(v1.v_hi, v2.v_hi);
ret.v_lo = _mm512_max_epi64(v1.v_lo, v2.v_lo);
return ret;
}
static FORCEINLINE __vec16_i64 __min_varying_int64 (__vec16_i64 v1, __vec16_i64 v2) {
__vec16_i64 ret;
ret.v_hi = _mm512_min_epi32(v1.v_hi, v2.v_hi);
__vec16_i1 mask = _mm512_cmpeq_epi32_mask(ret.v_hi, v2.v_hi);
ret.v_lo = _mm512_mask_min_epi32(v1.v_lo, mask, v1.v_lo, v2.v_lo);
ret.v_hi = _mm512_min_epi64(v1.v_hi, v2.v_hi);
ret.v_lo = _mm512_min_epi64(v1.v_lo, v2.v_lo);
return ret;
}
static FORCEINLINE __vec16_i64 __max_varying_uint64 (__vec16_i64 v1, __vec16_i64 v2) {
__vec16_i64 ret;
ret.v_hi = _mm512_max_epu32(v1.v_hi, v2.v_hi);
__vec16_i1 mask = _mm512_cmpeq_epi32_mask(ret.v_hi, v2.v_hi);
ret.v_lo = _mm512_mask_max_epu32(v1.v_lo, mask, v1.v_lo, v2.v_lo);
ret.v_hi = _mm512_max_epu64(v1.v_hi, v2.v_hi);
ret.v_lo = _mm512_max_epu64(v1.v_lo, v2.v_lo);
return ret;
}
static FORCEINLINE __vec16_i64 __min_varying_uint64 (__vec16_i64 v1, __vec16_i64 v2) {
__vec16_i64 ret;
ret.v_hi = _mm512_min_epu32(v1.v_hi, v2.v_hi);
__vec16_i1 mask = _mm512_cmpeq_epi32_mask(ret.v_hi, v2.v_hi);
ret.v_lo = _mm512_mask_min_epu32(v1.v_lo, mask, v1.v_lo, v2.v_lo);
ret.v_hi = _mm512_min_epu64(v1.v_hi, v2.v_hi);
ret.v_lo = _mm512_min_epu64(v1.v_lo, v2.v_lo);
return ret;
}
@@ -2796,14 +2792,6 @@ static FORCEINLINE __vec16_i32 __masked_load_i32(void *p, __vec16_i1 mask) {
#ifdef ISPC_FORCE_ALIGNED_MEMORY
return _mm512_mask_load_epi32(_mm512_undefined_epi32(), mask, p);
#else
printf("\n\n\n");
printf_v(mask);
for(int i=0; i <3; ++i) {
printf("%d ", ((uint32_t *)p)[i]);
}
printf("\n");
printf_v(_mm512_mask_loadu_epi32(_mm512_undefined_epi32(), mask, p));
printf("\n\n\n");
return _mm512_mask_loadu_epi32(_mm512_undefined_epi32(), mask, p);
#endif
}