diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index aa9bda32..f58e34a7 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -2386,7 +2386,37 @@ static FORCEINLINE __vec16_i32 __min_varying_int32 (__vec16_i32 v1, __vec16_i32 static FORCEINLINE __vec16_i32 __max_varying_uint32(__vec16_i32 v1, __vec16_i32 v2) { return _mm512_max_epu32(v1, v2); } static FORCEINLINE __vec16_i32 __min_varying_uint32(__vec16_i32 v1, __vec16_i32 v2) { return _mm512_min_epu32(v1, v2); } +static FORCEINLINE __vec16_i64 __max_varying_int64 (__vec16_i64 v1, __vec16_i64 v2) { + __vec16_i64 ret; + ret.v_hi = _mm512_max_epi32(v1.v_hi, v2.v_hi); + __vec16_i1 mask = _mm512_cmp_epi32_mask(ret.v_hi, v2.v_hi, _MM_CMPINT_EQ); + ret.v_lo = _mm512_mask_max_epi32(v1.v_lo, mask, v1.v_lo, v2.v_lo); + return ret; +} +static FORCEINLINE __vec16_i64 __min_varying_int64 (__vec16_i64 v1, __vec16_i64 v2) { + __vec16_i64 ret; + ret.v_hi = _mm512_min_epi32(v1.v_hi, v2.v_hi); + __vec16_i1 mask = _mm512_cmp_epi32_mask(ret.v_hi, v2.v_hi, _MM_CMPINT_EQ); + ret.v_lo = _mm512_mask_min_epi32(v1.v_lo, mask, v1.v_lo, v2.v_lo); + return ret; +} + +static FORCEINLINE __vec16_i64 __max_varying_uint64 (__vec16_i64 v1, __vec16_i64 v2) { + __vec16_i64 ret; + ret.v_hi = _mm512_max_epu32(v1.v_hi, v2.v_hi); + __vec16_i1 mask = _mm512_cmp_epu32_mask(ret.v_hi, v2.v_hi, _MM_CMPINT_EQ); + ret.v_lo = _mm512_mask_max_epu32(v1.v_lo, mask, v1.v_lo, v2.v_lo); + return ret; +} + +static FORCEINLINE __vec16_i64 __min_varying_uint64 (__vec16_i64 v1, __vec16_i64 v2) { + __vec16_i64 ret; + ret.v_hi = _mm512_min_epu32(v1.v_hi, v2.v_hi); + __vec16_i1 mask = _mm512_cmp_epu32_mask(ret.v_hi, v2.v_hi, _MM_CMPINT_EQ); + ret.v_lo = _mm512_mask_min_epu32(v1.v_lo, mask, v1.v_lo, v2.v_lo); + return ret; +} // sqrt/rsqrt/rcp