From 70adb5d283ed15e403859d9e292d368a2281ba48 Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskiy Date: Thu, 27 Nov 2014 19:00:38 +0400 Subject: [PATCH] Fix for unsigned cmp for int64 --- examples/intrinsics/knc.h | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index c2f347fe..f8fb56aa 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -1028,6 +1028,67 @@ static FORCEINLINE __vec16_i1 __not_equal_i64_and_mask(const __vec16_i64 &a, con } +static FORCEINLINE __vec16_i1 __unsigned_less_equal_i64(__vec16_i64 a, __vec16_i64 b) { + __vec16_i1 ret_hi = __unsigned_less_equal_i32(a.v_hi, b.v_hi); + __vec16_i1 ret_lo = __unsigned_less_equal_i32(a.v_lo, b.v_lo); + return ret_hi | ret_lo; +} + +static FORCEINLINE __vec16_i1 __unsigned_less_equal_i64_and_mask(__vec16_i64 a, __vec16_i64 b, __vec16_i1 m) +{ + __vec16_i1 ret_hi = __unsigned_less_equal_i32(a.v_hi, b.v_hi); + __vec16_i1 ret_lo = __unsigned_less_equal_i32(a.v_lo, b.v_lo); + return (ret_hi | ret_lo) & m; +} + +static FORCEINLINE __vec16_i1 __unsigned_greater_equal_i64(__vec16_i64 a, __vec16_i64 b) { + __vec16_i1 ret_hi = __unsigned_greater_equal_i32(a.v_hi, b.v_hi); + __vec16_i1 ret_lo = __unsigned_greater_equal_i32(a.v_lo, b.v_lo); + return ret_hi | ret_lo; +} + +static FORCEINLINE __vec16_i1 __unsigned_greater_equal_i64_and_mask(__vec16_i64 a, __vec16_i64 b, __vec16_i1 m) +{ + __vec16_i1 ret_hi = __unsigned_greater_equal_i32(a.v_hi, b.v_hi); + __vec16_i1 ret_lo = __unsigned_greater_equal_i32(a.v_lo, b.v_lo); + return (ret_hi | ret_lo) & m; +} + +static FORCEINLINE __vec16_i1 __unsigned_less_than_i64(__vec16_i64 a, __vec16_i64 b) { + __vec16_i1 ret_hi = __unsigned_less_than_i32(a.v_hi, b.v_hi); + __vec16_i1 ret_lo = __unsigned_less_than_i32(a.v_lo, b.v_lo); + return ret_hi | ret_lo; +} + +static FORCEINLINE __vec16_i1 __unsigned_less_than_i64_and_mask(__vec16_i64 a, __vec16_i64 b, __vec16_i1 m) +{ + __vec16_i1 ret_hi = __unsigned_less_than_i32(a.v_hi, b.v_hi); + __vec16_i1 ret_lo = __unsigned_less_than_i32(a.v_lo, b.v_lo); + return (ret_hi | ret_lo) & m; +} + +static FORCEINLINE __vec16_i1 __unsigned_greater_than_i64(__vec16_i64 a, __vec16_i64 b) { + __vec16_i1 ret_hi = __unsigned_greater_than_i32(a.v_hi, b.v_hi); + __vec16_i1 ret_lo = __unsigned_greater_than_i32(a.v_lo, b.v_lo); + return ret_hi | ret_lo; +} + +static FORCEINLINE __vec16_i1 __unsigned_greater_than_i64_and_mask(__vec16_i64 a, __vec16_i64 b, __vec16_i1 m) +{ + __vec16_i1 ret_hi = __unsigned_greater_than_i32(a.v_hi, b.v_hi); + __vec16_i1 ret_lo = __unsigned_greater_than_i32(a.v_lo, b.v_lo); + return (ret_hi | ret_lo) & m; +} + + +static FORCEINLINE __vec16_i64 __select(__vec16_i1 mask, + __vec16_i64 a, __vec16_i64 b) { + __vec16_i64 ret; + ret.v_hi = _mm512_mask_mov_epi32(b.v_hi, mask, a.v_hi); + ret.v_lo = _mm512_mask_mov_epi32(b.v_lo, mask, a.v_lo); + return ret; +} + template static RetVecType __smear_i64(const int64_t &l); template <> FORCEINLINE __vec16_i64 __smear_i64<__vec16_i64>(const int64_t &l) {