From e4b1f585952d4748818d01995f24c04d35c4c0b0 Mon Sep 17 00:00:00 2001 From: evghenii Date: Wed, 18 Sep 2013 19:14:41 +0300 Subject: [PATCH] performance fix.. still some issues left with equal_i1 for __vec8_i1 --- examples/intrinsics/knc-i1x16.h | 52 ++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h index 8b1a2bb9..ebffa4d6 100644 --- a/examples/intrinsics/knc-i1x16.h +++ b/examples/intrinsics/knc-i1x16.h @@ -480,46 +480,63 @@ INSERT_EXTRACT(__vec1_d, double) /////////////////////////////////////////////////////////////////////////// // mask ops -static FORCEINLINE uint64_t __movmsk(__vec16_i1 mask) { - return (uint64_t)mask.v; +static FORCEINLINE __vec16_i1 __movmsk(__vec16_i1 mask) { + return _mm512_kmov(mask); } static FORCEINLINE bool __any(__vec16_i1 mask) { - return (mask.v!=0); + return !_mm512_kortestz(mask, mask); } static FORCEINLINE bool __all(__vec16_i1 mask) { - return (mask.v==0xFFFF); + return _mm512_kortestc(mask, mask); } static FORCEINLINE bool __none(__vec16_i1 mask) { - return (mask.v==0); + return _mm512_kortestz(mask, mask); } +#if 0 +static FORCEINLINE __vec16_i1 __equal_i1(__vec16_i1 a, __vec16_i1 b) { +#if 0 + return _mm512_kand(a,b); /* this fails some short circut tests */ +#else + return _mm512_knot( _mm512_kandn(a, b)); /* this fails some asin test */ +#endif +} +#else /* passes all the tests */ static FORCEINLINE __vec16_i1 __equal_i1(__vec16_i1 a, __vec16_i1 b) { __vec16_i1 r; r.v = (a.v & b.v) | (~a.v & ~b.v); return r; } +#endif static FORCEINLINE __vec16_i1 __and(__vec16_i1 a, __vec16_i1 b) { - __vec16_i1 r; - r.v = a.v & b.v; - return r; + return _mm512_kand(a, b); } static FORCEINLINE __vec16_i1 __xor(__vec16_i1 a, __vec16_i1 b) { - __vec16_i1 r; - r.v = a.v ^ b.v; - return r; + return _mm512_kxor(a, b); } static FORCEINLINE __vec16_i1 __or(__vec16_i1 a, __vec16_i1 b) { - __vec16_i1 r; - r.v = a.v | b.v; - return r; + return _mm512_kor(a, b); } +#if 0 +static FORCEINLINE __vec16_i1 __not(__vec16_i1 a) { + return _mm512_knot(a); +} + +static FORCEINLINE __vec16_i1 __and_not1(__vec16_i1 a, __vec16_i1 b) { + return _mm512_kandn(a, b); +} + +static FORCEINLINE __vec16_i1 __and_not2(__vec16_i1 a, __vec16_i1 b) { + return _mm512_kandnr(a, b); +} +#else static FORCEINLINE __vec16_i1 __not(__vec16_i1 v) { __vec16_i1 r; r.v = ~v.v; @@ -537,18 +554,19 @@ static FORCEINLINE __vec16_i1 __and_not2(__vec16_i1 a, __vec16_i1 b) { r.v = a.v & ~b.v; return r; } +#endif static FORCEINLINE __vec16_i1 __select(__vec16_i1 mask, __vec16_i1 a, __vec16_i1 b) { - __vec16_i1 r; - r.v = (a.v & mask.v) | (b.v & ~mask.v); - return r; + return ((a & mask) | (b & ~mask)); + //return __or(__and(a, mask), __andnr(b, mask)); } static FORCEINLINE __vec16_i1 __select(bool cond, __vec16_i1 a, __vec16_i1 b) { return cond ? a : b; } + static FORCEINLINE bool __extract_element(__vec16_i1 vec, int index) { return (vec.v & (1 << index)) ? true : false; }