From 296b057a0a6efde52ce72c734aa40f037dc2b0cc Mon Sep 17 00:00:00 2001 From: Anton Mitrokhin Date: Thu, 27 Nov 2014 16:54:46 +0400 Subject: [PATCH] added debug helpers for knc-i1x16.h --- examples/intrinsics/knc-i1x16.h | 79 ++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h index b09958fa..256907f9 100644 --- a/examples/intrinsics/knc-i1x16.h +++ b/examples/intrinsics/knc-i1x16.h @@ -39,6 +39,9 @@ #include #include +#include // for operator<<(m512[i]) +#include // for operator<<(m512[i]) + #ifdef _MSC_VER #define FORCEINLINE __forceinline #define PRE_ALIGN(x) /*__declspec(align(x))*/ @@ -101,7 +104,8 @@ struct __vec16_i1 ((v14 & 1) << 14) | ((v15 & 1) << 15)); } - + FORCEINLINE uint8_t operator[](const int i) const { return ((v >> i) & 1); } + FORCEINLINE uint8_t operator[](const int i) { return ((v >> i) & 1); } FORCEINLINE operator __mmask16() const { return v; } }; @@ -293,6 +297,75 @@ PRE_ALIGN(32) struct __vec16_i16 : public vec16 { static inline int32_t __extract_element(__vec16_i32, int); +/////////////////////////////////////////////////////////////////////////// +// debugging helpers +// +inline std::ostream &operator<<(std::ostream &out, const __m512i &v) +{ + out << "["; + for (int i=0;i<16;i++) + out << (i!=0?",":"") << std::dec << std::setw(8) << ((int*)&v)[i] << std::dec; + out << "]" << std::flush; + return out; +} + +inline std::ostream &operator<<(std::ostream &out, const __m512 &v) +{ + out << "["; + for (int i=0;i<16;i++) + out << (i!=0?",":"") << ((float*)&v)[i]; + out << "]" << std::flush; + return out; +} + +inline std::ostream &operator<<(std::ostream &out, const __vec16_i1 &v) +{ + out << "["; + for (int i=0;i<16;i++) + out << (i!=0?",":"") << std::dec << std::setw(8) << (int)v[i] << std::dec; + out << "]" << std::flush; + return out; +} + +inline std::ostream &operator<<(std::ostream &out, const __vec16_i8 &v) +{ + out << "["; + for (int i=0;i<16;i++) + out << (i!=0?",":"") << std::dec << std::setw(8) << (int)((unsigned char*)&v)[i] << std::dec; + out << "]" << std::flush; + return out; +} + +inline std::ostream &operator<<(std::ostream &out, const __vec16_i16 &v) +{ + out << "["; + for (int i=0;i<16;i++) + out << (i!=0?",":"") << std::dec << std::setw(8) << (int)((uint16_t*)&v)[i] << std::dec; + out << "]" << std::flush; + return out; +} + +inline std::ostream &operator<<(std::ostream &out, const __vec16_d &v) +{ + out << "["; + for (int i=0;i<16;i++) { + out << (i!=0?",":"") << (v[i]); + } + out << "]" << std::flush; + return out; +} + +inline std::ostream &operator<<(std::ostream &out, const __vec16_i64 &v) +{ + out << "["; + for (int i=0;i<16;i++) { + out << (i!=0?",":"") << (v[i]); + } + out << "]" << std::flush; + return out; +} + + /////////////////////////////////////////////////////////////////////////// // macros... @@ -813,9 +886,10 @@ static FORCEINLINE void __abs_i32i64(__m512i &_hi, __m512i &_lo) _hi = _mm512_sbb_epi32 (hi, borrow, mask, &borrow); } static FORCEINLINE __vec16_i64 __mul(__vec16_i64 _a, __vec16_i64 _b) -{ +{ __vec16_i64 a = _a.cvt2hilo(); __vec16_i64 b = _b.cvt2hilo(); + /* sign = (a^b) >> 32, if sign == 0 then a*b >= 0, otherwise a*b < 0 */ const __vec16_i1 sign = __not_equal_i32(__ashr(__xor(a.v_hi, b.v_hi), __ispc_thirty_two), __ispc_zero); __abs_i32i64(a.v_hi, a.v_lo); /* abs(a) */ @@ -830,6 +904,7 @@ static FORCEINLINE __vec16_i64 __mul(__vec16_i64 _a, __vec16_i64 _b) const __vec16_i32 lo = lo_m1; const __vec16_i64 ret_abs = __vec16_i64(hi,lo).cvt2zmm(); /* if sign != 0, means either a or b is negative, then negate the result */ + return __select(sign, __sub(__vec16_i64(__ispc_zero, __ispc_zero), ret_abs), ret_abs); } #endif /* __ICC >= 1400 */