From d45c5767d8ff81e01f56418095be3d115eb0507c Mon Sep 17 00:00:00 2001 From: "james.brodman" Date: Tue, 1 Oct 2013 12:17:57 -0400 Subject: [PATCH 1/3] Due diligence tweaks. --- examples/intrinsics/knc-i1x16.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h index ae9c4130..730141ec 100644 --- a/examples/intrinsics/knc-i1x16.h +++ b/examples/intrinsics/knc-i1x16.h @@ -1021,12 +1021,22 @@ static FORCEINLINE __vec16_i64 __mul(const __vec16_i32 &a, const __vec16_i64 &_b _mm512_mulhi_epi32(a.v, b.v_lo))).cvt2zmm(); } -#if __ICC_VERSION == 1400 +#if __ICC_VERSION >= 1400 static FORCEINLINE __vec16_i64 __mul(__vec16_i64 a, __vec16_i64 b) { return __vec16_i64(_mm512_mullox_epi64(a.v1, b.v1), _mm512_mullox_epi64(a.v2,b.v2)); } #else -BINARY_OP(__vec16_i64, __mul, *) +static FORCEINLINE __vec16_i64 __mul(const __vec16_i64 &a, const __vec16_i64 &b) +{ + __vec16_i32 lo = _mm512_mullo_epi32(a.v_lo,b.v_lo); + __vec16_i32 hi_m1 = _mm512_mulhi_epi32(a.v_lo, b.v_lo); + __vec16_i32 hi_m2 = _mm512_mullo_epi32(a.v_hi, b.v_lo); + __vec16_i32 hi_m3 = _mm512_mullo_epi32(a.v_lo, b.v_hi); + __mmask16 carry = 0; + __vec16_i32 hi_p23 = _mm512_addsetc_epi32(hi_m2, hi_m1, &carry); + __vec16_i32 hi = _mm512_adc_epi32(hi_m3, carry, hi_p23, &carry); + return __vec16_i64(lo, hi); +} #endif #endif From ac79f3f34555a97da837834076c8adc89e6c50a4 Mon Sep 17 00:00:00 2001 From: "james.brodman" Date: Tue, 1 Oct 2013 12:31:33 -0400 Subject: [PATCH 2/3] format change --- examples/intrinsics/knc-i1x16.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h index 730141ec..84a1f7aa 100644 --- a/examples/intrinsics/knc-i1x16.h +++ b/examples/intrinsics/knc-i1x16.h @@ -1026,8 +1026,10 @@ static FORCEINLINE __vec16_i64 __mul(__vec16_i64 a, __vec16_i64 b) { return __vec16_i64(_mm512_mullox_epi64(a.v1, b.v1), _mm512_mullox_epi64(a.v2,b.v2)); } #else -static FORCEINLINE __vec16_i64 __mul(const __vec16_i64 &a, const __vec16_i64 &b) +static FORCEINLINE __vec16_i64 __mul(const __vec16_i64 &_a, const __vec16_i64 &_b) { + const __vec16_i64 a = _a.cvt2hilo(); + const __vec16_i64 b = _b.cvt2hilo(); __vec16_i32 lo = _mm512_mullo_epi32(a.v_lo,b.v_lo); __vec16_i32 hi_m1 = _mm512_mulhi_epi32(a.v_lo, b.v_lo); __vec16_i32 hi_m2 = _mm512_mullo_epi32(a.v_hi, b.v_lo); @@ -1035,7 +1037,7 @@ static FORCEINLINE __vec16_i64 __mul(const __vec16_i64 &a, const __vec16_i64 &b) __mmask16 carry = 0; __vec16_i32 hi_p23 = _mm512_addsetc_epi32(hi_m2, hi_m1, &carry); __vec16_i32 hi = _mm512_adc_epi32(hi_m3, carry, hi_p23, &carry); - return __vec16_i64(lo, hi); + return __vec16_i64(hi,lo).cvt2zmm(); } #endif #endif From 44912e6b1e0478da79fe4c3a00fd5a64f2904ece Mon Sep 17 00:00:00 2001 From: "james.brodman" Date: Tue, 8 Oct 2013 18:27:03 -0400 Subject: [PATCH 3/3] Fix segfault when using both -g and -MMM --- module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module.cpp b/module.cpp index 755a5dc4..41861a2d 100644 --- a/module.cpp +++ b/module.cpp @@ -936,7 +936,7 @@ Module::AddExportedTypes(const std::vectorfinalize(); lStripUnusedDebugInfo(module);