diff --git a/examples/intrinsics/knl.h b/examples/intrinsics/knl.h index 32e363af..5385652a 100644 --- a/examples/intrinsics/knl.h +++ b/examples/intrinsics/knl.h @@ -2565,33 +2565,29 @@ static FORCEINLINE __vec16_i32 __min_varying_uint32(__vec16_i32 v1, __vec16_i32 static FORCEINLINE __vec16_i64 __max_varying_int64 (__vec16_i64 v1, __vec16_i64 v2) { __vec16_i64 ret; - ret.v_hi = _mm512_max_epi32(v1.v_hi, v2.v_hi); - __vec16_i1 mask = _mm512_cmpeq_epi32_mask(ret.v_hi, v2.v_hi); - ret.v_lo = _mm512_mask_max_epi32(v1.v_lo, mask, v1.v_lo, v2.v_lo); + ret.v_hi = _mm512_max_epi64(v1.v_hi, v2.v_hi); + ret.v_lo = _mm512_max_epi64(v1.v_lo, v2.v_lo); return ret; } static FORCEINLINE __vec16_i64 __min_varying_int64 (__vec16_i64 v1, __vec16_i64 v2) { __vec16_i64 ret; - ret.v_hi = _mm512_min_epi32(v1.v_hi, v2.v_hi); - __vec16_i1 mask = _mm512_cmpeq_epi32_mask(ret.v_hi, v2.v_hi); - ret.v_lo = _mm512_mask_min_epi32(v1.v_lo, mask, v1.v_lo, v2.v_lo); + ret.v_hi = _mm512_min_epi64(v1.v_hi, v2.v_hi); + ret.v_lo = _mm512_min_epi64(v1.v_lo, v2.v_lo); return ret; } static FORCEINLINE __vec16_i64 __max_varying_uint64 (__vec16_i64 v1, __vec16_i64 v2) { __vec16_i64 ret; - ret.v_hi = _mm512_max_epu32(v1.v_hi, v2.v_hi); - __vec16_i1 mask = _mm512_cmpeq_epi32_mask(ret.v_hi, v2.v_hi); - ret.v_lo = _mm512_mask_max_epu32(v1.v_lo, mask, v1.v_lo, v2.v_lo); + ret.v_hi = _mm512_max_epu64(v1.v_hi, v2.v_hi); + ret.v_lo = _mm512_max_epu64(v1.v_lo, v2.v_lo); return ret; } static FORCEINLINE __vec16_i64 __min_varying_uint64 (__vec16_i64 v1, __vec16_i64 v2) { __vec16_i64 ret; - ret.v_hi = _mm512_min_epu32(v1.v_hi, v2.v_hi); - __vec16_i1 mask = _mm512_cmpeq_epi32_mask(ret.v_hi, v2.v_hi); - ret.v_lo = _mm512_mask_min_epu32(v1.v_lo, mask, v1.v_lo, v2.v_lo); + ret.v_hi = _mm512_min_epu64(v1.v_hi, v2.v_hi); + ret.v_lo = _mm512_min_epu64(v1.v_lo, v2.v_lo); return ret; } @@ -2796,14 +2792,6 @@ static FORCEINLINE __vec16_i32 __masked_load_i32(void *p, __vec16_i1 mask) { #ifdef ISPC_FORCE_ALIGNED_MEMORY return _mm512_mask_load_epi32(_mm512_undefined_epi32(), mask, p); #else - printf("\n\n\n"); - printf_v(mask); - for(int i=0; i <3; ++i) { - printf("%d ", ((uint32_t *)p)[i]); - } - printf("\n"); - printf_v(_mm512_mask_loadu_epi32(_mm512_undefined_epi32(), mask, p)); - printf("\n\n\n"); return _mm512_mask_loadu_epi32(_mm512_undefined_epi32(), mask, p); #endif }