diff --git a/examples/intrinsics/knl.h b/examples/intrinsics/knl.h index 1bb9832d..32e363af 100644 --- a/examples/intrinsics/knl.h +++ b/examples/intrinsics/knl.h @@ -1785,21 +1785,26 @@ static FORCEINLINE __vec16_i32 __cast_sext(const __vec16_i32 &, const __vec16_i1 return _mm512_cvtepi16_epi32(val_t); } +static FORCEINLINE __vec16_i64 __cast_sext(const __vec16_i64 &, const __vec16_i32 &val) +{ + // TODO: this probably shall be optimized + __vec16_i64 a; + a.v_lo = _mm512_cvtepi32_epi64(_mm512_castsi512_si256(val)); + __vec16_i32 a_hi_32 = _mm512_permutevar_epi32(__vec16_i32(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7), val); + a.v_hi = _mm512_cvtepi32_epi64(_mm512_castsi512_si256(a_hi_32)); + return a; +} + static FORCEINLINE __vec16_i64 __cast_sext(const __vec16_i64 &, const __vec16_i1 &val) { - __vec16_i32 ret = _mm512_mask_mov_epi32(_mm512_setzero_epi32(), val, _mm512_set1_epi32(-1)); - return __vec16_i64(ret, ret); + __vec16_i32 a = _mm512_mask_mov_epi32(_mm512_setzero_epi32(), val, _mm512_set1_epi32(-1)); + return __cast_sext(__vec16_i64(), a); } static FORCEINLINE __vec16_i64 __cast_sext(const __vec16_i64 &, const __vec16_i8 &val) { __vec16_i32 a = __cast_sext(__vec16_i32(), val); - return __vec16_i64(a.v, _mm512_srai_epi32(a.v, 31)); -} - -static FORCEINLINE __vec16_i64 __cast_sext(const __vec16_i64 &, const __vec16_i32 &val) -{ - return __vec16_i64(val.v, _mm512_srai_epi32(val.v, 31)); + return __cast_sext(__vec16_i64(), a); } static FORCEINLINE __vec16_i8 __cast_zext(const __vec16_i8 &, const __vec16_i1 &val) @@ -1841,28 +1846,31 @@ static FORCEINLINE __vec16_i32 __cast_zext(const __vec16_i32 &, const __vec16_i1 return _mm512_cvtepu16_epi32(val_t); } +static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i32 &val) +{ + // TODO: this probably shall be optimized + __vec16_i64 a; + a.v_lo = _mm512_cvtepu32_epi64(_mm512_castsi512_si256(val)); + __vec16_i32 a_hi_32 = _mm512_permutevar_epi32(__vec16_i32(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7), val); + a.v_hi = _mm512_cvtepu32_epi64(_mm512_castsi512_si256(a_hi_32)); + return a; +} + static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i1 &val) { - __vec16_i32 ret_hi = _mm512_setzero_epi32(); - __vec16_i32 ret_lo = _mm512_setzero_epi32(); - __vec16_i32 one = _mm512_set1_epi32(1); - ret_lo = _mm512_mask_mov_epi32(ret_lo, val, one); - return __vec16_i64 (ret_lo, ret_hi); + __vec16_i32 ret = _mm512_setzero_epi32(); + ret = _mm512_mask_mov_epi32(ret, val, _mm512_set1_epi32(1)); + return __cast_zext(__vec16_i64(), ret); } static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i8 &val) { - return __vec16_i64(__cast_zext(__vec16_i32(), val), _mm512_setzero_epi32()); + return __cast_zext(__vec16_i64(), __cast_zext(__vec16_i32(), val)); } static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i16 &val) { - return __vec16_i64(__cast_zext(__vec16_i32(), val), _mm512_setzero_epi32()); -} - -static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i32 &val) -{ - return __vec16_i64(val.v, _mm512_setzero_epi32()); + return __cast_zext(__vec16_i64(), __cast_zext(__vec16_i32(), val)); } static FORCEINLINE __vec16_f __cast_sitofp(__vec16_f, __vec16_i32 val) { diff --git a/fail_db.txt b/fail_db.txt index de924a32..0f0fccb1 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -184,30 +184,126 @@ ./tests/gather-int16-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/gather-int16-7.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/gather-int16-8.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/idiv.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/local-atomics-varyingptr-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/local-atomics-varyingptr-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/local-atomics-varyingptr-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/memcpy-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/memmove-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/memset-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/packed-load-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/packed-store-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/packed-store2-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/paddus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/pmuls_vi32.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/pmuls_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/pmulus_vi32.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/pmulus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/reduce-add-int64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/reduce-add-uint64-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/reduce-add-uint64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/shift-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/soa-21.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/soa-22.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/soa-23.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/soa-24.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * -./tests/soa-25.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * ./tests/uint64-max-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 * +./tests/atomics-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/atomics-varyingptr-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/atomics-varyingptr-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/atomics-varyingptr-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/c-test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/c-test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/cfor-c-test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/cfor-c-test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/cfor-test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/cfor-test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/count-leading-trailing-zeros-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/double-abs-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/double-abs.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/exclusive-scan-add-10.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/exclusive-scan-add-7.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/frexp-double-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/frexp-double.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-null-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-null-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-null-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-null-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-null-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-uniform-9.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-varying-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-varying-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-varying-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/funcptr-varying-9.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-double-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-double-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-float-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-float-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-int16-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-int16-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-int32-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-int32-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-int64-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-int64-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-int8-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-int8-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/gather-struct-vector.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/insert-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/insert-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/int64-constant.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/int64-max-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/int64-max.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/int64-min-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/int64-min.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ldexp-double.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/local-atomics-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/local-atomics-varyingptr-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/local-atomics-varyingptr-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/local-atomics-varyingptr-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/memcpy-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/memmove-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/memset-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/packed-load-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/packed-store-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/packed-store2-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/padds_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/paddus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/pass-varying-lvalue-to-ref.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/pdivs_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/pdivus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/pmuls_vi32.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/pmuls_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/pmulus_vi32.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/pmulus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/popcnt-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/prefetch-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/psubs_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/psubus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-15.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-22.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-24.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-25.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-7.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-assign-lhs-math-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-cast-complex.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-cmp-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-diff-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-diff-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-diff-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-diff-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-diff-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-int-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-int-null-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-null-func-arg.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/ptr-varying-unif-index.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-add-int64-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-add-int64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-add-uint64-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-add-uint64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-equal-10.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-equal-8.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-max-int64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-max-uint64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/reduce-min-uint64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/rotate-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/rotate-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/shift-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/shift-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/shift-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/struct-nested-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/uint64-max-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/uint64-max.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/uint64-min-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 * +./tests/uint64-min.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *