cast s(z)ext 64-bit function fix for knl target

This commit is contained in:
Anton Mitrokhin
2015-04-02 14:40:24 +03:00
parent 13084ece5f
commit 29c73f242c
2 changed files with 137 additions and 33 deletions

View File

@@ -1785,21 +1785,26 @@ static FORCEINLINE __vec16_i32 __cast_sext(const __vec16_i32 &, const __vec16_i1
return _mm512_cvtepi16_epi32(val_t);
}
static FORCEINLINE __vec16_i64 __cast_sext(const __vec16_i64 &, const __vec16_i32 &val)
{
// TODO: this probably shall be optimized
__vec16_i64 a;
a.v_lo = _mm512_cvtepi32_epi64(_mm512_castsi512_si256(val));
__vec16_i32 a_hi_32 = _mm512_permutevar_epi32(__vec16_i32(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7), val);
a.v_hi = _mm512_cvtepi32_epi64(_mm512_castsi512_si256(a_hi_32));
return a;
}
static FORCEINLINE __vec16_i64 __cast_sext(const __vec16_i64 &, const __vec16_i1 &val)
{
__vec16_i32 ret = _mm512_mask_mov_epi32(_mm512_setzero_epi32(), val, _mm512_set1_epi32(-1));
return __vec16_i64(ret, ret);
__vec16_i32 a = _mm512_mask_mov_epi32(_mm512_setzero_epi32(), val, _mm512_set1_epi32(-1));
return __cast_sext(__vec16_i64(), a);
}
static FORCEINLINE __vec16_i64 __cast_sext(const __vec16_i64 &, const __vec16_i8 &val)
{
__vec16_i32 a = __cast_sext(__vec16_i32(), val);
return __vec16_i64(a.v, _mm512_srai_epi32(a.v, 31));
}
static FORCEINLINE __vec16_i64 __cast_sext(const __vec16_i64 &, const __vec16_i32 &val)
{
return __vec16_i64(val.v, _mm512_srai_epi32(val.v, 31));
return __cast_sext(__vec16_i64(), a);
}
static FORCEINLINE __vec16_i8 __cast_zext(const __vec16_i8 &, const __vec16_i1 &val)
@@ -1841,28 +1846,31 @@ static FORCEINLINE __vec16_i32 __cast_zext(const __vec16_i32 &, const __vec16_i1
return _mm512_cvtepu16_epi32(val_t);
}
static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i32 &val)
{
// TODO: this probably shall be optimized
__vec16_i64 a;
a.v_lo = _mm512_cvtepu32_epi64(_mm512_castsi512_si256(val));
__vec16_i32 a_hi_32 = _mm512_permutevar_epi32(__vec16_i32(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7), val);
a.v_hi = _mm512_cvtepu32_epi64(_mm512_castsi512_si256(a_hi_32));
return a;
}
static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i1 &val)
{
__vec16_i32 ret_hi = _mm512_setzero_epi32();
__vec16_i32 ret_lo = _mm512_setzero_epi32();
__vec16_i32 one = _mm512_set1_epi32(1);
ret_lo = _mm512_mask_mov_epi32(ret_lo, val, one);
return __vec16_i64 (ret_lo, ret_hi);
__vec16_i32 ret = _mm512_setzero_epi32();
ret = _mm512_mask_mov_epi32(ret, val, _mm512_set1_epi32(1));
return __cast_zext(__vec16_i64(), ret);
}
static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i8 &val)
{
return __vec16_i64(__cast_zext(__vec16_i32(), val), _mm512_setzero_epi32());
return __cast_zext(__vec16_i64(), __cast_zext(__vec16_i32(), val));
}
static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i16 &val)
{
return __vec16_i64(__cast_zext(__vec16_i32(), val), _mm512_setzero_epi32());
}
static FORCEINLINE __vec16_i64 __cast_zext(const __vec16_i64 &, const __vec16_i32 &val)
{
return __vec16_i64(val.v, _mm512_setzero_epi32());
return __cast_zext(__vec16_i64(), __cast_zext(__vec16_i32(), val));
}
static FORCEINLINE __vec16_f __cast_sitofp(__vec16_f, __vec16_i32 val) {

View File

@@ -184,30 +184,126 @@
./tests/gather-int16-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/gather-int16-7.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/gather-int16-8.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/idiv.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/local-atomics-varyingptr-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/local-atomics-varyingptr-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/local-atomics-varyingptr-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/memcpy-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/memmove-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/memset-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/packed-load-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/packed-store-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/packed-store2-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/paddus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/pmuls_vi32.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/pmuls_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/pmulus_vi32.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/pmulus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/reduce-add-int64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/reduce-add-uint64-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/reduce-add-uint64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/shift-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/soa-21.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/soa-22.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/soa-23.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/soa-24.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/soa-25.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/uint64-max-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O2 *
./tests/atomics-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/atomics-varyingptr-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/atomics-varyingptr-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/atomics-varyingptr-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/c-test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/c-test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/cfor-c-test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/cfor-c-test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/cfor-test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/cfor-test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/count-leading-trailing-zeros-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/double-abs-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/double-abs.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/exclusive-scan-add-10.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/exclusive-scan-add-7.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/frexp-double-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/frexp-double.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-null-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-null-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-null-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-null-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-null-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-uniform-9.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-varying-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-varying-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-varying-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/funcptr-varying-9.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-double-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-double-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-float-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-float-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-int16-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-int16-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-int32-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-int32-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-int64-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-int64-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-int8-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-int8-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/gather-struct-vector.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/insert-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/insert-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/int64-constant.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/int64-max-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/int64-max.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/int64-min-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/int64-min.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ldexp-double.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/local-atomics-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/local-atomics-varyingptr-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/local-atomics-varyingptr-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/local-atomics-varyingptr-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/memcpy-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/memmove-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/memset-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/packed-load-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/packed-store-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/packed-store2-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/padds_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/paddus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/pass-varying-lvalue-to-ref.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/pdivs_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/pdivus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/pmuls_vi32.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/pmuls_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/pmulus_vi32.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/pmulus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/popcnt-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/prefetch-varying.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/psubs_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/psubus_vi64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-15.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-22.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-24.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-25.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-7.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-assign-lhs-math-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-cast-complex.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-cmp-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-diff-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-diff-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-diff-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-diff-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-diff-6.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-int-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-int-null-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-null-func-arg.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/ptr-varying-unif-index.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-add-int64-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-add-int64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-add-uint64-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-add-uint64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-equal-10.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-equal-8.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-max-int64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-max-uint64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/reduce-min-uint64.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/rotate-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/rotate-4.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/shift-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/shift-2.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/shift-3.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/struct-nested-5.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/test-108.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/test-131.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/uint64-max-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/uint64-max.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/uint64-min-1.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *
./tests/uint64-min.ispc runfail x86-64 knl Linux LLVM 3.4 icpc15.0 -O0 *