added several math functions, new runfails: gather-int8-2/4; ldexp-double (previously compfailed)

This commit is contained in:
Anton Mitrokhin
2014-09-11 17:36:33 +04:00
parent efa0ea01f3
commit 78a7ef9fc5

View File

@@ -800,6 +800,13 @@ static FORCEINLINE __vec16_i64 __shl(__vec16_i64 a, __vec16_i64 b) {
return __vec16_i64(lo, hi);
}
static FORCEINLINE __vec16_i64 __shl(__vec16_i64 a, unsigned long long b) {
__vec16_i32 hi = _mm512_or_epi32(_mm512_slli_epi32(a.v_hi, b),
_mm512_srli_epi32(a.v_lo, 32-b));
__vec16_i32 lo = _mm512_slli_epi32(a.v_lo, b);
return __vec16_i64(lo, hi);
}
static FORCEINLINE __vec16_i64 __lshr(__vec16_i64 a, __vec16_i64 b) {
__vec16_i32 shift = _mm512_sub_epi32(__ispc_thirty_two, b.v_lo);
__vec16_i32 xfer = _mm512_and_epi32(_mm512_sllv_epi32(__ispc_ffffffff, shift), _mm512_sllv_epi32(a.v_hi, shift));
@@ -820,6 +827,16 @@ static FORCEINLINE __vec16_i64 __ashr(__vec16_i64 a, __vec16_i64 b) {
return __vec16_i64(lo, hi);
}
static FORCEINLINE __vec16_i64 __ashr(__vec16_i64 a, unsigned long long b) {
__vec16_i32 xfer
= _mm512_slli_epi32(_mm512_and_epi32(a.v_hi,
_mm512_set1_epi32((1<<b)-1)),
32-b);
__vec16_i32 hi = _mm512_srai_epi32(a.v_hi, b);
__vec16_i32 lo = _mm512_or_epi32(xfer, _mm512_srli_epi32(a.v_lo, b));
return __vec16_i64(lo, hi);
}
static FORCEINLINE __vec16_i1 __equal_i64(const __vec16_i64 &a, const __vec16_i64 &b) {
const __mmask16 lo_match = _mm512_cmpeq_epi32_mask(a.v_lo,b.v_lo);
return _mm512_mask_cmpeq_epi32_mask(lo_match,a.v_hi,b.v_hi);
@@ -849,6 +866,10 @@ static FORCEINLINE __vec16_i64 __select(__vec16_i1 mask,
return ret;
}
// static FORCEINLINE int64_t __extract_element(__vec16_i64 v, uint32_t index) {
// return (uint64_t(((int32_t *)&v.v_hi)[index])<<32) | (uint64_t(((int32_t *)&v.v_lo)[index]));
// }
template <class RetVecType> static RetVecType __smear_i64(const int64_t &l);
template <> FORCEINLINE __vec16_i64 __smear_i64<__vec16_i64>(const int64_t &l) {
const int *i = (const int*)&l;
@@ -1609,12 +1630,14 @@ static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) {
return _mm512_invsqrt_ps(v);
#endif
}
static FORCEINLINE __vec16_d __rsqrt_varying_double(__vec16_d x) {
__vec16_d y;
for (int i = 0; i < 16; i++)
__insert_element(&y, i, 1.0/sqrt(__extract_element(x,i)));
return y;
}
static FORCEINLINE double __rsqrt_uniform_double(double v)
{
return 1.0/v;