add 'examples/intrinsics/known_fails.txt' to track difficult runfails/compfails

This commit is contained in:
Anton Mitrokhin
2014-12-26 13:58:49 +03:00
parent 2d403cf258
commit 192aeb0ae3
2 changed files with 26 additions and 12 deletions

View File

@@ -3599,21 +3599,21 @@ static FORCEINLINE void __aos_to_soa3_float(float *ptr, __vec16_f *out0, __vec16
__insert_element(out2, i, *ptr++);
}
}
*/
/*
static FORCEINLINE void __soa_to_aos4_float(__vec16_f v0, __vec16_f v1, __vec16_f v2,
__vec16_f v3, float *ptr) {
// v0 = A1 ... A16, v1 = B1 ..., v3 = D1 ... D16
__vec16_f tmp00 = _mm512_mask_swizzle_ps (v0, 0x3333, v1, _MM_SWIZ_REG_CDAB); // A1A2B1B2 A5A6B5B6 ...
__vec16_f tmp01 = _mm512_mask_swizzle_ps (v0, 0xCCCC, v1, _MM_SWIZ_REG_CDAB); // B3B4A3A4 B7B8A7A8 ...
__vec16_f tmp02 = _mm512_mask_swizzle_ps (v2, 0x3333, v3, _MM_SWIZ_REG_CDAB); // C1C2D1D2 ...
__vec16_f tmp03 = _mm512_mask_swizzle_ps (v2, 0xCCCC, v3, _MM_SWIZ_REG_CDAB); // D3D4C3C4 ...
//__vec16_f tmp00 = _mm512_mask_swizzle_ps (v0, 0x3333, v1, _MM_SWIZ_REG_CDAB); // A1A2B1B2 A5A6B5B6 ...
//__vec16_f tmp01 = _mm512_mask_swizzle_ps (v0, 0xCCCC, v1, _MM_SWIZ_REG_CDAB); // B3B4A3A4 B7B8A7A8 ...
//__vec16_f tmp02 = _mm512_mask_swizzle_ps (v2, 0x3333, v3, _MM_SWIZ_REG_CDAB); // C1C2D1D2 ...
//__vec16_f tmp03 = _mm512_mask_swizzle_ps (v2, 0xCCCC, v3, _MM_SWIZ_REG_CDAB); // D3D4C3C4 ...
__vec16_f tmp10 = _mm512_mask_swizzle_ps (tmp00, 0x5555, tmp02, _MM_SWIZ_REG_BADC); // A1C1B1D1 A5C5B5D5 ...
__vec16_f tmp11 = _mm512_mask_swizzle_ps (tmp00, 0xAAAA, tmp02, _MM_SWIZ_REG_BADC); // C2A2D2B2 C6A6D6B6 ...
__vec16_f tmp12 = _mm512_mask_swizzle_ps (tmp01, 0x5555, tmp03, _MM_SWIZ_REG_BADC); // DBCA ...
__vec16_f tmp13 = _mm512_mask_swizzle_ps (tmp01, 0xAAAA, tmp03, _MM_SWIZ_REG_BADC); // BDAC ...
//__vec16_f tmp10 = _mm512_mask_swizzle_ps (tmp00, 0x5555, tmp02, _MM_SWIZ_REG_BADC); // A1C1B1D1 A5C5B5D5 ...
//__vec16_f tmp11 = _mm512_mask_swizzle_ps (tmp00, 0xAAAA, tmp02, _MM_SWIZ_REG_BADC); // C2A2D2B2 C6A6D6B6 ...
//__vec16_f tmp12 = _mm512_mask_swizzle_ps (tmp01, 0x5555, tmp03, _MM_SWIZ_REG_BADC); // DBCA ...
//__vec16_f tmp13 = _mm512_mask_swizzle_ps (tmp01, 0xAAAA, tmp03, _MM_SWIZ_REG_BADC); // BDAC ...
@@ -3627,9 +3627,8 @@ static FORCEINLINE void __soa_to_aos4_float(__vec16_f v0, __vec16_f v1, __vec16_
*ptr++ = __extract_element(v3, i);
}
}
*/
/*
static FORCEINLINE void __aos_to_soa4_float(float *ptr, __vec16_f *out0, __vec16_f *out1,
__vec16_f *out2, __vec16_f *out3) {
for (int i = 0; i < 16; ++i) {

View File

@@ -0,0 +1,15 @@
knc.h
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1. pmulus_vi64.ispc (-O2)
The root of the broblem is in the code generator - it assumes __vec16_i64 stores its elements sequentually in memory,
not high and low parts separately as we do. Consequently, this construction works incorrectly
```
__vec16_i64 (((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
*)(&tmp__2_))[0]
```
where 'tmp__2_' is of __vec16_i64 data type.