@@ -1,32 +0,0 @@
|
||||
knc.h
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
1. pmulus_vi64.ispc (-O2)
|
||||
The root of the broblem is in the code generator - it assumes __vec16_i64 stores its elements sequentually in memory,
|
||||
not high and low parts separately as we do. Consequently, this construction works incorrectly
|
||||
|
||||
```
|
||||
__vec16_i64 (((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
||||
*)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t *)(&tmp__2_))[0], ((uint64_t
|
||||
*)(&tmp__2_))[0]
|
||||
```
|
||||
where 'tmp__2_' is of __vec16_i64 data type.
|
||||
|
||||
2. soa-18.ispc (-O0)
|
||||
The same as (1). The generator uses the structure of a kind
|
||||
```
|
||||
struct l_unnamed_0 {
|
||||
__vec16_i64 field0;
|
||||
__vec16_i32 field1;
|
||||
} ptr_;
|
||||
```
|
||||
and a function
|
||||
```
|
||||
__masked_store_i64(((&ptr_.field0)), _slice_ptr7_slice_offset_extract_0_, internal_mask_26_function_mask9_);
|
||||
```
|
||||
|
||||
where '_slice_ptr7_slice_offset_extract_0_' is of type __vec16_i64. The
|
||||
problem is, we store 64 bit ints in memory sequentually and in vectors
|
||||
separately (hi and lo parts). This way, the '.field0' has wrong layout
|
||||
Reference in New Issue
Block a user