changed codestyle to 2 spaces in 'knc.h'
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
@@ -29,7 +29,7 @@
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
*/
|
||||
|
||||
#include <limits.h> // INT_MIN
|
||||
#include <stdint.h>
|
||||
@@ -86,10 +86,10 @@ struct __vec16_i32;
|
||||
|
||||
#if 0
|
||||
/* (iw) actually, this *SHOULD* be the right implementation for a
|
||||
vec16_i1: this one is a class that can have a constructor (which
|
||||
ISPC sometimes emits for these vectors...) This version might
|
||||
not be working with embree's ISPC bindings, probably because
|
||||
embree still uses the 'wrong' implementation */
|
||||
vec16_i1: this one is a class that can have a constructor (which
|
||||
ISPC sometimes emits for these vectors...) This version might
|
||||
not be working with embree's ISPC bindings, probably because
|
||||
embree still uses the 'wrong' implementation */
|
||||
typedef struct PRE_ALIGN(2) __vec16_i1
|
||||
{
|
||||
FORCEINLINE operator __mmask16() const { return v; }
|
||||
@@ -277,9 +277,9 @@ inline std::ostream &operator<<(std::ostream &out, const __vec16_i64 &v)
|
||||
out << "[";
|
||||
uint32_t *ptr = (uint32_t*)&v;
|
||||
for (int i=0;i<16;i++) {
|
||||
uint64_t val = (uint64_t(ptr[i])<<32)+ptr[i+16];
|
||||
uint64_t val = (uint64_t(ptr[i])<<32)+ptr[i+16];
|
||||
out << (i?",":"") << ((int*)val);
|
||||
}
|
||||
}
|
||||
out << "]" << std::flush;
|
||||
return out;
|
||||
}
|
||||
@@ -382,14 +382,14 @@ static FORCEINLINE int64_t __extract_element(const __vec16_i64 &v, uint32_t inde
|
||||
|
||||
|
||||
/*
|
||||
static FORCEINLINE void __insert_element(__vec16_i1 *vec, int index,
|
||||
static FORCEINLINE void __insert_element(__vec16_i1 *vec, int index,
|
||||
bool val) {
|
||||
if (val == false)
|
||||
vec->v &= ~(1 << index);
|
||||
else
|
||||
vec->v |= (1 << index);
|
||||
}
|
||||
*/
|
||||
}
|
||||
*/
|
||||
|
||||
template <int ALIGN> static FORCEINLINE __vec16_i1 __load(const __vec16_i1 *p) {
|
||||
const uint16_t *ptr = (const uint16_t *)p;
|
||||
@@ -424,7 +424,7 @@ template <> FORCEINLINE __vec16_i1 __undef_i1<__vec16_i1>() {
|
||||
|
||||
/*
|
||||
|
||||
TODO
|
||||
TODO
|
||||
|
||||
*/
|
||||
|
||||
@@ -435,7 +435,7 @@ TODO
|
||||
|
||||
/*
|
||||
|
||||
TODO
|
||||
TODO
|
||||
|
||||
*/
|
||||
|
||||
@@ -684,7 +684,7 @@ template <> FORCEINLINE void __store<64>(__vec16_i32 *p, __vec16_i32 v) {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// int64
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
static FORCEINLINE
|
||||
static FORCEINLINE
|
||||
void __masked_store_i64(void *p, const __vec16_i64 &v, __vec16_i1 mask)
|
||||
{
|
||||
__m512i v1;
|
||||
@@ -998,7 +998,7 @@ __gather_base_offsets32_i64(uint8_t *base, uint32_t scale, __vec16_i32 offsets,
|
||||
poitners, so in here there's no way to tell - only thing we can do
|
||||
is pick one...
|
||||
*/
|
||||
static FORCEINLINE __vec16_i64
|
||||
static FORCEINLINE __vec16_i64
|
||||
__gather64_i64(__vec16_i64 addr, __vec16_i1 mask)
|
||||
{
|
||||
__vec16_i64 ret;
|
||||
@@ -1862,6 +1862,7 @@ template <int ALIGN> static FORCEINLINE __vec16_i8 __load(const __vec16_i8 *p) {
|
||||
template <int ALIGN> static FORCEINLINE void __store(__vec16_i8 *p, __vec16_i8 v) {
|
||||
*p = v;
|
||||
}
|
||||
|
||||
static FORCEINLINE void
|
||||
__scatter_base_offsets32_i8(uint8_t *b, uint32_t scale, __vec16_i32 offsets,
|
||||
__vec16_i8 val, __vec16_i1 mask)
|
||||
@@ -2184,8 +2185,7 @@ __scatter_base_offsets32_float(void *base, uint32_t scale, __vec16_i32 offsets,
|
||||
// packed load/store
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *p, __vec16_i32 *val,
|
||||
__vec16_i1 mask) {
|
||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *p, __vec16_i32 *val, __vec16_i1 mask) {
|
||||
__vec16_i32 v = __load<64>(val);
|
||||
v = _mm512_mask_extloadunpacklo_epi32(v, mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
v = _mm512_mask_extloadunpackhi_epi32(v, mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
@@ -2193,8 +2193,7 @@ static FORCEINLINE int32_t __packed_load_active(uint32_t *p, __vec16_i32 *val,
|
||||
return _mm_countbits_32(uint32_t(mask));
|
||||
}
|
||||
|
||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val,
|
||||
__vec16_i1 mask) {
|
||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val, __vec16_i1 mask) {
|
||||
_mm512_mask_extpackstorelo_epi32(p, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||
return _mm_countbits_32(uint32_t(mask));
|
||||
|
||||
Reference in New Issue
Block a user