changed codestyle to 2 spaces in 'knc.h'
This commit is contained in:
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
modification, are permitted provided that the following conditions are
|
modification, are permitted provided that the following conditions are
|
||||||
met:
|
met:
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
* Redistributions of source code must retain the above copyright
|
||||||
notice, this list of conditions and the following disclaimer.
|
notice, this list of conditions and the following disclaimer.
|
||||||
@@ -29,7 +29,7 @@
|
|||||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <limits.h> // INT_MIN
|
#include <limits.h> // INT_MIN
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
@@ -86,10 +86,10 @@ struct __vec16_i32;
|
|||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
/* (iw) actually, this *SHOULD* be the right implementation for a
|
/* (iw) actually, this *SHOULD* be the right implementation for a
|
||||||
vec16_i1: this one is a class that can have a constructor (which
|
vec16_i1: this one is a class that can have a constructor (which
|
||||||
ISPC sometimes emits for these vectors...) This version might
|
ISPC sometimes emits for these vectors...) This version might
|
||||||
not be working with embree's ISPC bindings, probably because
|
not be working with embree's ISPC bindings, probably because
|
||||||
embree still uses the 'wrong' implementation */
|
embree still uses the 'wrong' implementation */
|
||||||
typedef struct PRE_ALIGN(2) __vec16_i1
|
typedef struct PRE_ALIGN(2) __vec16_i1
|
||||||
{
|
{
|
||||||
FORCEINLINE operator __mmask16() const { return v; }
|
FORCEINLINE operator __mmask16() const { return v; }
|
||||||
@@ -277,9 +277,9 @@ inline std::ostream &operator<<(std::ostream &out, const __vec16_i64 &v)
|
|||||||
out << "[";
|
out << "[";
|
||||||
uint32_t *ptr = (uint32_t*)&v;
|
uint32_t *ptr = (uint32_t*)&v;
|
||||||
for (int i=0;i<16;i++) {
|
for (int i=0;i<16;i++) {
|
||||||
uint64_t val = (uint64_t(ptr[i])<<32)+ptr[i+16];
|
uint64_t val = (uint64_t(ptr[i])<<32)+ptr[i+16];
|
||||||
out << (i?",":"") << ((int*)val);
|
out << (i?",":"") << ((int*)val);
|
||||||
}
|
}
|
||||||
out << "]" << std::flush;
|
out << "]" << std::flush;
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@@ -382,14 +382,14 @@ static FORCEINLINE int64_t __extract_element(const __vec16_i64 &v, uint32_t inde
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
static FORCEINLINE void __insert_element(__vec16_i1 *vec, int index,
|
static FORCEINLINE void __insert_element(__vec16_i1 *vec, int index,
|
||||||
bool val) {
|
bool val) {
|
||||||
if (val == false)
|
if (val == false)
|
||||||
vec->v &= ~(1 << index);
|
vec->v &= ~(1 << index);
|
||||||
else
|
else
|
||||||
vec->v |= (1 << index);
|
vec->v |= (1 << index);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
template <int ALIGN> static FORCEINLINE __vec16_i1 __load(const __vec16_i1 *p) {
|
template <int ALIGN> static FORCEINLINE __vec16_i1 __load(const __vec16_i1 *p) {
|
||||||
const uint16_t *ptr = (const uint16_t *)p;
|
const uint16_t *ptr = (const uint16_t *)p;
|
||||||
@@ -424,7 +424,7 @@ template <> FORCEINLINE __vec16_i1 __undef_i1<__vec16_i1>() {
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
TODO
|
TODO
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -435,7 +435,7 @@ TODO
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
TODO
|
TODO
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -684,7 +684,7 @@ template <> FORCEINLINE void __store<64>(__vec16_i32 *p, __vec16_i32 v) {
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// int64
|
// int64
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
static FORCEINLINE
|
static FORCEINLINE
|
||||||
void __masked_store_i64(void *p, const __vec16_i64 &v, __vec16_i1 mask)
|
void __masked_store_i64(void *p, const __vec16_i64 &v, __vec16_i1 mask)
|
||||||
{
|
{
|
||||||
__m512i v1;
|
__m512i v1;
|
||||||
@@ -998,7 +998,7 @@ __gather_base_offsets32_i64(uint8_t *base, uint32_t scale, __vec16_i32 offsets,
|
|||||||
poitners, so in here there's no way to tell - only thing we can do
|
poitners, so in here there's no way to tell - only thing we can do
|
||||||
is pick one...
|
is pick one...
|
||||||
*/
|
*/
|
||||||
static FORCEINLINE __vec16_i64
|
static FORCEINLINE __vec16_i64
|
||||||
__gather64_i64(__vec16_i64 addr, __vec16_i1 mask)
|
__gather64_i64(__vec16_i64 addr, __vec16_i1 mask)
|
||||||
{
|
{
|
||||||
__vec16_i64 ret;
|
__vec16_i64 ret;
|
||||||
@@ -1862,6 +1862,7 @@ template <int ALIGN> static FORCEINLINE __vec16_i8 __load(const __vec16_i8 *p) {
|
|||||||
template <int ALIGN> static FORCEINLINE void __store(__vec16_i8 *p, __vec16_i8 v) {
|
template <int ALIGN> static FORCEINLINE void __store(__vec16_i8 *p, __vec16_i8 v) {
|
||||||
*p = v;
|
*p = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static FORCEINLINE void
|
static FORCEINLINE void
|
||||||
__scatter_base_offsets32_i8(uint8_t *b, uint32_t scale, __vec16_i32 offsets,
|
__scatter_base_offsets32_i8(uint8_t *b, uint32_t scale, __vec16_i32 offsets,
|
||||||
__vec16_i8 val, __vec16_i1 mask)
|
__vec16_i8 val, __vec16_i1 mask)
|
||||||
@@ -2184,8 +2185,7 @@ __scatter_base_offsets32_float(void *base, uint32_t scale, __vec16_i32 offsets,
|
|||||||
// packed load/store
|
// packed load/store
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_load_active(uint32_t *p, __vec16_i32 *val,
|
static FORCEINLINE int32_t __packed_load_active(uint32_t *p, __vec16_i32 *val, __vec16_i1 mask) {
|
||||||
__vec16_i1 mask) {
|
|
||||||
__vec16_i32 v = __load<64>(val);
|
__vec16_i32 v = __load<64>(val);
|
||||||
v = _mm512_mask_extloadunpacklo_epi32(v, mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
v = _mm512_mask_extloadunpacklo_epi32(v, mask, p, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||||
v = _mm512_mask_extloadunpackhi_epi32(v, mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
v = _mm512_mask_extloadunpackhi_epi32(v, mask, (uint8_t*)p+64, _MM_UPCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||||
@@ -2193,8 +2193,7 @@ static FORCEINLINE int32_t __packed_load_active(uint32_t *p, __vec16_i32 *val,
|
|||||||
return _mm_countbits_32(uint32_t(mask));
|
return _mm_countbits_32(uint32_t(mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val,
|
static FORCEINLINE int32_t __packed_store_active(uint32_t *p, __vec16_i32 val, __vec16_i1 mask) {
|
||||||
__vec16_i1 mask) {
|
|
||||||
_mm512_mask_extpackstorelo_epi32(p, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
_mm512_mask_extpackstorelo_epi32(p, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||||
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
_mm512_mask_extpackstorehi_epi32((uint8_t*)p+64, mask, val, _MM_DOWNCONV_EPI32_NONE, _MM_HINT_NONE);
|
||||||
return _mm_countbits_32(uint32_t(mask));
|
return _mm_countbits_32(uint32_t(mask));
|
||||||
|
|||||||
Reference in New Issue
Block a user