diff --git a/examples/intrinsics/generic-16.h b/examples/intrinsics/generic-16.h index e2426f7f..3ac77a41 100644 --- a/examples/intrinsics/generic-16.h +++ b/examples/intrinsics/generic-16.h @@ -938,7 +938,7 @@ REDUCE_MINMAX(uint64_t, __vec16_i64, __reduce_max_uint64, >) /////////////////////////////////////////////////////////////////////////// // masked load/store -static FORCEINLINE __vec16_i8 __masked_load_8(unsigned char *p, +static FORCEINLINE __vec16_i8 __masked_load_8(void *p, __vec16_i1 mask) { __vec16_i8 ret; int8_t *ptr = (int8_t *)p; @@ -948,7 +948,7 @@ static FORCEINLINE __vec16_i8 __masked_load_8(unsigned char *p, return ret; } -static FORCEINLINE __vec16_i16 __masked_load_16(unsigned char *p, +static FORCEINLINE __vec16_i16 __masked_load_16(void *p, __vec16_i1 mask) { __vec16_i16 ret; int16_t *ptr = (int16_t *)p; @@ -958,7 +958,7 @@ static FORCEINLINE __vec16_i16 __masked_load_16(unsigned char *p, return ret; } -static FORCEINLINE __vec16_i32 __masked_load_32(unsigned char *p, +static FORCEINLINE __vec16_i32 __masked_load_32(void *p, __vec16_i1 mask) { __vec16_i32 ret; int32_t *ptr = (int32_t *)p; @@ -968,7 +968,7 @@ static FORCEINLINE __vec16_i32 __masked_load_32(unsigned char *p, return ret; } -static FORCEINLINE __vec16_i64 __masked_load_64(unsigned char *p, +static FORCEINLINE __vec16_i64 __masked_load_64(void *p, __vec16_i1 mask) { __vec16_i64 ret; int64_t *ptr = (int64_t *)p; @@ -978,7 +978,7 @@ static FORCEINLINE __vec16_i64 __masked_load_64(unsigned char *p, return ret; } -static FORCEINLINE void __masked_store_8(unsigned char *p, __vec16_i8 val, +static FORCEINLINE void __masked_store_8(void *p, __vec16_i8 val, __vec16_i1 mask) { int8_t *ptr = (int8_t *)p; for (int i = 0; i < 16; ++i) @@ -986,7 +986,7 @@ static FORCEINLINE void __masked_store_8(unsigned char *p, __vec16_i8 val, ptr[i] = val.v[i]; } -static FORCEINLINE void __masked_store_16(unsigned char *p, __vec16_i16 val, +static FORCEINLINE void __masked_store_16(void *p, __vec16_i16 val, __vec16_i1 mask) { int16_t *ptr = (int16_t *)p; for (int i = 0; i < 16; ++i) @@ -994,7 +994,7 @@ static FORCEINLINE void __masked_store_16(unsigned char *p, __vec16_i16 val, ptr[i] = val.v[i]; } -static FORCEINLINE void __masked_store_32(unsigned char *p, __vec16_i32 val, +static FORCEINLINE void __masked_store_32(void *p, __vec16_i32 val, __vec16_i1 mask) { int32_t *ptr = (int32_t *)p; for (int i = 0; i < 16; ++i) @@ -1002,7 +1002,7 @@ static FORCEINLINE void __masked_store_32(unsigned char *p, __vec16_i32 val, ptr[i] = val.v[i]; } -static FORCEINLINE void __masked_store_64(unsigned char *p, __vec16_i64 val, +static FORCEINLINE void __masked_store_64(void *p, __vec16_i64 val, __vec16_i1 mask) { int64_t *ptr = (int64_t *)p; for (int i = 0; i < 16; ++i) diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 0662a4c0..5e5131f3 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -2328,7 +2328,7 @@ static FORCEINLINE uint64_t __reduce_max_uint64(__vec4_i64 v) { /////////////////////////////////////////////////////////////////////////// // masked load/store -static FORCEINLINE __vec4_i8 __masked_load_8(unsigned char *p, +static FORCEINLINE __vec4_i8 __masked_load_8(void *p, __vec4_i1 mask) { int8_t r[4]; int8_t *ptr = (int8_t *)p; @@ -2348,7 +2348,7 @@ static FORCEINLINE __vec4_i8 __masked_load_8(unsigned char *p, return __vec4_i8(r[0], r[1], r[2], r[3]); } -static FORCEINLINE __vec4_i16 __masked_load_16(unsigned char *p, +static FORCEINLINE __vec4_i16 __masked_load_16(void *p, __vec4_i1 mask) { int16_t r[4]; int16_t *ptr = (int16_t *)p; @@ -2372,7 +2372,7 @@ static FORCEINLINE __vec4_i16 __masked_load_16(unsigned char *p, return __vec4_i16(r[0], r[1], r[2], r[3]); } -static FORCEINLINE __vec4_i32 __masked_load_32(unsigned char *p, +static FORCEINLINE __vec4_i32 __masked_load_32(void *p, __vec4_i1 mask) { __m128i r = _mm_set_epi32(0, 0, 0, 0); int32_t *ptr = (int32_t *)p; @@ -2395,7 +2395,7 @@ static FORCEINLINE __vec4_i32 __masked_load_32(unsigned char *p, return r; } -static FORCEINLINE __vec4_i64 __masked_load_64(unsigned char *p, +static FORCEINLINE __vec4_i64 __masked_load_64(void *p, __vec4_i1 mask) { uint64_t r[4]; uint64_t *ptr = (uint64_t *)p; @@ -2418,7 +2418,7 @@ static FORCEINLINE __vec4_i64 __masked_load_64(unsigned char *p, return __vec4_i64(r[0], r[1], r[2], r[3]); } -static FORCEINLINE void __masked_store_8(unsigned char *p, __vec4_i8 val, +static FORCEINLINE void __masked_store_8(void *p, __vec4_i8 val, __vec4_i1 mask) { int8_t *ptr = (int8_t *)p; @@ -2439,7 +2439,7 @@ static FORCEINLINE void __masked_store_8(unsigned char *p, __vec4_i8 val, ptr[3] = _mm_extract_epi8(val.v, 3); } -static FORCEINLINE void __masked_store_16(unsigned char *p, __vec4_i16 val, __vec4_i1 mask) { +static FORCEINLINE void __masked_store_16(void *p, __vec4_i16 val, __vec4_i1 mask) { int16_t *ptr = (int16_t *)p; uint32_t m = _mm_extract_ps(mask.v, 0); @@ -2459,7 +2459,7 @@ static FORCEINLINE void __masked_store_16(unsigned char *p, __vec4_i16 val, __ve ptr[3] = _mm_extract_epi16(val.v, 3); } -static FORCEINLINE void __masked_store_32(unsigned char *p, __vec4_i32 val, +static FORCEINLINE void __masked_store_32(void *p, __vec4_i32 val, __vec4_i1 mask) { int32_t *ptr = (int32_t *)p; uint32_t m = _mm_extract_ps(mask.v, 0); @@ -2479,7 +2479,7 @@ static FORCEINLINE void __masked_store_32(unsigned char *p, __vec4_i32 val, ptr[3] = _mm_extract_epi32(val.v, 3); } -static FORCEINLINE void __masked_store_64(unsigned char *p, __vec4_i64 val, +static FORCEINLINE void __masked_store_64(void *p, __vec4_i64 val, __vec4_i1 mask) { int64_t *ptr = (int64_t *)p; uint32_t m = _mm_extract_ps(mask.v, 0);