More varied support for constant vectors from C++ backend.
If we have a vector of all zeros, a __setzero_* function call is emitted, permitting calling specialized intrinsics for this. Undefined values are reflected with an __undef_* call, which similarly allows passing that information along. This change also includes a cleanup to the signature of the __smear_* functions; since they already have different names depending on the scalar value type, we don't need to use the trick of passing an undefined value of the return vector type as the first parameter as an indirect way to overload by return value. Issue #317.
This commit is contained in:
@@ -452,13 +452,26 @@ static FORCEINLINE TYPE NAME(TYPE a, int32_t b) { \
|
||||
}
|
||||
|
||||
#define SMEAR(VTYPE, NAME, STYPE) \
|
||||
static FORCEINLINE VTYPE __smear_##NAME(VTYPE retType, STYPE v) { \
|
||||
static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \
|
||||
VTYPE ret; \
|
||||
for (int i = 0; i < 64; ++i) \
|
||||
ret.v[i] = v; \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
#define SETZERO(VTYPE, NAME) \
|
||||
static FORCEINLINE VTYPE __setzero_##NAME() { \
|
||||
VTYPE ret; \
|
||||
for (int i = 0; i < 64; ++i) \
|
||||
ret.v[i] = 0; \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
#define UNDEF(VTYPE, NAME) \
|
||||
static FORCEINLINE VTYPE __undef_##NAME(VTYPE retType) { \
|
||||
return VTYPE(); \
|
||||
}
|
||||
|
||||
#define BROADCAST(VTYPE, NAME, STYPE) \
|
||||
static FORCEINLINE VTYPE __broadcast_##NAME(VTYPE v, int index) { \
|
||||
VTYPE ret; \
|
||||
@@ -584,7 +597,7 @@ template <int ALIGN> static FORCEINLINE void __store(__vec64_i1 *p, __vec64_i1 v
|
||||
*ptr = v.v;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec64_i1 __smear_i1(__vec64_i1, int v) {
|
||||
static FORCEINLINE __vec64_i1 __smear_i1(int v) {
|
||||
return __vec64_i1(v, v, v, v, v, v, v, v,
|
||||
v, v, v, v, v, v, v, v,
|
||||
v, v, v, v, v, v, v, v,
|
||||
@@ -595,6 +608,21 @@ static FORCEINLINE __vec64_i1 __smear_i1(__vec64_i1, int v) {
|
||||
v, v, v, v, v, v, v, v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec64_i1 __setzero_i1() {
|
||||
return __vec64_i1(0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec64_i1 __undef_i1() {
|
||||
return __vec64_i1();
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// int8
|
||||
@@ -634,6 +662,8 @@ CMP_OP(__vec64_i8, int8_t, __signed_greater_than, >)
|
||||
SELECT(__vec64_i8)
|
||||
INSERT_EXTRACT(__vec64_i8, int8_t)
|
||||
SMEAR(__vec64_i8, i8, int8_t)
|
||||
SETZERO(__vec64_i8, i8)
|
||||
UNDEF(__vec64_i8, i8)
|
||||
BROADCAST(__vec64_i8, i8, int8_t)
|
||||
ROTATE(__vec64_i8, i8, int8_t)
|
||||
SHUFFLES(__vec64_i8, i8, int8_t)
|
||||
@@ -677,6 +707,8 @@ CMP_OP(__vec64_i16, int16_t, __signed_greater_than, >)
|
||||
SELECT(__vec64_i16)
|
||||
INSERT_EXTRACT(__vec64_i16, int16_t)
|
||||
SMEAR(__vec64_i16, i16, int16_t)
|
||||
SETZERO(__vec64_i16, i16)
|
||||
UNDEF(__vec64_i16, i16)
|
||||
BROADCAST(__vec64_i16, i16, int16_t)
|
||||
ROTATE(__vec64_i16, i16, int16_t)
|
||||
SHUFFLES(__vec64_i16, i16, int16_t)
|
||||
@@ -720,6 +752,8 @@ CMP_OP(__vec64_i32, int32_t, __signed_greater_than, >)
|
||||
SELECT(__vec64_i32)
|
||||
INSERT_EXTRACT(__vec64_i32, int32_t)
|
||||
SMEAR(__vec64_i32, i32, int32_t)
|
||||
SETZERO(__vec64_i32, i32)
|
||||
UNDEF(__vec64_i32, i32)
|
||||
BROADCAST(__vec64_i32, i32, int32_t)
|
||||
ROTATE(__vec64_i32, i32, int32_t)
|
||||
SHUFFLES(__vec64_i32, i32, int32_t)
|
||||
@@ -763,6 +797,8 @@ CMP_OP(__vec64_i64, int64_t, __signed_greater_than, >)
|
||||
SELECT(__vec64_i64)
|
||||
INSERT_EXTRACT(__vec64_i64, int64_t)
|
||||
SMEAR(__vec64_i64, i64, int64_t)
|
||||
SETZERO(__vec64_i64, i64)
|
||||
UNDEF(__vec64_i64, i64)
|
||||
BROADCAST(__vec64_i64, i64, int64_t)
|
||||
ROTATE(__vec64_i64, i64, int64_t)
|
||||
SHUFFLES(__vec64_i64, i64, int64_t)
|
||||
@@ -798,6 +834,8 @@ static FORCEINLINE __vec64_i1 __ordered(__vec64_f a, __vec64_f b) {
|
||||
SELECT(__vec64_f)
|
||||
INSERT_EXTRACT(__vec64_f, float)
|
||||
SMEAR(__vec64_f, float, float)
|
||||
SETZERO(__vec64_f, float)
|
||||
UNDEF(__vec64_f, float)
|
||||
BROADCAST(__vec64_f, float, float)
|
||||
ROTATE(__vec64_f, float, float)
|
||||
SHUFFLES(__vec64_f, float, float)
|
||||
@@ -948,6 +986,8 @@ static FORCEINLINE __vec64_i1 __ordered(__vec64_d a, __vec64_d b) {
|
||||
SELECT(__vec64_d)
|
||||
INSERT_EXTRACT(__vec64_d, double)
|
||||
SMEAR(__vec64_d, double, double)
|
||||
SETZERO(__vec64_d, double)
|
||||
UNDEF(__vec64_d, double)
|
||||
BROADCAST(__vec64_d, double, double)
|
||||
ROTATE(__vec64_d, double, double)
|
||||
SHUFFLES(__vec64_d, double, double)
|
||||
|
||||
Reference in New Issue
Block a user