More varied support for constant vectors from C++ backend.

If we have a vector of all zeros, a __setzero_* function call is emitted,
permitting calling specialized intrinsics for this.  Undefined values
are reflected with an __undef_* call, which similarly allows passing that
information along.

This change also includes a cleanup to the signature of the __smear_*
functions; since they already have different names depending on the
scalar value type, we don't need to use the trick of passing an
undefined value of the return vector type as the first parameter as
an indirect way to overload by return value.

Issue #317.
This commit is contained in:
Matt Pharr
2012-07-05 20:19:11 -07:00
parent ac421f68e2
commit 0d3993fa25
7 changed files with 330 additions and 104 deletions

View File

@@ -452,13 +452,26 @@ static FORCEINLINE TYPE NAME(TYPE a, int32_t b) { \
}
#define SMEAR(VTYPE, NAME, STYPE) \
static FORCEINLINE VTYPE __smear_##NAME(VTYPE retType, STYPE v) { \
static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \
VTYPE ret; \
for (int i = 0; i < 64; ++i) \
ret.v[i] = v; \
return ret; \
}
#define SETZERO(VTYPE, NAME) \
static FORCEINLINE VTYPE __setzero_##NAME() { \
VTYPE ret; \
for (int i = 0; i < 64; ++i) \
ret.v[i] = 0; \
return ret; \
}
#define UNDEF(VTYPE, NAME) \
static FORCEINLINE VTYPE __undef_##NAME(VTYPE retType) { \
return VTYPE(); \
}
#define BROADCAST(VTYPE, NAME, STYPE) \
static FORCEINLINE VTYPE __broadcast_##NAME(VTYPE v, int index) { \
VTYPE ret; \
@@ -584,7 +597,7 @@ template <int ALIGN> static FORCEINLINE void __store(__vec64_i1 *p, __vec64_i1 v
*ptr = v.v;
}
static FORCEINLINE __vec64_i1 __smear_i1(__vec64_i1, int v) {
static FORCEINLINE __vec64_i1 __smear_i1(int v) {
return __vec64_i1(v, v, v, v, v, v, v, v,
v, v, v, v, v, v, v, v,
v, v, v, v, v, v, v, v,
@@ -595,6 +608,21 @@ static FORCEINLINE __vec64_i1 __smear_i1(__vec64_i1, int v) {
v, v, v, v, v, v, v, v);
}
static FORCEINLINE __vec64_i1 __setzero_i1() {
return __vec64_i1(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0);
}
static FORCEINLINE __vec64_i1 __undef_i1() {
return __vec64_i1();
}
///////////////////////////////////////////////////////////////////////////
// int8
@@ -634,6 +662,8 @@ CMP_OP(__vec64_i8, int8_t, __signed_greater_than, >)
SELECT(__vec64_i8)
INSERT_EXTRACT(__vec64_i8, int8_t)
SMEAR(__vec64_i8, i8, int8_t)
SETZERO(__vec64_i8, i8)
UNDEF(__vec64_i8, i8)
BROADCAST(__vec64_i8, i8, int8_t)
ROTATE(__vec64_i8, i8, int8_t)
SHUFFLES(__vec64_i8, i8, int8_t)
@@ -677,6 +707,8 @@ CMP_OP(__vec64_i16, int16_t, __signed_greater_than, >)
SELECT(__vec64_i16)
INSERT_EXTRACT(__vec64_i16, int16_t)
SMEAR(__vec64_i16, i16, int16_t)
SETZERO(__vec64_i16, i16)
UNDEF(__vec64_i16, i16)
BROADCAST(__vec64_i16, i16, int16_t)
ROTATE(__vec64_i16, i16, int16_t)
SHUFFLES(__vec64_i16, i16, int16_t)
@@ -720,6 +752,8 @@ CMP_OP(__vec64_i32, int32_t, __signed_greater_than, >)
SELECT(__vec64_i32)
INSERT_EXTRACT(__vec64_i32, int32_t)
SMEAR(__vec64_i32, i32, int32_t)
SETZERO(__vec64_i32, i32)
UNDEF(__vec64_i32, i32)
BROADCAST(__vec64_i32, i32, int32_t)
ROTATE(__vec64_i32, i32, int32_t)
SHUFFLES(__vec64_i32, i32, int32_t)
@@ -763,6 +797,8 @@ CMP_OP(__vec64_i64, int64_t, __signed_greater_than, >)
SELECT(__vec64_i64)
INSERT_EXTRACT(__vec64_i64, int64_t)
SMEAR(__vec64_i64, i64, int64_t)
SETZERO(__vec64_i64, i64)
UNDEF(__vec64_i64, i64)
BROADCAST(__vec64_i64, i64, int64_t)
ROTATE(__vec64_i64, i64, int64_t)
SHUFFLES(__vec64_i64, i64, int64_t)
@@ -798,6 +834,8 @@ static FORCEINLINE __vec64_i1 __ordered(__vec64_f a, __vec64_f b) {
SELECT(__vec64_f)
INSERT_EXTRACT(__vec64_f, float)
SMEAR(__vec64_f, float, float)
SETZERO(__vec64_f, float)
UNDEF(__vec64_f, float)
BROADCAST(__vec64_f, float, float)
ROTATE(__vec64_f, float, float)
SHUFFLES(__vec64_f, float, float)
@@ -948,6 +986,8 @@ static FORCEINLINE __vec64_i1 __ordered(__vec64_d a, __vec64_d b) {
SELECT(__vec64_d)
INSERT_EXTRACT(__vec64_d, double)
SMEAR(__vec64_d, double, double)
SETZERO(__vec64_d, double)
UNDEF(__vec64_d, double)
BROADCAST(__vec64_d, double, double)
ROTATE(__vec64_d, double, double)
SHUFFLES(__vec64_d, double, double)