All the smear(), setzero() and undef() APIs are now templated on the return type.
Modified ISPC's internal mangling to pass these through unchanged. Tried hard to make sure this is not going to introduce an ABI change.
This commit is contained in:
@@ -271,7 +271,8 @@ static FORCEINLINE TYPE NAME(TYPE a, int32_t b) { \
|
||||
}
|
||||
|
||||
#define SMEAR(VTYPE, NAME, STYPE) \
|
||||
static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \
|
||||
template <class RetVecType> VTYPE __smear_##NAME(STYPE); \
|
||||
template <> FORCEINLINE VTYPE __smear_##NAME<VTYPE>(STYPE v) { \
|
||||
VTYPE ret; \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
ret.v[i] = v; \
|
||||
@@ -279,7 +280,8 @@ static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \
|
||||
}
|
||||
|
||||
#define SETZERO(VTYPE, NAME) \
|
||||
static FORCEINLINE VTYPE __setzero_##NAME() { \
|
||||
template <class RetVecType> VTYPE __setzero_##NAME(); \
|
||||
template <> FORCEINLINE VTYPE __setzero_##NAME<VTYPE>() { \
|
||||
VTYPE ret; \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
ret.v[i] = 0; \
|
||||
@@ -287,7 +289,8 @@ static FORCEINLINE VTYPE __setzero_##NAME() { \
|
||||
}
|
||||
|
||||
#define UNDEF(VTYPE, NAME) \
|
||||
static FORCEINLINE VTYPE __undef_##NAME() { \
|
||||
template <class RetVecType> VTYPE __undef_##NAME(); \
|
||||
template <> FORCEINLINE VTYPE __undef_##NAME<VTYPE>() { \
|
||||
return VTYPE(); \
|
||||
}
|
||||
|
||||
@@ -416,18 +419,20 @@ template <int ALIGN> static FORCEINLINE void __store(__vec16_i1 *p, __vec16_i1 v
|
||||
*ptr = v.v;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __smear_i1(int v) {
|
||||
template <class RetVecType> __vec16_i1 __smear_i1(int i);
|
||||
template <> FORCEINLINE __vec16_i1 __smear_i1<__vec16_i1>(int v) {
|
||||
return __vec16_i1(v, v, v, v, v, v, v, v,
|
||||
v, v, v, v, v, v, v, v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __setzero_i1() {
|
||||
template <class RetVecType> __vec16_i1 __setzero_i1();
|
||||
template <> FORCEINLINE __vec16_i1 __setzero_i1<__vec16_i1>() {
|
||||
return __vec16_i1(0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE __vec16_i1 __undef_i1() {
|
||||
template <class RetVecType> __vec16_i1 __undef_i1();
|
||||
template <> FORCEINLINE __vec16_i1 __undef_i1<__vec16_i1>() {
|
||||
return __vec16_i1();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user