Fix __ordered and _unordered floating point functions for C++ target.
Fixes include adding "_float" and "_double" suffixes as appropriate as well as providing a number of missing implementations. This fixes a number of failures in the half* tests.
This commit is contained in:
@@ -1422,6 +1422,10 @@ static FORCEINLINE __vec4_i1 __ordered_float(__vec4_f a, __vec4_f b) {
|
||||
return _mm_cmpord_ps(a.v, b.v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i1 __unordered_float(__vec4_f a, __vec4_f b) {
|
||||
return _mm_cmpunord_ps(a.v, b.v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_f __select(__vec4_i1 mask, __vec4_f a, __vec4_f b) {
|
||||
return _mm_blendv_ps(b.v, a.v, mask.v);
|
||||
}
|
||||
@@ -1556,6 +1560,13 @@ static FORCEINLINE __vec4_i1 __ordered_double(__vec4_d a, __vec4_d b) {
|
||||
_MM_SHUFFLE(2, 0, 2, 0));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i1 __unordered_double(__vec4_d a, __vec4_d b) {
|
||||
__m128d cmp0 = _mm_cmpunord_pd(a.v[0], b.v[0]);
|
||||
__m128d cmp1 = _mm_cmpunord_pd(a.v[1], b.v[1]);
|
||||
return _mm_shuffle_ps(_mm_castpd_ps(cmp0), _mm_castpd_ps(cmp1),
|
||||
_MM_SHUFFLE(2, 0, 2, 0));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_d __select(__vec4_i1 mask, __vec4_d a, __vec4_d b) {
|
||||
__m128 m0 = _mm_shuffle_ps(mask.v, mask.v, _MM_SHUFFLE(1, 1, 0, 0));
|
||||
__m128 m1 = _mm_shuffle_ps(mask.v, mask.v, _MM_SHUFFLE(3, 3, 2, 2));
|
||||
|
||||
Reference in New Issue
Block a user