Fix __ordered and _unordered floating point functions for C++ target.

Fixes include adding "_float" and "_double" suffixes as appropriate as well
as providing a number of missing implementations.

This fixes a number of failures in the half* tests.
This commit is contained in:
Matt Pharr
2012-07-09 14:35:51 -07:00
parent 107669686c
commit bc7775aef2
6 changed files with 81 additions and 22 deletions

View File

@@ -1422,6 +1422,10 @@ static FORCEINLINE __vec4_i1 __ordered_float(__vec4_f a, __vec4_f b) {
return _mm_cmpord_ps(a.v, b.v);
}
static FORCEINLINE __vec4_i1 __unordered_float(__vec4_f a, __vec4_f b) {
return _mm_cmpunord_ps(a.v, b.v);
}
static FORCEINLINE __vec4_f __select(__vec4_i1 mask, __vec4_f a, __vec4_f b) {
return _mm_blendv_ps(b.v, a.v, mask.v);
}
@@ -1556,6 +1560,13 @@ static FORCEINLINE __vec4_i1 __ordered_double(__vec4_d a, __vec4_d b) {
_MM_SHUFFLE(2, 0, 2, 0));
}
static FORCEINLINE __vec4_i1 __unordered_double(__vec4_d a, __vec4_d b) {
__m128d cmp0 = _mm_cmpunord_pd(a.v[0], b.v[0]);
__m128d cmp1 = _mm_cmpunord_pd(a.v[1], b.v[1]);
return _mm_shuffle_ps(_mm_castpd_ps(cmp0), _mm_castpd_ps(cmp1),
_MM_SHUFFLE(2, 0, 2, 0));
}
static FORCEINLINE __vec4_d __select(__vec4_i1 mask, __vec4_d a, __vec4_d b) {
__m128 m0 = _mm_shuffle_ps(mask.v, mask.v, _MM_SHUFFLE(1, 1, 0, 0));
__m128 m1 = _mm_shuffle_ps(mask.v, mask.v, _MM_SHUFFLE(3, 3, 2, 2));