From bba02f87ea100db68973210f1520fff980f75b16 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 27 Jan 2012 16:49:41 -0800 Subject: [PATCH] Improve implementations of unsigned <=, >= in sse4 intrinsics file. --- examples/intrinsics/sse4.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index 7a3af6ad..c6299893 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -941,10 +941,8 @@ static FORCEINLINE __vec4_i1 __not_equal(__vec4_i32 a, __vec4_i32 b) { } static FORCEINLINE __vec4_i1 __unsigned_less_equal(__vec4_i32 a, __vec4_i32 b) { - a.v = _mm_xor_si128(a.v, _mm_set1_epi32(0x80000000)); - b.v = _mm_xor_si128(b.v, _mm_set1_epi32(0x80000000)); - return _mm_or_si128(_mm_cmplt_epi32(a.v, b.v), - _mm_cmpeq_epi32(a.v, b.v)); + // a<=b == (min(a,b) == a) + return _mm_cmpeq_epi32(_mm_min_epu32(a.v, b.v), a.v); } static FORCEINLINE __vec4_i1 __signed_less_equal(__vec4_i32 a, __vec4_i32 b) { @@ -953,10 +951,8 @@ static FORCEINLINE __vec4_i1 __signed_less_equal(__vec4_i32 a, __vec4_i32 b) { } static FORCEINLINE __vec4_i1 __unsigned_greater_equal(__vec4_i32 a, __vec4_i32 b) { - a.v = _mm_xor_si128(a.v, _mm_set1_epi32(0x80000000)); - b.v = _mm_xor_si128(b.v, _mm_set1_epi32(0x80000000)); - return _mm_or_si128(_mm_cmpgt_epi32(a.v, b.v), - _mm_cmpeq_epi32(a.v, b.v)); + // a>=b == (max(a,b) == a) + return _mm_cmpeq_epi32(_mm_max_epu32(a.v, b.v), a.v); } static FORCEINLINE __vec4_i1 __signed_greater_equal(__vec4_i32 a, __vec4_i32 b) {