Add support for atomic swap/cmpexchg with float and double types.

Addresses issue #60.
This commit is contained in:
Matt Pharr
2011-07-07 14:07:52 +01:00
parent 729f522a01
commit aef8c09019
3 changed files with 49 additions and 2 deletions

View File

@@ -1852,7 +1852,8 @@ example.
Here are the declarations of the ``int32`` variants of these functions.
There are also ``int64`` equivalents as well as variants that take
``unsigned`` ``int32`` and ``int64`` values.
``unsigned`` ``int32`` and ``int64`` values. (The ``atomic_swap_global()``
function can be used with ``float`` and ``double`` types as well.)
::
@@ -1869,7 +1870,8 @@ There is also an atomic "compare and exchange" function; it atomically
compares the value in "val" to "compare"--if they match, it assigns
"newval" to "val". In either case, the old value of "val" is returned.
(As with the other atomic operations, there are also ``unsigned`` and
64-bit variants of this function.)
64-bit variants of this function. Furthermore, there are ``float`` and
``double`` variants as well.)
::

View File

@@ -425,6 +425,8 @@ DEFINE_ATOMIC_OP(unsigned int32,int32,or,or)
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor)
DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap)
DEFINE_ATOMIC_OP(float,float,swap,swap)
DEFINE_ATOMIC_OP(int64,int64,add,add)
DEFINE_ATOMIC_OP(int64,int64,subtract,sub)
DEFINE_ATOMIC_OP(int64,int64,min,min)
@@ -445,6 +447,8 @@ DEFINE_ATOMIC_OP(unsigned int64,int64,or,or)
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor)
DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap)
DEFINE_ATOMIC_OP(double,double,swap,swap)
#define ATOMIC_DECL_CMPXCHG(TA, TB) \
static inline TA atomic_compare_exchange_global( \
uniform reference TA ref, TA oldval, TA newval) { \
@@ -456,8 +460,10 @@ static inline TA atomic_compare_exchange_global( \
ATOMIC_DECL_CMPXCHG(int32, int32)
ATOMIC_DECL_CMPXCHG(unsigned int32, int32)
ATOMIC_DECL_CMPXCHG(float, float)
ATOMIC_DECL_CMPXCHG(int64, int64)
ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
ATOMIC_DECL_CMPXCHG(double, double)
///////////////////////////////////////////////////////////////////////////
// Load/store from/to 8/16-bit types

View File

@@ -800,9 +800,48 @@ global_atomic($1, umax, i64, uint64)
global_swap($1, i32, int32)
global_swap($1, i64, int64)
define internal <$1 x float> @__atomic_swap_float_global(float * %ptr, <$1 x float> %val,
<$1 x i32> %mask) nounwind alwaysinline {
%iptr = bitcast float * %ptr to i32 *
%ival = bitcast <$1 x float> %val to <$1 x i32>
%iret = call <$1 x i32> @__atomic_swap_int32_global(i32 * %iptr, <$1 x i32> %ival, <$1 x i32> %mask)
%ret = bitcast <$1 x i32> %iret to <$1 x float>
ret <$1 x float> %ret
}
define internal <$1 x double> @__atomic_swap_double_global(double * %ptr, <$1 x double> %val,
<$1 x i32> %mask) nounwind alwaysinline {
%iptr = bitcast double * %ptr to i64 *
%ival = bitcast <$1 x double> %val to <$1 x i64>
%iret = call <$1 x i64> @__atomic_swap_int64_global(i64 * %iptr, <$1 x i64> %ival, <$1 x i32> %mask)
%ret = bitcast <$1 x i64> %iret to <$1 x double>
ret <$1 x double> %ret
}
global_atomic_exchange($1, i32, int32)
global_atomic_exchange($1, i64, int64)
define internal <$1 x float> @__atomic_compare_exchange_float_global(float * %ptr,
<$1 x float> %cmp, <$1 x float> %val, <$1 x i32> %mask) nounwind alwaysinline {
%iptr = bitcast float * %ptr to i32 *
%icmp = bitcast <$1 x float> %cmp to <$1 x i32>
%ival = bitcast <$1 x float> %val to <$1 x i32>
%iret = call <$1 x i32> @__atomic_compare_exchange_int32_global(i32 * %iptr, <$1 x i32> %icmp,
<$1 x i32> %ival, <$1 x i32> %mask)
%ret = bitcast <$1 x i32> %iret to <$1 x float>
ret <$1 x float> %ret
}
define internal <$1 x double> @__atomic_compare_exchange_double_global(double * %ptr,
<$1 x double> %cmp, <$1 x double> %val, <$1 x i32> %mask) nounwind alwaysinline {
%iptr = bitcast double * %ptr to i64 *
%icmp = bitcast <$1 x double> %cmp to <$1 x i64>
%ival = bitcast <$1 x double> %val to <$1 x i64>
%iret = call <$1 x i64> @__atomic_compare_exchange_int64_global(i64 * %iptr, <$1 x i64> %icmp,
<$1 x i64> %ival, <$1 x i32> %mask)
%ret = bitcast <$1 x i64> %iret to <$1 x double>
ret <$1 x double> %ret
}
')