Add support for atomic swap/cmpexchg with float and double types.

Addresses issue #60.
2011-07-07 14:07:52 +01:00
parent 729f522a01
commit aef8c09019
3 changed files with 49 additions and 2 deletions
--- a/docs/ispc.txt
+++ b/docs/ispc.txt
@@ -1852,7 +1852,8 @@ example.
 Here are the declarations of the ``int32`` variants of these functions.
 There are also ``int64`` equivalents as well as variants that take
-``unsigned`` ``int32`` and ``int64`` values.
+``unsigned`` ``int32`` and ``int64`` values.  (The ``atomic_swap_global()``
 function can be used with ``float`` and ``double`` types as well.)
 ::
@@ -1869,7 +1870,8 @@ There is also an atomic "compare and exchange" function; it atomically
 compares the value in "val" to "compare"--if they match, it assigns
 "newval" to "val".  In either case, the old value of "val" is returned.
 (As with the other atomic operations, there are also ``unsigned`` and
-64-bit variants of this function.)
+64-bit variants of this function.  Furthermore, there are ``float`` and
 ``double`` variants as well.)
 ::
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -425,6 +425,8 @@ DEFINE_ATOMIC_OP(unsigned int32,int32,or,or)
 DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor)
 DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap)
 DEFINE_ATOMIC_OP(float,float,swap,swap)
 DEFINE_ATOMIC_OP(int64,int64,add,add)
 DEFINE_ATOMIC_OP(int64,int64,subtract,sub)
 DEFINE_ATOMIC_OP(int64,int64,min,min)
@@ -445,6 +447,8 @@ DEFINE_ATOMIC_OP(unsigned int64,int64,or,or)
 DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor)
 DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap)
 DEFINE_ATOMIC_OP(double,double,swap,swap)
 #define ATOMIC_DECL_CMPXCHG(TA, TB)                                        \
 static inline TA atomic_compare_exchange_global(                           \
         uniform reference TA ref, TA oldval, TA newval) {                 \
@@ -456,8 +460,10 @@ static inline TA atomic_compare_exchange_global(                           \
 ATOMIC_DECL_CMPXCHG(int32, int32)
 ATOMIC_DECL_CMPXCHG(unsigned int32, int32)
 ATOMIC_DECL_CMPXCHG(float, float)
 ATOMIC_DECL_CMPXCHG(int64, int64)
 ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
 ATOMIC_DECL_CMPXCHG(double, double)
 ///////////////////////////////////////////////////////////////////////////
 // Load/store from/to 8/16-bit types
--- a/stdlib.m4
+++ b/stdlib.m4
@@ -800,9 +800,48 @@ global_atomic($1, umax, i64, uint64)
 global_swap($1, i32, int32)
 global_swap($1, i64, int64)
 define internal <$1 x float> @__atomic_swap_float_global(float * %ptr, <$1 x float> %val,
                                                   <$1 x i32> %mask) nounwind alwaysinline {
  %iptr = bitcast float * %ptr to i32 *
  %ival = bitcast <$1 x float> %val to <$1 x i32>
  %iret = call <$1 x i32> @__atomic_swap_int32_global(i32 * %iptr, <$1 x i32> %ival, <$1 x i32> %mask)
  %ret = bitcast <$1 x i32> %iret to <$1 x float>
  ret <$1 x float> %ret
 }
 define internal <$1 x double> @__atomic_swap_double_global(double * %ptr, <$1 x double> %val,
                                                   <$1 x i32> %mask) nounwind alwaysinline {
  %iptr = bitcast double * %ptr to i64 *
  %ival = bitcast <$1 x double> %val to <$1 x i64>
  %iret = call <$1 x i64> @__atomic_swap_int64_global(i64 * %iptr, <$1 x i64> %ival, <$1 x i32> %mask)
  %ret = bitcast <$1 x i64> %iret to <$1 x double>
  ret <$1 x double> %ret
 }
 global_atomic_exchange($1, i32, int32)
 global_atomic_exchange($1, i64, int64)
 define internal <$1 x float> @__atomic_compare_exchange_float_global(float * %ptr,
                      <$1 x float> %cmp, <$1 x float> %val, <$1 x i32> %mask) nounwind alwaysinline {
  %iptr = bitcast float * %ptr to i32 *
  %icmp = bitcast <$1 x float> %cmp to <$1 x i32>
  %ival = bitcast <$1 x float> %val to <$1 x i32>
  %iret = call <$1 x i32> @__atomic_compare_exchange_int32_global(i32 * %iptr, <$1 x i32> %icmp,
                                                                  <$1 x i32> %ival, <$1 x i32> %mask)
  %ret = bitcast <$1 x i32> %iret to <$1 x float>
  ret <$1 x float> %ret
 }
 define internal <$1 x double> @__atomic_compare_exchange_double_global(double * %ptr,
                      <$1 x double> %cmp, <$1 x double> %val, <$1 x i32> %mask) nounwind alwaysinline {
  %iptr = bitcast double * %ptr to i64 *
  %icmp = bitcast <$1 x double> %cmp to <$1 x i64>
  %ival = bitcast <$1 x double> %val to <$1 x i64>
  %iret = call <$1 x i64> @__atomic_compare_exchange_int64_global(i64 * %iptr, <$1 x i64> %icmp,
                                                                  <$1 x i64> %ival, <$1 x i32> %mask)
  %ret = bitcast <$1 x i64> %iret to <$1 x double>
  ret <$1 x double> %ret
 }
 ')