diff --git a/docs/ispc.txt b/docs/ispc.txt index 6376b025..26d582e3 100644 --- a/docs/ispc.txt +++ b/docs/ispc.txt @@ -1852,7 +1852,8 @@ example. Here are the declarations of the ``int32`` variants of these functions. There are also ``int64`` equivalents as well as variants that take -``unsigned`` ``int32`` and ``int64`` values. +``unsigned`` ``int32`` and ``int64`` values. (The ``atomic_swap_global()`` +function can be used with ``float`` and ``double`` types as well.) :: @@ -1869,7 +1870,8 @@ There is also an atomic "compare and exchange" function; it atomically compares the value in "val" to "compare"--if they match, it assigns "newval" to "val". In either case, the old value of "val" is returned. (As with the other atomic operations, there are also ``unsigned`` and -64-bit variants of this function.) +64-bit variants of this function. Furthermore, there are ``float`` and +``double`` variants as well.) :: diff --git a/stdlib.ispc b/stdlib.ispc index d6cfd12a..ec94c4c8 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -425,6 +425,8 @@ DEFINE_ATOMIC_OP(unsigned int32,int32,or,or) DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor) DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap) +DEFINE_ATOMIC_OP(float,float,swap,swap) + DEFINE_ATOMIC_OP(int64,int64,add,add) DEFINE_ATOMIC_OP(int64,int64,subtract,sub) DEFINE_ATOMIC_OP(int64,int64,min,min) @@ -445,6 +447,8 @@ DEFINE_ATOMIC_OP(unsigned int64,int64,or,or) DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor) DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap) +DEFINE_ATOMIC_OP(double,double,swap,swap) + #define ATOMIC_DECL_CMPXCHG(TA, TB) \ static inline TA atomic_compare_exchange_global( \ uniform reference TA ref, TA oldval, TA newval) { \ @@ -456,8 +460,10 @@ static inline TA atomic_compare_exchange_global( \ ATOMIC_DECL_CMPXCHG(int32, int32) ATOMIC_DECL_CMPXCHG(unsigned int32, int32) +ATOMIC_DECL_CMPXCHG(float, float) ATOMIC_DECL_CMPXCHG(int64, int64) ATOMIC_DECL_CMPXCHG(unsigned int64, int64) +ATOMIC_DECL_CMPXCHG(double, double) /////////////////////////////////////////////////////////////////////////// // Load/store from/to 8/16-bit types diff --git a/stdlib.m4 b/stdlib.m4 index 6b781b17..50fbc9cc 100644 --- a/stdlib.m4 +++ b/stdlib.m4 @@ -800,9 +800,48 @@ global_atomic($1, umax, i64, uint64) global_swap($1, i32, int32) global_swap($1, i64, int64) +define internal <$1 x float> @__atomic_swap_float_global(float * %ptr, <$1 x float> %val, + <$1 x i32> %mask) nounwind alwaysinline { + %iptr = bitcast float * %ptr to i32 * + %ival = bitcast <$1 x float> %val to <$1 x i32> + %iret = call <$1 x i32> @__atomic_swap_int32_global(i32 * %iptr, <$1 x i32> %ival, <$1 x i32> %mask) + %ret = bitcast <$1 x i32> %iret to <$1 x float> + ret <$1 x float> %ret +} + +define internal <$1 x double> @__atomic_swap_double_global(double * %ptr, <$1 x double> %val, + <$1 x i32> %mask) nounwind alwaysinline { + %iptr = bitcast double * %ptr to i64 * + %ival = bitcast <$1 x double> %val to <$1 x i64> + %iret = call <$1 x i64> @__atomic_swap_int64_global(i64 * %iptr, <$1 x i64> %ival, <$1 x i32> %mask) + %ret = bitcast <$1 x i64> %iret to <$1 x double> + ret <$1 x double> %ret +} + global_atomic_exchange($1, i32, int32) global_atomic_exchange($1, i64, int64) +define internal <$1 x float> @__atomic_compare_exchange_float_global(float * %ptr, + <$1 x float> %cmp, <$1 x float> %val, <$1 x i32> %mask) nounwind alwaysinline { + %iptr = bitcast float * %ptr to i32 * + %icmp = bitcast <$1 x float> %cmp to <$1 x i32> + %ival = bitcast <$1 x float> %val to <$1 x i32> + %iret = call <$1 x i32> @__atomic_compare_exchange_int32_global(i32 * %iptr, <$1 x i32> %icmp, + <$1 x i32> %ival, <$1 x i32> %mask) + %ret = bitcast <$1 x i32> %iret to <$1 x float> + ret <$1 x float> %ret +} + +define internal <$1 x double> @__atomic_compare_exchange_double_global(double * %ptr, + <$1 x double> %cmp, <$1 x double> %val, <$1 x i32> %mask) nounwind alwaysinline { + %iptr = bitcast double * %ptr to i64 * + %icmp = bitcast <$1 x double> %cmp to <$1 x i64> + %ival = bitcast <$1 x double> %val to <$1 x i64> + %iret = call <$1 x i64> @__atomic_compare_exchange_int64_global(i64 * %iptr, <$1 x i64> %icmp, + <$1 x i64> %ival, <$1 x i32> %mask) + %ret = bitcast <$1 x i64> %iret to <$1 x double> + ret <$1 x double> %ret +} ')