fix for __any
This commit is contained in:
22
builtins.cpp
22
builtins.cpp
@@ -353,10 +353,14 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__atomic_add_int64_global",
|
"__atomic_add_int64_global",
|
||||||
"__atomic_add_uniform_int32_global",
|
"__atomic_add_uniform_int32_global",
|
||||||
"__atomic_add_uniform_int64_global",
|
"__atomic_add_uniform_int64_global",
|
||||||
|
"__atomic_add_varying_int32_global",
|
||||||
|
"__atomic_add_varying_int64_global",
|
||||||
"__atomic_and_int32_global",
|
"__atomic_and_int32_global",
|
||||||
"__atomic_and_int64_global",
|
"__atomic_and_int64_global",
|
||||||
"__atomic_and_uniform_int32_global",
|
"__atomic_and_uniform_int32_global",
|
||||||
"__atomic_and_uniform_int64_global",
|
"__atomic_and_uniform_int64_global",
|
||||||
|
"__atomic_and_varying_int32_global",
|
||||||
|
"__atomic_and_varying_int64_global",
|
||||||
"__atomic_compare_exchange_double_global",
|
"__atomic_compare_exchange_double_global",
|
||||||
"__atomic_compare_exchange_float_global",
|
"__atomic_compare_exchange_float_global",
|
||||||
"__atomic_compare_exchange_int32_global",
|
"__atomic_compare_exchange_int32_global",
|
||||||
@@ -369,14 +373,22 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__atomic_max_uniform_int64_global",
|
"__atomic_max_uniform_int64_global",
|
||||||
"__atomic_min_uniform_int32_global",
|
"__atomic_min_uniform_int32_global",
|
||||||
"__atomic_min_uniform_int64_global",
|
"__atomic_min_uniform_int64_global",
|
||||||
|
"__atomic_max_varying_int32_global",
|
||||||
|
"__atomic_max_varying_int64_global",
|
||||||
|
"__atomic_min_varying_int32_global",
|
||||||
|
"__atomic_min_varying_int64_global",
|
||||||
"__atomic_or_int32_global",
|
"__atomic_or_int32_global",
|
||||||
"__atomic_or_int64_global",
|
"__atomic_or_int64_global",
|
||||||
"__atomic_or_uniform_int32_global",
|
"__atomic_or_uniform_int32_global",
|
||||||
"__atomic_or_uniform_int64_global",
|
"__atomic_or_uniform_int64_global",
|
||||||
|
"__atomic_or_varying_int32_global",
|
||||||
|
"__atomic_or_varying_int64_global",
|
||||||
"__atomic_sub_int32_global",
|
"__atomic_sub_int32_global",
|
||||||
"__atomic_sub_int64_global",
|
"__atomic_sub_int64_global",
|
||||||
"__atomic_sub_uniform_int32_global",
|
"__atomic_sub_uniform_int32_global",
|
||||||
"__atomic_sub_uniform_int64_global",
|
"__atomic_sub_uniform_int64_global",
|
||||||
|
"__atomic_sub_varying_int32_global",
|
||||||
|
"__atomic_sub_varying_int64_global",
|
||||||
"__atomic_swap_double_global",
|
"__atomic_swap_double_global",
|
||||||
"__atomic_swap_float_global",
|
"__atomic_swap_float_global",
|
||||||
"__atomic_swap_int32_global",
|
"__atomic_swap_int32_global",
|
||||||
@@ -389,10 +401,20 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__atomic_umax_uniform_uint64_global",
|
"__atomic_umax_uniform_uint64_global",
|
||||||
"__atomic_umin_uniform_uint32_global",
|
"__atomic_umin_uniform_uint32_global",
|
||||||
"__atomic_umin_uniform_uint64_global",
|
"__atomic_umin_uniform_uint64_global",
|
||||||
|
"__atomic_umax_varying_uint32_global",
|
||||||
|
"__atomic_umax_varying_uint64_global",
|
||||||
|
"__atomic_umin_varying_uint32_global",
|
||||||
|
"__atomic_umin_varying_uint64_global",
|
||||||
"__atomic_xor_int32_global",
|
"__atomic_xor_int32_global",
|
||||||
"__atomic_xor_int64_global",
|
"__atomic_xor_int64_global",
|
||||||
"__atomic_xor_uniform_int32_global",
|
"__atomic_xor_uniform_int32_global",
|
||||||
"__atomic_xor_uniform_int64_global",
|
"__atomic_xor_uniform_int64_global",
|
||||||
|
"__atomic_xor_uniform_int32_global",
|
||||||
|
"__atomic_xor_uniform_int64_global",
|
||||||
|
"__atomic_xor_varying_int32_global",
|
||||||
|
"__atomic_xor_varying_int64_global",
|
||||||
|
"__atomic_xor_varying_int32_global",
|
||||||
|
"__atomic_xor_varying_int64_global",
|
||||||
"__broadcast_double",
|
"__broadcast_double",
|
||||||
"__broadcast_float",
|
"__broadcast_float",
|
||||||
"__broadcast_i16",
|
"__broadcast_i16",
|
||||||
|
|||||||
@@ -1660,3 +1660,423 @@ define i64 @__clock() nounwind alwaysinline {
|
|||||||
ret i64 %r
|
ret i64 %r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; global_atomic_associative
|
||||||
|
;; More efficient implementation for atomics that are associative (e.g.,
|
||||||
|
;; add, and, ...). If a basic implementation would do sometihng like:
|
||||||
|
;; result0 = atomic_op(ptr, val0)
|
||||||
|
;; result1 = atomic_op(ptr, val1)
|
||||||
|
;; ..
|
||||||
|
;; Then instead we can do:
|
||||||
|
;; tmp = (val0 op val1 op ...)
|
||||||
|
;; result0 = atomic_op(ptr, tmp)
|
||||||
|
;; result1 = (result0 op val0)
|
||||||
|
;; ..
|
||||||
|
;; And more efficiently compute the same result
|
||||||
|
;;
|
||||||
|
;; Takes five parameters:
|
||||||
|
;; $1: vector width of the target
|
||||||
|
;; $2: operation being performed (w.r.t. LLVM atomic intrinsic names)
|
||||||
|
;; (add, sub...)
|
||||||
|
;; $3: return type of the LLVM atomic (e.g. i32)
|
||||||
|
;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32)
|
||||||
|
;; $5: identity value for the operator (e.g. 0 for add, -1 for AND, ...)
|
||||||
|
;; add
|
||||||
|
define <1 x i32> @__atomic_add_int32_global(i32* %ptr, <1 x i32> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%mask = bitcast <1 x i1> %maskv to i1
|
||||||
|
%val = bitcast <1 x i32> %valv to i32
|
||||||
|
br i1 %mask, label %exec, label %pass
|
||||||
|
exec:
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.add.u32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
%oldv = bitcast i32 %old to <1 x i32>
|
||||||
|
ret <1 x i32> %oldv
|
||||||
|
pass:
|
||||||
|
ret <1 x i32> %valv
|
||||||
|
}
|
||||||
|
;; sub
|
||||||
|
define <1 x i32> @__atomic_sub_int32_global(i32* %ptr, <1 x i32> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%nvalv = sub <1 x i32> <i32 0>, %valv
|
||||||
|
%ret = call <1 x i32> @__atomic_add_int32_global(i32* %ptr, <1 x i32> %nvalv, <1 x i1> %maskv);
|
||||||
|
ret <1 x i32> %ret;
|
||||||
|
}
|
||||||
|
;; and
|
||||||
|
define <1 x i32> @__atomic_and_int32_global(i32* %ptr, <1 x i32> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%mask = bitcast <1 x i1> %maskv to i1
|
||||||
|
%val = bitcast <1 x i32> %valv to i32
|
||||||
|
br i1 %mask, label %exec, label %pass
|
||||||
|
exec:
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.and.b32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
%oldv = bitcast i32 %old to <1 x i32>
|
||||||
|
ret <1 x i32> %oldv
|
||||||
|
pass:
|
||||||
|
ret <1 x i32> %valv
|
||||||
|
}
|
||||||
|
;; or
|
||||||
|
define <1 x i32> @__atomic_or_int32_global(i32* %ptr, <1 x i32> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%mask = bitcast <1 x i1> %maskv to i1
|
||||||
|
%val = bitcast <1 x i32> %valv to i32
|
||||||
|
br i1 %mask, label %exec, label %pass
|
||||||
|
exec:
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.or.b32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
%oldv = bitcast i32 %old to <1 x i32>
|
||||||
|
ret <1 x i32> %oldv
|
||||||
|
pass:
|
||||||
|
ret <1 x i32> %valv
|
||||||
|
}
|
||||||
|
;; xor
|
||||||
|
define <1 x i32> @__atomic_xor_int32_global(i32* %ptr, <1 x i32> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%mask = bitcast <1 x i1> %maskv to i1
|
||||||
|
%val = bitcast <1 x i32> %valv to i32
|
||||||
|
br i1 %mask, label %exec, label %pass
|
||||||
|
exec:
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.xor.b32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
%oldv = bitcast i32 %old to <1 x i32>
|
||||||
|
ret <1 x i32> %oldv
|
||||||
|
pass:
|
||||||
|
ret <1 x i32> %valv
|
||||||
|
}
|
||||||
|
|
||||||
|
;;;;;;;;; int64
|
||||||
|
define <1 x i64> @__atomic_add_int64_global(i64* %ptr, <1 x i64> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%mask = bitcast <1 x i1> %maskv to i1
|
||||||
|
%val = bitcast <1 x i64> %valv to i64
|
||||||
|
br i1 %mask, label %exec, label %pass
|
||||||
|
exec:
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.add.u64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
%oldv = bitcast i64 %old to <1 x i64>
|
||||||
|
ret <1 x i64> %oldv
|
||||||
|
pass:
|
||||||
|
ret <1 x i64> %valv
|
||||||
|
}
|
||||||
|
define <1 x i64> @__atomic_sub_int64_global(i64* %ptr, <1 x i64> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%nvalv = sub <1 x i64> <i64 0>, %valv
|
||||||
|
%ret = call <1 x i64> @__atomic_add_int64_global(i64* %ptr, <1 x i64> %nvalv, <1 x i1> %maskv);
|
||||||
|
ret <1 x i64> %ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
;; and
|
||||||
|
define <1 x i64> @__atomic_and_int64_global(i64* %ptr, <1 x i64> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%mask = bitcast <1 x i1> %maskv to i1
|
||||||
|
%val = bitcast <1 x i64> %valv to i64
|
||||||
|
br i1 %mask, label %exec, label %pass
|
||||||
|
exec:
|
||||||
|
%andr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.and.b64 $0, [$1], $2;", "=l,l,l"(i64 %andr, i64 %val);
|
||||||
|
%oldv = bitcast i64 %old to <1 x i64>
|
||||||
|
ret <1 x i64> %oldv
|
||||||
|
pass:
|
||||||
|
ret <1 x i64> %valv
|
||||||
|
}
|
||||||
|
|
||||||
|
;; or
|
||||||
|
define <1 x i64> @__atomic_or_int64_global(i64* %ptr, <1 x i64> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%mask = bitcast <1 x i1> %maskv to i1
|
||||||
|
%val = bitcast <1 x i64> %valv to i64
|
||||||
|
br i1 %mask, label %exec, label %pass
|
||||||
|
exec:
|
||||||
|
%orr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.or.b64 $0, [$1], $2;", "=l,l,l"(i64 %orr, i64 %val);
|
||||||
|
%oldv = bitcast i64 %old to <1 x i64>
|
||||||
|
ret <1 x i64> %oldv
|
||||||
|
pass:
|
||||||
|
ret <1 x i64> %valv
|
||||||
|
}
|
||||||
|
|
||||||
|
;; xor
|
||||||
|
define <1 x i64> @__atomic_xor_int64_global(i64* %ptr, <1 x i64> %valv, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%mask = bitcast <1 x i1> %maskv to i1
|
||||||
|
%val = bitcast <1 x i64> %valv to i64
|
||||||
|
br i1 %mask, label %exec, label %pass
|
||||||
|
exec:
|
||||||
|
%xorr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.xor.b64 $0, [$1], $2;", "=l,l,l"(i64 %xorr, i64 %val);
|
||||||
|
%oldv = bitcast i64 %old to <1 x i64>
|
||||||
|
ret <1 x i64> %oldv
|
||||||
|
pass:
|
||||||
|
ret <1 x i64> %valv
|
||||||
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; global_atomic_uniform
|
||||||
|
;; Defines the implementation of a function that handles the mapping from
|
||||||
|
;; an ispc atomic function to the underlying LLVM intrinsics. This variant
|
||||||
|
;; just calls the atomic once, for the given uniform value
|
||||||
|
;;
|
||||||
|
;; Takes four parameters:
|
||||||
|
;; $1: vector width of the target
|
||||||
|
;; $2: operation being performed (w.r.t. LLVM atomic intrinsic names)
|
||||||
|
;; (add, sub...)
|
||||||
|
;; $3: return type of the LLVM atomic (e.g. i32)
|
||||||
|
;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32)
|
||||||
|
|
||||||
|
define i32 @__get_first_active_lane()
|
||||||
|
{
|
||||||
|
%nact = call i32 @__ballot_nvptx(i1 true);
|
||||||
|
%lane1 = call i32 @__count_leading_zeros_i32(i32 %nact)
|
||||||
|
%lane = sub i32 31, %lane1
|
||||||
|
ret i32 %lane
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @__atomic_add_uniform_int32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.add.u32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
define i32 @__atomic_sub_uniform_int32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%nval = sub i32 0, %val;
|
||||||
|
%old = tail call i32 @__atomic_add_uniform_int32_global_nvptx(i32* %ptr, i32 %nval);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
define i32 @__atomic_and_uniform_int32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.and.b32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
define i32 @__atomic_or_uniform_int32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.or.b32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
define i32 @__atomic_xor_uniform_int32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.xor.b32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
define i32 @__atomic_min_uniform_int32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.min.s32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
define i32 @__atomic_max_uniform_int32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.max.s32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
define i32 @__atomic_umin_uniform_uint32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.min.u32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
define i32 @__atomic_umax_uniform_uint32_global_nvptx(i32* %ptr, i32 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i32* %ptr to i64
|
||||||
|
%old = tail call i32 asm sideeffect "atom.max.u32 $0, [$1], $2;", "=r,l,r"(i64 %addr, i32 %val);
|
||||||
|
ret i32 %old;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define i64 @__atomic_add_uniform_int64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.add.u64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
define i64 @__atomic_sub_uniform_int64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%nval = sub i64 0, %val;
|
||||||
|
%old = tail call i64 @__atomic_add_uniform_int64_global_nvptx(i64* %ptr, i64 %nval);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
define i64 @__atomic_and_uniform_int64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.and.b64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
define i64 @__atomic_or_uniform_int64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.or.b64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
define i64 @__atomic_xor_uniform_int64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.xor.b64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
define i64 @__atomic_min_uniform_int64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.min.s64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
define i64 @__atomic_max_uniform_int64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.max.s64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
define i64 @__atomic_umin_uniform_uint64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.min.u64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
define i64 @__atomic_umax_uniform_uint64_global_nvptx(i64* %ptr, i64 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
%addr = ptrtoint i64* %ptr to i64
|
||||||
|
%old = tail call i64 asm sideeffect "atom.max.u64 $0, [$1], $2;", "=l,l,l"(i64 %addr, i64 %val);
|
||||||
|
ret i64 %old;
|
||||||
|
}
|
||||||
|
|
||||||
|
define(`global_atomic_uniform',`
|
||||||
|
define $3 @__atomic_$2_uniform_$4_global($3 * %ptr, $3 %val) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
entry:
|
||||||
|
%addr = ptrtoint $3 * %ptr to i64
|
||||||
|
%active = call i32 @__get_first_active_lane();
|
||||||
|
%lane = call i32 @__laneidx();
|
||||||
|
%c = icmp eq i32 %lane, %active
|
||||||
|
br i1 %c, label %p1, label %p2
|
||||||
|
|
||||||
|
p1:
|
||||||
|
%t0 = call $3 @__atomic_$2_uniform_$4_global_nvptx($3 * %ptr, $3 %val);
|
||||||
|
br label %p2;
|
||||||
|
|
||||||
|
p2:
|
||||||
|
%t1 = phi $3 [%t0, %p1], [zeroinitializer, %entry]
|
||||||
|
%old = call $3 @__shfl_$3_nvptx($3 %t1, i32 %active)
|
||||||
|
ret $3 %old;
|
||||||
|
}
|
||||||
|
')
|
||||||
|
global_atomic_uniform(1, add, i32, int32)
|
||||||
|
global_atomic_uniform(1, sub, i32, int32)
|
||||||
|
global_atomic_uniform(1, and, i32, int32)
|
||||||
|
global_atomic_uniform(1, or, i32, int32)
|
||||||
|
global_atomic_uniform(1, xor, i32, int32)
|
||||||
|
global_atomic_uniform(1, min, i32, int32)
|
||||||
|
global_atomic_uniform(1, max, i32, int32)
|
||||||
|
global_atomic_uniform(1, umin, i32, uint32)
|
||||||
|
global_atomic_uniform(1, umax, i32, uint32)
|
||||||
|
|
||||||
|
global_atomic_uniform(1, add, i64, int64)
|
||||||
|
global_atomic_uniform(1, sub, i64, int64)
|
||||||
|
global_atomic_uniform(1, and, i64, int64)
|
||||||
|
global_atomic_uniform(1, or, i64, int64)
|
||||||
|
global_atomic_uniform(1, xor, i64, int64)
|
||||||
|
global_atomic_uniform(1, min, i64, int64)
|
||||||
|
global_atomic_uniform(1, max, i64, int64)
|
||||||
|
global_atomic_uniform(1, umin, i64, uint64)
|
||||||
|
global_atomic_uniform(1, umax, i64, uint64)
|
||||||
|
|
||||||
|
define(`global_atomic_varying',`
|
||||||
|
define <1 x $3> @__atomic_$2_varying_$4_global(<1 x i64> %ptr, <1 x $3> %val, <1 x i1> %maskv) nounwind alwaysinline
|
||||||
|
{
|
||||||
|
entry:
|
||||||
|
%addr = bitcast <1 x i64> %ptr to i64
|
||||||
|
%c = bitcast <1 x i1> %maskv to i1
|
||||||
|
br i1 %c, label %p1, label %p2
|
||||||
|
|
||||||
|
p1:
|
||||||
|
%sv = bitcast <1 x $3> %val to $3
|
||||||
|
%sptr = inttoptr i64 %addr to $3*
|
||||||
|
%t0 = call $3 @__atomic_$2_uniform_$4_global_nvptx($3 * %sptr, $3 %sv);
|
||||||
|
%t0v = bitcast $3 %t0 to <1 x $3>
|
||||||
|
ret < 1x $3> %t0v
|
||||||
|
|
||||||
|
p2:
|
||||||
|
ret <1 x $3> %val
|
||||||
|
}
|
||||||
|
')
|
||||||
|
global_atomic_varying(1, add, i32, int32)
|
||||||
|
global_atomic_varying(1, sub, i32, int32)
|
||||||
|
global_atomic_varying(1, and, i32, int32)
|
||||||
|
global_atomic_varying(1, or, i32, int32)
|
||||||
|
global_atomic_varying(1, xor, i32, int32)
|
||||||
|
global_atomic_varying(1, min, i32, int32)
|
||||||
|
global_atomic_varying(1, max, i32, int32)
|
||||||
|
global_atomic_varying(1, umin, i32, uint32)
|
||||||
|
global_atomic_varying(1, umax, i32, uint32)
|
||||||
|
|
||||||
|
global_atomic_varying(1, add, i64, int64)
|
||||||
|
global_atomic_varying(1, sub, i64, int64)
|
||||||
|
global_atomic_varying(1, and, i64, int64)
|
||||||
|
global_atomic_varying(1, or, i64, int64)
|
||||||
|
global_atomic_varying(1, xor, i64, int64)
|
||||||
|
global_atomic_varying(1, min, i64, int64)
|
||||||
|
global_atomic_varying(1, max, i64, int64)
|
||||||
|
global_atomic_varying(1, umin, i64, uint64)
|
||||||
|
global_atomic_varying(1, umax, i64, uint64)
|
||||||
|
|
||||||
|
;; Macro to declare the function that implements the swap atomic.
|
||||||
|
;; Takes three parameters:
|
||||||
|
;; $1: vector width of the target
|
||||||
|
;; $2: llvm type of the vector elements (e.g. i32)
|
||||||
|
;; $3: ispc type of the elements (e.g. int32)
|
||||||
|
|
||||||
|
define(`global_swap', `
|
||||||
|
declare $2 @__atomic_swap_uniform_$3_global($2* %ptr, $2 %val) nounwind alwaysinline ;
|
||||||
|
')
|
||||||
|
|
||||||
|
|
||||||
|
;; Similarly, macro to declare the function that implements the compare/exchange
|
||||||
|
;; atomic. Takes three parameters:
|
||||||
|
;; $1: vector width of the target
|
||||||
|
;; $2: llvm type of the vector elements (e.g. i32)
|
||||||
|
;; $3: ispc type of the elements (e.g. int32)
|
||||||
|
|
||||||
|
define(`global_atomic_exchange', `
|
||||||
|
|
||||||
|
declare <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
||||||
|
<$1 x $2> %val, <$1 x MASK> %mask) nounwind alwaysinline ;
|
||||||
|
|
||||||
|
declare $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
||||||
|
$2 %val) nounwind alwaysinline ;
|
||||||
|
')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; atomics and memory barriers
|
||||||
|
|
||||||
|
global_swap(WIDTH, i32, int32)
|
||||||
|
global_swap(WIDTH, i64, int64)
|
||||||
|
|
||||||
|
declare float @__atomic_swap_uniform_float_global(float * %ptr, float %val) nounwind alwaysinline ;
|
||||||
|
declare double @__atomic_swap_uniform_double_global(double * %ptr, double %val) nounwind alwaysinline ;
|
||||||
|
global_atomic_exchange(WIDTH, i32, int32)
|
||||||
|
global_atomic_exchange(WIDTH, i64, int64)
|
||||||
|
|
||||||
|
declare <WIDTH x float> @__atomic_compare_exchange_float_global(float * %ptr,
|
||||||
|
<WIDTH x float> %cmp, <WIDTH x float> %val, <WIDTH x MASK> %mask) nounwind alwaysinline ;
|
||||||
|
declare <WIDTH x double> @__atomic_compare_exchange_double_global(double * %ptr,
|
||||||
|
<WIDTH x double> %cmp, <WIDTH x double> %val, <WIDTH x MASK> %mask) nounwind alwaysinline ;
|
||||||
|
declare float @__atomic_compare_exchange_uniform_float_global(float * %ptr, float %cmp,
|
||||||
|
float %val) nounwind alwaysinline ;
|
||||||
|
declare double @__atomic_compare_exchange_uniform_double_global(double * %ptr, double %cmp,
|
||||||
|
double %val) nounwind alwaysinline ;
|
||||||
|
|
||||||
|
declare void @llvm.nvvm.membar.gl()
|
||||||
|
declare void @llvm.nvvm.membar.sys()
|
||||||
|
declare void @llvm.nvvm.membar.cta()
|
||||||
|
|
||||||
|
define void @__memory_barrier() nounwind readnone alwaysinline {
|
||||||
|
;; see http://llvm.org/bugs/show_bug.cgi?id=2829. It seems like we
|
||||||
|
;; only get an MFENCE on x86 if "device" is true, but IMHO we should
|
||||||
|
;; in the case where the first 4 args are true but it is false.
|
||||||
|
;; So we just always set that to true...
|
||||||
|
call void @llvm.nvvm.membar.gl()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|||||||
@@ -768,27 +768,6 @@ shuffles(double, 8)
|
|||||||
shuffles(i64, 8)
|
shuffles(i64, 8)
|
||||||
')
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; global_atomic_associative
|
|
||||||
;; More efficient implementation for atomics that are associative (e.g.,
|
|
||||||
;; add, and, ...). If a basic implementation would do sometihng like:
|
|
||||||
;; result0 = atomic_op(ptr, val0)
|
|
||||||
;; result1 = atomic_op(ptr, val1)
|
|
||||||
;; ..
|
|
||||||
;; Then instead we can do:
|
|
||||||
;; tmp = (val0 op val1 op ...)
|
|
||||||
;; result0 = atomic_op(ptr, tmp)
|
|
||||||
;; result1 = (result0 op val0)
|
|
||||||
;; ..
|
|
||||||
;; And more efficiently compute the same result
|
|
||||||
;;
|
|
||||||
;; Takes five parameters:
|
|
||||||
;; $1: vector width of the target
|
|
||||||
;; $2: operation being performed (w.r.t. LLVM atomic intrinsic names)
|
|
||||||
;; (add, sub...)
|
|
||||||
;; $3: return type of the LLVM atomic (e.g. i32)
|
|
||||||
;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32)
|
|
||||||
;; $5: identity value for the operator (e.g. 0 for add, -1 for AND, ...)
|
|
||||||
|
|
||||||
define(`mask_converts', `
|
define(`mask_converts', `
|
||||||
define internal <$1 x i8> @convertmask_i1_i8_$1(<$1 x i1>) {
|
define internal <$1 x i8> @convertmask_i1_i8_$1(<$1 x i1>) {
|
||||||
@@ -875,54 +854,6 @@ define internal <$1 x i64> @convertmask_i64_i64_$1(<$1 x i64>) {
|
|||||||
|
|
||||||
mask_converts(WIDTH)
|
mask_converts(WIDTH)
|
||||||
|
|
||||||
define(`global_atomic_associative', `
|
|
||||||
|
|
||||||
declare <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
|
||||||
<$1 x MASK> %m) nounwind alwaysinline ;
|
|
||||||
')
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; global_atomic_uniform
|
|
||||||
;; Defines the implementation of a function that handles the mapping from
|
|
||||||
;; an ispc atomic function to the underlying LLVM intrinsics. This variant
|
|
||||||
;; just calls the atomic once, for the given uniform value
|
|
||||||
;;
|
|
||||||
;; Takes four parameters:
|
|
||||||
;; $1: vector width of the target
|
|
||||||
;; $2: operation being performed (w.r.t. LLVM atomic intrinsic names)
|
|
||||||
;; (add, sub...)
|
|
||||||
;; $3: return type of the LLVM atomic (e.g. i32)
|
|
||||||
;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32)
|
|
||||||
|
|
||||||
define(`global_atomic_uniform', `
|
|
||||||
declare $3 @__atomic_$2_uniform_$4_global($3 * %ptr, $3 %val) nounwind alwaysinline ;
|
|
||||||
')
|
|
||||||
|
|
||||||
;; Macro to declare the function that implements the swap atomic.
|
|
||||||
;; Takes three parameters:
|
|
||||||
;; $1: vector width of the target
|
|
||||||
;; $2: llvm type of the vector elements (e.g. i32)
|
|
||||||
;; $3: ispc type of the elements (e.g. int32)
|
|
||||||
|
|
||||||
define(`global_swap', `
|
|
||||||
declare $2 @__atomic_swap_uniform_$3_global($2* %ptr, $2 %val) nounwind alwaysinline ;
|
|
||||||
')
|
|
||||||
|
|
||||||
|
|
||||||
;; Similarly, macro to declare the function that implements the compare/exchange
|
|
||||||
;; atomic. Takes three parameters:
|
|
||||||
;; $1: vector width of the target
|
|
||||||
;; $2: llvm type of the vector elements (e.g. i32)
|
|
||||||
;; $3: ispc type of the elements (e.g. int32)
|
|
||||||
|
|
||||||
define(`global_atomic_exchange', `
|
|
||||||
|
|
||||||
declare <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
|
||||||
<$1 x $2> %val, <$1 x MASK> %mask) nounwind alwaysinline ;
|
|
||||||
|
|
||||||
declare $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
|
|
||||||
$2 %val) nounwind alwaysinline ;
|
|
||||||
')
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; count trailing zeros
|
;; count trailing zeros
|
||||||
@@ -2507,67 +2438,6 @@ define double @__stdlib_pow(double, double) nounwind readnone alwaysinline {
|
|||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; atomics and memory barriers
|
|
||||||
|
|
||||||
declare void @llvm.memory.barrier(i1 %loadload, i1 %loadstore, i1 %storeload,
|
|
||||||
i1 %storestore, i1 %device)
|
|
||||||
|
|
||||||
define void @__memory_barrier() nounwind readnone alwaysinline {
|
|
||||||
;; see http://llvm.org/bugs/show_bug.cgi?id=2829. It seems like we
|
|
||||||
;; only get an MFENCE on x86 if "device" is true, but IMHO we should
|
|
||||||
;; in the case where the first 4 args are true but it is false.
|
|
||||||
;; So we just always set that to true...
|
|
||||||
call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
global_atomic_associative(WIDTH, add, i32, int32, 0)
|
|
||||||
global_atomic_associative(WIDTH, sub, i32, int32, 0)
|
|
||||||
global_atomic_associative(WIDTH, and, i32, int32, -1)
|
|
||||||
global_atomic_associative(WIDTH, or, i32, int32, 0)
|
|
||||||
global_atomic_associative(WIDTH, xor, i32, int32, 0)
|
|
||||||
global_atomic_uniform(WIDTH, add, i32, int32)
|
|
||||||
global_atomic_uniform(WIDTH, sub, i32, int32)
|
|
||||||
global_atomic_uniform(WIDTH, and, i32, int32)
|
|
||||||
global_atomic_uniform(WIDTH, or, i32, int32)
|
|
||||||
global_atomic_uniform(WIDTH, xor, i32, int32)
|
|
||||||
global_atomic_uniform(WIDTH, min, i32, int32)
|
|
||||||
global_atomic_uniform(WIDTH, max, i32, int32)
|
|
||||||
global_atomic_uniform(WIDTH, umin, i32, uint32)
|
|
||||||
global_atomic_uniform(WIDTH, umax, i32, uint32)
|
|
||||||
|
|
||||||
global_atomic_associative(WIDTH, add, i64, int64, 0)
|
|
||||||
global_atomic_associative(WIDTH, sub, i64, int64, 0)
|
|
||||||
global_atomic_associative(WIDTH, and, i64, int64, -1)
|
|
||||||
global_atomic_associative(WIDTH, or, i64, int64, 0)
|
|
||||||
global_atomic_associative(WIDTH, xor, i64, int64, 0)
|
|
||||||
global_atomic_uniform(WIDTH, add, i64, int64)
|
|
||||||
global_atomic_uniform(WIDTH, sub, i64, int64)
|
|
||||||
global_atomic_uniform(WIDTH, and, i64, int64)
|
|
||||||
global_atomic_uniform(WIDTH, or, i64, int64)
|
|
||||||
global_atomic_uniform(WIDTH, xor, i64, int64)
|
|
||||||
global_atomic_uniform(WIDTH, min, i64, int64)
|
|
||||||
global_atomic_uniform(WIDTH, max, i64, int64)
|
|
||||||
global_atomic_uniform(WIDTH, umin, i64, uint64)
|
|
||||||
global_atomic_uniform(WIDTH, umax, i64, uint64)
|
|
||||||
|
|
||||||
global_swap(WIDTH, i32, int32)
|
|
||||||
global_swap(WIDTH, i64, int64)
|
|
||||||
|
|
||||||
declare float @__atomic_swap_uniform_float_global(float * %ptr, float %val) nounwind alwaysinline ;
|
|
||||||
declare double @__atomic_swap_uniform_double_global(double * %ptr, double %val) nounwind alwaysinline ;
|
|
||||||
global_atomic_exchange(WIDTH, i32, int32)
|
|
||||||
global_atomic_exchange(WIDTH, i64, int64)
|
|
||||||
|
|
||||||
declare <WIDTH x float> @__atomic_compare_exchange_float_global(float * %ptr,
|
|
||||||
<WIDTH x float> %cmp, <WIDTH x float> %val, <WIDTH x MASK> %mask) nounwind alwaysinline ;
|
|
||||||
declare <WIDTH x double> @__atomic_compare_exchange_double_global(double * %ptr,
|
|
||||||
<WIDTH x double> %cmp, <WIDTH x double> %val, <WIDTH x MASK> %mask) nounwind alwaysinline ;
|
|
||||||
declare float @__atomic_compare_exchange_uniform_float_global(float * %ptr, float %cmp,
|
|
||||||
float %val) nounwind alwaysinline ;
|
|
||||||
declare double @__atomic_compare_exchange_uniform_double_global(double * %ptr, double %cmp,
|
|
||||||
double %val) nounwind alwaysinline ;
|
|
||||||
|
|
||||||
')
|
')
|
||||||
|
|
||||||
|
|||||||
70
stdlib.ispc
70
stdlib.ispc
@@ -1814,7 +1814,7 @@ static inline void memory_barrier() {
|
|||||||
__memory_barrier();
|
__memory_barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
|
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE,TC) \
|
||||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||||
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
|
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
|
||||||
return ret; \
|
return ret; \
|
||||||
@@ -1825,6 +1825,10 @@ static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
|||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
||||||
|
if (__is_nvptx_target) { \
|
||||||
|
TA ret = __atomic_##OPB##_varying_##TB##_global((TC)ptr, value, (MASKTYPE)__mask); \
|
||||||
|
return ret; \
|
||||||
|
} else { \
|
||||||
uniform TA * uniform ptrArray[programCount]; \
|
uniform TA * uniform ptrArray[programCount]; \
|
||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
TA ret; \
|
TA ret; \
|
||||||
@@ -1835,6 +1839,7 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
|||||||
ret = insert(ret, i, r); \
|
ret = insert(ret, i, r); \
|
||||||
} \
|
} \
|
||||||
return ret; \
|
return ret; \
|
||||||
|
} \
|
||||||
} \
|
} \
|
||||||
|
|
||||||
#define DEFINE_ATOMIC_SWAP(TA,TB) \
|
#define DEFINE_ATOMIC_SWAP(TA,TB) \
|
||||||
@@ -1888,7 +1893,7 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
|||||||
return ret; \
|
return ret; \
|
||||||
} \
|
} \
|
||||||
|
|
||||||
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB) \
|
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB,MASKTYPE,TC) \
|
||||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||||
uniform TA oneval = reduce_##OPA(value); \
|
uniform TA oneval = reduce_##OPA(value); \
|
||||||
TA ret; \
|
TA ret; \
|
||||||
@@ -1903,6 +1908,10 @@ static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
|||||||
} \
|
} \
|
||||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
||||||
TA value) { \
|
TA value) { \
|
||||||
|
if (__is_nvptx_target) { \
|
||||||
|
TA ret = __atomic_##OPB##_varying_##TB##_global((TC)ptr, value, (MASKTYPE)__mask); \
|
||||||
|
return ret; \
|
||||||
|
} else { \
|
||||||
uniform TA * uniform ptrArray[programCount]; \
|
uniform TA * uniform ptrArray[programCount]; \
|
||||||
ptrArray[programIndex] = ptr; \
|
ptrArray[programIndex] = ptr; \
|
||||||
TA ret; \
|
TA ret; \
|
||||||
@@ -1913,48 +1922,49 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
|||||||
ret = insert(ret, i, r); \
|
ret = insert(ret, i, r); \
|
||||||
} \
|
} \
|
||||||
return ret; \
|
return ret; \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType)
|
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType)
|
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min)
|
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max)
|
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType)
|
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType)
|
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType)
|
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_SWAP(int32,int32)
|
DEFINE_ATOMIC_SWAP(int32,int32)
|
||||||
|
|
||||||
// For everything but atomic min and max, we can use the same
|
// For everything but atomic min and max, we can use the same
|
||||||
// implementations for unsigned as for signed.
|
// implementations for unsigned as for signed.
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType, unsigned int64)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType, unsigned int64)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin)
|
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax)
|
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType, unsigned int64)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType, unsigned int64)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType, unsigned int64)
|
||||||
DEFINE_ATOMIC_SWAP(unsigned int32,int32)
|
DEFINE_ATOMIC_SWAP(unsigned int32,int32)
|
||||||
|
|
||||||
DEFINE_ATOMIC_SWAP(float,float)
|
DEFINE_ATOMIC_SWAP(float,float)
|
||||||
|
|
||||||
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType)
|
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType)
|
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min)
|
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max)
|
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType)
|
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType)
|
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType)
|
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType,int64)
|
||||||
DEFINE_ATOMIC_SWAP(int64,int64)
|
DEFINE_ATOMIC_SWAP(int64,int64)
|
||||||
|
|
||||||
// For everything but atomic min and max, we can use the same
|
// For everything but atomic min and max, we can use the same
|
||||||
// implementations for unsigned as for signed.
|
// implementations for unsigned as for signed.
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin)
|
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax)
|
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType,unsigned int64)
|
||||||
DEFINE_ATOMIC_SWAP(unsigned int64,int64)
|
DEFINE_ATOMIC_SWAP(unsigned int64,int64)
|
||||||
|
|
||||||
DEFINE_ATOMIC_SWAP(double,double)
|
DEFINE_ATOMIC_SWAP(double,double)
|
||||||
|
|||||||
Reference in New Issue
Block a user