Implement global atomics and a memory barrier in the standard library.
This checkin provides the standard set of atomic operations and a memory barrier in the ispc standard library. Both signed and unsigned 32- and 64-bit integer types are supported.
This commit is contained in:
2
Makefile
2
Makefile
@@ -121,7 +121,7 @@ objs/stdlib-c.o: objs/stdlib-c.cpp
|
|||||||
|
|
||||||
objs/stdlib_ispc.cpp: stdlib.ispc
|
objs/stdlib_ispc.cpp: stdlib.ispc
|
||||||
@echo Creating C++ source from $<
|
@echo Creating C++ source from $<
|
||||||
@$(CPP) -DISPC=1 -DPI=3.1415926536 $< | ./stdlib2cpp.py > $@
|
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | ./stdlib2cpp.py > $@
|
||||||
|
|
||||||
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
|
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
|
||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
=== v1.0.3 === (not yet released)
|
=== v1.0.3 === (4 July 2011)
|
||||||
|
|
||||||
ispc now has a bulit-in pre-processor (from LLVM's clang compiler).
|
ispc now has a bulit-in pre-processor (from LLVM's clang compiler).
|
||||||
(Thanks to Pete Couperus!) It is therefore no longer necessary to use
|
(Thanks to Pete Couperus for this patch!) It is therefore no longer
|
||||||
cl.exe for preprocessing before on Windows; the MSVC proejct files for the
|
necessary to use cl.exe for preprocessing on Windows; the MSVC proejct
|
||||||
examples have been updated accordingly.
|
files for the examples have been updated accordingly.
|
||||||
|
|
||||||
There is another variant of the shuffle() function int the standard
|
There is another variant of the shuffle() function int the standard
|
||||||
library: "<type> shuffle(<type> v0, <type> v1, int permute)", where the
|
library: "<type> shuffle(<type> v0, <type> v1, int permute)", where the
|
||||||
@@ -11,8 +11,15 @@ permutation vector indexes over the concatenation of the two vectors
|
|||||||
(e.g. the value 0 corresponds to the first element of v0, the value
|
(e.g. the value 0 corresponds to the first element of v0, the value
|
||||||
2*programCount-1 corresponds to the last element of v1, etc.)
|
2*programCount-1 corresponds to the last element of v1, etc.)
|
||||||
|
|
||||||
|
ispc now supports the usual range of atomic operations (add, subtract, min,
|
||||||
|
max, and, or, and xor) as well as atomic swap and atomic compare and
|
||||||
|
exchange. There is also a facility for inserting memory fences. See the
|
||||||
|
"Atomic Operations and Memory Fences" section of the user's guide
|
||||||
|
(http://ispc.github.com/ispc.html#atomic-operations-and-memory-fences) for
|
||||||
|
more information.
|
||||||
|
|
||||||
There are now both 'signed' and 'unsigned' variants of the standard library
|
There are now both 'signed' and 'unsigned' variants of the standard library
|
||||||
functions like packed_load_active() that that references to arrays of
|
functions like packed_load_active() that take references to arrays of
|
||||||
signed int32s and unsigned int32s respectively. (The
|
signed int32s and unsigned int32s respectively. (The
|
||||||
{load_from,store_to}_{int8,int16}() functions have similarly been augmented
|
{load_from,store_to}_{int8,int16}() functions have similarly been augmented
|
||||||
to have both 'signed' and 'unsigned' variants.)
|
to have both 'signed' and 'unsigned' variants.)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
rst2html ispc.txt > ispc.html
|
rst2html.py ispc.txt > ispc.html
|
||||||
|
|
||||||
#rst2latex --section-numbering --documentclass=article --documentoptions=DIV=9,10pt,letterpaper ispc.txt > ispc.tex
|
#rst2latex --section-numbering --documentclass=article --documentoptions=DIV=9,10pt,letterpaper ispc.txt > ispc.tex
|
||||||
#pdflatex ispc.tex
|
#pdflatex ispc.tex
|
||||||
|
|||||||
@@ -76,6 +76,7 @@ Contents:
|
|||||||
+ `Output Functions`_
|
+ `Output Functions`_
|
||||||
+ `Cross-Program Instance Operations`_
|
+ `Cross-Program Instance Operations`_
|
||||||
+ `Packed Load and Store Operations`_
|
+ `Packed Load and Store Operations`_
|
||||||
|
+ `Atomic Operations and Memory Fences`_
|
||||||
+ `Low-Level Bits`_
|
+ `Low-Level Bits`_
|
||||||
|
|
||||||
* `Interoperability with the Application`_
|
* `Interoperability with the Application`_
|
||||||
@@ -1811,6 +1812,69 @@ where the ``i`` th element of ``x`` has been replaced with the value ``v``
|
|||||||
int insert(int x, uniform int i, uniform int v)
|
int insert(int x, uniform int i, uniform int v)
|
||||||
|
|
||||||
|
|
||||||
|
Atomic Operations and Memory Fences
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
|
The usual range of atomic memory operations are provided in ``ispc``. As an
|
||||||
|
example, consider the 32-bit integer atomic add routine:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
int32 atomic_add_global(reference uniform int32 val, int32 delta)
|
||||||
|
|
||||||
|
The semantics are the expected ones for an atomic add function: the value
|
||||||
|
"val" has the value "delta" added to it atomically, and the old value of
|
||||||
|
"val" is returned from the function. (Thus, if multiple processors
|
||||||
|
simultaneously issue atomic adds to the same memory location, the adds will
|
||||||
|
be serialized by the hardware so that the correct result is computed in the
|
||||||
|
end.)
|
||||||
|
|
||||||
|
One thing to note is that that the value being added to here is a
|
||||||
|
``uniform`` integer, while the increment amount and the return value are
|
||||||
|
``varying``. In other words, the semantics are that each running program
|
||||||
|
instance individually issues the atomic operation with its own ``delta``
|
||||||
|
value and gets the previous value of ``val`` back in return.
|
||||||
|
|
||||||
|
Here are the declarations of the ``int32`` variants of these functions.
|
||||||
|
There are also ``int64`` equivalents as well as variants that take
|
||||||
|
``unsigned`` ``int32`` and ``int64`` values.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
int32 atomic_add_global(reference uniform int32 val, int32 value)
|
||||||
|
int32 atomic_subtract_global(reference uniform int32 val, int32 value)
|
||||||
|
int32 atomic_min_global(reference uniform int32 val, int32 value)
|
||||||
|
int32 atomic_max_global(reference uniform int32 val, int32 value)
|
||||||
|
int32 atomic_and_global(reference uniform int32 val, int32 value)
|
||||||
|
int32 atomic_or_global(reference uniform int32 val, int32 value)
|
||||||
|
int32 atomic_xor_global(reference uniform int32 val, int32 value)
|
||||||
|
int32 atomic_swap_global(reference uniform int32 val, int32 newval)
|
||||||
|
|
||||||
|
There is also an atomic "compare and exchange" function; it atomically
|
||||||
|
compares the value in "val" to "compare"--if they match, it assigns
|
||||||
|
"newval" to "val". In either case, the old value of "val" is returned.
|
||||||
|
(As with the other atomic operations, there are also ``unsigned`` and
|
||||||
|
64-bit variants of this function.)
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
int32 atomic_compare_exchange_global(reference uniform int32 val,
|
||||||
|
int32 compare, int32 newval)
|
||||||
|
|
||||||
|
``ispc`` also has a standard library routine that inserts a memory barrier
|
||||||
|
into the code; it ensures that all memory reads and writes prior to be
|
||||||
|
barrier complete before any reads or writes after the barrier are issued.
|
||||||
|
See the `Linux kernel documentation on memory barriers`_ for an excellent
|
||||||
|
writeup on the need for that the use of memory barriers in multi-threaded
|
||||||
|
code.
|
||||||
|
|
||||||
|
.. _Linux kernel documentation on memory barriers: http://www.kernel.org/doc/Documentation/memory-barriers.txt
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
void memory_barrier();
|
||||||
|
|
||||||
|
|
||||||
Low-Level Bits
|
Low-Level Bits
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
|
|||||||
@@ -59,9 +59,9 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="stdlib.ispc">
|
<CustomBuild Include="stdlib.ispc">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">cl /EP /TP %(Filename).ispc /DISPC=1 /DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">cl /EP /TP %(Filename).ispc /DISPC=1 /DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
|
||||||
|
|||||||
69
stdlib.ispc
69
stdlib.ispc
@@ -295,6 +295,75 @@ static inline uniform int packed_store_active(uniform int a[], uniform int start
|
|||||||
return __packed_store_active(a, start, vals, __mask);
|
return __packed_store_active(a, start, vals, __mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// Atomics and memory barriers
|
||||||
|
|
||||||
|
static inline void memory_barrier() {
|
||||||
|
__memory_barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB) \
|
||||||
|
static inline TA atomic_##OPA##_global(uniform reference TA ref, TA value) { \
|
||||||
|
memory_barrier(); \
|
||||||
|
TA ret = __atomic_##OPB##_##TB##_global(ref, value, __mask); \
|
||||||
|
memory_barrier(); \
|
||||||
|
return ret; \
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_ATOMIC_OP(int32,int32,add,add)
|
||||||
|
DEFINE_ATOMIC_OP(int32,int32,subtract,sub)
|
||||||
|
DEFINE_ATOMIC_OP(int32,int32,min,min)
|
||||||
|
DEFINE_ATOMIC_OP(int32,int32,max,max)
|
||||||
|
DEFINE_ATOMIC_OP(int32,int32,and,and)
|
||||||
|
DEFINE_ATOMIC_OP(int32,int32,or,or)
|
||||||
|
DEFINE_ATOMIC_OP(int32,int32,xor,xor)
|
||||||
|
DEFINE_ATOMIC_OP(int32,int32,swap,swap)
|
||||||
|
|
||||||
|
// For everything but atomic min and max, we can use the same
|
||||||
|
// implementations for unsigned as for signed.
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int32,uint32,min,umin)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int32,uint32,max,umax)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap)
|
||||||
|
|
||||||
|
DEFINE_ATOMIC_OP(int64,int64,add,add)
|
||||||
|
DEFINE_ATOMIC_OP(int64,int64,subtract,sub)
|
||||||
|
DEFINE_ATOMIC_OP(int64,int64,min,min)
|
||||||
|
DEFINE_ATOMIC_OP(int64,int64,max,max)
|
||||||
|
DEFINE_ATOMIC_OP(int64,int64,and,and)
|
||||||
|
DEFINE_ATOMIC_OP(int64,int64,or,or)
|
||||||
|
DEFINE_ATOMIC_OP(int64,int64,xor,xor)
|
||||||
|
DEFINE_ATOMIC_OP(int64,int64,swap,swap)
|
||||||
|
|
||||||
|
// For everything but atomic min and max, we can use the same
|
||||||
|
// implementations for unsigned as for signed.
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int64,uint64,min,umin)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int64,uint64,max,umax)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor)
|
||||||
|
DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap)
|
||||||
|
|
||||||
|
#define ATOMIC_DECL_CMPXCHG(TA, TB) \
|
||||||
|
static inline TA atomic_compare_exchange_global( \
|
||||||
|
uniform reference TA ref, TA oldval, TA newval) { \
|
||||||
|
memory_barrier(); \
|
||||||
|
TA ret = __atomic_compare_exchange_##TB##_global(ref, oldval, newval, __mask); \
|
||||||
|
memory_barrier(); \
|
||||||
|
return ret; \
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_DECL_CMPXCHG(int32, int32)
|
||||||
|
ATOMIC_DECL_CMPXCHG(unsigned int32, int32)
|
||||||
|
ATOMIC_DECL_CMPXCHG(int64, int64)
|
||||||
|
ATOMIC_DECL_CMPXCHG(unsigned int64, int64)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Load/store from/to 8/16-bit types
|
// Load/store from/to 8/16-bit types
|
||||||
|
|
||||||
|
|||||||
131
stdlib.m4
131
stdlib.m4
@@ -405,6 +405,95 @@ forloop(i, 1, eval($1-1), `
|
|||||||
}
|
}
|
||||||
')
|
')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; global_atomic
|
||||||
|
;; Defines the implementation of a function that handles the mapping from
|
||||||
|
;; an ispc atomic function to the underlying LLVM intrinsics. Specifically,
|
||||||
|
;; the function handles loooping over the active lanes, calling the underlying
|
||||||
|
;; scalar atomic intrinsic for each one, and assembling the vector result.
|
||||||
|
;;
|
||||||
|
;; Takes four parameters:
|
||||||
|
;; $1: vector width of the target
|
||||||
|
;; $2: operation being performed (w.r.t. LLVM atomic intrinsic names)
|
||||||
|
;; (add, sub...)
|
||||||
|
;; $3: return type of the LLVM atomic (e.g. i32)
|
||||||
|
;; $4: return type of the LLVM atomic type, in ispc naming paralance (e.g. int32)
|
||||||
|
|
||||||
|
define(`global_atomic', `
|
||||||
|
|
||||||
|
declare $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %delta)
|
||||||
|
|
||||||
|
define internal <$1 x $3> @__atomic_$2_$4_global($3 * %ptr, <$1 x $3> %val,
|
||||||
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
|
%rptr = alloca <$1 x $3>
|
||||||
|
%rptr32 = bitcast <$1 x $3> * %rptr to $3 *
|
||||||
|
|
||||||
|
per_lane($1, <$1 x i32> %mask, `
|
||||||
|
%v_LANE_ID = extractelement <$1 x $3> %val, i32 LANE
|
||||||
|
%r_LANE_ID = call $3 @llvm.atomic.load.$2.$3.p0$3($3 * %ptr, $3 %v_LANE_ID)
|
||||||
|
%rp_LANE_ID = getelementptr $3 * %rptr32, i32 LANE
|
||||||
|
store $3 %r_LANE_ID, $3 * %rp_LANE_ID')
|
||||||
|
|
||||||
|
%r = load <$1 x $3> * %rptr
|
||||||
|
ret <$1 x $3> %r
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
;; Macro to declare the function that implements the swap atomic.
|
||||||
|
;; Takes three parameters:
|
||||||
|
;; $1: vector width of the target
|
||||||
|
;; $2: llvm type of the vector elements (e.g. i32)
|
||||||
|
;; $3: ispc type of the elements (e.g. int32)
|
||||||
|
|
||||||
|
define(`global_swap', `
|
||||||
|
|
||||||
|
declare $2 @llvm.atomic.swap.$2.p0$2($2 * %ptr, $2 %val)
|
||||||
|
|
||||||
|
define <$1 x $2> @__atomic_swap_$3_global($2* %ptr, <$1 x $2> %val,
|
||||||
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
|
%rptr = alloca <$1 x $2>
|
||||||
|
%rptr32 = bitcast <$1 x $2> * %rptr to $2 *
|
||||||
|
|
||||||
|
per_lane($1, <$1 x i32> %mask, `
|
||||||
|
%val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE
|
||||||
|
%r_LANE_ID = call $2 @llvm.atomic.swap.$2.p0$2($2 * %ptr, $2 %val_LANE_ID)
|
||||||
|
%rp_LANE_ID = getelementptr $2 * %rptr32, i32 LANE
|
||||||
|
store $2 %r_LANE_ID, $2 * %rp_LANE_ID')
|
||||||
|
|
||||||
|
%r = load <$1 x $2> * %rptr
|
||||||
|
ret <$1 x $2> %r
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
|
||||||
|
;; Similarly, macro to declare the function that implements the compare/exchange
|
||||||
|
;; atomic. Takes three parameters:
|
||||||
|
;; $1: vector width of the target
|
||||||
|
;; $2: llvm type of the vector elements (e.g. i32)
|
||||||
|
;; $3: ispc type of the elements (e.g. int32)
|
||||||
|
|
||||||
|
define(`global_atomic_exchange', `
|
||||||
|
|
||||||
|
declare $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp, $2 %val)
|
||||||
|
|
||||||
|
define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
|
||||||
|
<$1 x $2> %val, <$1 x i32> %mask) nounwind alwaysinline {
|
||||||
|
%rptr = alloca <$1 x $2>
|
||||||
|
%rptr32 = bitcast <$1 x $2> * %rptr to $2 *
|
||||||
|
|
||||||
|
per_lane($1, <$1 x i32> %mask, `
|
||||||
|
%cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE
|
||||||
|
%val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE
|
||||||
|
%r_LANE_ID = call $2 @llvm.atomic.cmp.swap.$2.p0$2($2 * %ptr, $2 %cmp_LANE_ID,
|
||||||
|
$2 %val_LANE_ID)
|
||||||
|
%rp_LANE_ID = getelementptr $2 * %rptr32, i32 LANE
|
||||||
|
store $2 %r_LANE_ID, $2 * %rp_LANE_ID')
|
||||||
|
|
||||||
|
%r = load <$1 x $2> * %rptr
|
||||||
|
ret <$1 x $2> %r
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
|
||||||
define(`stdlib_core', `
|
define(`stdlib_core', `
|
||||||
|
|
||||||
@@ -543,6 +632,48 @@ define internal float @__stdlib_pow(float, float) nounwind readnone alwaysinline
|
|||||||
%r = call float @powf(float %0, float %1)
|
%r = call float @powf(float %0, float %1)
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; atomics and memory barriers
|
||||||
|
|
||||||
|
declare void @llvm.memory.barrier(i1 %loadload, i1 %loadstore, i1 %storeload,
|
||||||
|
i1 %storestore, i1 %device)
|
||||||
|
|
||||||
|
define internal void @__memory_barrier() nounwind readnone alwaysinline {
|
||||||
|
;; see http://llvm.org/bugs/show_bug.cgi?id=2829. It seems like we
|
||||||
|
;; only get an MFENCE on x86 if "device" is true, but IMHO we should
|
||||||
|
;; in the case where the first 4 args are true but it is false.
|
||||||
|
;; So we just always set that to true...
|
||||||
|
call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
global_atomic($1, add, i32, int32)
|
||||||
|
global_atomic($1, sub, i32, int32)
|
||||||
|
global_atomic($1, and, i32, int32)
|
||||||
|
global_atomic($1, or, i32, int32)
|
||||||
|
global_atomic($1, xor, i32, int32)
|
||||||
|
global_atomic($1, min, i32, int32)
|
||||||
|
global_atomic($1, max, i32, int32)
|
||||||
|
global_atomic($1, umin, i32, uint32)
|
||||||
|
global_atomic($1, umax, i32, uint32)
|
||||||
|
|
||||||
|
global_atomic($1, add, i64, int64)
|
||||||
|
global_atomic($1, sub, i64, int64)
|
||||||
|
global_atomic($1, and, i64, int64)
|
||||||
|
global_atomic($1, or, i64, int64)
|
||||||
|
global_atomic($1, xor, i64, int64)
|
||||||
|
global_atomic($1, min, i64, int64)
|
||||||
|
global_atomic($1, max, i64, int64)
|
||||||
|
global_atomic($1, umin, i64, uint64)
|
||||||
|
global_atomic($1, umax, i64, uint64)
|
||||||
|
|
||||||
|
global_swap($1, i32, int32)
|
||||||
|
global_swap($1, i64, int64)
|
||||||
|
|
||||||
|
global_atomic_exchange($1, i32, int32)
|
||||||
|
global_atomic_exchange($1, i64, int64)
|
||||||
|
|
||||||
')
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|||||||
14
tests/atomics-1.ispc
Normal file
14
tests/atomics-1.ispc
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
uniform unsigned int32 s = 0;
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
float b = atomic_add_global(s, 1);
|
||||||
|
RET[programIndex] = reduce_add(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = reduce_add(programIndex);
|
||||||
|
}
|
||||||
14
tests/atomics-2.ispc
Normal file
14
tests/atomics-2.ispc
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
uniform int64 s = 0;
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
float b = atomic_add_global(s, 1);
|
||||||
|
RET[programIndex] = reduce_add(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = reduce_add(programIndex);
|
||||||
|
}
|
||||||
14
tests/atomics-3.ispc
Normal file
14
tests/atomics-3.ispc
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
uniform int32 s = 0xff;
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
float b = atomic_xor_global(s, 0xfffffff0);
|
||||||
|
RET[programIndex] = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0xff;
|
||||||
|
}
|
||||||
14
tests/atomics-4.ispc
Normal file
14
tests/atomics-4.ispc
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
uniform int32 s = 0;
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
float b = atomic_or_global(s, (1<<programIndex));
|
||||||
|
RET[programIndex] = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = (1<<programCount)-1;
|
||||||
|
}
|
||||||
14
tests/atomics-5.ispc
Normal file
14
tests/atomics-5.ispc
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
uniform int32 s = 0xbeef;
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
float b = atomic_swap_global(s, programIndex);
|
||||||
|
RET[programIndex] = reduce_max(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0xbeef;
|
||||||
|
}
|
||||||
14
tests/atomics-6.ispc
Normal file
14
tests/atomics-6.ispc
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
uniform int32 s = 2;
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
float b = atomic_compare_exchange_global(s, programIndex, a*1000);
|
||||||
|
RET[programIndex] = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 3000;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user