We now do a single atomic hardware swap and then effectively do swaps between the running program instances such that the result is the same as if they had happened to run a particular ordering of hardware swaps themselves. Also cleaned up __atomic_swap_uniform_* built-in implementations to not take the mask, which they weren't using anyway. Finishes Issue #56.
18 lines
446 B
Plaintext
18 lines
446 B
Plaintext
|
|
export uniform int width() { return programCount; }
|
|
|
|
uniform int32 s = 1234;
|
|
|
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
|
float a = aFOO[programIndex];
|
|
float b = 0;
|
|
if (programIndex & 1) {
|
|
b = atomic_swap_global(&s, programIndex);
|
|
}
|
|
RET[programIndex] = reduce_add(b) + s;
|
|
}
|
|
|
|
export void result(uniform float RET[]) {
|
|
RET[programIndex] = 1234 + reduce_add(programIndex & 1 ? programIndex : 0);
|
|
}
|