On a target with a 16-bit mask (for example), we would choose the type of an integer literal "1024" to be an int16. Previously, we used an int32, which is a worse fit and leads to less efficient code than an int16 on a 16-bit mask target. (However, we'd still give an integer literal 1000000 the type int32, even in a 16-bit target.) Updated the tests to still pass with 8 and 16-bit targets, given this change.
17 lines
480 B
Plaintext
17 lines
480 B
Plaintext
|
|
export uniform int width() { return programCount; }
|
|
|
|
uniform unsigned int32 s = 0;
|
|
|
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
|
float a = aFOO[programIndex];
|
|
int32 b = 0;
|
|
if (programIndex < 28 && (programIndex & 1))
|
|
b = atomic_or_local(&s, (1ul << programIndex));
|
|
RET[programIndex] = popcnt(reduce_max(b));
|
|
}
|
|
|
|
export void result(uniform float RET[]) {
|
|
RET[programIndex] = (programCount == 1) ? 0 : ((min(28, programCount)/2) - 1);
|
|
}
|