Specifically, now we can work through phi nodes in the IR to detect cases
where an index value is actually the same across lanes or is linear across
the lanes. For example, this is a loop that used to require gathers but
is now turned into vector loads:
for (int i = programIndex; i < 16; i += programCount)
sum += a[i];
Fixes issue #107.
20 lines
555 B
Plaintext
20 lines
555 B
Plaintext
|
|
export uniform int width() { return programCount; }
|
|
|
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
|
float sum = 0;
|
|
// Obscfucated way to make delta all one, but make that unclear to the
|
|
// compiler
|
|
int delta = aFOO[min(0., aFOO[programIndex])];
|
|
|
|
// The optimization shouldn't apply for this, since delta isn't known
|
|
// to be all equal
|
|
for (int i = 0; i < 16; i += delta)
|
|
sum += aFOO[i];
|
|
RET[programIndex] = extract(sum, 0);
|
|
}
|
|
|
|
export void result(uniform float RET[]) {
|
|
RET[programIndex] = 136;
|
|
}
|