Improve gather/scatter optimization passes to handle loops better.
Specifically, now we can work through phi nodes in the IR to detect cases
where an index value is actually the same across lanes or is linear across
the lanes. For example, this is a loop that used to require gathers but
is now turned into vector loads:
for (int i = programIndex; i < 16; i += programCount)
sum += a[i];
Fixes issue #107.
This commit is contained in:
13
tests/phi-opts-1.ispc
Normal file
13
tests/phi-opts-1.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float sum = 0;
|
||||
for (int i = 0; i < 16; i += programCount)
|
||||
sum += aFOO[i+programIndex];
|
||||
RET[programIndex] = reduce_add(sum);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 136;
|
||||
}
|
||||
13
tests/phi-opts-2.ispc
Normal file
13
tests/phi-opts-2.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float sum = 0;
|
||||
for (int i = programIndex; i < 16; i += programCount)
|
||||
sum += aFOO[i];
|
||||
RET[programIndex] = reduce_add(sum);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 136;
|
||||
}
|
||||
13
tests/phi-opts-3.ispc
Normal file
13
tests/phi-opts-3.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float sum = 0;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
sum += aFOO[i];
|
||||
RET[programIndex] = extract(sum, 0);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 136;
|
||||
}
|
||||
19
tests/phi-opts-4.ispc
Normal file
19
tests/phi-opts-4.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float sum = 0;
|
||||
// Obscfucated way to make delta all one, but make that unclear to the
|
||||
// compiler
|
||||
int delta = aFOO[min(0., aFOO[programIndex])];
|
||||
|
||||
// The optimization shouldn't apply for this, since delta isn't known
|
||||
// to be all equal
|
||||
for (int i = 0; i < 16; i += delta)
|
||||
sum += aFOO[i];
|
||||
RET[programIndex] = extract(sum, 0);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 136;
|
||||
}
|
||||
Reference in New Issue
Block a user