Add docs and example
This commit is contained in:
@@ -4743,13 +4743,13 @@ have a declaration like:
|
|||||||
};
|
};
|
||||||
|
|
||||||
Because ``varying`` types have size that depends on the size of the gang of
|
Because ``varying`` types have size that depends on the size of the gang of
|
||||||
program instances, ``ispc`` prohibits any varying types from being used in
|
program instances, ``ispc`` has restrictrictions on using varying types in
|
||||||
parameters to functions with the ``export`` qualifier. (``ispc`` also
|
parameters to functions with the ``export`` qualifier. ``ispc `` prohibits
|
||||||
prohibits passing structures that themselves have varying types as members,
|
parameters to exported functions to have varying type unless the parameter is
|
||||||
etc.) Thus, all datatypes that are shared with the application must have
|
of pointer type. (That is, ``varying float`` isn't allowed, but ``varying float * uniform``
|
||||||
the ``uniform`` or ``soa`` rate qualifier applied to them. (See `Use
|
(uniform pointer to varying float) is permitted.) Care must be taken
|
||||||
"Structure of Arrays" Layout When Possible`_ in the Performance Guide for
|
by the programmer to ensure that the data being accessed through any
|
||||||
more discussion of how to load vectors of SOA data from the application.)
|
pointers to varying data has the correct organization.
|
||||||
|
|
||||||
Similarly, ``struct`` types shared with the application can also have
|
Similarly, ``struct`` types shared with the application can also have
|
||||||
embedded pointers.
|
embedded pointers.
|
||||||
@@ -4770,6 +4770,30 @@ On the ``ispc`` side, the corresponding ``struct`` declaration is:
|
|||||||
float * uniform foo, * uniform bar;
|
float * uniform foo, * uniform bar;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
If a pointer to a varying ``struct`` type appears in an exported function,
|
||||||
|
the generated header file will have a definition like (for 8-wide SIMD):
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
// C/C++ code
|
||||||
|
struct Node {
|
||||||
|
int count[8];
|
||||||
|
float pos[3][8];
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
In the case of multiple target compilation, ``ispc`` will generate multiple
|
||||||
|
header files and a "general" header file with definitions for multiple sizes.
|
||||||
|
Any pointers to varyings in exported functions will be rewritten as ``void *``.
|
||||||
|
At runtime, the ``ispc`` dispatch mechanism will cast these pointers to the appropriate
|
||||||
|
types. Programmers can
|
||||||
|
provide C/C++ code can with a mechanism to determine the gang width used
|
||||||
|
at runtime by ``ispc`` by creating an exported function that simply
|
||||||
|
returns the value of ``programCount``. An example of such a function
|
||||||
|
is provided in the file ``examples/util/util.isph`` included in the ``ispc``
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
|
||||||
There is one subtlety related to data layout to be aware of: ``ispc``
|
There is one subtlety related to data layout to be aware of: ``ispc``
|
||||||
stores ``uniform`` short-vector types in memory with their first element at
|
stores ``uniform`` short-vector types in memory with their first element at
|
||||||
the machine's natural vector alignment (i.e. 16 bytes for a target that is
|
the machine's natural vector alignment (i.e. 16 bytes for a target that is
|
||||||
|
|||||||
@@ -69,6 +69,7 @@ static PerfTest tests[] = {
|
|||||||
{ xyzSumAOS, "serial", ispc::xyzSumAOSStdlib, "ispc", "AOS vector element sum (stdlib swizzle)" },
|
{ xyzSumAOS, "serial", ispc::xyzSumAOSStdlib, "ispc", "AOS vector element sum (stdlib swizzle)" },
|
||||||
{ xyzSumAOS, "serial", ispc::xyzSumAOSNoCoalesce, "ispc", "AOS vector element sum (no coalescing)" },
|
{ xyzSumAOS, "serial", ispc::xyzSumAOSNoCoalesce, "ispc", "AOS vector element sum (no coalescing)" },
|
||||||
{ xyzSumSOA, "serial", ispc::xyzSumSOA, "ispc", "SOA vector element sum" },
|
{ xyzSumSOA, "serial", ispc::xyzSumSOA, "ispc", "SOA vector element sum" },
|
||||||
|
{ xyzSumSOA, "serial", (FuncType *) ispc::xyzSumVarying, "ispc", "Varying vector element sum" },
|
||||||
{ ispc::gathers, "gather", ispc::loads, "vector load", "Memory reads" },
|
{ ispc::gathers, "gather", ispc::loads, "vector load", "Memory reads" },
|
||||||
{ ispc::scatters, "scatter", ispc::stores, "vector store", "Memory writes" },
|
{ ispc::scatters, "scatter", ispc::stores, "vector store", "Memory writes" },
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -104,6 +104,50 @@ export void xyzSumSOA(uniform float array[], uniform int count,
|
|||||||
result[2] = reduce_add(zsum);
|
result[2] = reduce_add(zsum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export void xyzSumVarying(varying float array[], uniform int count,
|
||||||
|
uniform float zeros[], uniform float result[]) {
|
||||||
|
float xsum = 0, ysum = 0, zsum = 0;
|
||||||
|
varying float * uniform ap = array;
|
||||||
|
assert(programCount <= 8);
|
||||||
|
|
||||||
|
for (uniform int i = 0; i < count/3; i += 8) {
|
||||||
|
if (programCount == 4) {
|
||||||
|
float x0 = ap[0];
|
||||||
|
float y0 = ap[2];
|
||||||
|
float z0 = ap[4];
|
||||||
|
|
||||||
|
xsum += x0;
|
||||||
|
ysum += y0;
|
||||||
|
zsum += z0;
|
||||||
|
|
||||||
|
float x1 = ap[1];
|
||||||
|
float y1 = ap[3];
|
||||||
|
float z1 = ap[5];
|
||||||
|
|
||||||
|
xsum += x1;
|
||||||
|
ysum += y1;
|
||||||
|
zsum += z1;
|
||||||
|
|
||||||
|
ap += 6;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// programCount == 8
|
||||||
|
float x = ap[0];
|
||||||
|
float y = ap[1];
|
||||||
|
float z = ap[2];
|
||||||
|
|
||||||
|
xsum += x;
|
||||||
|
ysum += y;
|
||||||
|
zsum += z;
|
||||||
|
|
||||||
|
ap += 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result[0] = reduce_add(xsum);
|
||||||
|
result[1] = reduce_add(ysum);
|
||||||
|
result[2] = reduce_add(zsum);
|
||||||
|
}
|
||||||
|
|
||||||
export void gathers(uniform float array[], uniform int count,
|
export void gathers(uniform float array[], uniform int count,
|
||||||
uniform float zeros[], uniform float result[]) {
|
uniform float zeros[], uniform float result[]) {
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
|
|||||||
Reference in New Issue
Block a user