diff --git a/docs/ispc.rst b/docs/ispc.rst index 9464dcde..3ac57e86 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -4743,13 +4743,13 @@ have a declaration like: }; Because ``varying`` types have size that depends on the size of the gang of -program instances, ``ispc`` prohibits any varying types from being used in -parameters to functions with the ``export`` qualifier. (``ispc`` also -prohibits passing structures that themselves have varying types as members, -etc.) Thus, all datatypes that are shared with the application must have -the ``uniform`` or ``soa`` rate qualifier applied to them. (See `Use -"Structure of Arrays" Layout When Possible`_ in the Performance Guide for -more discussion of how to load vectors of SOA data from the application.) +program instances, ``ispc`` has restrictrictions on using varying types in +parameters to functions with the ``export`` qualifier. ``ispc `` prohibits +parameters to exported functions to have varying type unless the parameter is +of pointer type. (That is, ``varying float`` isn't allowed, but ``varying float * uniform`` +(uniform pointer to varying float) is permitted.) Care must be taken +by the programmer to ensure that the data being accessed through any +pointers to varying data has the correct organization. Similarly, ``struct`` types shared with the application can also have embedded pointers. @@ -4770,6 +4770,30 @@ On the ``ispc`` side, the corresponding ``struct`` declaration is: float * uniform foo, * uniform bar; }; +If a pointer to a varying ``struct`` type appears in an exported function, +the generated header file will have a definition like (for 8-wide SIMD): + +:: + + // C/C++ code + struct Node { + int count[8]; + float pos[3][8]; + }; + + +In the case of multiple target compilation, ``ispc`` will generate multiple +header files and a "general" header file with definitions for multiple sizes. +Any pointers to varyings in exported functions will be rewritten as ``void *``. +At runtime, the ``ispc`` dispatch mechanism will cast these pointers to the appropriate +types. Programmers can +provide C/C++ code can with a mechanism to determine the gang width used +at runtime by ``ispc`` by creating an exported function that simply +returns the value of ``programCount``. An example of such a function +is provided in the file ``examples/util/util.isph`` included in the ``ispc`` +distribution. + + There is one subtlety related to data layout to be aware of: ``ispc`` stores ``uniform`` short-vector types in memory with their first element at the machine's natural vector alignment (i.e. 16 bytes for a target that is diff --git a/examples/perfbench/perfbench.cpp b/examples/perfbench/perfbench.cpp index 04e72bd9..1defffe7 100644 --- a/examples/perfbench/perfbench.cpp +++ b/examples/perfbench/perfbench.cpp @@ -69,6 +69,7 @@ static PerfTest tests[] = { { xyzSumAOS, "serial", ispc::xyzSumAOSStdlib, "ispc", "AOS vector element sum (stdlib swizzle)" }, { xyzSumAOS, "serial", ispc::xyzSumAOSNoCoalesce, "ispc", "AOS vector element sum (no coalescing)" }, { xyzSumSOA, "serial", ispc::xyzSumSOA, "ispc", "SOA vector element sum" }, + { xyzSumSOA, "serial", (FuncType *) ispc::xyzSumVarying, "ispc", "Varying vector element sum" }, { ispc::gathers, "gather", ispc::loads, "vector load", "Memory reads" }, { ispc::scatters, "scatter", ispc::stores, "vector store", "Memory writes" }, }; diff --git a/examples/perfbench/perfbench.ispc b/examples/perfbench/perfbench.ispc index 38fe6cee..54c3137a 100644 --- a/examples/perfbench/perfbench.ispc +++ b/examples/perfbench/perfbench.ispc @@ -104,6 +104,50 @@ export void xyzSumSOA(uniform float array[], uniform int count, result[2] = reduce_add(zsum); } +export void xyzSumVarying(varying float array[], uniform int count, + uniform float zeros[], uniform float result[]) { + float xsum = 0, ysum = 0, zsum = 0; + varying float * uniform ap = array; + assert(programCount <= 8); + + for (uniform int i = 0; i < count/3; i += 8) { + if (programCount == 4) { + float x0 = ap[0]; + float y0 = ap[2]; + float z0 = ap[4]; + + xsum += x0; + ysum += y0; + zsum += z0; + + float x1 = ap[1]; + float y1 = ap[3]; + float z1 = ap[5]; + + xsum += x1; + ysum += y1; + zsum += z1; + + ap += 6; + } + else { + // programCount == 8 + float x = ap[0]; + float y = ap[1]; + float z = ap[2]; + + xsum += x; + ysum += y; + zsum += z; + + ap += 3; + } + } + result[0] = reduce_add(xsum); + result[1] = reduce_add(ysum); + result[2] = reduce_add(zsum); +} + export void gathers(uniform float array[], uniform int count, uniform float zeros[], uniform float result[]) { float sum = 0;