Add support for scan operations across program instances (add, and, or).
This commit is contained in:
@@ -41,6 +41,7 @@
|
|||||||
|
|
||||||
stdlib_core(8)
|
stdlib_core(8)
|
||||||
packed_load_and_store(8)
|
packed_load_and_store(8)
|
||||||
|
scans(8)
|
||||||
int64minmax(8)
|
int64minmax(8)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|||||||
@@ -35,6 +35,7 @@
|
|||||||
; Define some basics for a 4-wide target
|
; Define some basics for a 4-wide target
|
||||||
stdlib_core(4)
|
stdlib_core(4)
|
||||||
packed_load_and_store(4)
|
packed_load_and_store(4)
|
||||||
|
scans(4)
|
||||||
|
|
||||||
; Include the various definitions of things that only require SSE1 and SSE2
|
; Include the various definitions of things that only require SSE1 and SSE2
|
||||||
include(`builtins-sse.ll')
|
include(`builtins-sse.ll')
|
||||||
|
|||||||
@@ -35,6 +35,7 @@
|
|||||||
; Define common 4-wide stuff
|
; Define common 4-wide stuff
|
||||||
stdlib_core(4)
|
stdlib_core(4)
|
||||||
packed_load_and_store(4)
|
packed_load_and_store(4)
|
||||||
|
scans(4)
|
||||||
|
|
||||||
; Define the stuff that can be done with base SSE1/SSE2 instructions
|
; Define the stuff that can be done with base SSE1/SSE2 instructions
|
||||||
include(`builtins-sse.ll')
|
include(`builtins-sse.ll')
|
||||||
@@ -229,7 +230,6 @@ define internal float @__reduce_add_float(<4 x float>) nounwind readonly alwaysi
|
|||||||
ret float %scalar
|
ret float %scalar
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; masked store
|
;; masked store
|
||||||
|
|
||||||
|
|||||||
@@ -38,6 +38,7 @@
|
|||||||
|
|
||||||
stdlib_core(8)
|
stdlib_core(8)
|
||||||
packed_load_and_store(8)
|
packed_load_and_store(8)
|
||||||
|
scans(8)
|
||||||
int64minmax(8)
|
int64minmax(8)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|||||||
60
builtins.m4
60
builtins.m4
@@ -1475,6 +1475,66 @@ reduce_equal_aux($1, i64, int64, i64, icmp, 64)
|
|||||||
reduce_equal_aux($1, double, double, i64, fcmp, 64)
|
reduce_equal_aux($1, double, double, i64, fcmp, 64)
|
||||||
')
|
')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; prefix sum stuff
|
||||||
|
|
||||||
|
; $1: vector width (e.g. 4)
|
||||||
|
; $2: vector element type (e.g. float)
|
||||||
|
; $3: bit width of vector element type (e.g. 32)
|
||||||
|
; $4: operator to apply (e.g. fadd)
|
||||||
|
; $5: identity element value (e.g. 0)
|
||||||
|
; $6: suffix for function (e.g. add_float)
|
||||||
|
|
||||||
|
define(`exclusive_scan', `
|
||||||
|
define internal <$1 x $2> @__exclusive_scan_$6(<$1 x $2> %v,
|
||||||
|
<$1 x i32> %mask) nounwind alwaysinline {
|
||||||
|
; first, set the value of any off lanes to the identity value
|
||||||
|
%ptr = alloca <$1 x $2>
|
||||||
|
%idvec1 = bitcast $2 $5 to <1 x $2>
|
||||||
|
%idvec = shufflevector <1 x $2> %idvec1, <1 x $2> undef,
|
||||||
|
<$1 x i32> < forloop(i, 0, eval($1-2), `i32 0, ') i32 0 >
|
||||||
|
store <$1 x $2> %idvec, <$1 x $2> * %ptr
|
||||||
|
%ptr`'$3 = bitcast <$1 x $2> * %ptr to <$1 x i`'$3> *
|
||||||
|
%vi = bitcast <$1 x $2> %v to <$1 x i`'$3>
|
||||||
|
call void @__masked_store_blend_$3(<$1 x i`'$3> * %ptr`'$3, <$1 x i`'$3> %vi,
|
||||||
|
<$1 x i32> %mask)
|
||||||
|
%v_id = load <$1 x $2> * %ptr
|
||||||
|
|
||||||
|
; extract elements of the vector to use in computing the scan
|
||||||
|
forloop(i, 0, eval($1-1), `
|
||||||
|
%v`'i = extractelement <$1 x $2> %v_id, i32 i')
|
||||||
|
|
||||||
|
; and just compute the scan directly.
|
||||||
|
; 0th element is the identity (so nothing to do here),
|
||||||
|
; 1st element is identity (op) the 0th element of the original vector,
|
||||||
|
; each successive element is the previous element (op) the previous element
|
||||||
|
; of the original vector
|
||||||
|
%s1 = $4 $2 $5, %v0
|
||||||
|
forloop(i, 2, eval($1-1), `
|
||||||
|
%s`'i = $4 $2 %s`'eval(i-1), %v`'eval(i-1)')
|
||||||
|
|
||||||
|
; and fill in the result vector
|
||||||
|
%r0 = insertelement <$1 x $2> undef, $2 $5, i32 0 ; 0th element gets identity
|
||||||
|
forloop(i, 1, eval($1-1), `
|
||||||
|
%r`'i = insertelement <$1 x $2> %r`'eval(i-1), $2 %s`'i, i32 i')
|
||||||
|
|
||||||
|
ret <$1 x $2> %r`'eval($1-1)
|
||||||
|
}
|
||||||
|
')
|
||||||
|
|
||||||
|
define(`scans', `
|
||||||
|
exclusive_scan($1, i32, 32, add, 0, add_i32)
|
||||||
|
exclusive_scan($1, float, 32, fadd, zeroinitializer, add_float)
|
||||||
|
exclusive_scan($1, i64, 64, add, 0, add_i64)
|
||||||
|
exclusive_scan($1, double, 64, fadd, zeroinitializer, add_double)
|
||||||
|
|
||||||
|
exclusive_scan($1, i32, 32, and, -1, and_i32)
|
||||||
|
exclusive_scan($1, i64, 64, and, -1, and_i64)
|
||||||
|
|
||||||
|
exclusive_scan($1, i32, 32, or, 0, or_i32)
|
||||||
|
exclusive_scan($1, i64, 64, or, 0, or_i64)
|
||||||
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; per_lane
|
;; per_lane
|
||||||
;;
|
;;
|
||||||
|
|||||||
@@ -101,6 +101,7 @@ Contents:
|
|||||||
+ `"Inline" Aggressively`_
|
+ `"Inline" Aggressively`_
|
||||||
+ `Small Performance Tricks`_
|
+ `Small Performance Tricks`_
|
||||||
+ `Instrumenting Your ISPC Programs`_
|
+ `Instrumenting Your ISPC Programs`_
|
||||||
|
+ `Using Scan Operations For Variable Output`_
|
||||||
|
|
||||||
* `Disclaimer and Legal Information`_
|
* `Disclaimer and Legal Information`_
|
||||||
|
|
||||||
@@ -1852,6 +1853,44 @@ There are also variants of these functions that return the value as a
|
|||||||
The value returned by the ``reduce_equal()`` function is undefined if
|
The value returned by the ``reduce_equal()`` function is undefined if
|
||||||
it is called when none of the program instances are running.
|
it is called when none of the program instances are running.
|
||||||
|
|
||||||
|
There are also a number of functions to compute "scan"s of values across
|
||||||
|
the program instances. For example, the ``exclusive_scan_and()`` function
|
||||||
|
computes, for each program instance, the sum of the given value over all of
|
||||||
|
the preceeding program instances. (The scans currently available in
|
||||||
|
``ispc`` are all so-called "exclusive" scans, meaning that the value
|
||||||
|
computed for a given element does not include the value provided for that
|
||||||
|
element.) In C code, an exclusive add scan over an array might be
|
||||||
|
implemented as:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
void scan_add(int *in_array, int *result_array, int count) {
|
||||||
|
result_array[0] = 0;
|
||||||
|
for (int i = 0; i < count; ++i)
|
||||||
|
result_array[i] = result_array[i-1] + in_array[i-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
``ispc`` provides the following scan functions--addition, bitwise-and, and
|
||||||
|
bitwise-or are available:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
int32 exclusive_scan_add(int32 v)
|
||||||
|
unsigned int32 exclusive_scan_add(unsigned int32 v)
|
||||||
|
float exclusive_scan_add(float v)
|
||||||
|
int64 exclusive_scan_add(int64 v)
|
||||||
|
unsigned int64 exclusive_scan_add(unsigned int64 v)
|
||||||
|
double exclusive_scan_add(double v)
|
||||||
|
int32 exclusive_scan_and(int32 v)
|
||||||
|
unsigned int32 exclusive_scan_and(unsigned int32 v)
|
||||||
|
int64 exclusive_scan_and(int64 v)
|
||||||
|
unsigned int64 exclusive_scan_and(unsigned int64 v)
|
||||||
|
int32 exclusive_scan_or(int32 v)
|
||||||
|
unsigned int32 exclusive_scan_or(unsigned int32 v)
|
||||||
|
int64 exclusive_scan_or(int64 v)
|
||||||
|
unsigned int64 exclusive_scan_or(unsigned int64 v)
|
||||||
|
|
||||||
|
|
||||||
Packed Load and Store Operations
|
Packed Load and Store Operations
|
||||||
--------------------------------
|
--------------------------------
|
||||||
|
|
||||||
@@ -2760,6 +2799,38 @@ active upon function entry.
|
|||||||
ao.ispc(0088) - function entry: 36928 calls (0 / 0.00% all off!), 97.40% active lanes
|
ao.ispc(0088) - function entry: 36928 calls (0 / 0.00% all off!), 97.40% active lanes
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
Using Scan Operations For Variable Output
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
One important application of the ``exclusive_scan_add()`` function in the
|
||||||
|
standard library is when program instances want to generate a variable amount
|
||||||
|
of output and when one would like that output to be densely packed in a
|
||||||
|
single array. For example, consider the code fragment below:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
uniform int func(uniform float outArray[], ...) {
|
||||||
|
int numOut = ...; // figure out how many to be output
|
||||||
|
float outLocal[MAX_OUT]; // staging area
|
||||||
|
// put results in outLocal[0], ..., outLocal[numOut-1]
|
||||||
|
int startOffset = exclusive_scan_add(numOut);
|
||||||
|
for (int i = 0; i < numOut; ++i)
|
||||||
|
outArray[startOffset + i] = outLocal[i];
|
||||||
|
return reduce_add(numOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
Here, each program instance has computed a number, ``numOut``, of values to
|
||||||
|
output, and has stored them in the ``outLocal`` array. Assume that four
|
||||||
|
program instances are running and that the first one wants to output one
|
||||||
|
value, the second two values, and the third and fourth three values each.
|
||||||
|
In this case, ``exclusive_scan_add()`` will return the values (0, 1, 3, 6)
|
||||||
|
to the four program instances, respectively. The first program instance
|
||||||
|
will write its one result to ``outArray[0]``, the second will write its two
|
||||||
|
values to ``outArray[1]`` and ``outArray[2]``, and so forth. The
|
||||||
|
``reduce_add`` call at the end returns the total number of values that the
|
||||||
|
program instances have written to the array.
|
||||||
|
|
||||||
Disclaimer and Legal Information
|
Disclaimer and Legal Information
|
||||||
================================
|
================================
|
||||||
|
|
||||||
|
|||||||
56
stdlib.ispc
56
stdlib.ispc
@@ -487,6 +487,62 @@ REDUCE_EQUAL(int64, int64)
|
|||||||
REDUCE_EQUAL(unsigned int64, int64)
|
REDUCE_EQUAL(unsigned int64, int64)
|
||||||
REDUCE_EQUAL(double, double)
|
REDUCE_EQUAL(double, double)
|
||||||
|
|
||||||
|
static int32 exclusive_scan_add(int32 v) {
|
||||||
|
return __exclusive_scan_add_i32(v, (int32)__mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int32 exclusive_scan_add(unsigned int32 v) {
|
||||||
|
return __exclusive_scan_add_i32(v, __mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float exclusive_scan_add(float v) {
|
||||||
|
return __exclusive_scan_add_float(v, __mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int64 exclusive_scan_add(int64 v) {
|
||||||
|
return __exclusive_scan_add_i64(v, (int32)__mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int64 exclusive_scan_add(unsigned int64 v) {
|
||||||
|
return __exclusive_scan_add_i64(v, __mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static double exclusive_scan_add(double v) {
|
||||||
|
return __exclusive_scan_add_double(v, __mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32 exclusive_scan_and(int32 v) {
|
||||||
|
return __exclusive_scan_and_i32(v, (int32)__mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int32 exclusive_scan_and(unsigned int32 v) {
|
||||||
|
return __exclusive_scan_and_i32(v, __mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int64 exclusive_scan_and(int64 v) {
|
||||||
|
return __exclusive_scan_and_i64(v, (int32)__mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int64 exclusive_scan_and(unsigned int64 v) {
|
||||||
|
return __exclusive_scan_and_i64(v, __mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32 exclusive_scan_or(int32 v) {
|
||||||
|
return __exclusive_scan_or_i32(v, (int32)__mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int32 exclusive_scan_or(unsigned int32 v) {
|
||||||
|
return __exclusive_scan_or_i32(v, __mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int64 exclusive_scan_or(int64 v) {
|
||||||
|
return __exclusive_scan_or_i64(v, (int32)__mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int64 exclusive_scan_or(unsigned int64 v) {
|
||||||
|
return __exclusive_scan_or_i64(v, __mask);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// packed load, store
|
// packed load, store
|
||||||
|
|
||||||
|
|||||||
12
tests/exclusive-scan-add-1.ispc
Normal file
12
tests/exclusive-scan-add-1.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = exclusive_scan_add(programIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[] = { 0, 0, 1, 3, 6, 10, 15, 21, 28,
|
||||||
|
36, 45, 55, 66, 78, 91, 105, 120 };
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
}
|
||||||
20
tests/exclusive-scan-add-10.ispc
Normal file
20
tests/exclusive-scan-add-10.ispc
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
unsigned int64 a = aFOO[programIndex];
|
||||||
|
if (programIndex & 1) {
|
||||||
|
RET[programIndex] = exclusive_scan_add(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[16] = { 0, 0, 0, 2, 0, 6, 0, 12,
|
||||||
|
0, 20, 0, 30, 0, 42, 0, 56 };
|
||||||
|
if (programIndex & 1)
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
else
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
}
|
||||||
12
tests/exclusive-scan-add-2.ispc
Normal file
12
tests/exclusive-scan-add-2.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = exclusive_scan_add(aFOO[programIndex]);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[] = { 0, 1, 3, 6, 10, 15, 21, 28,
|
||||||
|
36, 45, 55, 66, 78, 91, 105, 120, 136 };
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
}
|
||||||
17
tests/exclusive-scan-add-3.ispc
Normal file
17
tests/exclusive-scan-add-3.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
if (a <= 2)
|
||||||
|
RET[programIndex] = exclusive_scan_add(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[] = { 0, 1, 3, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
if (programIndex <= 1)
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
}
|
||||||
17
tests/exclusive-scan-add-4.ispc
Normal file
17
tests/exclusive-scan-add-4.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
if (a <= 2)
|
||||||
|
RET[programIndex] = exclusive_scan_add(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[] = { 0, 1, 3, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
if (programIndex <= 1)
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
}
|
||||||
20
tests/exclusive-scan-add-5.ispc
Normal file
20
tests/exclusive-scan-add-5.ispc
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
if (programIndex & 1) {
|
||||||
|
RET[programIndex] = exclusive_scan_add(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[16] = { 0, 0, 0, 2, 0, 6, 0, 12,
|
||||||
|
0, 20, 0, 30, 0, 42, 0, 56 };
|
||||||
|
if (programIndex & 1)
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
else
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
}
|
||||||
12
tests/exclusive-scan-add-6.ispc
Normal file
12
tests/exclusive-scan-add-6.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = exclusive_scan_add((float)programIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[] = { 0, 0, 1, 3, 6, 10, 15, 21, 28,
|
||||||
|
36, 45, 55, 66, 78, 91, 105, 120 };
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
}
|
||||||
12
tests/exclusive-scan-add-7.ispc
Normal file
12
tests/exclusive-scan-add-7.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = exclusive_scan_add((double)aFOO[programIndex]);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[] = { 0, 1, 3, 6, 10, 15, 21, 28,
|
||||||
|
36, 45, 55, 66, 78, 91, 105, 120, 136 };
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
}
|
||||||
17
tests/exclusive-scan-add-8.ispc
Normal file
17
tests/exclusive-scan-add-8.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
int64 a = aFOO[programIndex];
|
||||||
|
if (a <= 2)
|
||||||
|
RET[programIndex] = exclusive_scan_add(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[] = { 0, 1, 3, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
if (programIndex <= 1)
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
}
|
||||||
17
tests/exclusive-scan-add-9.ispc
Normal file
17
tests/exclusive-scan-add-9.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
unsigned int64 a = aFOO[programIndex];
|
||||||
|
if (a <= 2)
|
||||||
|
RET[programIndex] = exclusive_scan_add(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
uniform int result[] = { 0, 1, 3, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
if (programIndex <= 1)
|
||||||
|
RET[programIndex] = result[programIndex];
|
||||||
|
}
|
||||||
22
tests/exclusive-scan-and-1.ispc
Normal file
22
tests/exclusive-scan-and-1.ispc
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
int32 a = (programIndex & 1) ? 0xff : 0;
|
||||||
|
if (programIndex & 1) {
|
||||||
|
RET[programIndex] = exclusive_scan_and(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
if (programIndex & 1) {
|
||||||
|
if (programIndex == 1)
|
||||||
|
RET[programIndex] = -1; // 0xffffffff, made float
|
||||||
|
else
|
||||||
|
RET[programIndex] = 0xff;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
}
|
||||||
21
tests/exclusive-scan-and-2.ispc
Normal file
21
tests/exclusive-scan-and-2.ispc
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
int32 a = ~(1 << programIndex);
|
||||||
|
if ((programIndex & 1) == 0) {
|
||||||
|
RET[programIndex] = exclusive_scan_and(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
if ((programIndex & 1) == 0 && programIndex > 0) {
|
||||||
|
int val = 0xffffffff;
|
||||||
|
for (int i = 0; i < programIndex-1; i += 2)
|
||||||
|
val &= ~(1<<i);
|
||||||
|
RET[programIndex] = val;
|
||||||
|
}
|
||||||
|
}
|
||||||
13
tests/exclusive-scan-or-1.ispc
Normal file
13
tests/exclusive-scan-or-1.ispc
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = -1;
|
||||||
|
int32 a = (1 << programIndex);
|
||||||
|
RET[programIndex] = exclusive_scan_or(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = (1 << programIndex) - 1;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user