Add reduce_equal() function to standard library.
This commit is contained in:
@@ -409,6 +409,7 @@ define internal float @__reduce_max_float(<8 x float>) nounwind readnone alwaysi
|
||||
reduce8(float, @__max_varying_float, @__max_uniform_float)
|
||||
}
|
||||
|
||||
reduce_equal(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; horizontal int32 ops
|
||||
|
||||
@@ -376,6 +376,7 @@ define internal i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
|
||||
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||
}
|
||||
|
||||
reduce_equal(4)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
@@ -434,6 +434,8 @@ define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone {
|
||||
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
|
||||
}
|
||||
|
||||
reduce_equal(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
|
||||
75
builtins.m4
75
builtins.m4
@@ -1400,6 +1400,81 @@ done:
|
||||
}
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reduce_equal
|
||||
|
||||
; count leading zeros
|
||||
declare i32 @llvm.cttz.i32(i32)
|
||||
|
||||
define(`reduce_equal_aux', `
|
||||
define internal i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%allon = icmp eq i32 %mm, eval((1<<$1)-1)
|
||||
br i1 %allon, label %check_neighbors, label %domixed
|
||||
|
||||
domixed:
|
||||
; the mask is mixed on/off. First see if the lanes are all off
|
||||
%alloff = icmp eq i32 %mm, 0
|
||||
br i1 %alloff, label %doalloff, label %actuallymixed
|
||||
|
||||
doalloff:
|
||||
ret i1 undef ;; should we return an actual value here?
|
||||
|
||||
actuallymixed:
|
||||
; First, figure out which lane is the first active one
|
||||
%first = call i32 @llvm.cttz.i32(i32 %mm)
|
||||
%baseval = extractelement <$1 x $2> %v, i32 %first
|
||||
%basev1 = bitcast $2 %baseval to <1 x $2>
|
||||
; get a vector that is that value smeared across all elements
|
||||
%basesmear = shufflevector <1 x $2> %basev1, <1 x $2> undef,
|
||||
<$1 x i32> < forloop(i, 0, eval($1-2), `i32 0, ') i32 0 >
|
||||
|
||||
; now to a blend of that vector with the original vector, such that the
|
||||
; result will be the original value for the active lanes, and the value
|
||||
; from the first active lane for the inactive lanes. Given that, we can
|
||||
; just unconditionally check if the lanes are all equal in check_neighbors
|
||||
; below without worrying about inactive lanes...
|
||||
%ptr = alloca <$1 x $2>
|
||||
store <$1 x $2> %basesmear, <$1 x $2> * %ptr
|
||||
%castptr = bitcast <$1 x $2> * %ptr to <$1 x $4> *
|
||||
%castv = bitcast <$1 x $2> %v to <$1 x $4>
|
||||
call void @__masked_store_blend_$6(<$1 x $4> * %castptr, <$1 x $4> %castv, <$1 x i32> %mask)
|
||||
%blendvec = load <$1 x $2> * %ptr
|
||||
br label %check_neighbors
|
||||
|
||||
check_neighbors:
|
||||
%vec = phi <$1 x $2> [ %blendvec, %actuallymixed ], [ %v, %entry ]
|
||||
; now we can just rotate once and compare with the vector, which ends
|
||||
; up comparing each element to its neighbor on the right. Then see if
|
||||
; all of those values are true; if so, then all of the elements are equal..
|
||||
%castvec = bitcast <$1 x $2> %vec to <$1 x $4>
|
||||
%castvr = call <$1 x $4> @__rotate_int$6(<$1 x $4> %castvec, i32 1)
|
||||
%vr = bitcast <$1 x $4> %castvr to <$1 x $2>
|
||||
%eq = $5 eq <$1 x $2> %vec, %vr
|
||||
%eq32 = sext <$1 x i1> %eq to <$1 x i32>
|
||||
%eqmm = call i32 @__movmsk(<$1 x i32> %eq32)
|
||||
%alleq = icmp eq i32 %eqmm, eval((1<<$1)-1)
|
||||
br i1 %alleq, label %all_equal, label %not_all_equal
|
||||
|
||||
all_equal:
|
||||
%the_value = extractelement <$1 x $2> %vec, i32 0
|
||||
store $2 %the_value, $2 * %samevalue
|
||||
ret i1 true
|
||||
|
||||
not_all_equal:
|
||||
ret i1 false
|
||||
}
|
||||
')
|
||||
|
||||
define(`reduce_equal', `
|
||||
reduce_equal_aux($1, i32, int32, i32, icmp, 32)
|
||||
reduce_equal_aux($1, float, float, i32, fcmp, 32)
|
||||
reduce_equal_aux($1, i64, int64, i64, icmp, 64)
|
||||
reduce_equal_aux($1, double, double, i64, fcmp, 64)
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; per_lane
|
||||
;;
|
||||
|
||||
@@ -1823,7 +1823,34 @@ given value across all of the currently-executing vector lanes.
|
||||
uniform int reduce_max(int a, int b)
|
||||
uniform unsigned int reduce_max(unsigned int a, unsigned int b)
|
||||
|
||||
Finally, you can check to see if a particular value has the same value in
|
||||
all of the currently-running program instances:
|
||||
|
||||
::
|
||||
|
||||
uniform bool reduce_equal(int32 v)
|
||||
uniform bool reduce_equal(unsigned int32 v)
|
||||
uniform bool reduce_equal(float v)
|
||||
uniform bool reduce_equal(int64 v)
|
||||
uniform bool reduce_equal(unsigned int64 v)
|
||||
uniform bool reduce_equal(double)
|
||||
|
||||
There are also variants of these functions that return the value as a
|
||||
``uniform`` in the case where the values are all the same.
|
||||
|
||||
::
|
||||
|
||||
uniform bool reduce_equal(int32 v, reference uniform int32 sameval)
|
||||
uniform bool reduce_equal(unsigned int32 v,
|
||||
reference uniform unsigned int32 sameval)
|
||||
uniform bool reduce_equal(float v, reference uniform float sameval)
|
||||
uniform bool reduce_equal(int64 v, reference uniform int64 sameval)
|
||||
uniform bool reduce_equal(unsigned int64 v,
|
||||
reference uniform unsigned int64 sameval)
|
||||
uniform bool reduce_equal(double, reference uniform double sameval)
|
||||
|
||||
The value returned by the ``reduce_equal()`` function is undefined if
|
||||
it is called when none of the program instances are running.
|
||||
|
||||
Packed Load and Store Operations
|
||||
--------------------------------
|
||||
|
||||
16
stdlib.ispc
16
stdlib.ispc
@@ -471,6 +471,22 @@ static inline uniform unsigned int64 reduce_max(unsigned int64 v) {
|
||||
return __reduce_max_uint64(__mask ? v : 0);
|
||||
}
|
||||
|
||||
#define REDUCE_EQUAL(TYPE, FUNCTYPE) \
|
||||
static inline uniform bool reduce_equal(TYPE v) { \
|
||||
uniform TYPE unusedValue; \
|
||||
return __reduce_equal_##FUNCTYPE(v, unusedValue, (int32)__mask); \
|
||||
} \
|
||||
static inline uniform bool reduce_equal(TYPE v, reference uniform TYPE value) { \
|
||||
return __reduce_equal_##FUNCTYPE(v, value, (int32)__mask); \
|
||||
}
|
||||
|
||||
REDUCE_EQUAL(int32, int32)
|
||||
REDUCE_EQUAL(unsigned int32, int32)
|
||||
REDUCE_EQUAL(float, float)
|
||||
REDUCE_EQUAL(int64, int64)
|
||||
REDUCE_EQUAL(unsigned int64, int64)
|
||||
REDUCE_EQUAL(double, double)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// packed load, store
|
||||
|
||||
|
||||
11
tests/reduce-equal-1.ispc
Normal file
11
tests/reduce-equal-1.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = b;
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
13
tests/reduce-equal-10.ispc
Normal file
13
tests/reduce-equal-10.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
unsigned int64 a = aFOO[programIndex&1];
|
||||
RET[programIndex] = 1;
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
13
tests/reduce-equal-11.ispc
Normal file
13
tests/reduce-equal-11.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
double a = aFOO[programIndex];
|
||||
RET[programIndex] = 0;
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
18
tests/reduce-equal-12.ispc
Normal file
18
tests/reduce-equal-12.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = 10 + aFOO[programIndex/2];
|
||||
RET[programIndex] = 1;
|
||||
uniform int sameVal;
|
||||
uniform bool re;
|
||||
if (a <= 11) {
|
||||
re = reduce_equal(a, sameVal);
|
||||
//CO print("% % %\n", re, sameVal, a);
|
||||
}
|
||||
RET[programIndex] = ((int)re << 8) + sameVal;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 267;
|
||||
}
|
||||
17
tests/reduce-equal-13.ispc
Normal file
17
tests/reduce-equal-13.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex&1];
|
||||
RET[programIndex] = 1;
|
||||
uniform bool re;
|
||||
uniform int val;
|
||||
if (programIndex & 1) {
|
||||
re = reduce_equal(a, val);
|
||||
}
|
||||
RET[programIndex] = ((int)re << 8) + val;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 258;
|
||||
}
|
||||
13
tests/reduce-equal-2.ispc
Normal file
13
tests/reduce-equal-2.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex/2];
|
||||
RET[programIndex] = 1;
|
||||
if (a == 1)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
13
tests/reduce-equal-3.ispc
Normal file
13
tests/reduce-equal-3.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex/2];
|
||||
RET[programIndex] = 1;
|
||||
if (a < programCount + 4)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
15
tests/reduce-equal-4.ispc
Normal file
15
tests/reduce-equal-4.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex/(programCount/2)];
|
||||
RET[programIndex] = 0;
|
||||
if (programIndex >= programCount/2 && a < 4)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
else
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
13
tests/reduce-equal-5.ispc
Normal file
13
tests/reduce-equal-5.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex&1];
|
||||
RET[programIndex] = 1;
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
13
tests/reduce-equal-6.ispc
Normal file
13
tests/reduce-equal-6.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
RET[programIndex] = 0;
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
13
tests/reduce-equal-7.ispc
Normal file
13
tests/reduce-equal-7.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex/2];
|
||||
RET[programIndex] = 1;
|
||||
if (a == 1)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
13
tests/reduce-equal-8.ispc
Normal file
13
tests/reduce-equal-8.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int64 a = aFOO[programIndex/2];
|
||||
RET[programIndex] = 1;
|
||||
if (a == 1)
|
||||
RET[programIndex] = reduce_equal(a) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
15
tests/reduce-equal-9.ispc
Normal file
15
tests/reduce-equal-9.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
double a = aFOO[programIndex/2];
|
||||
RET[programIndex] = 1;
|
||||
uniform bool eq = false;
|
||||
if (a < 4)
|
||||
eq = reduce_equal(a);
|
||||
RET[programIndex] = eq;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
11
tests/reduce-equal.ispc
Normal file
11
tests/reduce-equal.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
RET[programIndex] = reduce_equal(a);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
Reference in New Issue
Block a user