Add reduce_equal() function to standard library.

This commit is contained in:
Matt Pharr
2011-08-10 15:55:55 -07:00
parent d821a11c7c
commit 8c534d4d74
20 changed files with 313 additions and 0 deletions

View File

@@ -409,6 +409,7 @@ define internal float @__reduce_max_float(<8 x float>) nounwind readnone alwaysi
reduce8(float, @__max_varying_float, @__max_uniform_float)
}
reduce_equal(8)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; horizontal int32 ops

View File

@@ -376,6 +376,7 @@ define internal i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
reduce_equal(4)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; masked store

View File

@@ -434,6 +434,8 @@ define internal i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone {
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
reduce_equal(8)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts

View File

@@ -1400,6 +1400,81 @@ done:
}
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reduce_equal
; count leading zeros
declare i32 @llvm.cttz.i32(i32)
define(`reduce_equal_aux', `
define internal i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
<$1 x i32> %mask) nounwind alwaysinline {
entry:
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%allon = icmp eq i32 %mm, eval((1<<$1)-1)
br i1 %allon, label %check_neighbors, label %domixed
domixed:
; the mask is mixed on/off. First see if the lanes are all off
%alloff = icmp eq i32 %mm, 0
br i1 %alloff, label %doalloff, label %actuallymixed
doalloff:
ret i1 undef ;; should we return an actual value here?
actuallymixed:
; First, figure out which lane is the first active one
%first = call i32 @llvm.cttz.i32(i32 %mm)
%baseval = extractelement <$1 x $2> %v, i32 %first
%basev1 = bitcast $2 %baseval to <1 x $2>
; get a vector that is that value smeared across all elements
%basesmear = shufflevector <1 x $2> %basev1, <1 x $2> undef,
<$1 x i32> < forloop(i, 0, eval($1-2), `i32 0, ') i32 0 >
; now to a blend of that vector with the original vector, such that the
; result will be the original value for the active lanes, and the value
; from the first active lane for the inactive lanes. Given that, we can
; just unconditionally check if the lanes are all equal in check_neighbors
; below without worrying about inactive lanes...
%ptr = alloca <$1 x $2>
store <$1 x $2> %basesmear, <$1 x $2> * %ptr
%castptr = bitcast <$1 x $2> * %ptr to <$1 x $4> *
%castv = bitcast <$1 x $2> %v to <$1 x $4>
call void @__masked_store_blend_$6(<$1 x $4> * %castptr, <$1 x $4> %castv, <$1 x i32> %mask)
%blendvec = load <$1 x $2> * %ptr
br label %check_neighbors
check_neighbors:
%vec = phi <$1 x $2> [ %blendvec, %actuallymixed ], [ %v, %entry ]
; now we can just rotate once and compare with the vector, which ends
; up comparing each element to its neighbor on the right. Then see if
; all of those values are true; if so, then all of the elements are equal..
%castvec = bitcast <$1 x $2> %vec to <$1 x $4>
%castvr = call <$1 x $4> @__rotate_int$6(<$1 x $4> %castvec, i32 1)
%vr = bitcast <$1 x $4> %castvr to <$1 x $2>
%eq = $5 eq <$1 x $2> %vec, %vr
%eq32 = sext <$1 x i1> %eq to <$1 x i32>
%eqmm = call i32 @__movmsk(<$1 x i32> %eq32)
%alleq = icmp eq i32 %eqmm, eval((1<<$1)-1)
br i1 %alleq, label %all_equal, label %not_all_equal
all_equal:
%the_value = extractelement <$1 x $2> %vec, i32 0
store $2 %the_value, $2 * %samevalue
ret i1 true
not_all_equal:
ret i1 false
}
')
define(`reduce_equal', `
reduce_equal_aux($1, i32, int32, i32, icmp, 32)
reduce_equal_aux($1, float, float, i32, fcmp, 32)
reduce_equal_aux($1, i64, int64, i64, icmp, 64)
reduce_equal_aux($1, double, double, i64, fcmp, 64)
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; per_lane
;;

View File

@@ -1823,7 +1823,34 @@ given value across all of the currently-executing vector lanes.
uniform int reduce_max(int a, int b)
uniform unsigned int reduce_max(unsigned int a, unsigned int b)
Finally, you can check to see if a particular value has the same value in
all of the currently-running program instances:
::
uniform bool reduce_equal(int32 v)
uniform bool reduce_equal(unsigned int32 v)
uniform bool reduce_equal(float v)
uniform bool reduce_equal(int64 v)
uniform bool reduce_equal(unsigned int64 v)
uniform bool reduce_equal(double)
There are also variants of these functions that return the value as a
``uniform`` in the case where the values are all the same.
::
uniform bool reduce_equal(int32 v, reference uniform int32 sameval)
uniform bool reduce_equal(unsigned int32 v,
reference uniform unsigned int32 sameval)
uniform bool reduce_equal(float v, reference uniform float sameval)
uniform bool reduce_equal(int64 v, reference uniform int64 sameval)
uniform bool reduce_equal(unsigned int64 v,
reference uniform unsigned int64 sameval)
uniform bool reduce_equal(double, reference uniform double sameval)
The value returned by the ``reduce_equal()`` function is undefined if
it is called when none of the program instances are running.
Packed Load and Store Operations
--------------------------------

View File

@@ -471,6 +471,22 @@ static inline uniform unsigned int64 reduce_max(unsigned int64 v) {
return __reduce_max_uint64(__mask ? v : 0);
}
#define REDUCE_EQUAL(TYPE, FUNCTYPE) \
static inline uniform bool reduce_equal(TYPE v) { \
uniform TYPE unusedValue; \
return __reduce_equal_##FUNCTYPE(v, unusedValue, (int32)__mask); \
} \
static inline uniform bool reduce_equal(TYPE v, reference uniform TYPE value) { \
return __reduce_equal_##FUNCTYPE(v, value, (int32)__mask); \
}
REDUCE_EQUAL(int32, int32)
REDUCE_EQUAL(unsigned int32, int32)
REDUCE_EQUAL(float, float)
REDUCE_EQUAL(int64, int64)
REDUCE_EQUAL(unsigned int64, int64)
REDUCE_EQUAL(double, double)
///////////////////////////////////////////////////////////////////////////
// packed load, store

11
tests/reduce-equal-1.ispc Normal file
View File

@@ -0,0 +1,11 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = b;
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
unsigned int64 a = aFOO[programIndex&1];
RET[programIndex] = 1;
if (programIndex & 1)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
double a = aFOO[programIndex];
RET[programIndex] = 0;
if (programIndex & 1)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

View File

@@ -0,0 +1,18 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = 10 + aFOO[programIndex/2];
RET[programIndex] = 1;
uniform int sameVal;
uniform bool re;
if (a <= 11) {
re = reduce_equal(a, sameVal);
//CO print("% % %\n", re, sameVal, a);
}
RET[programIndex] = ((int)re << 8) + sameVal;
}
export void result(uniform float RET[]) {
RET[programIndex] = 267;
}

View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex&1];
RET[programIndex] = 1;
uniform bool re;
uniform int val;
if (programIndex & 1) {
re = reduce_equal(a, val);
}
RET[programIndex] = ((int)re << 8) + val;
}
export void result(uniform float RET[]) {
RET[programIndex] = 258;
}

13
tests/reduce-equal-2.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex/2];
RET[programIndex] = 1;
if (a == 1)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

13
tests/reduce-equal-3.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex/2];
RET[programIndex] = 1;
if (a < programCount + 4)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

15
tests/reduce-equal-4.ispc Normal file
View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex/(programCount/2)];
RET[programIndex] = 0;
if (programIndex >= programCount/2 && a < 4)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
else
RET[programIndex] = 1;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

13
tests/reduce-equal-5.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex&1];
RET[programIndex] = 1;
if (programIndex & 1)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

13
tests/reduce-equal-6.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex];
RET[programIndex] = 0;
if (programIndex & 1)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

13
tests/reduce-equal-7.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex/2];
RET[programIndex] = 1;
if (a == 1)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

13
tests/reduce-equal-8.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int64 a = aFOO[programIndex/2];
RET[programIndex] = 1;
if (a == 1)
RET[programIndex] = reduce_equal(a) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

15
tests/reduce-equal-9.ispc Normal file
View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
double a = aFOO[programIndex/2];
RET[programIndex] = 1;
uniform bool eq = false;
if (a < 4)
eq = reduce_equal(a);
RET[programIndex] = eq;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

11
tests/reduce-equal.ispc Normal file
View File

@@ -0,0 +1,11 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
RET[programIndex] = reduce_equal(a);
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}