Add count_{leading,trailing}_zeros() functions to stdlib.

(Documentation is still yet to be written.)
This commit is contained in:
Matt Pharr
2011-11-30 10:12:16 -08:00
parent 1703f2717c
commit 7a2561c429
9 changed files with 192 additions and 6 deletions

View File

@@ -379,7 +379,10 @@ lSetInternalFunctions(llvm::Module *module) {
"__ceil_uniform_float",
"__ceil_varying_double",
"__ceil_varying_float",
"__count_trailing_zeros",
"__count_trailing_zeros_i32",
"__count_trailing_zeros_i64",
"__count_leading_zeros_i32",
"__count_leading_zeros_i64",
"__do_assert_uniform",
"__do_assert_varying",
"__do_print",

View File

@@ -1094,11 +1094,26 @@ define <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone alwaysinlin
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; count trailing zeros
define i32 @__count_trailing_zeros(i32) nounwind readnone alwaysinline {
define i32 @__count_trailing_zeros_i32(i32) nounwind readnone alwaysinline {
%c = call i32 @llvm.cttz.i32(i32 %0)
ret i32 %c
}
define i64 @__count_trailing_zeros_i64(i64) nounwind readnone alwaysinline {
%c = call i64 @llvm.cttz.i64(i64 %0)
ret i64 %c
}
define i32 @__count_leading_zeros_i32(i32) nounwind readnone alwaysinline {
%c = call i32 @llvm.ctlz.i32(i32 %0)
ret i32 %c
}
define i64 @__count_leading_zeros_i64(i64) nounwind readnone alwaysinline {
%c = call i64 @llvm.ctlz.i64(i64 %0)
ret i64 %c
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; AOS/SOA conversion primitives
@@ -2500,8 +2515,11 @@ done:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reduce_equal
; count trailing zeros
; count leading/trailing zeros
declare i32 @llvm.ctlz.i32(i32)
declare i64 @llvm.ctlz.i64(i64)
declare i32 @llvm.cttz.i32(i32)
declare i64 @llvm.cttz.i64(i64)
define(`reduce_equal_aux', `
define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,

View File

@@ -2158,13 +2158,14 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
// Figure out the first lane that still needs its function
// pointer to be called.
llvm::Value *currentMask = LoadInst(maskPtr);
llvm::Function *cttz = m->module->getFunction("__count_trailing_zeros");
llvm::Function *cttz =
m->module->getFunction("__count_trailing_zeros_i32");
assert(cttz != NULL);
llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask),
"first_lane");
// Get the pointer to the function we're going to call this time through:
// ftpr = func[firstLane]
// Get the pointer to the function we're going to call this
// time through: ftpr = func[firstLane]
llvm::Value *fptr =
llvm::ExtractElementInst::Create(func, firstLane,
"extract_fptr", bblock);

View File

@@ -315,6 +315,113 @@ static inline uniform int lanemask() {
return __movmsk(__mask);
}
///////////////////////////////////////////////////////////////////////////
// count leading/trailing zeros
static inline uniform unsigned int32
count_leading_zeros(uniform unsigned int32 v) {
return __count_leading_zeros_i32(v);
}
static inline uniform unsigned int64
count_leading_zeros(uniform unsigned int64 v) {
return __count_leading_zeros_i64(v);
}
static inline uniform unsigned int32
count_trailing_zeros(uniform unsigned int32 v) {
return __count_trailing_zeros_i32(v);
}
static inline uniform unsigned int64
count_trailing_zeros(uniform unsigned int64 v) {
return __count_trailing_zeros_i64(v);
}
static inline uniform int32
count_leading_zeros(uniform int32 v) {
return __count_leading_zeros_i32(v);
}
static inline uniform int64
count_leading_zeros(uniform int64 v) {
return __count_leading_zeros_i64(v);
}
static inline uniform int32
count_trailing_zeros(uniform int32 v) {
return __count_trailing_zeros_i32(v);
}
static inline uniform int64
count_trailing_zeros(uniform int64 v) {
return __count_trailing_zeros_i64(v);
}
static inline unsigned int32
count_leading_zeros(unsigned int32 v) {
unsigned int32 r;
for (uniform int i = 0; i < programCount; ++i)
r = insert(r, i, __count_leading_zeros_i32(extract(v, i)));
return r;
}
static inline unsigned int64
count_leading_zeros(unsigned int64 v) {
unsigned int64 r;
for (uniform int i = 0; i < programCount; ++i)
r = insert(r, i, __count_leading_zeros_i64(extract(v, i)));
return r;
}
static inline unsigned int32
count_trailing_zeros(unsigned int32 v) {
unsigned int32 r;
for (uniform int i = 0; i < programCount; ++i)
r = insert(r, i, __count_trailing_zeros_i32(extract(v, i)));
return r;
}
static inline unsigned int64
count_trailing_zeros(unsigned int64 v) {
unsigned int64 r;
for (uniform int i = 0; i < programCount; ++i)
r = insert(r, i, __count_trailing_zeros_i64(extract(v, i)));
return r;
}
static inline int32
count_leading_zeros(int32 v) {
int32 r;
for (uniform int i = 0; i < programCount; ++i)
r = insert(r, i, __count_leading_zeros_i32(extract(v, i)));
return r;
}
static inline int64
count_leading_zeros(int64 v) {
int64 r;
for (uniform int i = 0; i < programCount; ++i)
r = insert(r, i, __count_leading_zeros_i64(extract(v, i)));
return r;
}
static inline int32
count_trailing_zeros(int32 v) {
int32 r;
for (uniform int i = 0; i < programCount; ++i)
r = insert(r, i, __count_trailing_zeros_i32(extract(v, i)));
return r;
}
static inline int64
count_trailing_zeros(int64 v) {
int64 r;
for (uniform int i = 0; i < programCount; ++i)
r = insert(r, i, __count_trailing_zeros_i64(extract(v, i)));
return r;
}
///////////////////////////////////////////////////////////////////////////
// AOS/SOA conversion

View File

@@ -0,0 +1,11 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = count_trailing_zeros(0xf0);
}
export void result(uniform float RET[]) {
RET[programIndex] = 4;
}

View File

@@ -0,0 +1,11 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = count_leading_zeros((int32)0xf0);
}
export void result(uniform float RET[]) {
RET[programIndex] = 24;
}

View File

@@ -0,0 +1,11 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = count_leading_zeros((unsigned int64)0xf0);
}
export void result(uniform float RET[]) {
RET[programIndex] = 56;
}

View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 i = (1 << programIndex);
RET[programIndex] = count_leading_zeros(i);
}
export void result(uniform float RET[]) {
RET[programIndex] = 31-programIndex;
}

View File

@@ -0,0 +1,12 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
unsigned int64 i = ((unsigned int64)1 << (50+programIndex));
RET[programIndex] = count_trailing_zeros(i);
}
export void result(uniform float RET[]) {
RET[programIndex] = 50+programIndex;
}