Add count_{leading,trailing}_zeros() functions to stdlib.
(Documentation is still yet to be written.)
This commit is contained in:
@@ -379,7 +379,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__ceil_uniform_float",
|
||||
"__ceil_varying_double",
|
||||
"__ceil_varying_float",
|
||||
"__count_trailing_zeros",
|
||||
"__count_trailing_zeros_i32",
|
||||
"__count_trailing_zeros_i64",
|
||||
"__count_leading_zeros_i32",
|
||||
"__count_leading_zeros_i64",
|
||||
"__do_assert_uniform",
|
||||
"__do_assert_varying",
|
||||
"__do_print",
|
||||
|
||||
22
builtins.m4
22
builtins.m4
@@ -1094,11 +1094,26 @@ define <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone alwaysinlin
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; count trailing zeros
|
||||
|
||||
define i32 @__count_trailing_zeros(i32) nounwind readnone alwaysinline {
|
||||
define i32 @__count_trailing_zeros_i32(i32) nounwind readnone alwaysinline {
|
||||
%c = call i32 @llvm.cttz.i32(i32 %0)
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define i64 @__count_trailing_zeros_i64(i64) nounwind readnone alwaysinline {
|
||||
%c = call i64 @llvm.cttz.i64(i64 %0)
|
||||
ret i64 %c
|
||||
}
|
||||
|
||||
define i32 @__count_leading_zeros_i32(i32) nounwind readnone alwaysinline {
|
||||
%c = call i32 @llvm.ctlz.i32(i32 %0)
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define i64 @__count_leading_zeros_i64(i64) nounwind readnone alwaysinline {
|
||||
%c = call i64 @llvm.ctlz.i64(i64 %0)
|
||||
ret i64 %c
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; AOS/SOA conversion primitives
|
||||
|
||||
@@ -2500,8 +2515,11 @@ done:
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reduce_equal
|
||||
|
||||
; count trailing zeros
|
||||
; count leading/trailing zeros
|
||||
declare i32 @llvm.ctlz.i32(i32)
|
||||
declare i64 @llvm.ctlz.i64(i64)
|
||||
declare i32 @llvm.cttz.i32(i32)
|
||||
declare i64 @llvm.cttz.i64(i64)
|
||||
|
||||
define(`reduce_equal_aux', `
|
||||
define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
|
||||
|
||||
7
ctx.cpp
7
ctx.cpp
@@ -2158,13 +2158,14 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
||||
// Figure out the first lane that still needs its function
|
||||
// pointer to be called.
|
||||
llvm::Value *currentMask = LoadInst(maskPtr);
|
||||
llvm::Function *cttz = m->module->getFunction("__count_trailing_zeros");
|
||||
llvm::Function *cttz =
|
||||
m->module->getFunction("__count_trailing_zeros_i32");
|
||||
assert(cttz != NULL);
|
||||
llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask),
|
||||
"first_lane");
|
||||
|
||||
// Get the pointer to the function we're going to call this time through:
|
||||
// ftpr = func[firstLane]
|
||||
// Get the pointer to the function we're going to call this
|
||||
// time through: ftpr = func[firstLane]
|
||||
llvm::Value *fptr =
|
||||
llvm::ExtractElementInst::Create(func, firstLane,
|
||||
"extract_fptr", bblock);
|
||||
|
||||
107
stdlib.ispc
107
stdlib.ispc
@@ -315,6 +315,113 @@ static inline uniform int lanemask() {
|
||||
return __movmsk(__mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// count leading/trailing zeros
|
||||
|
||||
static inline uniform unsigned int32
|
||||
count_leading_zeros(uniform unsigned int32 v) {
|
||||
return __count_leading_zeros_i32(v);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int64
|
||||
count_leading_zeros(uniform unsigned int64 v) {
|
||||
return __count_leading_zeros_i64(v);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int32
|
||||
count_trailing_zeros(uniform unsigned int32 v) {
|
||||
return __count_trailing_zeros_i32(v);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int64
|
||||
count_trailing_zeros(uniform unsigned int64 v) {
|
||||
return __count_trailing_zeros_i64(v);
|
||||
}
|
||||
|
||||
static inline uniform int32
|
||||
count_leading_zeros(uniform int32 v) {
|
||||
return __count_leading_zeros_i32(v);
|
||||
}
|
||||
|
||||
static inline uniform int64
|
||||
count_leading_zeros(uniform int64 v) {
|
||||
return __count_leading_zeros_i64(v);
|
||||
}
|
||||
|
||||
static inline uniform int32
|
||||
count_trailing_zeros(uniform int32 v) {
|
||||
return __count_trailing_zeros_i32(v);
|
||||
}
|
||||
|
||||
static inline uniform int64
|
||||
count_trailing_zeros(uniform int64 v) {
|
||||
return __count_trailing_zeros_i64(v);
|
||||
}
|
||||
|
||||
static inline unsigned int32
|
||||
count_leading_zeros(unsigned int32 v) {
|
||||
unsigned int32 r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, __count_leading_zeros_i32(extract(v, i)));
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline unsigned int64
|
||||
count_leading_zeros(unsigned int64 v) {
|
||||
unsigned int64 r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, __count_leading_zeros_i64(extract(v, i)));
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline unsigned int32
|
||||
count_trailing_zeros(unsigned int32 v) {
|
||||
unsigned int32 r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, __count_trailing_zeros_i32(extract(v, i)));
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline unsigned int64
|
||||
count_trailing_zeros(unsigned int64 v) {
|
||||
unsigned int64 r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, __count_trailing_zeros_i64(extract(v, i)));
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline int32
|
||||
count_leading_zeros(int32 v) {
|
||||
int32 r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, __count_leading_zeros_i32(extract(v, i)));
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline int64
|
||||
count_leading_zeros(int64 v) {
|
||||
int64 r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, __count_leading_zeros_i64(extract(v, i)));
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline int32
|
||||
count_trailing_zeros(int32 v) {
|
||||
int32 r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, __count_trailing_zeros_i32(extract(v, i)));
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline int64
|
||||
count_trailing_zeros(int64 v) {
|
||||
int64 r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, __count_trailing_zeros_i64(extract(v, i)));
|
||||
return r;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// AOS/SOA conversion
|
||||
|
||||
|
||||
11
tests/count-leading-trailing-zeros-1.ispc
Normal file
11
tests/count-leading-trailing-zeros-1.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = count_trailing_zeros(0xf0);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 4;
|
||||
}
|
||||
11
tests/count-leading-trailing-zeros-2.ispc
Normal file
11
tests/count-leading-trailing-zeros-2.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = count_leading_zeros((int32)0xf0);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 24;
|
||||
}
|
||||
11
tests/count-leading-trailing-zeros-3.ispc
Normal file
11
tests/count-leading-trailing-zeros-3.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = count_leading_zeros((unsigned int64)0xf0);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 56;
|
||||
}
|
||||
12
tests/count-leading-trailing-zeros-4.ispc
Normal file
12
tests/count-leading-trailing-zeros-4.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 i = (1 << programIndex);
|
||||
RET[programIndex] = count_leading_zeros(i);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 31-programIndex;
|
||||
}
|
||||
12
tests/count-leading-trailing-zeros-5.ispc
Normal file
12
tests/count-leading-trailing-zeros-5.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
unsigned int64 i = ((unsigned int64)1 << (50+programIndex));
|
||||
RET[programIndex] = count_trailing_zeros(i);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 50+programIndex;
|
||||
}
|
||||
Reference in New Issue
Block a user