Add count_{leading,trailing}_zeros() functions to stdlib.
(Documentation is still yet to be written.)
This commit is contained in:
@@ -379,7 +379,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__ceil_uniform_float",
|
"__ceil_uniform_float",
|
||||||
"__ceil_varying_double",
|
"__ceil_varying_double",
|
||||||
"__ceil_varying_float",
|
"__ceil_varying_float",
|
||||||
"__count_trailing_zeros",
|
"__count_trailing_zeros_i32",
|
||||||
|
"__count_trailing_zeros_i64",
|
||||||
|
"__count_leading_zeros_i32",
|
||||||
|
"__count_leading_zeros_i64",
|
||||||
"__do_assert_uniform",
|
"__do_assert_uniform",
|
||||||
"__do_assert_varying",
|
"__do_assert_varying",
|
||||||
"__do_print",
|
"__do_print",
|
||||||
|
|||||||
22
builtins.m4
22
builtins.m4
@@ -1094,11 +1094,26 @@ define <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone alwaysinlin
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; count trailing zeros
|
;; count trailing zeros
|
||||||
|
|
||||||
define i32 @__count_trailing_zeros(i32) nounwind readnone alwaysinline {
|
define i32 @__count_trailing_zeros_i32(i32) nounwind readnone alwaysinline {
|
||||||
%c = call i32 @llvm.cttz.i32(i32 %0)
|
%c = call i32 @llvm.cttz.i32(i32 %0)
|
||||||
ret i32 %c
|
ret i32 %c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i64 @__count_trailing_zeros_i64(i64) nounwind readnone alwaysinline {
|
||||||
|
%c = call i64 @llvm.cttz.i64(i64 %0)
|
||||||
|
ret i64 %c
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @__count_leading_zeros_i32(i32) nounwind readnone alwaysinline {
|
||||||
|
%c = call i32 @llvm.ctlz.i32(i32 %0)
|
||||||
|
ret i32 %c
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @__count_leading_zeros_i64(i64) nounwind readnone alwaysinline {
|
||||||
|
%c = call i64 @llvm.ctlz.i64(i64 %0)
|
||||||
|
ret i64 %c
|
||||||
|
}
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; AOS/SOA conversion primitives
|
;; AOS/SOA conversion primitives
|
||||||
|
|
||||||
@@ -2500,8 +2515,11 @@ done:
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; reduce_equal
|
;; reduce_equal
|
||||||
|
|
||||||
; count trailing zeros
|
; count leading/trailing zeros
|
||||||
|
declare i32 @llvm.ctlz.i32(i32)
|
||||||
|
declare i64 @llvm.ctlz.i64(i64)
|
||||||
declare i32 @llvm.cttz.i32(i32)
|
declare i32 @llvm.cttz.i32(i32)
|
||||||
|
declare i64 @llvm.cttz.i64(i64)
|
||||||
|
|
||||||
define(`reduce_equal_aux', `
|
define(`reduce_equal_aux', `
|
||||||
define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
|
define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
|
||||||
|
|||||||
7
ctx.cpp
7
ctx.cpp
@@ -2158,13 +2158,14 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
|||||||
// Figure out the first lane that still needs its function
|
// Figure out the first lane that still needs its function
|
||||||
// pointer to be called.
|
// pointer to be called.
|
||||||
llvm::Value *currentMask = LoadInst(maskPtr);
|
llvm::Value *currentMask = LoadInst(maskPtr);
|
||||||
llvm::Function *cttz = m->module->getFunction("__count_trailing_zeros");
|
llvm::Function *cttz =
|
||||||
|
m->module->getFunction("__count_trailing_zeros_i32");
|
||||||
assert(cttz != NULL);
|
assert(cttz != NULL);
|
||||||
llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask),
|
llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask),
|
||||||
"first_lane");
|
"first_lane");
|
||||||
|
|
||||||
// Get the pointer to the function we're going to call this time through:
|
// Get the pointer to the function we're going to call this
|
||||||
// ftpr = func[firstLane]
|
// time through: ftpr = func[firstLane]
|
||||||
llvm::Value *fptr =
|
llvm::Value *fptr =
|
||||||
llvm::ExtractElementInst::Create(func, firstLane,
|
llvm::ExtractElementInst::Create(func, firstLane,
|
||||||
"extract_fptr", bblock);
|
"extract_fptr", bblock);
|
||||||
|
|||||||
107
stdlib.ispc
107
stdlib.ispc
@@ -315,6 +315,113 @@ static inline uniform int lanemask() {
|
|||||||
return __movmsk(__mask);
|
return __movmsk(__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// count leading/trailing zeros
|
||||||
|
|
||||||
|
static inline uniform unsigned int32
|
||||||
|
count_leading_zeros(uniform unsigned int32 v) {
|
||||||
|
return __count_leading_zeros_i32(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int64
|
||||||
|
count_leading_zeros(uniform unsigned int64 v) {
|
||||||
|
return __count_leading_zeros_i64(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int32
|
||||||
|
count_trailing_zeros(uniform unsigned int32 v) {
|
||||||
|
return __count_trailing_zeros_i32(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform unsigned int64
|
||||||
|
count_trailing_zeros(uniform unsigned int64 v) {
|
||||||
|
return __count_trailing_zeros_i64(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int32
|
||||||
|
count_leading_zeros(uniform int32 v) {
|
||||||
|
return __count_leading_zeros_i32(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int64
|
||||||
|
count_leading_zeros(uniform int64 v) {
|
||||||
|
return __count_leading_zeros_i64(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int32
|
||||||
|
count_trailing_zeros(uniform int32 v) {
|
||||||
|
return __count_trailing_zeros_i32(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uniform int64
|
||||||
|
count_trailing_zeros(uniform int64 v) {
|
||||||
|
return __count_trailing_zeros_i64(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int32
|
||||||
|
count_leading_zeros(unsigned int32 v) {
|
||||||
|
unsigned int32 r;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
r = insert(r, i, __count_leading_zeros_i32(extract(v, i)));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int64
|
||||||
|
count_leading_zeros(unsigned int64 v) {
|
||||||
|
unsigned int64 r;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
r = insert(r, i, __count_leading_zeros_i64(extract(v, i)));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int32
|
||||||
|
count_trailing_zeros(unsigned int32 v) {
|
||||||
|
unsigned int32 r;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
r = insert(r, i, __count_trailing_zeros_i32(extract(v, i)));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int64
|
||||||
|
count_trailing_zeros(unsigned int64 v) {
|
||||||
|
unsigned int64 r;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
r = insert(r, i, __count_trailing_zeros_i64(extract(v, i)));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32
|
||||||
|
count_leading_zeros(int32 v) {
|
||||||
|
int32 r;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
r = insert(r, i, __count_leading_zeros_i32(extract(v, i)));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64
|
||||||
|
count_leading_zeros(int64 v) {
|
||||||
|
int64 r;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
r = insert(r, i, __count_leading_zeros_i64(extract(v, i)));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32
|
||||||
|
count_trailing_zeros(int32 v) {
|
||||||
|
int32 r;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
r = insert(r, i, __count_trailing_zeros_i32(extract(v, i)));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int64
|
||||||
|
count_trailing_zeros(int64 v) {
|
||||||
|
int64 r;
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
r = insert(r, i, __count_trailing_zeros_i64(extract(v, i)));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// AOS/SOA conversion
|
// AOS/SOA conversion
|
||||||
|
|
||||||
|
|||||||
11
tests/count-leading-trailing-zeros-1.ispc
Normal file
11
tests/count-leading-trailing-zeros-1.ispc
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = count_trailing_zeros(0xf0);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 4;
|
||||||
|
}
|
||||||
11
tests/count-leading-trailing-zeros-2.ispc
Normal file
11
tests/count-leading-trailing-zeros-2.ispc
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = count_leading_zeros((int32)0xf0);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 24;
|
||||||
|
}
|
||||||
11
tests/count-leading-trailing-zeros-3.ispc
Normal file
11
tests/count-leading-trailing-zeros-3.ispc
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
RET[programIndex] = count_leading_zeros((unsigned int64)0xf0);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 56;
|
||||||
|
}
|
||||||
12
tests/count-leading-trailing-zeros-4.ispc
Normal file
12
tests/count-leading-trailing-zeros-4.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int32 i = (1 << programIndex);
|
||||||
|
RET[programIndex] = count_leading_zeros(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 31-programIndex;
|
||||||
|
}
|
||||||
12
tests/count-leading-trailing-zeros-5.ispc
Normal file
12
tests/count-leading-trailing-zeros-5.ispc
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
unsigned int64 i = ((unsigned int64)1 << (50+programIndex));
|
||||||
|
RET[programIndex] = count_trailing_zeros(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 50+programIndex;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user