diff --git a/builtins.cpp b/builtins.cpp index ffb05e6d..4cd2c8c1 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -379,7 +379,10 @@ lSetInternalFunctions(llvm::Module *module) { "__ceil_uniform_float", "__ceil_varying_double", "__ceil_varying_float", - "__count_trailing_zeros", + "__count_trailing_zeros_i32", + "__count_trailing_zeros_i64", + "__count_leading_zeros_i32", + "__count_leading_zeros_i64", "__do_assert_uniform", "__do_assert_varying", "__do_print", diff --git a/builtins.m4 b/builtins.m4 index 2b98bd80..bcfcc840 100644 --- a/builtins.m4 +++ b/builtins.m4 @@ -1094,11 +1094,26 @@ define <$1 x i32> @__sext_varying_bool(<$1 x i32>) nounwind readnone alwaysinlin ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; count trailing zeros -define i32 @__count_trailing_zeros(i32) nounwind readnone alwaysinline { +define i32 @__count_trailing_zeros_i32(i32) nounwind readnone alwaysinline { %c = call i32 @llvm.cttz.i32(i32 %0) ret i32 %c } +define i64 @__count_trailing_zeros_i64(i64) nounwind readnone alwaysinline { + %c = call i64 @llvm.cttz.i64(i64 %0) + ret i64 %c +} + +define i32 @__count_leading_zeros_i32(i32) nounwind readnone alwaysinline { + %c = call i32 @llvm.ctlz.i32(i32 %0) + ret i32 %c +} + +define i64 @__count_leading_zeros_i64(i64) nounwind readnone alwaysinline { + %c = call i64 @llvm.ctlz.i64(i64 %0) + ret i64 %c +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; AOS/SOA conversion primitives @@ -2500,8 +2515,11 @@ done: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; reduce_equal -; count trailing zeros +; count leading/trailing zeros +declare i32 @llvm.ctlz.i32(i32) +declare i64 @llvm.ctlz.i64(i64) declare i32 @llvm.cttz.i32(i32) +declare i64 @llvm.cttz.i64(i64) define(`reduce_equal_aux', ` define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue, diff --git a/ctx.cpp b/ctx.cpp index 96c44bce..29495319 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -2158,13 +2158,14 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, // Figure out the first lane that still needs its function // pointer to be called. llvm::Value *currentMask = LoadInst(maskPtr); - llvm::Function *cttz = m->module->getFunction("__count_trailing_zeros"); + llvm::Function *cttz = + m->module->getFunction("__count_trailing_zeros_i32"); assert(cttz != NULL); llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask), "first_lane"); - // Get the pointer to the function we're going to call this time through: - // ftpr = func[firstLane] + // Get the pointer to the function we're going to call this + // time through: ftpr = func[firstLane] llvm::Value *fptr = llvm::ExtractElementInst::Create(func, firstLane, "extract_fptr", bblock); diff --git a/stdlib.ispc b/stdlib.ispc index a125d9af..52326513 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -315,6 +315,113 @@ static inline uniform int lanemask() { return __movmsk(__mask); } +/////////////////////////////////////////////////////////////////////////// +// count leading/trailing zeros + +static inline uniform unsigned int32 +count_leading_zeros(uniform unsigned int32 v) { + return __count_leading_zeros_i32(v); +} + +static inline uniform unsigned int64 +count_leading_zeros(uniform unsigned int64 v) { + return __count_leading_zeros_i64(v); +} + +static inline uniform unsigned int32 +count_trailing_zeros(uniform unsigned int32 v) { + return __count_trailing_zeros_i32(v); +} + +static inline uniform unsigned int64 +count_trailing_zeros(uniform unsigned int64 v) { + return __count_trailing_zeros_i64(v); +} + +static inline uniform int32 +count_leading_zeros(uniform int32 v) { + return __count_leading_zeros_i32(v); +} + +static inline uniform int64 +count_leading_zeros(uniform int64 v) { + return __count_leading_zeros_i64(v); +} + +static inline uniform int32 +count_trailing_zeros(uniform int32 v) { + return __count_trailing_zeros_i32(v); +} + +static inline uniform int64 +count_trailing_zeros(uniform int64 v) { + return __count_trailing_zeros_i64(v); +} + +static inline unsigned int32 +count_leading_zeros(unsigned int32 v) { + unsigned int32 r; + for (uniform int i = 0; i < programCount; ++i) + r = insert(r, i, __count_leading_zeros_i32(extract(v, i))); + return r; +} + +static inline unsigned int64 +count_leading_zeros(unsigned int64 v) { + unsigned int64 r; + for (uniform int i = 0; i < programCount; ++i) + r = insert(r, i, __count_leading_zeros_i64(extract(v, i))); + return r; +} + +static inline unsigned int32 +count_trailing_zeros(unsigned int32 v) { + unsigned int32 r; + for (uniform int i = 0; i < programCount; ++i) + r = insert(r, i, __count_trailing_zeros_i32(extract(v, i))); + return r; +} + +static inline unsigned int64 +count_trailing_zeros(unsigned int64 v) { + unsigned int64 r; + for (uniform int i = 0; i < programCount; ++i) + r = insert(r, i, __count_trailing_zeros_i64(extract(v, i))); + return r; +} + +static inline int32 +count_leading_zeros(int32 v) { + int32 r; + for (uniform int i = 0; i < programCount; ++i) + r = insert(r, i, __count_leading_zeros_i32(extract(v, i))); + return r; +} + +static inline int64 +count_leading_zeros(int64 v) { + int64 r; + for (uniform int i = 0; i < programCount; ++i) + r = insert(r, i, __count_leading_zeros_i64(extract(v, i))); + return r; +} + +static inline int32 +count_trailing_zeros(int32 v) { + int32 r; + for (uniform int i = 0; i < programCount; ++i) + r = insert(r, i, __count_trailing_zeros_i32(extract(v, i))); + return r; +} + +static inline int64 +count_trailing_zeros(int64 v) { + int64 r; + for (uniform int i = 0; i < programCount; ++i) + r = insert(r, i, __count_trailing_zeros_i64(extract(v, i))); + return r; +} + /////////////////////////////////////////////////////////////////////////// // AOS/SOA conversion diff --git a/tests/count-leading-trailing-zeros-1.ispc b/tests/count-leading-trailing-zeros-1.ispc new file mode 100644 index 00000000..221d066d --- /dev/null +++ b/tests/count-leading-trailing-zeros-1.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + RET[programIndex] = count_trailing_zeros(0xf0); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 4; +} diff --git a/tests/count-leading-trailing-zeros-2.ispc b/tests/count-leading-trailing-zeros-2.ispc new file mode 100644 index 00000000..b94f5508 --- /dev/null +++ b/tests/count-leading-trailing-zeros-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + RET[programIndex] = count_leading_zeros((int32)0xf0); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 24; +} diff --git a/tests/count-leading-trailing-zeros-3.ispc b/tests/count-leading-trailing-zeros-3.ispc new file mode 100644 index 00000000..4812efa6 --- /dev/null +++ b/tests/count-leading-trailing-zeros-3.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + RET[programIndex] = count_leading_zeros((unsigned int64)0xf0); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 56; +} diff --git a/tests/count-leading-trailing-zeros-4.ispc b/tests/count-leading-trailing-zeros-4.ispc new file mode 100644 index 00000000..5cef2b7a --- /dev/null +++ b/tests/count-leading-trailing-zeros-4.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + int32 i = (1 << programIndex); + RET[programIndex] = count_leading_zeros(i); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 31-programIndex; +} diff --git a/tests/count-leading-trailing-zeros-5.ispc b/tests/count-leading-trailing-zeros-5.ispc new file mode 100644 index 00000000..f872d099 --- /dev/null +++ b/tests/count-leading-trailing-zeros-5.ispc @@ -0,0 +1,12 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + unsigned int64 i = ((unsigned int64)1 << (50+programIndex)); + RET[programIndex] = count_trailing_zeros(i); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 50+programIndex; +}