diff --git a/expr.cpp b/expr.cpp index 894942d2..3baaabaf 100644 --- a/expr.cpp +++ b/expr.cpp @@ -2240,6 +2240,49 @@ lConstFoldBinaryIntOp(ConstExpr *constArg0, ConstExpr *constArg1, } +/* Returns true if the given arguments (which are assumed to be the + operands of a divide) represent a divide that can be performed by one of + the __fast_idiv functions. + */ +static bool +lCanImproveVectorDivide(Expr *arg0, Expr *arg1, int *divisor) { + const Type *type = arg0->GetType(); + if (!type) + return false; + + // The value being divided must be an int8/16/32. + if (!(Type::EqualIgnoringConst(type, AtomicType::VaryingInt8) || + Type::EqualIgnoringConst(type, AtomicType::VaryingUInt8) || + Type::EqualIgnoringConst(type, AtomicType::VaryingInt16) || + Type::EqualIgnoringConst(type, AtomicType::VaryingUInt16) || + Type::EqualIgnoringConst(type, AtomicType::VaryingInt32) || + Type::EqualIgnoringConst(type, AtomicType::VaryingUInt32))) + return false; + + // The divisor must be the same compile-time constant value for all of + // the vector lanes. + ConstExpr *ce = dynamic_cast(arg1); + if (!ce) + return false; + int64_t div[ISPC_MAX_NVEC]; + int count = ce->GetValues(div); + for (int i = 1; i < count; ++i) + if (div[i] != div[0]) + return false; + *divisor = div[0]; + + // And finally, the divisor must be >= 2 and <128 (for 8-bit divides), + // and <256 otherwise. + if (*divisor < 2) + return false; + if (Type::EqualIgnoringConst(type, AtomicType::VaryingInt8) || + Type::EqualIgnoringConst(type, AtomicType::VaryingUInt8)) + return *divisor < 128; + else + return *divisor < 256; +} + + Expr * BinaryExpr::Optimize() { if (arg0 == NULL || arg1 == NULL) @@ -2302,6 +2345,32 @@ BinaryExpr::Optimize() { } } + int divisor; + if (op == Div && lCanImproveVectorDivide(arg0, arg1, &divisor)) { + Debug(pos, "Improving vector divide by constant %d", divisor); + + std::vector idivFuns; + m->symbolTable->LookupFunction("__fast_idiv", &idivFuns); + if (idivFuns.size() == 0) { + Warning(pos, "Couldn't find __fast_idiv to optimize integer divide. " + "Are you compiling with --nostdlib?"); + return this; + } + + Expr *idivSymExpr = new FunctionSymbolExpr("__fast_idiv", idivFuns, pos); + ExprList *args = new ExprList(arg0, pos); + args->exprs.push_back(new ConstExpr(AtomicType::UniformInt32, divisor, arg1->pos)); + Expr *idivCall = new FunctionCallExpr(idivSymExpr, args, pos); + + idivCall = ::TypeCheck(idivCall); + if (idivCall == NULL) + return NULL; + + Assert(Type::EqualIgnoringConst(GetType(), idivCall->GetType())); + idivCall = new TypeCastExpr(GetType(), idivCall, pos); + return ::Optimize(idivCall); + } + // From here on out, we're just doing constant folding, so if both args // aren't constants then we're done... if (constArg0 == NULL || constArg1 == NULL) diff --git a/stdlib.ispc b/stdlib.ispc index 4e06f5da..b8ed2057 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4264,3 +4264,678 @@ static inline bool rdrand(int64 * ptr) { return success; } } + +/////////////////////////////////////////////////////////////////////////// +// Fast vector integer division + +/* These tables and the algorithms in the __fast_idiv() functions below are + from Halide; the idea is based on the paper "Division by Invariant + Integers using Multiplication" by Granlund and Montgomery. + + Copyright (c) 2012 MIT CSAIL + + Developed by: + + The Halide team + MIT CSAIL + http://halide-lang.org + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +static const uniform int64 __idiv_table_u8[][3] = { + {0, 0LL, 1}, {1, 171LL, 1}, {0, 0LL, 2}, + {1, 205LL, 2}, {1, 171LL, 2}, {2, 37LL, 2}, + {0, 0LL, 3}, {1, 57LL, 1}, {1, 205LL, 3}, + {2, 117LL, 3}, {1, 171LL, 3}, {1, 79LL, 2}, + {2, 37LL, 3}, {1, 137LL, 3}, {0, 0LL, 4}, + {1, 241LL, 4}, {1, 57LL, 2}, {1, 27LL, 1}, + {1, 205LL, 4}, {2, 135LL, 4}, {2, 117LL, 4}, + {2, 101LL, 4}, {1, 171LL, 4}, {1, 41LL, 2}, + {1, 79LL, 3}, {1, 19LL, 1}, {2, 37LL, 4}, + {2, 27LL, 4}, {1, 137LL, 4}, {2, 9LL, 4}, + {0, 0LL, 5}, {1, 249LL, 5}, {1, 241LL, 5}, + {1, 235LL, 5}, {1, 57LL, 3}, {1, 111LL, 4}, + {1, 27LL, 2}, {2, 165LL, 5}, {1, 205LL, 5}, + {1, 25LL, 2}, {2, 135LL, 5}, {1, 191LL, 5}, + {1, 187LL, 5}, {2, 109LL, 5}, {2, 101LL, 5}, + {1, 175LL, 5}, {1, 171LL, 5}, {2, 79LL, 5}, + {1, 41LL, 3}, {1, 161LL, 5}, {1, 79LL, 4}, + {1, 155LL, 5}, {1, 19LL, 2}, {1, 149LL, 5}, + {2, 37LL, 5}, {1, 9LL, 1}, {2, 27LL, 5}, + {1, 139LL, 5}, {1, 137LL, 5}, {2, 13LL, 5}, + {2, 9LL, 5}, {2, 5LL, 5}, {0, 0LL, 6}, + {1, 253LL, 6}, {1, 249LL, 6}, {1, 245LL, 6}, + {1, 121LL, 5}, {1, 119LL, 5}, {1, 235LL, 6}, + {1, 231LL, 6}, {1, 57LL, 4}, {1, 225LL, 6}, + {1, 111LL, 5}, {1, 219LL, 6}, {1, 27LL, 3}, + {1, 213LL, 6}, {2, 165LL, 6}, {1, 13LL, 2}, + {1, 205LL, 6}, {1, 203LL, 6}, {1, 25LL, 3}, + {1, 99LL, 5}, {2, 135LL, 6}, {1, 193LL, 6}, + {1, 191LL, 6}, {1, 189LL, 6}, {1, 187LL, 6}, + {1, 185LL, 6}, {1, 183LL, 6}, {1, 181LL, 6}, + {1, 179LL, 6}, {1, 177LL, 6}, {1, 175LL, 6}, + {1, 173LL, 6}, {1, 171LL, 6}, {1, 169LL, 6}, + {1, 21LL, 3}, {1, 83LL, 5}, {1, 41LL, 4}, + {1, 163LL, 6}, {1, 161LL, 6}, {2, 63LL, 6}, + {1, 79LL, 5}, {2, 57LL, 6}, {1, 155LL, 6}, + {2, 51LL, 6}, {1, 19LL, 3}, {1, 151LL, 6}, + {1, 149LL, 6}, {1, 37LL, 4}, {2, 37LL, 6}, + {1, 145LL, 6}, {1, 9LL, 2}, {1, 143LL, 6}, + {2, 27LL, 6}, {2, 25LL, 6}, {1, 139LL, 6}, + {1, 69LL, 5}, {1, 137LL, 6}, {2, 15LL, 6}, + {2, 13LL, 6}, {2, 11LL, 6}, {2, 9LL, 6}, + {2, 7LL, 6}, {2, 5LL, 6}, {2, 3LL, 6}, + {0, 0LL, 7}, {1, 255LL, 7}, {1, 127LL, 6}, + {1, 63LL, 5}, {1, 125LL, 6}, {1, 31LL, 4}, + {1, 123LL, 6}, {1, 61LL, 5}, {1, 121LL, 6}, + {1, 15LL, 3}, {1, 119LL, 6}, {1, 59LL, 5}, + {1, 235LL, 7}, {1, 117LL, 6}, {1, 29LL, 4}, + {1, 115LL, 6}, {1, 57LL, 5}, {1, 113LL, 6}, + {1, 225LL, 7}, {1, 7LL, 2}, {1, 111LL, 6}, + {1, 55LL, 5}, {1, 219LL, 7}, {1, 109LL, 6}, + {1, 27LL, 4}, {1, 215LL, 7}, {1, 107LL, 6}, + {1, 53LL, 5}, {1, 211LL, 7}, {1, 105LL, 6}, + {1, 13LL, 3}, {1, 207LL, 7}, {1, 103LL, 6}, + {1, 51LL, 5}, {1, 203LL, 7}, {1, 101LL, 6}, + {1, 25LL, 4}, {1, 199LL, 7}, {1, 99LL, 6}, + {1, 197LL, 7}, {1, 49LL, 5}, {1, 97LL, 6}, + {1, 193LL, 7}, {1, 3LL, 1}, {1, 191LL, 7}, + {1, 95LL, 6}, {1, 189LL, 7}, {1, 47LL, 5}, + {1, 187LL, 7}, {1, 93LL, 6}, {1, 185LL, 7}, + {1, 23LL, 4}, {1, 183LL, 7}, {1, 91LL, 6}, + {1, 181LL, 7}, {1, 45LL, 5}, {1, 179LL, 7}, + {1, 89LL, 6}, {1, 177LL, 7}, {1, 11LL, 3}, + {1, 175LL, 7}, {1, 87LL, 6}, {1, 173LL, 7}, + {1, 43LL, 5}, {1, 171LL, 7}, {1, 85LL, 6}, + {1, 169LL, 7}, {2, 81LL, 7}, {1, 21LL, 4}, + {1, 167LL, 7}, {1, 83LL, 6}, {1, 165LL, 7}, + {1, 41LL, 5}, {2, 71LL, 7}, {1, 163LL, 7}, + {1, 81LL, 6}, {1, 161LL, 7}, {1, 5LL, 2}, + {2, 63LL, 7}, {1, 159LL, 7}, {1, 79LL, 6}, + {1, 157LL, 7}, {2, 57LL, 7}, {1, 39LL, 5}, + {1, 155LL, 7}, {1, 77LL, 6}, {2, 51LL, 7}, + {1, 153LL, 7}, {1, 19LL, 4}, {2, 47LL, 7}, + {1, 151LL, 7}, {1, 75LL, 6}, {1, 149LL, 7}, + {2, 41LL, 7}, {1, 37LL, 5}, {1, 147LL, 7}, + {2, 37LL, 7}, {1, 73LL, 6}, {1, 145LL, 7}, + {2, 33LL, 7}, {1, 9LL, 3}, {2, 31LL, 7}, + {1, 143LL, 7}, {1, 71LL, 6}, {2, 27LL, 7}, + {1, 141LL, 7}, {2, 25LL, 7}, {1, 35LL, 5}, + {1, 139LL, 7}, {2, 21LL, 7}, {1, 69LL, 6}, + {2, 19LL, 7}, {1, 137LL, 7}, {1, 17LL, 4}, + {2, 15LL, 7}, {1, 135LL, 7}, {2, 13LL, 7}, + {1, 67LL, 6}, {2, 11LL, 7}, {1, 133LL, 7}, + {2, 9LL, 7}, {1, 33LL, 5}, {2, 7LL, 7}, + {1, 131LL, 7}, {2, 5LL, 7}, {1, 65LL, 6}, + {2, 3LL, 7}, {1, 129LL, 7}, {0, 0LL, 8}, +}; +static const uniform int64 __idiv_table_s8[][3] = { + {0, 0LL, 1}, {1, 86LL, 0}, {0, 0LL, 2}, + {1, 103LL, 1}, {1, 43LL, 0}, {1, 147LL, 2}, + {0, 0LL, 3}, {1, 57LL, 1}, {1, 103LL, 2}, + {1, 187LL, 3}, {1, 43LL, 1}, {1, 79LL, 2}, + {1, 147LL, 3}, {1, 137LL, 3}, {0, 0LL, 4}, + {1, 121LL, 3}, {1, 57LL, 2}, {1, 27LL, 1}, + {1, 103LL, 3}, {1, 49LL, 2}, {1, 187LL, 4}, + {1, 179LL, 4}, {1, 43LL, 2}, {1, 41LL, 2}, + {1, 79LL, 3}, {1, 19LL, 1}, {1, 147LL, 4}, + {1, 71LL, 3}, {1, 137LL, 4}, {1, 133LL, 4}, + {0, 0LL, 5}, {1, 125LL, 4}, {1, 121LL, 4}, + {1, 59LL, 3}, {1, 57LL, 3}, {1, 111LL, 4}, + {1, 27LL, 2}, {1, 211LL, 5}, {1, 103LL, 4}, + {1, 25LL, 2}, {1, 49LL, 3}, {1, 6LL, 0}, + {1, 47LL, 3}, {1, 23LL, 2}, {1, 45LL, 3}, + {1, 11LL, 1}, {1, 43LL, 3}, {1, 21LL, 2}, + {1, 41LL, 3}, {1, 81LL, 4}, {1, 79LL, 4}, + {1, 39LL, 3}, {1, 19LL, 2}, {1, 75LL, 4}, + {1, 147LL, 5}, {1, 9LL, 1}, {1, 71LL, 4}, + {1, 35LL, 3}, {1, 137LL, 5}, {1, 135LL, 5}, + {1, 133LL, 5}, {1, 131LL, 5}, {0, 0LL, 6}, + {1, 127LL, 5}, {1, 63LL, 4}, {1, 31LL, 3}, + {1, 61LL, 4}, {1, 15LL, 2}, {1, 59LL, 4}, + {1, 29LL, 3}, {1, 57LL, 4}, {1, 113LL, 5}, + {1, 7LL, 1}, {1, 55LL, 4}, {1, 27LL, 3}, + {1, 107LL, 5}, {1, 53LL, 4}, {1, 13LL, 2}, + {1, 103LL, 5}, {1, 51LL, 4}, {1, 25LL, 3}, + {1, 99LL, 5}, {1, 49LL, 4}, {1, 97LL, 5}, + {1, 3LL, 0}, {1, 95LL, 5}, {1, 47LL, 4}, + {1, 93LL, 5}, {1, 23LL, 3}, {1, 91LL, 5}, + {1, 45LL, 4}, {1, 89LL, 5}, {1, 11LL, 2}, + {1, 87LL, 5}, {1, 43LL, 4}, {1, 85LL, 5}, + {1, 21LL, 3}, {1, 83LL, 5}, {1, 41LL, 4}, + {1, 163LL, 6}, {1, 81LL, 5}, {1, 5LL, 1}, + {1, 79LL, 5}, {1, 157LL, 6}, {1, 39LL, 4}, + {1, 77LL, 5}, {1, 19LL, 3}, {1, 151LL, 6}, + {1, 75LL, 5}, {1, 37LL, 4}, {1, 147LL, 6}, + {1, 73LL, 5}, {1, 9LL, 2}, {1, 143LL, 6}, + {1, 71LL, 5}, {1, 141LL, 6}, {1, 35LL, 4}, + {1, 69LL, 5}, {1, 137LL, 6}, {1, 17LL, 3}, + {1, 135LL, 6}, {1, 67LL, 5}, {1, 133LL, 6}, + {1, 33LL, 4}, {1, 131LL, 6}, {1, 65LL, 5}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, + {0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7}, +}; +static const uniform int64 __idiv_table_u16[][3] = { + {0, 0LL, 1}, {1, 43691LL, 1}, {0, 0LL, 2}, + {1, 52429LL, 2}, {1, 43691LL, 2}, {2, 9363LL, 2}, + {0, 0LL, 3}, {1, 58255LL, 3}, {1, 52429LL, 3}, + {1, 47663LL, 3}, {1, 43691LL, 3}, {1, 20165LL, 2}, + {2, 9363LL, 3}, {1, 34953LL, 3}, {0, 0LL, 4}, + {1, 61681LL, 4}, {1, 58255LL, 4}, {1, 55189LL, 4}, + {1, 52429LL, 4}, {2, 34329LL, 4}, {1, 47663LL, 4}, + {2, 25645LL, 4}, {1, 43691LL, 4}, {2, 18351LL, 4}, + {1, 20165LL, 3}, {2, 12137LL, 4}, {2, 9363LL, 4}, + {1, 18079LL, 3}, {1, 34953LL, 4}, {2, 2115LL, 4}, + {0, 0LL, 5}, {1, 63551LL, 5}, {1, 61681LL, 5}, + {1, 59919LL, 5}, {1, 58255LL, 5}, {1, 7085LL, 2}, + {1, 55189LL, 5}, {2, 42011LL, 5}, {1, 52429LL, 5}, + {2, 36765LL, 5}, {2, 34329LL, 5}, {1, 48771LL, 5}, + {1, 47663LL, 5}, {1, 11651LL, 3}, {2, 25645LL, 5}, + {2, 23705LL, 5}, {1, 43691LL, 5}, {2, 20063LL, 5}, + {2, 18351LL, 5}, {1, 41121LL, 5}, {1, 20165LL, 4}, + {1, 39569LL, 5}, {2, 12137LL, 5}, {2, 10725LL, 5}, + {2, 9363LL, 5}, {2, 8049LL, 5}, {1, 18079LL, 4}, + {1, 35545LL, 5}, {1, 34953LL, 5}, {1, 8595LL, 3}, + {2, 2115LL, 5}, {2, 1041LL, 5}, {0, 0LL, 6}, + {1, 4033LL, 2}, {1, 63551LL, 6}, {1, 31301LL, 5}, + {1, 61681LL, 6}, {2, 56039LL, 6}, {1, 59919LL, 6}, + {1, 59075LL, 6}, {1, 58255LL, 6}, {1, 57457LL, 6}, + {1, 7085LL, 3}, {2, 46313LL, 6}, {1, 55189LL, 6}, + {1, 6809LL, 3}, {2, 42011LL, 6}, {1, 53093LL, 6}, + {1, 52429LL, 6}, {1, 25891LL, 5}, {2, 36765LL, 6}, + {1, 25267LL, 5}, {2, 34329LL, 6}, {1, 49345LL, 6}, + {1, 48771LL, 6}, {1, 48211LL, 6}, {1, 47663LL, 6}, + {2, 28719LL, 6}, {1, 11651LL, 4}, {2, 26647LL, 6}, + {2, 25645LL, 6}, {2, 24665LL, 6}, {2, 23705LL, 6}, + {1, 44151LL, 6}, {1, 43691LL, 6}, {2, 20945LL, 6}, + {2, 20063LL, 6}, {1, 42367LL, 6}, {2, 18351LL, 6}, + {1, 5191LL, 3}, {1, 41121LL, 6}, {1, 20361LL, 5}, + {1, 20165LL, 5}, {1, 19973LL, 5}, {1, 39569LL, 6}, + {2, 12863LL, 6}, {2, 12137LL, 6}, {1, 2405LL, 2}, + {2, 10725LL, 6}, {1, 37787LL, 6}, {2, 9363LL, 6}, + {1, 18559LL, 5}, {2, 8049LL, 6}, {2, 7409LL, 6}, + {1, 18079LL, 5}, {1, 35849LL, 6}, {1, 35545LL, 6}, + {2, 4957LL, 6}, {1, 34953LL, 6}, {1, 4333LL, 3}, + {1, 8595LL, 4}, {2, 2665LL, 6}, {2, 2115LL, 6}, + {2, 1573LL, 6}, {2, 1041LL, 6}, {2, 517LL, 6}, + {0, 0LL, 7}, {1, 16257LL, 5}, {1, 4033LL, 3}, + {1, 16009LL, 5}, {1, 63551LL, 7}, {1, 63073LL, 7}, + {1, 31301LL, 6}, {1, 31069LL, 6}, {1, 61681LL, 7}, + {1, 61231LL, 7}, {2, 56039LL, 7}, {1, 30175LL, 6}, + {1, 59919LL, 7}, {1, 29747LL, 6}, {1, 59075LL, 7}, + {1, 29331LL, 6}, {1, 58255LL, 7}, {1, 57853LL, 7}, + {1, 57457LL, 7}, {1, 28533LL, 6}, {1, 7085LL, 4}, + {1, 14075LL, 5}, {2, 46313LL, 7}, {1, 27777LL, 6}, + {1, 55189LL, 7}, {1, 13707LL, 5}, {1, 6809LL, 4}, + {2, 42705LL, 7}, {2, 42011LL, 7}, {1, 53431LL, 7}, + {1, 53093LL, 7}, {1, 52759LL, 7}, {1, 52429LL, 7}, + {2, 38671LL, 7}, {1, 25891LL, 6}, {1, 6433LL, 4}, + {2, 36765LL, 7}, {2, 36145LL, 7}, {1, 25267LL, 6}, + {2, 34927LL, 7}, {2, 34329LL, 7}, {1, 49637LL, 7}, + {1, 49345LL, 7}, {2, 32577LL, 7}, {1, 48771LL, 7}, + {2, 31443LL, 7}, {1, 48211LL, 7}, {1, 47935LL, 7}, + {1, 47663LL, 7}, {2, 29251LL, 7}, {2, 28719LL, 7}, + {1, 2929LL, 3}, {1, 11651LL, 5}, {1, 23173LL, 6}, + {2, 26647LL, 7}, {1, 2865LL, 3}, {2, 25645LL, 7}, + {1, 1417LL, 2}, {2, 24665LL, 7}, {1, 44859LL, 7}, + {2, 23705LL, 7}, {2, 23233LL, 7}, {1, 44151LL, 7}, + {1, 2745LL, 3}, {1, 43691LL, 7}, {2, 21393LL, 7}, + {2, 20945LL, 7}, {1, 43019LL, 7}, {2, 20063LL, 7}, + {1, 21291LL, 6}, {1, 42367LL, 7}, {1, 21077LL, 6}, + {2, 18351LL, 7}, {1, 41735LL, 7}, {1, 5191LL, 4}, + {2, 17111LL, 7}, {1, 41121LL, 7}, {2, 16305LL, 7}, + {1, 20361LL, 6}, {1, 40525LL, 7}, {1, 20165LL, 6}, + {1, 40137LL, 7}, {1, 19973LL, 6}, {1, 39757LL, 7}, + {1, 39569LL, 7}, {2, 13231LL, 7}, {2, 12863LL, 7}, + {1, 39017LL, 7}, {2, 12137LL, 7}, {2, 11779LL, 7}, + {1, 2405LL, 3}, {2, 11073LL, 7}, {2, 10725LL, 7}, + {1, 18979LL, 6}, {1, 37787LL, 7}, {2, 9699LL, 7}, + {2, 9363LL, 7}, {1, 37283LL, 7}, {1, 18559LL, 6}, + {2, 8373LL, 7}, {2, 8049LL, 7}, {1, 4579LL, 4}, + {2, 7409LL, 7}, {2, 7093LL, 7}, {1, 18079LL, 6}, + {1, 36003LL, 7}, {1, 35849LL, 7}, {2, 5857LL, 7}, + {1, 35545LL, 7}, {1, 35395LL, 7}, {2, 4957LL, 7}, + {1, 35099LL, 7}, {1, 34953LL, 7}, {1, 4351LL, 4}, + {1, 4333LL, 4}, {2, 3507LL, 7}, {1, 8595LL, 5}, + {2, 2943LL, 7}, {2, 2665LL, 7}, {1, 16981LL, 6}, + {2, 2115LL, 7}, {2, 1843LL, 7}, {2, 1573LL, 7}, + {1, 33421LL, 7}, {2, 1041LL, 7}, {1, 33157LL, 7}, + {2, 517LL, 7}, {1, 32897LL, 7}, {0, 0LL, 8}, +}; +static const uniform int64 __idiv_table_s16[][3] = { + {0, 0LL, 1}, {1, 21846LL, 0}, {0, 0LL, 2}, + {1, 26215LL, 1}, {1, 10923LL, 0}, {1, 18725LL, 1}, + {0, 0LL, 3}, {1, 7282LL, 0}, {1, 26215LL, 2}, + {1, 5958LL, 0}, {1, 10923LL, 1}, {1, 20165LL, 2}, + {1, 18725LL, 2}, {1, 34953LL, 3}, {0, 0LL, 4}, + {1, 30841LL, 3}, {1, 3641LL, 0}, {1, 55189LL, 4}, + {1, 26215LL, 3}, {1, 49933LL, 4}, {1, 2979LL, 0}, + {1, 45591LL, 4}, {1, 10923LL, 2}, {1, 5243LL, 1}, + {1, 20165LL, 3}, {1, 38837LL, 4}, {1, 18725LL, 3}, + {1, 18079LL, 3}, {1, 34953LL, 4}, {1, 16913LL, 3}, + {0, 0LL, 5}, {1, 1986LL, 0}, {1, 30841LL, 4}, + {1, 3745LL, 1}, {1, 3641LL, 1}, {1, 7085LL, 2}, + {1, 55189LL, 5}, {1, 26887LL, 4}, {1, 26215LL, 4}, + {1, 51151LL, 5}, {1, 49933LL, 5}, {1, 12193LL, 3}, + {1, 2979LL, 1}, {1, 11651LL, 3}, {1, 45591LL, 5}, + {1, 44621LL, 5}, {1, 10923LL, 3}, {1, 2675LL, 1}, + {1, 5243LL, 2}, {1, 41121LL, 5}, {1, 20165LL, 4}, + {1, 19785LL, 4}, {1, 38837LL, 5}, {1, 38131LL, 5}, + {1, 18725LL, 4}, {1, 36793LL, 5}, {1, 18079LL, 4}, + {1, 17773LL, 4}, {1, 34953LL, 5}, {1, 8595LL, 3}, + {1, 16913LL, 4}, {1, 33289LL, 5}, {0, 0LL, 6}, + {1, 4033LL, 2}, {1, 993LL, 0}, {1, 31301LL, 5}, + {1, 30841LL, 5}, {1, 15197LL, 4}, {1, 3745LL, 2}, + {1, 14769LL, 4}, {1, 3641LL, 2}, {1, 57457LL, 6}, + {1, 7085LL, 3}, {1, 55925LL, 6}, {1, 55189LL, 6}, + {1, 6809LL, 3}, {1, 26887LL, 5}, {1, 26547LL, 5}, + {1, 26215LL, 5}, {1, 25891LL, 5}, {1, 51151LL, 6}, + {1, 25267LL, 5}, {1, 49933LL, 6}, {1, 24673LL, 5}, + {1, 12193LL, 4}, {1, 48211LL, 6}, {1, 2979LL, 2}, + {1, 5891LL, 3}, {1, 11651LL, 4}, {1, 11523LL, 4}, + {1, 45591LL, 6}, {1, 45101LL, 6}, {1, 44621LL, 6}, + {1, 44151LL, 6}, {1, 10923LL, 4}, {1, 43241LL, 6}, + {1, 2675LL, 2}, {1, 662LL, 0}, {1, 5243LL, 3}, + {1, 5191LL, 3}, {1, 41121LL, 6}, {1, 20361LL, 5}, + {1, 20165LL, 5}, {1, 19973LL, 5}, {1, 19785LL, 5}, + {1, 1225LL, 1}, {1, 38837LL, 6}, {1, 2405LL, 2}, + {1, 38131LL, 6}, {1, 37787LL, 6}, {1, 18725LL, 5}, + {1, 18559LL, 5}, {1, 36793LL, 6}, {1, 36473LL, 6}, + {1, 18079LL, 5}, {1, 35849LL, 6}, {1, 17773LL, 5}, + {1, 35247LL, 6}, {1, 34953LL, 6}, {1, 4333LL, 3}, + {1, 8595LL, 4}, {1, 34101LL, 6}, {1, 16913LL, 5}, + {1, 33555LL, 6}, {1, 33289LL, 6}, {1, 33027LL, 6}, + {0, 0LL, 7}, {1, 16257LL, 5}, {1, 4033LL, 3}, + {1, 16009LL, 5}, {1, 993LL, 1}, {1, 31537LL, 6}, + {1, 31301LL, 6}, {1, 31069LL, 6}, {1, 30841LL, 6}, + {1, 3827LL, 3}, {1, 15197LL, 5}, {1, 30175LL, 6}, + {1, 3745LL, 3}, {1, 29747LL, 6}, {1, 14769LL, 5}, + {1, 29331LL, 6}, {1, 3641LL, 3}, {1, 28927LL, 6}, + {1, 57457LL, 7}, {1, 28533LL, 6}, {1, 7085LL, 4}, + {1, 14075LL, 5}, {1, 55925LL, 7}, {1, 27777LL, 6}, + {1, 55189LL, 7}, {1, 13707LL, 5}, {1, 6809LL, 4}, + {1, 54121LL, 7}, {1, 26887LL, 6}, {1, 6679LL, 4}, + {1, 26547LL, 6}, {1, 6595LL, 4}, {1, 26215LL, 6}, + {1, 6513LL, 4}, {1, 25891LL, 6}, {1, 6433LL, 4}, + {1, 51151LL, 7}, {1, 50841LL, 7}, {1, 25267LL, 6}, + {1, 6279LL, 4}, {1, 49933LL, 7}, {1, 24819LL, 6}, + {1, 24673LL, 6}, {1, 49057LL, 7}, {1, 12193LL, 5}, + {1, 24245LL, 6}, {1, 48211LL, 7}, {1, 749LL, 1}, + {1, 2979LL, 3}, {1, 23697LL, 6}, {1, 5891LL, 4}, + {1, 2929LL, 3}, {1, 11651LL, 5}, {1, 23173LL, 6}, + {1, 11523LL, 5}, {1, 2865LL, 3}, {1, 45591LL, 7}, + {1, 1417LL, 2}, {1, 45101LL, 7}, {1, 11215LL, 5}, + {1, 44621LL, 7}, {1, 44385LL, 7}, {1, 44151LL, 7}, + {1, 2745LL, 3}, {1, 10923LL, 5}, {1, 43465LL, 7}, + {1, 43241LL, 7}, {1, 43019LL, 7}, {1, 2675LL, 3}, + {1, 21291LL, 6}, {1, 331LL, 0}, {1, 21077LL, 6}, + {1, 5243LL, 4}, {1, 41735LL, 7}, {1, 5191LL, 4}, + {1, 10331LL, 5}, {1, 41121LL, 7}, {1, 40921LL, 7}, + {1, 20361LL, 6}, {1, 40525LL, 7}, {1, 20165LL, 6}, + {1, 20069LL, 6}, {1, 19973LL, 6}, {1, 39757LL, 7}, + {1, 19785LL, 6}, {1, 4923LL, 4}, {1, 1225LL, 2}, + {1, 39017LL, 7}, {1, 38837LL, 7}, {1, 19329LL, 6}, + {1, 2405LL, 3}, {1, 38305LL, 7}, {1, 38131LL, 7}, + {1, 18979LL, 6}, {1, 37787LL, 7}, {1, 18809LL, 6}, + {1, 18725LL, 6}, {1, 37283LL, 7}, {1, 18559LL, 6}, + {1, 36955LL, 7}, {1, 36793LL, 7}, {1, 4579LL, 4}, + {1, 36473LL, 7}, {1, 36315LL, 7}, {1, 18079LL, 6}, + {1, 36003LL, 7}, {1, 35849LL, 7}, {1, 35697LL, 7}, + {1, 17773LL, 6}, {1, 8849LL, 5}, {1, 35247LL, 7}, + {1, 35099LL, 7}, {1, 34953LL, 7}, {1, 4351LL, 4}, + {1, 4333LL, 4}, {1, 17261LL, 6}, {1, 8595LL, 5}, + {1, 535LL, 1}, {1, 34101LL, 7}, {1, 16981LL, 6}, + {1, 16913LL, 6}, {1, 16845LL, 6}, {1, 33555LL, 7}, + {1, 33421LL, 7}, {1, 33289LL, 7}, {1, 33157LL, 7}, + {1, 33027LL, 7}, {1, 32897LL, 7}, {1, 32769LL, 7}, +}; +static const uniform int64 __idiv_table_u32[][3] = { + {0, 0LL, 1}, {1, 2863311531LL, 1}, {0, 0LL, 2}, + {1, 3435973837LL, 2}, {1, 2863311531LL, 2}, {2, 613566757LL, 2}, + {0, 0LL, 3}, {1, 954437177LL, 1}, {1, 3435973837LL, 3}, + {1, 3123612579LL, 3}, {1, 2863311531LL, 3}, {1, 1321528399LL, 2}, + {2, 613566757LL, 3}, {1, 2290649225LL, 3}, {0, 0LL, 4}, + {1, 4042322161LL, 4}, {1, 954437177LL, 2}, {2, 2938661835LL, 4}, + {1, 3435973837LL, 4}, {2, 2249744775LL, 4}, {1, 3123612579LL, 4}, + {1, 2987803337LL, 4}, {1, 2863311531LL, 4}, {1, 1374389535LL, 3}, + {1, 1321528399LL, 3}, {2, 795364315LL, 4}, {2, 613566757LL, 4}, + {1, 2369637129LL, 4}, {1, 2290649225LL, 4}, {2, 138547333LL, 4}, + {0, 0LL, 5}, {1, 1041204193LL, 3}, {1, 4042322161LL, 5}, + {2, 3558687189LL, 5}, {1, 954437177LL, 3}, {2, 3134165325LL, 5}, + {2, 2938661835LL, 5}, {2, 2753184165LL, 5}, {1, 3435973837LL, 5}, + {1, 3352169597LL, 5}, {2, 2249744775LL, 5}, {1, 799063683LL, 3}, + {1, 3123612579LL, 5}, {2, 1813430637LL, 5}, {1, 2987803337LL, 5}, + {1, 2924233053LL, 5}, {1, 2863311531LL, 5}, {1, 1402438301LL, 4}, + {1, 1374389535LL, 4}, {1, 2694881441LL, 5}, {1, 1321528399LL, 4}, + {2, 891408307LL, 5}, {2, 795364315LL, 5}, {2, 702812831LL, 5}, + {2, 613566757LL, 5}, {2, 527452125LL, 5}, {1, 2369637129LL, 5}, + {1, 582368447LL, 3}, {1, 2290649225LL, 5}, {1, 1126548799LL, 4}, + {2, 138547333LL, 5}, {2, 68174085LL, 5}, {0, 0LL, 6}, + {1, 4228890877LL, 6}, {1, 1041204193LL, 4}, {1, 128207979LL, 1}, + {1, 4042322161LL, 6}, {1, 1991868891LL, 5}, {2, 3558687189LL, 6}, + {1, 3871519817LL, 6}, {1, 954437177LL, 4}, {2, 3235934265LL, 6}, + {2, 3134165325LL, 6}, {1, 458129845LL, 3}, {2, 2938661835LL, 6}, + {1, 892460737LL, 4}, {2, 2753184165LL, 6}, {1, 3479467177LL, 6}, + {1, 3435973837LL, 6}, {1, 3393554407LL, 6}, {1, 3352169597LL, 6}, + {1, 827945503LL, 4}, {2, 2249744775LL, 6}, {1, 3233857729LL, 6}, + {1, 799063683LL, 4}, {1, 789879043LL, 4}, {1, 3123612579LL, 6}, + {1, 3088515809LL, 6}, {2, 1813430637LL, 6}, {2, 1746305385LL, 6}, + {1, 2987803337LL, 6}, {1, 2955676419LL, 6}, {1, 2924233053LL, 6}, + {2, 1491936009LL, 6}, {1, 2863311531LL, 6}, {2, 1372618415LL, 6}, + {1, 1402438301LL, 5}, {1, 2776544515LL, 6}, {1, 1374389535LL, 5}, + {2, 1148159575LL, 6}, {1, 2694881441LL, 6}, {2, 1042467791LL, 6}, + {1, 1321528399LL, 5}, {2, 940802361LL, 6}, {2, 891408307LL, 6}, + {2, 842937507LL, 6}, {2, 795364315LL, 6}, {2, 748664025LL, 6}, + {2, 702812831LL, 6}, {2, 657787785LL, 6}, {2, 613566757LL, 6}, + {2, 570128403LL, 6}, {2, 527452125LL, 6}, {2, 485518043LL, 6}, + {1, 2369637129LL, 6}, {2, 403800345LL, 6}, {1, 582368447LL, 4}, + {1, 1154949189LL, 5}, {1, 2290649225LL, 6}, {2, 248469183LL, 6}, + {1, 1126548799LL, 5}, {2, 174592167LL, 6}, {2, 138547333LL, 6}, + {1, 274877907LL, 3}, {2, 68174085LL, 6}, {2, 33818641LL, 6}, + {0, 0LL, 7}, {1, 266354561LL, 3}, {1, 4228890877LL, 7}, + {1, 4196609267LL, 7}, {1, 1041204193LL, 5}, {1, 4133502361LL, 7}, + {1, 128207979LL, 2}, {1, 4072265289LL, 7}, {1, 4042322161LL, 7}, + {1, 125400505LL, 2}, {1, 1991868891LL, 6}, {1, 1977538899LL, 6}, + {2, 3558687189LL, 7}, {1, 974744351LL, 5}, {1, 3871519817LL, 7}, + {1, 3844446251LL, 7}, {1, 954437177LL, 5}, {1, 3791419407LL, 7}, + {2, 3235934265LL, 7}, {1, 3739835469LL, 7}, {2, 3134165325LL, 7}, + {1, 3689636335LL, 7}, {1, 458129845LL, 4}, {1, 910191745LL, 5}, + {2, 2938661835LL, 7}, {1, 3593175255LL, 7}, {1, 892460737LL, 5}, + {1, 3546811703LL, 7}, {2, 2753184165LL, 7}, {1, 875407347LL, 5}, + {1, 3479467177LL, 7}, {2, 2620200175LL, 7}, {1, 3435973837LL, 7}, + {1, 3414632385LL, 7}, {1, 3393554407LL, 7}, {1, 3372735055LL, 7}, + {1, 3352169597LL, 7}, {1, 1665926709LL, 6}, {1, 827945503LL, 5}, + {1, 1645975491LL, 6}, {2, 2249744775LL, 7}, {1, 1626496491LL, 6}, + {1, 3233857729LL, 7}, {2, 2134925265LL, 7}, {1, 799063683LL, 5}, + {2, 2060591247LL, 7}, {1, 789879043LL, 5}, {1, 1570730897LL, 6}, + {1, 3123612579LL, 7}, {2, 1916962805LL, 7}, {1, 3088515809LL, 7}, + {2, 1847555765LL, 7}, {2, 1813430637LL, 7}, {1, 3037324939LL, 7}, + {2, 1746305385LL, 7}, {1, 3004130131LL, 7}, {1, 2987803337LL, 7}, + {2, 1648338801LL, 7}, {1, 2955676419LL, 7}, {1, 2939870663LL, 7}, + {1, 2924233053LL, 7}, {2, 1522554545LL, 7}, {2, 1491936009LL, 7}, + {1, 2878302691LL, 7}, {1, 2863311531LL, 7}, {1, 356059465LL, 4}, + {2, 1372618415LL, 7}, {2, 1343553873LL, 7}, {1, 1402438301LL, 6}, + {2, 1286310003LL, 7}, {1, 2776544515LL, 7}, {1, 1381296015LL, 6}, + {1, 1374389535LL, 6}, {1, 42735993LL, 1}, {2, 1148159575LL, 7}, + {1, 2708156719LL, 7}, {1, 2694881441LL, 7}, {1, 1340867839LL, 6}, + {2, 1042467791LL, 7}, {1, 663956297LL, 5}, {1, 1321528399LL, 6}, + {1, 2630410593LL, 7}, {2, 940802361LL, 7}, {1, 2605477791LL, 7}, + {2, 891408307LL, 7}, {1, 2581013211LL, 7}, {2, 842937507LL, 7}, + {1, 1278501893LL, 6}, {2, 795364315LL, 7}, {2, 771906565LL, 7}, + {2, 748664025LL, 7}, {2, 725633745LL, 7}, {2, 702812831LL, 7}, + {2, 680198441LL, 7}, {2, 657787785LL, 7}, {2, 635578121LL, 7}, + {2, 613566757LL, 7}, {1, 2443359173LL, 7}, {2, 570128403LL, 7}, + {2, 548696263LL, 7}, {2, 527452125LL, 7}, {1, 1200340205LL, 6}, + {2, 485518043LL, 7}, {2, 464823301LL, 7}, {1, 2369637129LL, 7}, + {2, 423966729LL, 7}, {2, 403800345LL, 7}, {2, 383805589LL, 7}, + {1, 582368447LL, 5}, {2, 344322273LL, 7}, {1, 1154949189LL, 6}, + {1, 2300233531LL, 7}, {1, 2290649225LL, 7}, {1, 285143057LL, 4}, + {2, 248469183LL, 7}, {1, 2262369605LL, 7}, {1, 1126548799LL, 6}, + {2, 192835267LL, 7}, {2, 174592167LL, 7}, {2, 156496785LL, 7}, + {2, 138547333LL, 7}, {2, 120742053LL, 7}, {1, 274877907LL, 4}, + {1, 2190262207LL, 7}, {2, 68174085LL, 7}, {1, 2172947881LL, 7}, + {2, 33818641LL, 7}, {1, 2155905153LL, 7}, {0, 0LL, 8}, +}; +static const uniform int64 __idiv_table_s32[][3] = { + {0, 0LL, 1}, {1, 1431655766LL, 0}, {0, 0LL, 2}, + {1, 1717986919LL, 1}, {1, 715827883LL, 0}, {1, 2454267027LL, 2}, + {0, 0LL, 3}, {1, 954437177LL, 1}, {1, 1717986919LL, 2}, + {1, 780903145LL, 1}, {1, 715827883LL, 1}, {1, 1321528399LL, 2}, + {1, 2454267027LL, 3}, {1, 2290649225LL, 3}, {0, 0LL, 4}, + {1, 2021161081LL, 3}, {1, 954437177LL, 2}, {1, 1808407283LL, 3}, + {1, 1717986919LL, 3}, {1, 818089009LL, 2}, {1, 780903145LL, 2}, + {1, 2987803337LL, 4}, {1, 715827883LL, 2}, {1, 1374389535LL, 3}, + {1, 1321528399LL, 3}, {1, 1272582903LL, 3}, {1, 2454267027LL, 4}, + {1, 2369637129LL, 4}, {1, 2290649225LL, 4}, {1, 2216757315LL, 4}, + {0, 0LL, 5}, {1, 1041204193LL, 3}, {1, 2021161081LL, 4}, + {1, 3926827243LL, 5}, {1, 954437177LL, 3}, {1, 3714566311LL, 5}, + {1, 1808407283LL, 4}, {1, 3524075731LL, 5}, {1, 1717986919LL, 4}, + {1, 1676084799LL, 4}, {1, 818089009LL, 3}, {1, 799063683LL, 3}, + {1, 780903145LL, 3}, {1, 3054198967LL, 5}, {1, 2987803337LL, 5}, + {1, 2924233053LL, 5}, {1, 715827883LL, 3}, {1, 1402438301LL, 4}, + {1, 1374389535LL, 4}, {1, 2694881441LL, 5}, {1, 1321528399LL, 4}, + {1, 1296593901LL, 4}, {1, 1272582903LL, 4}, {1, 156180629LL, 1}, + {1, 2454267027LL, 5}, {1, 2411209711LL, 5}, {1, 2369637129LL, 5}, + {1, 582368447LL, 3}, {1, 2290649225LL, 5}, {1, 1126548799LL, 4}, + {1, 2216757315LL, 5}, {1, 2181570691LL, 5}, {0, 0LL, 6}, + {1, 2114445439LL, 5}, {1, 1041204193LL, 4}, {1, 128207979LL, 1}, + {1, 2021161081LL, 5}, {1, 1991868891LL, 5}, {1, 3926827243LL, 6}, + {1, 3871519817LL, 6}, {1, 954437177LL, 4}, {1, 3765450781LL, 6}, + {1, 3714566311LL, 6}, {1, 458129845LL, 3}, {1, 1808407283LL, 5}, + {1, 892460737LL, 4}, {1, 3524075731LL, 6}, {1, 1739733589LL, 5}, + {1, 1717986919LL, 5}, {1, 424194301LL, 3}, {1, 1676084799LL, 5}, + {1, 827945503LL, 4}, {1, 818089009LL, 4}, {1, 1616928865LL, 5}, + {1, 799063683LL, 4}, {1, 789879043LL, 4}, {1, 780903145LL, 4}, + {1, 3088515809LL, 6}, {1, 3054198967LL, 6}, {1, 3020636341LL, 6}, + {1, 2987803337LL, 6}, {1, 738919105LL, 4}, {1, 2924233053LL, 6}, + {1, 2893451653LL, 6}, {1, 715827883LL, 4}, {1, 354224107LL, 3}, + {1, 1402438301LL, 5}, {1, 2776544515LL, 6}, {1, 1374389535LL, 5}, + {1, 680390859LL, 4}, {1, 2694881441LL, 6}, {1, 333589693LL, 3}, + {1, 1321528399LL, 5}, {1, 2617884829LL, 6}, {1, 1296593901LL, 5}, + {1, 1284476201LL, 5}, {1, 1272582903LL, 5}, {1, 2521815661LL, 6}, + {1, 156180629LL, 2}, {1, 2476377541LL, 6}, {1, 2454267027LL, 6}, + {1, 1216273925LL, 5}, {1, 2411209711LL, 6}, {1, 1195121335LL, 5}, + {1, 2369637129LL, 6}, {1, 2349383821LL, 6}, {1, 582368447LL, 4}, + {1, 1154949189LL, 5}, {1, 2290649225LL, 6}, {1, 70991195LL, 1}, + {1, 1126548799LL, 5}, {1, 558694933LL, 4}, {1, 2216757315LL, 6}, + {1, 274877907LL, 3}, {1, 2181570691LL, 6}, {1, 2164392969LL, 6}, + {0, 0LL, 7}, {1, 266354561LL, 3}, {1, 2114445439LL, 6}, + {1, 1049152317LL, 5}, {1, 1041204193LL, 5}, {1, 4133502361LL, 7}, + {1, 128207979LL, 2}, {1, 4072265289LL, 7}, {1, 2021161081LL, 6}, + {1, 125400505LL, 2}, {1, 1991868891LL, 6}, {1, 1977538899LL, 6}, + {1, 3926827243LL, 7}, {1, 974744351LL, 5}, {1, 3871519817LL, 7}, + {1, 961111563LL, 5}, {1, 954437177LL, 5}, {1, 3791419407LL, 7}, + {1, 3765450781LL, 7}, {1, 1869917735LL, 6}, {1, 3714566311LL, 7}, + {1, 230602271LL, 3}, {1, 458129845LL, 4}, {1, 910191745LL, 5}, + {1, 1808407283LL, 6}, {1, 3593175255LL, 7}, {1, 892460737LL, 5}, + {1, 443351463LL, 4}, {1, 3524075731LL, 7}, {1, 875407347LL, 5}, + {1, 1739733589LL, 6}, {1, 432197967LL, 4}, {1, 1717986919LL, 6}, + {1, 3414632385LL, 7}, {1, 424194301LL, 4}, {1, 210795941LL, 3}, + {1, 1676084799LL, 6}, {1, 1665926709LL, 6}, {1, 827945503LL, 5}, + {1, 1645975491LL, 6}, {1, 818089009LL, 5}, {1, 1626496491LL, 6}, + {1, 1616928865LL, 6}, {1, 3214946281LL, 7}, {1, 799063683LL, 5}, + {1, 397222409LL, 4}, {1, 789879043LL, 5}, {1, 1570730897LL, 6}, + {1, 780903145LL, 5}, {1, 3105965051LL, 7}, {1, 3088515809LL, 7}, + {1, 3071261531LL, 7}, {1, 3054198967LL, 7}, {1, 759331235LL, 5}, + {1, 3020636341LL, 7}, {1, 3004130131LL, 7}, {1, 2987803337LL, 7}, + {1, 2971653049LL, 7}, {1, 738919105LL, 5}, {1, 2939870663LL, 7}, + {1, 2924233053LL, 7}, {1, 2908760921LL, 7}, {1, 2893451653LL, 7}, + {1, 2878302691LL, 7}, {1, 715827883LL, 5}, {1, 356059465LL, 4}, + {1, 354224107LL, 4}, {1, 2819260585LL, 7}, {1, 1402438301LL, 6}, + {1, 1395319325LL, 6}, {1, 2776544515LL, 7}, {1, 1381296015LL, 6}, + {1, 1374389535LL, 6}, {1, 42735993LL, 1}, {1, 680390859LL, 5}, + {1, 2708156719LL, 7}, {1, 2694881441LL, 7}, {1, 1340867839LL, 6}, + {1, 333589693LL, 4}, {1, 663956297LL, 5}, {1, 1321528399LL, 6}, + {1, 2630410593LL, 7}, {1, 2617884829LL, 7}, {1, 81421181LL, 2}, + {1, 1296593901LL, 6}, {1, 2581013211LL, 7}, {1, 1284476201LL, 6}, + {1, 1278501893LL, 6}, {1, 1272582903LL, 6}, {1, 2533436931LL, 7}, + {1, 2521815661LL, 7}, {1, 2510300521LL, 7}, {1, 156180629LL, 3}, + {1, 2487582869LL, 7}, {1, 2476377541LL, 7}, {1, 2465272709LL, 7}, + {1, 2454267027LL, 7}, {1, 2443359173LL, 7}, {1, 1216273925LL, 6}, + {1, 605457945LL, 5}, {1, 2411209711LL, 7}, {1, 1200340205LL, 6}, + {1, 1195121335LL, 6}, {1, 2379895299LL, 7}, {1, 2369637129LL, 7}, + {1, 2359467013LL, 7}, {1, 2349383821LL, 7}, {1, 2339386443LL, 7}, + {1, 582368447LL, 5}, {1, 2319644785LL, 7}, {1, 1154949189LL, 6}, + {1, 2300233531LL, 7}, {1, 2290649225LL, 7}, {1, 285143057LL, 4}, + {1, 70991195LL, 2}, {1, 2262369605LL, 7}, {1, 1126548799LL, 6}, + {1, 1121950641LL, 6}, {1, 558694933LL, 5}, {1, 2225732041LL, 7}, + {1, 2216757315LL, 7}, {1, 2207854675LL, 7}, {1, 274877907LL, 4}, + {1, 2190262207LL, 7}, {1, 2181570691LL, 7}, {1, 2172947881LL, 7}, + {1, 2164392969LL, 7}, {1, 2155905153LL, 7}, {1, 2147483649LL, 7}, +}; + +__declspec(safe) +static unmasked unsigned int8 __fast_idiv(unsigned int8 numerator, + uniform unsigned int8 divisor) { + uniform int64 method = __idiv_table_u8[divisor-2][0]; + uniform int64 multiplier = __idiv_table_u8[divisor-2][1]; + uniform int64 shift = __idiv_table_u8[divisor-2][2]; + + unsigned int16 mult = multiplier; + unsigned int16 val = numerator; + if (method == 0) + return numerator >> shift; + else if (method == 1) + return (val * mult) >> (8 + shift); + else { + val *= mult; + val >>= 8; + val += (numerator-val)>>1; + return (val >> shift); + } +} + +__declspec(safe) +static unmasked int8 __fast_idiv(int8 numerator, uniform int8 divisor) { + uniform int8 method = __idiv_table_s8[divisor-2][0]; + uniform int16 multiplier = __idiv_table_s8[divisor-2][1]; + uniform int8 shift = __idiv_table_s8[divisor-2][2]; + + if (method == 0) + return numerator >> shift; + else { + unsigned int8 sign = numerator >> 7; + numerator ^= sign; + int16 mul = (int16)numerator * (int16)multiplier; + mul >>= 8 + shift; + return (int8)mul ^ sign; + } +} + +__declspec(safe) +static unmasked unsigned int16 __fast_idiv(unsigned int16 numerator, + uniform unsigned int16 divisor) { + uniform int64 method = __idiv_table_u16[divisor-2][0]; + uniform int64 multiplier = __idiv_table_u16[divisor-2][1]; + uniform int64 shift = __idiv_table_u16[divisor-2][2]; + + unsigned int32 mult = multiplier; + unsigned int32 val = numerator; + if (method == 0) + return numerator >> shift; + else if (method == 1) + return (val * mult) >> (16 + shift); + else { + val *= mult; + val >>= 16; + val += (numerator-val)>>1; + return val >> shift; + } +} + +__declspec(safe) +static unmasked int16 __fast_idiv(int16 numerator, uniform int16 divisor) { + uniform int64 method = __idiv_table_s16[divisor-2][0]; + uniform int64 multiplier = __idiv_table_s16[divisor-2][1]; + uniform int64 shift = __idiv_table_s16[divisor-2][2]; + + if (method == 0) + return numerator >> shift; + else { + unsigned int16 sign = numerator >> 15; + numerator ^= sign; + int32 mul = (int32)numerator * (int32)multiplier; + mul >>= 16 + shift; + int16 result = mul; + return result ^ sign; + } +} + +__declspec(safe) +static unmasked inline unsigned int32 __fast_idiv(unsigned int32 numerator, + uniform unsigned int32 divisor) { + uniform int64 method = __idiv_table_u32[divisor-2][0]; + uniform int64 multiplier = __idiv_table_u32[divisor-2][1]; + uniform int64 shift = __idiv_table_u32[divisor-2][2]; + + unsigned int64 mult = multiplier; + unsigned int64 val = numerator; + if (method == 0) + return numerator >> shift; + else if (method == 1) + return (val * mult) >> (32 + shift); + else { + val *= mult; + val >>= 32; + val += (numerator-val)>>1; + return val >> shift; + } +} + +__declspec(safe) +static unmasked int32 __fast_idiv(int32 numerator, uniform int32 divisor) { + uniform int64 method = __idiv_table_s32[divisor-2][0]; + uniform int64 multiplier = __idiv_table_s32[divisor-2][1]; + uniform int64 shift = __idiv_table_s32[divisor-2][2]; + + if (method == 0) + return numerator >> shift; + else { + unsigned int32 sign = numerator >> 31; + numerator ^= sign; + int64 mul = (int64)numerator * (int64)multiplier; + mul >>= 32 + shift; + int32 result = mul; + return result ^ sign; + } +} + diff --git a/tests/idiv.ispc b/tests/idiv.ispc new file mode 100644 index 00000000..b7bd78dc --- /dev/null +++ b/tests/idiv.ispc @@ -0,0 +1,75 @@ + +export uniform int width() { return programCount; } + + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int errorCount = 0; + + for (unsigned int8 num = 0; num < 255; ++num) { + for (uniform unsigned int8 div = 2; div < 255; ++div) { + if (__fast_idiv(num, div) != num/div) { + ++errorCount; + print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div)); + } + } + } + + for (int8 num = 0; num < 127; ++num) { + for (uniform int8 div = 2; div < 127; ++div) { + if (__fast_idiv(num, div) != num/div) { + ++errorCount; + print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div)); + } + } + } + + for (int16 num = 0; num < 32767; ++num) { + for (uniform int16 div = 2; div < 256; ++div) { + if (__fast_idiv(num, div) != num/div) { + ++errorCount; + print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div)); + } + } + } + + for (unsigned int16 num = 0; num < 0xffff; ++num) { + for (uniform unsigned int16 div = 2; div < 256; ++div) { + if (__fast_idiv(num, div) != num/div) { + ++errorCount; + print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div)); + } + } + } + + // randomly sample int32s... + uniform RNGState state; + seed_rng(&state, 1234); + for (uniform int i = 0; i < 1M; ++i) { + unsigned int32 num = random(&state); + for (uniform unsigned int32 div = 2; div < 256; ++div) { + if (__fast_idiv(num, div) != num/div) { + ++errorCount; + print("ui32 error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div)); + } + } + } + + for (uniform int64 i = 0; i < 1M; ++i) { + int32 num = random(&state); + if (num < 0) + continue; + for (uniform int32 div = 2; div < 256; ++div) { + if (__fast_idiv(num, div) != num/div) { + ++errorCount; + print("si32 error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div)); + } + } + } + + RET[programIndex] = errorCount; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} +