Add support for fast division of varying int values by small constants.

For varying int8/16/32 types, divides by small constants can be
implemented efficiently through multiplies and shifts with integer
types of twice the bit-width; this commit adds this optimization.
    
(Implementation is based on Halide.)
This commit is contained in:
Matt Pharr
2013-07-23 16:49:56 -07:00
parent 0277ba1aaa
commit 83e1630fbc
3 changed files with 819 additions and 0 deletions

View File

@@ -2240,6 +2240,49 @@ lConstFoldBinaryIntOp(ConstExpr *constArg0, ConstExpr *constArg1,
}
/* Returns true if the given arguments (which are assumed to be the
operands of a divide) represent a divide that can be performed by one of
the __fast_idiv functions.
*/
static bool
lCanImproveVectorDivide(Expr *arg0, Expr *arg1, int *divisor) {
const Type *type = arg0->GetType();
if (!type)
return false;
// The value being divided must be an int8/16/32.
if (!(Type::EqualIgnoringConst(type, AtomicType::VaryingInt8) ||
Type::EqualIgnoringConst(type, AtomicType::VaryingUInt8) ||
Type::EqualIgnoringConst(type, AtomicType::VaryingInt16) ||
Type::EqualIgnoringConst(type, AtomicType::VaryingUInt16) ||
Type::EqualIgnoringConst(type, AtomicType::VaryingInt32) ||
Type::EqualIgnoringConst(type, AtomicType::VaryingUInt32)))
return false;
// The divisor must be the same compile-time constant value for all of
// the vector lanes.
ConstExpr *ce = dynamic_cast<ConstExpr *>(arg1);
if (!ce)
return false;
int64_t div[ISPC_MAX_NVEC];
int count = ce->GetValues(div);
for (int i = 1; i < count; ++i)
if (div[i] != div[0])
return false;
*divisor = div[0];
// And finally, the divisor must be >= 2 and <128 (for 8-bit divides),
// and <256 otherwise.
if (*divisor < 2)
return false;
if (Type::EqualIgnoringConst(type, AtomicType::VaryingInt8) ||
Type::EqualIgnoringConst(type, AtomicType::VaryingUInt8))
return *divisor < 128;
else
return *divisor < 256;
}
Expr *
BinaryExpr::Optimize() {
if (arg0 == NULL || arg1 == NULL)
@@ -2302,6 +2345,32 @@ BinaryExpr::Optimize() {
}
}
int divisor;
if (op == Div && lCanImproveVectorDivide(arg0, arg1, &divisor)) {
Debug(pos, "Improving vector divide by constant %d", divisor);
std::vector<Symbol *> idivFuns;
m->symbolTable->LookupFunction("__fast_idiv", &idivFuns);
if (idivFuns.size() == 0) {
Warning(pos, "Couldn't find __fast_idiv to optimize integer divide. "
"Are you compiling with --nostdlib?");
return this;
}
Expr *idivSymExpr = new FunctionSymbolExpr("__fast_idiv", idivFuns, pos);
ExprList *args = new ExprList(arg0, pos);
args->exprs.push_back(new ConstExpr(AtomicType::UniformInt32, divisor, arg1->pos));
Expr *idivCall = new FunctionCallExpr(idivSymExpr, args, pos);
idivCall = ::TypeCheck(idivCall);
if (idivCall == NULL)
return NULL;
Assert(Type::EqualIgnoringConst(GetType(), idivCall->GetType()));
idivCall = new TypeCastExpr(GetType(), idivCall, pos);
return ::Optimize(idivCall);
}
// From here on out, we're just doing constant folding, so if both args
// aren't constants then we're done...
if (constArg0 == NULL || constArg1 == NULL)

View File

@@ -4264,3 +4264,678 @@ static inline bool rdrand(int64 * ptr) {
return success;
}
}
///////////////////////////////////////////////////////////////////////////
// Fast vector integer division
/* These tables and the algorithms in the __fast_idiv() functions below are
from Halide; the idea is based on the paper "Division by Invariant
Integers using Multiplication" by Granlund and Montgomery.
Copyright (c) 2012 MIT CSAIL
Developed by:
The Halide team
MIT CSAIL
http://halide-lang.org
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
static const uniform int64 __idiv_table_u8[][3] = {
{0, 0LL, 1}, {1, 171LL, 1}, {0, 0LL, 2},
{1, 205LL, 2}, {1, 171LL, 2}, {2, 37LL, 2},
{0, 0LL, 3}, {1, 57LL, 1}, {1, 205LL, 3},
{2, 117LL, 3}, {1, 171LL, 3}, {1, 79LL, 2},
{2, 37LL, 3}, {1, 137LL, 3}, {0, 0LL, 4},
{1, 241LL, 4}, {1, 57LL, 2}, {1, 27LL, 1},
{1, 205LL, 4}, {2, 135LL, 4}, {2, 117LL, 4},
{2, 101LL, 4}, {1, 171LL, 4}, {1, 41LL, 2},
{1, 79LL, 3}, {1, 19LL, 1}, {2, 37LL, 4},
{2, 27LL, 4}, {1, 137LL, 4}, {2, 9LL, 4},
{0, 0LL, 5}, {1, 249LL, 5}, {1, 241LL, 5},
{1, 235LL, 5}, {1, 57LL, 3}, {1, 111LL, 4},
{1, 27LL, 2}, {2, 165LL, 5}, {1, 205LL, 5},
{1, 25LL, 2}, {2, 135LL, 5}, {1, 191LL, 5},
{1, 187LL, 5}, {2, 109LL, 5}, {2, 101LL, 5},
{1, 175LL, 5}, {1, 171LL, 5}, {2, 79LL, 5},
{1, 41LL, 3}, {1, 161LL, 5}, {1, 79LL, 4},
{1, 155LL, 5}, {1, 19LL, 2}, {1, 149LL, 5},
{2, 37LL, 5}, {1, 9LL, 1}, {2, 27LL, 5},
{1, 139LL, 5}, {1, 137LL, 5}, {2, 13LL, 5},
{2, 9LL, 5}, {2, 5LL, 5}, {0, 0LL, 6},
{1, 253LL, 6}, {1, 249LL, 6}, {1, 245LL, 6},
{1, 121LL, 5}, {1, 119LL, 5}, {1, 235LL, 6},
{1, 231LL, 6}, {1, 57LL, 4}, {1, 225LL, 6},
{1, 111LL, 5}, {1, 219LL, 6}, {1, 27LL, 3},
{1, 213LL, 6}, {2, 165LL, 6}, {1, 13LL, 2},
{1, 205LL, 6}, {1, 203LL, 6}, {1, 25LL, 3},
{1, 99LL, 5}, {2, 135LL, 6}, {1, 193LL, 6},
{1, 191LL, 6}, {1, 189LL, 6}, {1, 187LL, 6},
{1, 185LL, 6}, {1, 183LL, 6}, {1, 181LL, 6},
{1, 179LL, 6}, {1, 177LL, 6}, {1, 175LL, 6},
{1, 173LL, 6}, {1, 171LL, 6}, {1, 169LL, 6},
{1, 21LL, 3}, {1, 83LL, 5}, {1, 41LL, 4},
{1, 163LL, 6}, {1, 161LL, 6}, {2, 63LL, 6},
{1, 79LL, 5}, {2, 57LL, 6}, {1, 155LL, 6},
{2, 51LL, 6}, {1, 19LL, 3}, {1, 151LL, 6},
{1, 149LL, 6}, {1, 37LL, 4}, {2, 37LL, 6},
{1, 145LL, 6}, {1, 9LL, 2}, {1, 143LL, 6},
{2, 27LL, 6}, {2, 25LL, 6}, {1, 139LL, 6},
{1, 69LL, 5}, {1, 137LL, 6}, {2, 15LL, 6},
{2, 13LL, 6}, {2, 11LL, 6}, {2, 9LL, 6},
{2, 7LL, 6}, {2, 5LL, 6}, {2, 3LL, 6},
{0, 0LL, 7}, {1, 255LL, 7}, {1, 127LL, 6},
{1, 63LL, 5}, {1, 125LL, 6}, {1, 31LL, 4},
{1, 123LL, 6}, {1, 61LL, 5}, {1, 121LL, 6},
{1, 15LL, 3}, {1, 119LL, 6}, {1, 59LL, 5},
{1, 235LL, 7}, {1, 117LL, 6}, {1, 29LL, 4},
{1, 115LL, 6}, {1, 57LL, 5}, {1, 113LL, 6},
{1, 225LL, 7}, {1, 7LL, 2}, {1, 111LL, 6},
{1, 55LL, 5}, {1, 219LL, 7}, {1, 109LL, 6},
{1, 27LL, 4}, {1, 215LL, 7}, {1, 107LL, 6},
{1, 53LL, 5}, {1, 211LL, 7}, {1, 105LL, 6},
{1, 13LL, 3}, {1, 207LL, 7}, {1, 103LL, 6},
{1, 51LL, 5}, {1, 203LL, 7}, {1, 101LL, 6},
{1, 25LL, 4}, {1, 199LL, 7}, {1, 99LL, 6},
{1, 197LL, 7}, {1, 49LL, 5}, {1, 97LL, 6},
{1, 193LL, 7}, {1, 3LL, 1}, {1, 191LL, 7},
{1, 95LL, 6}, {1, 189LL, 7}, {1, 47LL, 5},
{1, 187LL, 7}, {1, 93LL, 6}, {1, 185LL, 7},
{1, 23LL, 4}, {1, 183LL, 7}, {1, 91LL, 6},
{1, 181LL, 7}, {1, 45LL, 5}, {1, 179LL, 7},
{1, 89LL, 6}, {1, 177LL, 7}, {1, 11LL, 3},
{1, 175LL, 7}, {1, 87LL, 6}, {1, 173LL, 7},
{1, 43LL, 5}, {1, 171LL, 7}, {1, 85LL, 6},
{1, 169LL, 7}, {2, 81LL, 7}, {1, 21LL, 4},
{1, 167LL, 7}, {1, 83LL, 6}, {1, 165LL, 7},
{1, 41LL, 5}, {2, 71LL, 7}, {1, 163LL, 7},
{1, 81LL, 6}, {1, 161LL, 7}, {1, 5LL, 2},
{2, 63LL, 7}, {1, 159LL, 7}, {1, 79LL, 6},
{1, 157LL, 7}, {2, 57LL, 7}, {1, 39LL, 5},
{1, 155LL, 7}, {1, 77LL, 6}, {2, 51LL, 7},
{1, 153LL, 7}, {1, 19LL, 4}, {2, 47LL, 7},
{1, 151LL, 7}, {1, 75LL, 6}, {1, 149LL, 7},
{2, 41LL, 7}, {1, 37LL, 5}, {1, 147LL, 7},
{2, 37LL, 7}, {1, 73LL, 6}, {1, 145LL, 7},
{2, 33LL, 7}, {1, 9LL, 3}, {2, 31LL, 7},
{1, 143LL, 7}, {1, 71LL, 6}, {2, 27LL, 7},
{1, 141LL, 7}, {2, 25LL, 7}, {1, 35LL, 5},
{1, 139LL, 7}, {2, 21LL, 7}, {1, 69LL, 6},
{2, 19LL, 7}, {1, 137LL, 7}, {1, 17LL, 4},
{2, 15LL, 7}, {1, 135LL, 7}, {2, 13LL, 7},
{1, 67LL, 6}, {2, 11LL, 7}, {1, 133LL, 7},
{2, 9LL, 7}, {1, 33LL, 5}, {2, 7LL, 7},
{1, 131LL, 7}, {2, 5LL, 7}, {1, 65LL, 6},
{2, 3LL, 7}, {1, 129LL, 7}, {0, 0LL, 8},
};
static const uniform int64 __idiv_table_s8[][3] = {
{0, 0LL, 1}, {1, 86LL, 0}, {0, 0LL, 2},
{1, 103LL, 1}, {1, 43LL, 0}, {1, 147LL, 2},
{0, 0LL, 3}, {1, 57LL, 1}, {1, 103LL, 2},
{1, 187LL, 3}, {1, 43LL, 1}, {1, 79LL, 2},
{1, 147LL, 3}, {1, 137LL, 3}, {0, 0LL, 4},
{1, 121LL, 3}, {1, 57LL, 2}, {1, 27LL, 1},
{1, 103LL, 3}, {1, 49LL, 2}, {1, 187LL, 4},
{1, 179LL, 4}, {1, 43LL, 2}, {1, 41LL, 2},
{1, 79LL, 3}, {1, 19LL, 1}, {1, 147LL, 4},
{1, 71LL, 3}, {1, 137LL, 4}, {1, 133LL, 4},
{0, 0LL, 5}, {1, 125LL, 4}, {1, 121LL, 4},
{1, 59LL, 3}, {1, 57LL, 3}, {1, 111LL, 4},
{1, 27LL, 2}, {1, 211LL, 5}, {1, 103LL, 4},
{1, 25LL, 2}, {1, 49LL, 3}, {1, 6LL, 0},
{1, 47LL, 3}, {1, 23LL, 2}, {1, 45LL, 3},
{1, 11LL, 1}, {1, 43LL, 3}, {1, 21LL, 2},
{1, 41LL, 3}, {1, 81LL, 4}, {1, 79LL, 4},
{1, 39LL, 3}, {1, 19LL, 2}, {1, 75LL, 4},
{1, 147LL, 5}, {1, 9LL, 1}, {1, 71LL, 4},
{1, 35LL, 3}, {1, 137LL, 5}, {1, 135LL, 5},
{1, 133LL, 5}, {1, 131LL, 5}, {0, 0LL, 6},
{1, 127LL, 5}, {1, 63LL, 4}, {1, 31LL, 3},
{1, 61LL, 4}, {1, 15LL, 2}, {1, 59LL, 4},
{1, 29LL, 3}, {1, 57LL, 4}, {1, 113LL, 5},
{1, 7LL, 1}, {1, 55LL, 4}, {1, 27LL, 3},
{1, 107LL, 5}, {1, 53LL, 4}, {1, 13LL, 2},
{1, 103LL, 5}, {1, 51LL, 4}, {1, 25LL, 3},
{1, 99LL, 5}, {1, 49LL, 4}, {1, 97LL, 5},
{1, 3LL, 0}, {1, 95LL, 5}, {1, 47LL, 4},
{1, 93LL, 5}, {1, 23LL, 3}, {1, 91LL, 5},
{1, 45LL, 4}, {1, 89LL, 5}, {1, 11LL, 2},
{1, 87LL, 5}, {1, 43LL, 4}, {1, 85LL, 5},
{1, 21LL, 3}, {1, 83LL, 5}, {1, 41LL, 4},
{1, 163LL, 6}, {1, 81LL, 5}, {1, 5LL, 1},
{1, 79LL, 5}, {1, 157LL, 6}, {1, 39LL, 4},
{1, 77LL, 5}, {1, 19LL, 3}, {1, 151LL, 6},
{1, 75LL, 5}, {1, 37LL, 4}, {1, 147LL, 6},
{1, 73LL, 5}, {1, 9LL, 2}, {1, 143LL, 6},
{1, 71LL, 5}, {1, 141LL, 6}, {1, 35LL, 4},
{1, 69LL, 5}, {1, 137LL, 6}, {1, 17LL, 3},
{1, 135LL, 6}, {1, 67LL, 5}, {1, 133LL, 6},
{1, 33LL, 4}, {1, 131LL, 6}, {1, 65LL, 5},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
};
static const uniform int64 __idiv_table_u16[][3] = {
{0, 0LL, 1}, {1, 43691LL, 1}, {0, 0LL, 2},
{1, 52429LL, 2}, {1, 43691LL, 2}, {2, 9363LL, 2},
{0, 0LL, 3}, {1, 58255LL, 3}, {1, 52429LL, 3},
{1, 47663LL, 3}, {1, 43691LL, 3}, {1, 20165LL, 2},
{2, 9363LL, 3}, {1, 34953LL, 3}, {0, 0LL, 4},
{1, 61681LL, 4}, {1, 58255LL, 4}, {1, 55189LL, 4},
{1, 52429LL, 4}, {2, 34329LL, 4}, {1, 47663LL, 4},
{2, 25645LL, 4}, {1, 43691LL, 4}, {2, 18351LL, 4},
{1, 20165LL, 3}, {2, 12137LL, 4}, {2, 9363LL, 4},
{1, 18079LL, 3}, {1, 34953LL, 4}, {2, 2115LL, 4},
{0, 0LL, 5}, {1, 63551LL, 5}, {1, 61681LL, 5},
{1, 59919LL, 5}, {1, 58255LL, 5}, {1, 7085LL, 2},
{1, 55189LL, 5}, {2, 42011LL, 5}, {1, 52429LL, 5},
{2, 36765LL, 5}, {2, 34329LL, 5}, {1, 48771LL, 5},
{1, 47663LL, 5}, {1, 11651LL, 3}, {2, 25645LL, 5},
{2, 23705LL, 5}, {1, 43691LL, 5}, {2, 20063LL, 5},
{2, 18351LL, 5}, {1, 41121LL, 5}, {1, 20165LL, 4},
{1, 39569LL, 5}, {2, 12137LL, 5}, {2, 10725LL, 5},
{2, 9363LL, 5}, {2, 8049LL, 5}, {1, 18079LL, 4},
{1, 35545LL, 5}, {1, 34953LL, 5}, {1, 8595LL, 3},
{2, 2115LL, 5}, {2, 1041LL, 5}, {0, 0LL, 6},
{1, 4033LL, 2}, {1, 63551LL, 6}, {1, 31301LL, 5},
{1, 61681LL, 6}, {2, 56039LL, 6}, {1, 59919LL, 6},
{1, 59075LL, 6}, {1, 58255LL, 6}, {1, 57457LL, 6},
{1, 7085LL, 3}, {2, 46313LL, 6}, {1, 55189LL, 6},
{1, 6809LL, 3}, {2, 42011LL, 6}, {1, 53093LL, 6},
{1, 52429LL, 6}, {1, 25891LL, 5}, {2, 36765LL, 6},
{1, 25267LL, 5}, {2, 34329LL, 6}, {1, 49345LL, 6},
{1, 48771LL, 6}, {1, 48211LL, 6}, {1, 47663LL, 6},
{2, 28719LL, 6}, {1, 11651LL, 4}, {2, 26647LL, 6},
{2, 25645LL, 6}, {2, 24665LL, 6}, {2, 23705LL, 6},
{1, 44151LL, 6}, {1, 43691LL, 6}, {2, 20945LL, 6},
{2, 20063LL, 6}, {1, 42367LL, 6}, {2, 18351LL, 6},
{1, 5191LL, 3}, {1, 41121LL, 6}, {1, 20361LL, 5},
{1, 20165LL, 5}, {1, 19973LL, 5}, {1, 39569LL, 6},
{2, 12863LL, 6}, {2, 12137LL, 6}, {1, 2405LL, 2},
{2, 10725LL, 6}, {1, 37787LL, 6}, {2, 9363LL, 6},
{1, 18559LL, 5}, {2, 8049LL, 6}, {2, 7409LL, 6},
{1, 18079LL, 5}, {1, 35849LL, 6}, {1, 35545LL, 6},
{2, 4957LL, 6}, {1, 34953LL, 6}, {1, 4333LL, 3},
{1, 8595LL, 4}, {2, 2665LL, 6}, {2, 2115LL, 6},
{2, 1573LL, 6}, {2, 1041LL, 6}, {2, 517LL, 6},
{0, 0LL, 7}, {1, 16257LL, 5}, {1, 4033LL, 3},
{1, 16009LL, 5}, {1, 63551LL, 7}, {1, 63073LL, 7},
{1, 31301LL, 6}, {1, 31069LL, 6}, {1, 61681LL, 7},
{1, 61231LL, 7}, {2, 56039LL, 7}, {1, 30175LL, 6},
{1, 59919LL, 7}, {1, 29747LL, 6}, {1, 59075LL, 7},
{1, 29331LL, 6}, {1, 58255LL, 7}, {1, 57853LL, 7},
{1, 57457LL, 7}, {1, 28533LL, 6}, {1, 7085LL, 4},
{1, 14075LL, 5}, {2, 46313LL, 7}, {1, 27777LL, 6},
{1, 55189LL, 7}, {1, 13707LL, 5}, {1, 6809LL, 4},
{2, 42705LL, 7}, {2, 42011LL, 7}, {1, 53431LL, 7},
{1, 53093LL, 7}, {1, 52759LL, 7}, {1, 52429LL, 7},
{2, 38671LL, 7}, {1, 25891LL, 6}, {1, 6433LL, 4},
{2, 36765LL, 7}, {2, 36145LL, 7}, {1, 25267LL, 6},
{2, 34927LL, 7}, {2, 34329LL, 7}, {1, 49637LL, 7},
{1, 49345LL, 7}, {2, 32577LL, 7}, {1, 48771LL, 7},
{2, 31443LL, 7}, {1, 48211LL, 7}, {1, 47935LL, 7},
{1, 47663LL, 7}, {2, 29251LL, 7}, {2, 28719LL, 7},
{1, 2929LL, 3}, {1, 11651LL, 5}, {1, 23173LL, 6},
{2, 26647LL, 7}, {1, 2865LL, 3}, {2, 25645LL, 7},
{1, 1417LL, 2}, {2, 24665LL, 7}, {1, 44859LL, 7},
{2, 23705LL, 7}, {2, 23233LL, 7}, {1, 44151LL, 7},
{1, 2745LL, 3}, {1, 43691LL, 7}, {2, 21393LL, 7},
{2, 20945LL, 7}, {1, 43019LL, 7}, {2, 20063LL, 7},
{1, 21291LL, 6}, {1, 42367LL, 7}, {1, 21077LL, 6},
{2, 18351LL, 7}, {1, 41735LL, 7}, {1, 5191LL, 4},
{2, 17111LL, 7}, {1, 41121LL, 7}, {2, 16305LL, 7},
{1, 20361LL, 6}, {1, 40525LL, 7}, {1, 20165LL, 6},
{1, 40137LL, 7}, {1, 19973LL, 6}, {1, 39757LL, 7},
{1, 39569LL, 7}, {2, 13231LL, 7}, {2, 12863LL, 7},
{1, 39017LL, 7}, {2, 12137LL, 7}, {2, 11779LL, 7},
{1, 2405LL, 3}, {2, 11073LL, 7}, {2, 10725LL, 7},
{1, 18979LL, 6}, {1, 37787LL, 7}, {2, 9699LL, 7},
{2, 9363LL, 7}, {1, 37283LL, 7}, {1, 18559LL, 6},
{2, 8373LL, 7}, {2, 8049LL, 7}, {1, 4579LL, 4},
{2, 7409LL, 7}, {2, 7093LL, 7}, {1, 18079LL, 6},
{1, 36003LL, 7}, {1, 35849LL, 7}, {2, 5857LL, 7},
{1, 35545LL, 7}, {1, 35395LL, 7}, {2, 4957LL, 7},
{1, 35099LL, 7}, {1, 34953LL, 7}, {1, 4351LL, 4},
{1, 4333LL, 4}, {2, 3507LL, 7}, {1, 8595LL, 5},
{2, 2943LL, 7}, {2, 2665LL, 7}, {1, 16981LL, 6},
{2, 2115LL, 7}, {2, 1843LL, 7}, {2, 1573LL, 7},
{1, 33421LL, 7}, {2, 1041LL, 7}, {1, 33157LL, 7},
{2, 517LL, 7}, {1, 32897LL, 7}, {0, 0LL, 8},
};
static const uniform int64 __idiv_table_s16[][3] = {
{0, 0LL, 1}, {1, 21846LL, 0}, {0, 0LL, 2},
{1, 26215LL, 1}, {1, 10923LL, 0}, {1, 18725LL, 1},
{0, 0LL, 3}, {1, 7282LL, 0}, {1, 26215LL, 2},
{1, 5958LL, 0}, {1, 10923LL, 1}, {1, 20165LL, 2},
{1, 18725LL, 2}, {1, 34953LL, 3}, {0, 0LL, 4},
{1, 30841LL, 3}, {1, 3641LL, 0}, {1, 55189LL, 4},
{1, 26215LL, 3}, {1, 49933LL, 4}, {1, 2979LL, 0},
{1, 45591LL, 4}, {1, 10923LL, 2}, {1, 5243LL, 1},
{1, 20165LL, 3}, {1, 38837LL, 4}, {1, 18725LL, 3},
{1, 18079LL, 3}, {1, 34953LL, 4}, {1, 16913LL, 3},
{0, 0LL, 5}, {1, 1986LL, 0}, {1, 30841LL, 4},
{1, 3745LL, 1}, {1, 3641LL, 1}, {1, 7085LL, 2},
{1, 55189LL, 5}, {1, 26887LL, 4}, {1, 26215LL, 4},
{1, 51151LL, 5}, {1, 49933LL, 5}, {1, 12193LL, 3},
{1, 2979LL, 1}, {1, 11651LL, 3}, {1, 45591LL, 5},
{1, 44621LL, 5}, {1, 10923LL, 3}, {1, 2675LL, 1},
{1, 5243LL, 2}, {1, 41121LL, 5}, {1, 20165LL, 4},
{1, 19785LL, 4}, {1, 38837LL, 5}, {1, 38131LL, 5},
{1, 18725LL, 4}, {1, 36793LL, 5}, {1, 18079LL, 4},
{1, 17773LL, 4}, {1, 34953LL, 5}, {1, 8595LL, 3},
{1, 16913LL, 4}, {1, 33289LL, 5}, {0, 0LL, 6},
{1, 4033LL, 2}, {1, 993LL, 0}, {1, 31301LL, 5},
{1, 30841LL, 5}, {1, 15197LL, 4}, {1, 3745LL, 2},
{1, 14769LL, 4}, {1, 3641LL, 2}, {1, 57457LL, 6},
{1, 7085LL, 3}, {1, 55925LL, 6}, {1, 55189LL, 6},
{1, 6809LL, 3}, {1, 26887LL, 5}, {1, 26547LL, 5},
{1, 26215LL, 5}, {1, 25891LL, 5}, {1, 51151LL, 6},
{1, 25267LL, 5}, {1, 49933LL, 6}, {1, 24673LL, 5},
{1, 12193LL, 4}, {1, 48211LL, 6}, {1, 2979LL, 2},
{1, 5891LL, 3}, {1, 11651LL, 4}, {1, 11523LL, 4},
{1, 45591LL, 6}, {1, 45101LL, 6}, {1, 44621LL, 6},
{1, 44151LL, 6}, {1, 10923LL, 4}, {1, 43241LL, 6},
{1, 2675LL, 2}, {1, 662LL, 0}, {1, 5243LL, 3},
{1, 5191LL, 3}, {1, 41121LL, 6}, {1, 20361LL, 5},
{1, 20165LL, 5}, {1, 19973LL, 5}, {1, 19785LL, 5},
{1, 1225LL, 1}, {1, 38837LL, 6}, {1, 2405LL, 2},
{1, 38131LL, 6}, {1, 37787LL, 6}, {1, 18725LL, 5},
{1, 18559LL, 5}, {1, 36793LL, 6}, {1, 36473LL, 6},
{1, 18079LL, 5}, {1, 35849LL, 6}, {1, 17773LL, 5},
{1, 35247LL, 6}, {1, 34953LL, 6}, {1, 4333LL, 3},
{1, 8595LL, 4}, {1, 34101LL, 6}, {1, 16913LL, 5},
{1, 33555LL, 6}, {1, 33289LL, 6}, {1, 33027LL, 6},
{0, 0LL, 7}, {1, 16257LL, 5}, {1, 4033LL, 3},
{1, 16009LL, 5}, {1, 993LL, 1}, {1, 31537LL, 6},
{1, 31301LL, 6}, {1, 31069LL, 6}, {1, 30841LL, 6},
{1, 3827LL, 3}, {1, 15197LL, 5}, {1, 30175LL, 6},
{1, 3745LL, 3}, {1, 29747LL, 6}, {1, 14769LL, 5},
{1, 29331LL, 6}, {1, 3641LL, 3}, {1, 28927LL, 6},
{1, 57457LL, 7}, {1, 28533LL, 6}, {1, 7085LL, 4},
{1, 14075LL, 5}, {1, 55925LL, 7}, {1, 27777LL, 6},
{1, 55189LL, 7}, {1, 13707LL, 5}, {1, 6809LL, 4},
{1, 54121LL, 7}, {1, 26887LL, 6}, {1, 6679LL, 4},
{1, 26547LL, 6}, {1, 6595LL, 4}, {1, 26215LL, 6},
{1, 6513LL, 4}, {1, 25891LL, 6}, {1, 6433LL, 4},
{1, 51151LL, 7}, {1, 50841LL, 7}, {1, 25267LL, 6},
{1, 6279LL, 4}, {1, 49933LL, 7}, {1, 24819LL, 6},
{1, 24673LL, 6}, {1, 49057LL, 7}, {1, 12193LL, 5},
{1, 24245LL, 6}, {1, 48211LL, 7}, {1, 749LL, 1},
{1, 2979LL, 3}, {1, 23697LL, 6}, {1, 5891LL, 4},
{1, 2929LL, 3}, {1, 11651LL, 5}, {1, 23173LL, 6},
{1, 11523LL, 5}, {1, 2865LL, 3}, {1, 45591LL, 7},
{1, 1417LL, 2}, {1, 45101LL, 7}, {1, 11215LL, 5},
{1, 44621LL, 7}, {1, 44385LL, 7}, {1, 44151LL, 7},
{1, 2745LL, 3}, {1, 10923LL, 5}, {1, 43465LL, 7},
{1, 43241LL, 7}, {1, 43019LL, 7}, {1, 2675LL, 3},
{1, 21291LL, 6}, {1, 331LL, 0}, {1, 21077LL, 6},
{1, 5243LL, 4}, {1, 41735LL, 7}, {1, 5191LL, 4},
{1, 10331LL, 5}, {1, 41121LL, 7}, {1, 40921LL, 7},
{1, 20361LL, 6}, {1, 40525LL, 7}, {1, 20165LL, 6},
{1, 20069LL, 6}, {1, 19973LL, 6}, {1, 39757LL, 7},
{1, 19785LL, 6}, {1, 4923LL, 4}, {1, 1225LL, 2},
{1, 39017LL, 7}, {1, 38837LL, 7}, {1, 19329LL, 6},
{1, 2405LL, 3}, {1, 38305LL, 7}, {1, 38131LL, 7},
{1, 18979LL, 6}, {1, 37787LL, 7}, {1, 18809LL, 6},
{1, 18725LL, 6}, {1, 37283LL, 7}, {1, 18559LL, 6},
{1, 36955LL, 7}, {1, 36793LL, 7}, {1, 4579LL, 4},
{1, 36473LL, 7}, {1, 36315LL, 7}, {1, 18079LL, 6},
{1, 36003LL, 7}, {1, 35849LL, 7}, {1, 35697LL, 7},
{1, 17773LL, 6}, {1, 8849LL, 5}, {1, 35247LL, 7},
{1, 35099LL, 7}, {1, 34953LL, 7}, {1, 4351LL, 4},
{1, 4333LL, 4}, {1, 17261LL, 6}, {1, 8595LL, 5},
{1, 535LL, 1}, {1, 34101LL, 7}, {1, 16981LL, 6},
{1, 16913LL, 6}, {1, 16845LL, 6}, {1, 33555LL, 7},
{1, 33421LL, 7}, {1, 33289LL, 7}, {1, 33157LL, 7},
{1, 33027LL, 7}, {1, 32897LL, 7}, {1, 32769LL, 7},
};
static const uniform int64 __idiv_table_u32[][3] = {
{0, 0LL, 1}, {1, 2863311531LL, 1}, {0, 0LL, 2},
{1, 3435973837LL, 2}, {1, 2863311531LL, 2}, {2, 613566757LL, 2},
{0, 0LL, 3}, {1, 954437177LL, 1}, {1, 3435973837LL, 3},
{1, 3123612579LL, 3}, {1, 2863311531LL, 3}, {1, 1321528399LL, 2},
{2, 613566757LL, 3}, {1, 2290649225LL, 3}, {0, 0LL, 4},
{1, 4042322161LL, 4}, {1, 954437177LL, 2}, {2, 2938661835LL, 4},
{1, 3435973837LL, 4}, {2, 2249744775LL, 4}, {1, 3123612579LL, 4},
{1, 2987803337LL, 4}, {1, 2863311531LL, 4}, {1, 1374389535LL, 3},
{1, 1321528399LL, 3}, {2, 795364315LL, 4}, {2, 613566757LL, 4},
{1, 2369637129LL, 4}, {1, 2290649225LL, 4}, {2, 138547333LL, 4},
{0, 0LL, 5}, {1, 1041204193LL, 3}, {1, 4042322161LL, 5},
{2, 3558687189LL, 5}, {1, 954437177LL, 3}, {2, 3134165325LL, 5},
{2, 2938661835LL, 5}, {2, 2753184165LL, 5}, {1, 3435973837LL, 5},
{1, 3352169597LL, 5}, {2, 2249744775LL, 5}, {1, 799063683LL, 3},
{1, 3123612579LL, 5}, {2, 1813430637LL, 5}, {1, 2987803337LL, 5},
{1, 2924233053LL, 5}, {1, 2863311531LL, 5}, {1, 1402438301LL, 4},
{1, 1374389535LL, 4}, {1, 2694881441LL, 5}, {1, 1321528399LL, 4},
{2, 891408307LL, 5}, {2, 795364315LL, 5}, {2, 702812831LL, 5},
{2, 613566757LL, 5}, {2, 527452125LL, 5}, {1, 2369637129LL, 5},
{1, 582368447LL, 3}, {1, 2290649225LL, 5}, {1, 1126548799LL, 4},
{2, 138547333LL, 5}, {2, 68174085LL, 5}, {0, 0LL, 6},
{1, 4228890877LL, 6}, {1, 1041204193LL, 4}, {1, 128207979LL, 1},
{1, 4042322161LL, 6}, {1, 1991868891LL, 5}, {2, 3558687189LL, 6},
{1, 3871519817LL, 6}, {1, 954437177LL, 4}, {2, 3235934265LL, 6},
{2, 3134165325LL, 6}, {1, 458129845LL, 3}, {2, 2938661835LL, 6},
{1, 892460737LL, 4}, {2, 2753184165LL, 6}, {1, 3479467177LL, 6},
{1, 3435973837LL, 6}, {1, 3393554407LL, 6}, {1, 3352169597LL, 6},
{1, 827945503LL, 4}, {2, 2249744775LL, 6}, {1, 3233857729LL, 6},
{1, 799063683LL, 4}, {1, 789879043LL, 4}, {1, 3123612579LL, 6},
{1, 3088515809LL, 6}, {2, 1813430637LL, 6}, {2, 1746305385LL, 6},
{1, 2987803337LL, 6}, {1, 2955676419LL, 6}, {1, 2924233053LL, 6},
{2, 1491936009LL, 6}, {1, 2863311531LL, 6}, {2, 1372618415LL, 6},
{1, 1402438301LL, 5}, {1, 2776544515LL, 6}, {1, 1374389535LL, 5},
{2, 1148159575LL, 6}, {1, 2694881441LL, 6}, {2, 1042467791LL, 6},
{1, 1321528399LL, 5}, {2, 940802361LL, 6}, {2, 891408307LL, 6},
{2, 842937507LL, 6}, {2, 795364315LL, 6}, {2, 748664025LL, 6},
{2, 702812831LL, 6}, {2, 657787785LL, 6}, {2, 613566757LL, 6},
{2, 570128403LL, 6}, {2, 527452125LL, 6}, {2, 485518043LL, 6},
{1, 2369637129LL, 6}, {2, 403800345LL, 6}, {1, 582368447LL, 4},
{1, 1154949189LL, 5}, {1, 2290649225LL, 6}, {2, 248469183LL, 6},
{1, 1126548799LL, 5}, {2, 174592167LL, 6}, {2, 138547333LL, 6},
{1, 274877907LL, 3}, {2, 68174085LL, 6}, {2, 33818641LL, 6},
{0, 0LL, 7}, {1, 266354561LL, 3}, {1, 4228890877LL, 7},
{1, 4196609267LL, 7}, {1, 1041204193LL, 5}, {1, 4133502361LL, 7},
{1, 128207979LL, 2}, {1, 4072265289LL, 7}, {1, 4042322161LL, 7},
{1, 125400505LL, 2}, {1, 1991868891LL, 6}, {1, 1977538899LL, 6},
{2, 3558687189LL, 7}, {1, 974744351LL, 5}, {1, 3871519817LL, 7},
{1, 3844446251LL, 7}, {1, 954437177LL, 5}, {1, 3791419407LL, 7},
{2, 3235934265LL, 7}, {1, 3739835469LL, 7}, {2, 3134165325LL, 7},
{1, 3689636335LL, 7}, {1, 458129845LL, 4}, {1, 910191745LL, 5},
{2, 2938661835LL, 7}, {1, 3593175255LL, 7}, {1, 892460737LL, 5},
{1, 3546811703LL, 7}, {2, 2753184165LL, 7}, {1, 875407347LL, 5},
{1, 3479467177LL, 7}, {2, 2620200175LL, 7}, {1, 3435973837LL, 7},
{1, 3414632385LL, 7}, {1, 3393554407LL, 7}, {1, 3372735055LL, 7},
{1, 3352169597LL, 7}, {1, 1665926709LL, 6}, {1, 827945503LL, 5},
{1, 1645975491LL, 6}, {2, 2249744775LL, 7}, {1, 1626496491LL, 6},
{1, 3233857729LL, 7}, {2, 2134925265LL, 7}, {1, 799063683LL, 5},
{2, 2060591247LL, 7}, {1, 789879043LL, 5}, {1, 1570730897LL, 6},
{1, 3123612579LL, 7}, {2, 1916962805LL, 7}, {1, 3088515809LL, 7},
{2, 1847555765LL, 7}, {2, 1813430637LL, 7}, {1, 3037324939LL, 7},
{2, 1746305385LL, 7}, {1, 3004130131LL, 7}, {1, 2987803337LL, 7},
{2, 1648338801LL, 7}, {1, 2955676419LL, 7}, {1, 2939870663LL, 7},
{1, 2924233053LL, 7}, {2, 1522554545LL, 7}, {2, 1491936009LL, 7},
{1, 2878302691LL, 7}, {1, 2863311531LL, 7}, {1, 356059465LL, 4},
{2, 1372618415LL, 7}, {2, 1343553873LL, 7}, {1, 1402438301LL, 6},
{2, 1286310003LL, 7}, {1, 2776544515LL, 7}, {1, 1381296015LL, 6},
{1, 1374389535LL, 6}, {1, 42735993LL, 1}, {2, 1148159575LL, 7},
{1, 2708156719LL, 7}, {1, 2694881441LL, 7}, {1, 1340867839LL, 6},
{2, 1042467791LL, 7}, {1, 663956297LL, 5}, {1, 1321528399LL, 6},
{1, 2630410593LL, 7}, {2, 940802361LL, 7}, {1, 2605477791LL, 7},
{2, 891408307LL, 7}, {1, 2581013211LL, 7}, {2, 842937507LL, 7},
{1, 1278501893LL, 6}, {2, 795364315LL, 7}, {2, 771906565LL, 7},
{2, 748664025LL, 7}, {2, 725633745LL, 7}, {2, 702812831LL, 7},
{2, 680198441LL, 7}, {2, 657787785LL, 7}, {2, 635578121LL, 7},
{2, 613566757LL, 7}, {1, 2443359173LL, 7}, {2, 570128403LL, 7},
{2, 548696263LL, 7}, {2, 527452125LL, 7}, {1, 1200340205LL, 6},
{2, 485518043LL, 7}, {2, 464823301LL, 7}, {1, 2369637129LL, 7},
{2, 423966729LL, 7}, {2, 403800345LL, 7}, {2, 383805589LL, 7},
{1, 582368447LL, 5}, {2, 344322273LL, 7}, {1, 1154949189LL, 6},
{1, 2300233531LL, 7}, {1, 2290649225LL, 7}, {1, 285143057LL, 4},
{2, 248469183LL, 7}, {1, 2262369605LL, 7}, {1, 1126548799LL, 6},
{2, 192835267LL, 7}, {2, 174592167LL, 7}, {2, 156496785LL, 7},
{2, 138547333LL, 7}, {2, 120742053LL, 7}, {1, 274877907LL, 4},
{1, 2190262207LL, 7}, {2, 68174085LL, 7}, {1, 2172947881LL, 7},
{2, 33818641LL, 7}, {1, 2155905153LL, 7}, {0, 0LL, 8},
};
static const uniform int64 __idiv_table_s32[][3] = {
{0, 0LL, 1}, {1, 1431655766LL, 0}, {0, 0LL, 2},
{1, 1717986919LL, 1}, {1, 715827883LL, 0}, {1, 2454267027LL, 2},
{0, 0LL, 3}, {1, 954437177LL, 1}, {1, 1717986919LL, 2},
{1, 780903145LL, 1}, {1, 715827883LL, 1}, {1, 1321528399LL, 2},
{1, 2454267027LL, 3}, {1, 2290649225LL, 3}, {0, 0LL, 4},
{1, 2021161081LL, 3}, {1, 954437177LL, 2}, {1, 1808407283LL, 3},
{1, 1717986919LL, 3}, {1, 818089009LL, 2}, {1, 780903145LL, 2},
{1, 2987803337LL, 4}, {1, 715827883LL, 2}, {1, 1374389535LL, 3},
{1, 1321528399LL, 3}, {1, 1272582903LL, 3}, {1, 2454267027LL, 4},
{1, 2369637129LL, 4}, {1, 2290649225LL, 4}, {1, 2216757315LL, 4},
{0, 0LL, 5}, {1, 1041204193LL, 3}, {1, 2021161081LL, 4},
{1, 3926827243LL, 5}, {1, 954437177LL, 3}, {1, 3714566311LL, 5},
{1, 1808407283LL, 4}, {1, 3524075731LL, 5}, {1, 1717986919LL, 4},
{1, 1676084799LL, 4}, {1, 818089009LL, 3}, {1, 799063683LL, 3},
{1, 780903145LL, 3}, {1, 3054198967LL, 5}, {1, 2987803337LL, 5},
{1, 2924233053LL, 5}, {1, 715827883LL, 3}, {1, 1402438301LL, 4},
{1, 1374389535LL, 4}, {1, 2694881441LL, 5}, {1, 1321528399LL, 4},
{1, 1296593901LL, 4}, {1, 1272582903LL, 4}, {1, 156180629LL, 1},
{1, 2454267027LL, 5}, {1, 2411209711LL, 5}, {1, 2369637129LL, 5},
{1, 582368447LL, 3}, {1, 2290649225LL, 5}, {1, 1126548799LL, 4},
{1, 2216757315LL, 5}, {1, 2181570691LL, 5}, {0, 0LL, 6},
{1, 2114445439LL, 5}, {1, 1041204193LL, 4}, {1, 128207979LL, 1},
{1, 2021161081LL, 5}, {1, 1991868891LL, 5}, {1, 3926827243LL, 6},
{1, 3871519817LL, 6}, {1, 954437177LL, 4}, {1, 3765450781LL, 6},
{1, 3714566311LL, 6}, {1, 458129845LL, 3}, {1, 1808407283LL, 5},
{1, 892460737LL, 4}, {1, 3524075731LL, 6}, {1, 1739733589LL, 5},
{1, 1717986919LL, 5}, {1, 424194301LL, 3}, {1, 1676084799LL, 5},
{1, 827945503LL, 4}, {1, 818089009LL, 4}, {1, 1616928865LL, 5},
{1, 799063683LL, 4}, {1, 789879043LL, 4}, {1, 780903145LL, 4},
{1, 3088515809LL, 6}, {1, 3054198967LL, 6}, {1, 3020636341LL, 6},
{1, 2987803337LL, 6}, {1, 738919105LL, 4}, {1, 2924233053LL, 6},
{1, 2893451653LL, 6}, {1, 715827883LL, 4}, {1, 354224107LL, 3},
{1, 1402438301LL, 5}, {1, 2776544515LL, 6}, {1, 1374389535LL, 5},
{1, 680390859LL, 4}, {1, 2694881441LL, 6}, {1, 333589693LL, 3},
{1, 1321528399LL, 5}, {1, 2617884829LL, 6}, {1, 1296593901LL, 5},
{1, 1284476201LL, 5}, {1, 1272582903LL, 5}, {1, 2521815661LL, 6},
{1, 156180629LL, 2}, {1, 2476377541LL, 6}, {1, 2454267027LL, 6},
{1, 1216273925LL, 5}, {1, 2411209711LL, 6}, {1, 1195121335LL, 5},
{1, 2369637129LL, 6}, {1, 2349383821LL, 6}, {1, 582368447LL, 4},
{1, 1154949189LL, 5}, {1, 2290649225LL, 6}, {1, 70991195LL, 1},
{1, 1126548799LL, 5}, {1, 558694933LL, 4}, {1, 2216757315LL, 6},
{1, 274877907LL, 3}, {1, 2181570691LL, 6}, {1, 2164392969LL, 6},
{0, 0LL, 7}, {1, 266354561LL, 3}, {1, 2114445439LL, 6},
{1, 1049152317LL, 5}, {1, 1041204193LL, 5}, {1, 4133502361LL, 7},
{1, 128207979LL, 2}, {1, 4072265289LL, 7}, {1, 2021161081LL, 6},
{1, 125400505LL, 2}, {1, 1991868891LL, 6}, {1, 1977538899LL, 6},
{1, 3926827243LL, 7}, {1, 974744351LL, 5}, {1, 3871519817LL, 7},
{1, 961111563LL, 5}, {1, 954437177LL, 5}, {1, 3791419407LL, 7},
{1, 3765450781LL, 7}, {1, 1869917735LL, 6}, {1, 3714566311LL, 7},
{1, 230602271LL, 3}, {1, 458129845LL, 4}, {1, 910191745LL, 5},
{1, 1808407283LL, 6}, {1, 3593175255LL, 7}, {1, 892460737LL, 5},
{1, 443351463LL, 4}, {1, 3524075731LL, 7}, {1, 875407347LL, 5},
{1, 1739733589LL, 6}, {1, 432197967LL, 4}, {1, 1717986919LL, 6},
{1, 3414632385LL, 7}, {1, 424194301LL, 4}, {1, 210795941LL, 3},
{1, 1676084799LL, 6}, {1, 1665926709LL, 6}, {1, 827945503LL, 5},
{1, 1645975491LL, 6}, {1, 818089009LL, 5}, {1, 1626496491LL, 6},
{1, 1616928865LL, 6}, {1, 3214946281LL, 7}, {1, 799063683LL, 5},
{1, 397222409LL, 4}, {1, 789879043LL, 5}, {1, 1570730897LL, 6},
{1, 780903145LL, 5}, {1, 3105965051LL, 7}, {1, 3088515809LL, 7},
{1, 3071261531LL, 7}, {1, 3054198967LL, 7}, {1, 759331235LL, 5},
{1, 3020636341LL, 7}, {1, 3004130131LL, 7}, {1, 2987803337LL, 7},
{1, 2971653049LL, 7}, {1, 738919105LL, 5}, {1, 2939870663LL, 7},
{1, 2924233053LL, 7}, {1, 2908760921LL, 7}, {1, 2893451653LL, 7},
{1, 2878302691LL, 7}, {1, 715827883LL, 5}, {1, 356059465LL, 4},
{1, 354224107LL, 4}, {1, 2819260585LL, 7}, {1, 1402438301LL, 6},
{1, 1395319325LL, 6}, {1, 2776544515LL, 7}, {1, 1381296015LL, 6},
{1, 1374389535LL, 6}, {1, 42735993LL, 1}, {1, 680390859LL, 5},
{1, 2708156719LL, 7}, {1, 2694881441LL, 7}, {1, 1340867839LL, 6},
{1, 333589693LL, 4}, {1, 663956297LL, 5}, {1, 1321528399LL, 6},
{1, 2630410593LL, 7}, {1, 2617884829LL, 7}, {1, 81421181LL, 2},
{1, 1296593901LL, 6}, {1, 2581013211LL, 7}, {1, 1284476201LL, 6},
{1, 1278501893LL, 6}, {1, 1272582903LL, 6}, {1, 2533436931LL, 7},
{1, 2521815661LL, 7}, {1, 2510300521LL, 7}, {1, 156180629LL, 3},
{1, 2487582869LL, 7}, {1, 2476377541LL, 7}, {1, 2465272709LL, 7},
{1, 2454267027LL, 7}, {1, 2443359173LL, 7}, {1, 1216273925LL, 6},
{1, 605457945LL, 5}, {1, 2411209711LL, 7}, {1, 1200340205LL, 6},
{1, 1195121335LL, 6}, {1, 2379895299LL, 7}, {1, 2369637129LL, 7},
{1, 2359467013LL, 7}, {1, 2349383821LL, 7}, {1, 2339386443LL, 7},
{1, 582368447LL, 5}, {1, 2319644785LL, 7}, {1, 1154949189LL, 6},
{1, 2300233531LL, 7}, {1, 2290649225LL, 7}, {1, 285143057LL, 4},
{1, 70991195LL, 2}, {1, 2262369605LL, 7}, {1, 1126548799LL, 6},
{1, 1121950641LL, 6}, {1, 558694933LL, 5}, {1, 2225732041LL, 7},
{1, 2216757315LL, 7}, {1, 2207854675LL, 7}, {1, 274877907LL, 4},
{1, 2190262207LL, 7}, {1, 2181570691LL, 7}, {1, 2172947881LL, 7},
{1, 2164392969LL, 7}, {1, 2155905153LL, 7}, {1, 2147483649LL, 7},
};
__declspec(safe)
static unmasked unsigned int8 __fast_idiv(unsigned int8 numerator,
uniform unsigned int8 divisor) {
uniform int64 method = __idiv_table_u8[divisor-2][0];
uniform int64 multiplier = __idiv_table_u8[divisor-2][1];
uniform int64 shift = __idiv_table_u8[divisor-2][2];
unsigned int16 mult = multiplier;
unsigned int16 val = numerator;
if (method == 0)
return numerator >> shift;
else if (method == 1)
return (val * mult) >> (8 + shift);
else {
val *= mult;
val >>= 8;
val += (numerator-val)>>1;
return (val >> shift);
}
}
__declspec(safe)
static unmasked int8 __fast_idiv(int8 numerator, uniform int8 divisor) {
uniform int8 method = __idiv_table_s8[divisor-2][0];
uniform int16 multiplier = __idiv_table_s8[divisor-2][1];
uniform int8 shift = __idiv_table_s8[divisor-2][2];
if (method == 0)
return numerator >> shift;
else {
unsigned int8 sign = numerator >> 7;
numerator ^= sign;
int16 mul = (int16)numerator * (int16)multiplier;
mul >>= 8 + shift;
return (int8)mul ^ sign;
}
}
__declspec(safe)
static unmasked unsigned int16 __fast_idiv(unsigned int16 numerator,
uniform unsigned int16 divisor) {
uniform int64 method = __idiv_table_u16[divisor-2][0];
uniform int64 multiplier = __idiv_table_u16[divisor-2][1];
uniform int64 shift = __idiv_table_u16[divisor-2][2];
unsigned int32 mult = multiplier;
unsigned int32 val = numerator;
if (method == 0)
return numerator >> shift;
else if (method == 1)
return (val * mult) >> (16 + shift);
else {
val *= mult;
val >>= 16;
val += (numerator-val)>>1;
return val >> shift;
}
}
__declspec(safe)
static unmasked int16 __fast_idiv(int16 numerator, uniform int16 divisor) {
uniform int64 method = __idiv_table_s16[divisor-2][0];
uniform int64 multiplier = __idiv_table_s16[divisor-2][1];
uniform int64 shift = __idiv_table_s16[divisor-2][2];
if (method == 0)
return numerator >> shift;
else {
unsigned int16 sign = numerator >> 15;
numerator ^= sign;
int32 mul = (int32)numerator * (int32)multiplier;
mul >>= 16 + shift;
int16 result = mul;
return result ^ sign;
}
}
__declspec(safe)
static unmasked inline unsigned int32 __fast_idiv(unsigned int32 numerator,
uniform unsigned int32 divisor) {
uniform int64 method = __idiv_table_u32[divisor-2][0];
uniform int64 multiplier = __idiv_table_u32[divisor-2][1];
uniform int64 shift = __idiv_table_u32[divisor-2][2];
unsigned int64 mult = multiplier;
unsigned int64 val = numerator;
if (method == 0)
return numerator >> shift;
else if (method == 1)
return (val * mult) >> (32 + shift);
else {
val *= mult;
val >>= 32;
val += (numerator-val)>>1;
return val >> shift;
}
}
__declspec(safe)
static unmasked int32 __fast_idiv(int32 numerator, uniform int32 divisor) {
uniform int64 method = __idiv_table_s32[divisor-2][0];
uniform int64 multiplier = __idiv_table_s32[divisor-2][1];
uniform int64 shift = __idiv_table_s32[divisor-2][2];
if (method == 0)
return numerator >> shift;
else {
unsigned int32 sign = numerator >> 31;
numerator ^= sign;
int64 mul = (int64)numerator * (int64)multiplier;
mul >>= 32 + shift;
int32 result = mul;
return result ^ sign;
}
}

75
tests/idiv.ispc Normal file
View File

@@ -0,0 +1,75 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform int errorCount = 0;
for (unsigned int8 num = 0; num < 255; ++num) {
for (uniform unsigned int8 div = 2; div < 255; ++div) {
if (__fast_idiv(num, div) != num/div) {
++errorCount;
print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
}
}
}
for (int8 num = 0; num < 127; ++num) {
for (uniform int8 div = 2; div < 127; ++div) {
if (__fast_idiv(num, div) != num/div) {
++errorCount;
print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
}
}
}
for (int16 num = 0; num < 32767; ++num) {
for (uniform int16 div = 2; div < 256; ++div) {
if (__fast_idiv(num, div) != num/div) {
++errorCount;
print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
}
}
}
for (unsigned int16 num = 0; num < 0xffff; ++num) {
for (uniform unsigned int16 div = 2; div < 256; ++div) {
if (__fast_idiv(num, div) != num/div) {
++errorCount;
print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
}
}
}
// randomly sample int32s...
uniform RNGState state;
seed_rng(&state, 1234);
for (uniform int i = 0; i < 1M; ++i) {
unsigned int32 num = random(&state);
for (uniform unsigned int32 div = 2; div < 256; ++div) {
if (__fast_idiv(num, div) != num/div) {
++errorCount;
print("ui32 error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
}
}
}
for (uniform int64 i = 0; i < 1M; ++i) {
int32 num = random(&state);
if (num < 0)
continue;
for (uniform int32 div = 2; div < 256; ++div) {
if (__fast_idiv(num, div) != num/div) {
++errorCount;
print("si32 error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
}
}
}
RET[programIndex] = errorCount;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}