Add support for fast division of varying int values by small constants.
For varying int8/16/32 types, divides by small constants can be
implemented efficiently through multiplies and shifts with integer
types of twice the bit-width; this commit adds this optimization.
(Implementation is based on Halide.)
This commit is contained in:
69
expr.cpp
69
expr.cpp
@@ -2240,6 +2240,49 @@ lConstFoldBinaryIntOp(ConstExpr *constArg0, ConstExpr *constArg1,
|
||||
}
|
||||
|
||||
|
||||
/* Returns true if the given arguments (which are assumed to be the
|
||||
operands of a divide) represent a divide that can be performed by one of
|
||||
the __fast_idiv functions.
|
||||
*/
|
||||
static bool
|
||||
lCanImproveVectorDivide(Expr *arg0, Expr *arg1, int *divisor) {
|
||||
const Type *type = arg0->GetType();
|
||||
if (!type)
|
||||
return false;
|
||||
|
||||
// The value being divided must be an int8/16/32.
|
||||
if (!(Type::EqualIgnoringConst(type, AtomicType::VaryingInt8) ||
|
||||
Type::EqualIgnoringConst(type, AtomicType::VaryingUInt8) ||
|
||||
Type::EqualIgnoringConst(type, AtomicType::VaryingInt16) ||
|
||||
Type::EqualIgnoringConst(type, AtomicType::VaryingUInt16) ||
|
||||
Type::EqualIgnoringConst(type, AtomicType::VaryingInt32) ||
|
||||
Type::EqualIgnoringConst(type, AtomicType::VaryingUInt32)))
|
||||
return false;
|
||||
|
||||
// The divisor must be the same compile-time constant value for all of
|
||||
// the vector lanes.
|
||||
ConstExpr *ce = dynamic_cast<ConstExpr *>(arg1);
|
||||
if (!ce)
|
||||
return false;
|
||||
int64_t div[ISPC_MAX_NVEC];
|
||||
int count = ce->GetValues(div);
|
||||
for (int i = 1; i < count; ++i)
|
||||
if (div[i] != div[0])
|
||||
return false;
|
||||
*divisor = div[0];
|
||||
|
||||
// And finally, the divisor must be >= 2 and <128 (for 8-bit divides),
|
||||
// and <256 otherwise.
|
||||
if (*divisor < 2)
|
||||
return false;
|
||||
if (Type::EqualIgnoringConst(type, AtomicType::VaryingInt8) ||
|
||||
Type::EqualIgnoringConst(type, AtomicType::VaryingUInt8))
|
||||
return *divisor < 128;
|
||||
else
|
||||
return *divisor < 256;
|
||||
}
|
||||
|
||||
|
||||
Expr *
|
||||
BinaryExpr::Optimize() {
|
||||
if (arg0 == NULL || arg1 == NULL)
|
||||
@@ -2302,6 +2345,32 @@ BinaryExpr::Optimize() {
|
||||
}
|
||||
}
|
||||
|
||||
int divisor;
|
||||
if (op == Div && lCanImproveVectorDivide(arg0, arg1, &divisor)) {
|
||||
Debug(pos, "Improving vector divide by constant %d", divisor);
|
||||
|
||||
std::vector<Symbol *> idivFuns;
|
||||
m->symbolTable->LookupFunction("__fast_idiv", &idivFuns);
|
||||
if (idivFuns.size() == 0) {
|
||||
Warning(pos, "Couldn't find __fast_idiv to optimize integer divide. "
|
||||
"Are you compiling with --nostdlib?");
|
||||
return this;
|
||||
}
|
||||
|
||||
Expr *idivSymExpr = new FunctionSymbolExpr("__fast_idiv", idivFuns, pos);
|
||||
ExprList *args = new ExprList(arg0, pos);
|
||||
args->exprs.push_back(new ConstExpr(AtomicType::UniformInt32, divisor, arg1->pos));
|
||||
Expr *idivCall = new FunctionCallExpr(idivSymExpr, args, pos);
|
||||
|
||||
idivCall = ::TypeCheck(idivCall);
|
||||
if (idivCall == NULL)
|
||||
return NULL;
|
||||
|
||||
Assert(Type::EqualIgnoringConst(GetType(), idivCall->GetType()));
|
||||
idivCall = new TypeCastExpr(GetType(), idivCall, pos);
|
||||
return ::Optimize(idivCall);
|
||||
}
|
||||
|
||||
// From here on out, we're just doing constant folding, so if both args
|
||||
// aren't constants then we're done...
|
||||
if (constArg0 == NULL || constArg1 == NULL)
|
||||
|
||||
675
stdlib.ispc
675
stdlib.ispc
@@ -4264,3 +4264,678 @@ static inline bool rdrand(int64 * ptr) {
|
||||
return success;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Fast vector integer division
|
||||
|
||||
/* These tables and the algorithms in the __fast_idiv() functions below are
|
||||
from Halide; the idea is based on the paper "Division by Invariant
|
||||
Integers using Multiplication" by Granlund and Montgomery.
|
||||
|
||||
Copyright (c) 2012 MIT CSAIL
|
||||
|
||||
Developed by:
|
||||
|
||||
The Halide team
|
||||
MIT CSAIL
|
||||
http://halide-lang.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
static const uniform int64 __idiv_table_u8[][3] = {
|
||||
{0, 0LL, 1}, {1, 171LL, 1}, {0, 0LL, 2},
|
||||
{1, 205LL, 2}, {1, 171LL, 2}, {2, 37LL, 2},
|
||||
{0, 0LL, 3}, {1, 57LL, 1}, {1, 205LL, 3},
|
||||
{2, 117LL, 3}, {1, 171LL, 3}, {1, 79LL, 2},
|
||||
{2, 37LL, 3}, {1, 137LL, 3}, {0, 0LL, 4},
|
||||
{1, 241LL, 4}, {1, 57LL, 2}, {1, 27LL, 1},
|
||||
{1, 205LL, 4}, {2, 135LL, 4}, {2, 117LL, 4},
|
||||
{2, 101LL, 4}, {1, 171LL, 4}, {1, 41LL, 2},
|
||||
{1, 79LL, 3}, {1, 19LL, 1}, {2, 37LL, 4},
|
||||
{2, 27LL, 4}, {1, 137LL, 4}, {2, 9LL, 4},
|
||||
{0, 0LL, 5}, {1, 249LL, 5}, {1, 241LL, 5},
|
||||
{1, 235LL, 5}, {1, 57LL, 3}, {1, 111LL, 4},
|
||||
{1, 27LL, 2}, {2, 165LL, 5}, {1, 205LL, 5},
|
||||
{1, 25LL, 2}, {2, 135LL, 5}, {1, 191LL, 5},
|
||||
{1, 187LL, 5}, {2, 109LL, 5}, {2, 101LL, 5},
|
||||
{1, 175LL, 5}, {1, 171LL, 5}, {2, 79LL, 5},
|
||||
{1, 41LL, 3}, {1, 161LL, 5}, {1, 79LL, 4},
|
||||
{1, 155LL, 5}, {1, 19LL, 2}, {1, 149LL, 5},
|
||||
{2, 37LL, 5}, {1, 9LL, 1}, {2, 27LL, 5},
|
||||
{1, 139LL, 5}, {1, 137LL, 5}, {2, 13LL, 5},
|
||||
{2, 9LL, 5}, {2, 5LL, 5}, {0, 0LL, 6},
|
||||
{1, 253LL, 6}, {1, 249LL, 6}, {1, 245LL, 6},
|
||||
{1, 121LL, 5}, {1, 119LL, 5}, {1, 235LL, 6},
|
||||
{1, 231LL, 6}, {1, 57LL, 4}, {1, 225LL, 6},
|
||||
{1, 111LL, 5}, {1, 219LL, 6}, {1, 27LL, 3},
|
||||
{1, 213LL, 6}, {2, 165LL, 6}, {1, 13LL, 2},
|
||||
{1, 205LL, 6}, {1, 203LL, 6}, {1, 25LL, 3},
|
||||
{1, 99LL, 5}, {2, 135LL, 6}, {1, 193LL, 6},
|
||||
{1, 191LL, 6}, {1, 189LL, 6}, {1, 187LL, 6},
|
||||
{1, 185LL, 6}, {1, 183LL, 6}, {1, 181LL, 6},
|
||||
{1, 179LL, 6}, {1, 177LL, 6}, {1, 175LL, 6},
|
||||
{1, 173LL, 6}, {1, 171LL, 6}, {1, 169LL, 6},
|
||||
{1, 21LL, 3}, {1, 83LL, 5}, {1, 41LL, 4},
|
||||
{1, 163LL, 6}, {1, 161LL, 6}, {2, 63LL, 6},
|
||||
{1, 79LL, 5}, {2, 57LL, 6}, {1, 155LL, 6},
|
||||
{2, 51LL, 6}, {1, 19LL, 3}, {1, 151LL, 6},
|
||||
{1, 149LL, 6}, {1, 37LL, 4}, {2, 37LL, 6},
|
||||
{1, 145LL, 6}, {1, 9LL, 2}, {1, 143LL, 6},
|
||||
{2, 27LL, 6}, {2, 25LL, 6}, {1, 139LL, 6},
|
||||
{1, 69LL, 5}, {1, 137LL, 6}, {2, 15LL, 6},
|
||||
{2, 13LL, 6}, {2, 11LL, 6}, {2, 9LL, 6},
|
||||
{2, 7LL, 6}, {2, 5LL, 6}, {2, 3LL, 6},
|
||||
{0, 0LL, 7}, {1, 255LL, 7}, {1, 127LL, 6},
|
||||
{1, 63LL, 5}, {1, 125LL, 6}, {1, 31LL, 4},
|
||||
{1, 123LL, 6}, {1, 61LL, 5}, {1, 121LL, 6},
|
||||
{1, 15LL, 3}, {1, 119LL, 6}, {1, 59LL, 5},
|
||||
{1, 235LL, 7}, {1, 117LL, 6}, {1, 29LL, 4},
|
||||
{1, 115LL, 6}, {1, 57LL, 5}, {1, 113LL, 6},
|
||||
{1, 225LL, 7}, {1, 7LL, 2}, {1, 111LL, 6},
|
||||
{1, 55LL, 5}, {1, 219LL, 7}, {1, 109LL, 6},
|
||||
{1, 27LL, 4}, {1, 215LL, 7}, {1, 107LL, 6},
|
||||
{1, 53LL, 5}, {1, 211LL, 7}, {1, 105LL, 6},
|
||||
{1, 13LL, 3}, {1, 207LL, 7}, {1, 103LL, 6},
|
||||
{1, 51LL, 5}, {1, 203LL, 7}, {1, 101LL, 6},
|
||||
{1, 25LL, 4}, {1, 199LL, 7}, {1, 99LL, 6},
|
||||
{1, 197LL, 7}, {1, 49LL, 5}, {1, 97LL, 6},
|
||||
{1, 193LL, 7}, {1, 3LL, 1}, {1, 191LL, 7},
|
||||
{1, 95LL, 6}, {1, 189LL, 7}, {1, 47LL, 5},
|
||||
{1, 187LL, 7}, {1, 93LL, 6}, {1, 185LL, 7},
|
||||
{1, 23LL, 4}, {1, 183LL, 7}, {1, 91LL, 6},
|
||||
{1, 181LL, 7}, {1, 45LL, 5}, {1, 179LL, 7},
|
||||
{1, 89LL, 6}, {1, 177LL, 7}, {1, 11LL, 3},
|
||||
{1, 175LL, 7}, {1, 87LL, 6}, {1, 173LL, 7},
|
||||
{1, 43LL, 5}, {1, 171LL, 7}, {1, 85LL, 6},
|
||||
{1, 169LL, 7}, {2, 81LL, 7}, {1, 21LL, 4},
|
||||
{1, 167LL, 7}, {1, 83LL, 6}, {1, 165LL, 7},
|
||||
{1, 41LL, 5}, {2, 71LL, 7}, {1, 163LL, 7},
|
||||
{1, 81LL, 6}, {1, 161LL, 7}, {1, 5LL, 2},
|
||||
{2, 63LL, 7}, {1, 159LL, 7}, {1, 79LL, 6},
|
||||
{1, 157LL, 7}, {2, 57LL, 7}, {1, 39LL, 5},
|
||||
{1, 155LL, 7}, {1, 77LL, 6}, {2, 51LL, 7},
|
||||
{1, 153LL, 7}, {1, 19LL, 4}, {2, 47LL, 7},
|
||||
{1, 151LL, 7}, {1, 75LL, 6}, {1, 149LL, 7},
|
||||
{2, 41LL, 7}, {1, 37LL, 5}, {1, 147LL, 7},
|
||||
{2, 37LL, 7}, {1, 73LL, 6}, {1, 145LL, 7},
|
||||
{2, 33LL, 7}, {1, 9LL, 3}, {2, 31LL, 7},
|
||||
{1, 143LL, 7}, {1, 71LL, 6}, {2, 27LL, 7},
|
||||
{1, 141LL, 7}, {2, 25LL, 7}, {1, 35LL, 5},
|
||||
{1, 139LL, 7}, {2, 21LL, 7}, {1, 69LL, 6},
|
||||
{2, 19LL, 7}, {1, 137LL, 7}, {1, 17LL, 4},
|
||||
{2, 15LL, 7}, {1, 135LL, 7}, {2, 13LL, 7},
|
||||
{1, 67LL, 6}, {2, 11LL, 7}, {1, 133LL, 7},
|
||||
{2, 9LL, 7}, {1, 33LL, 5}, {2, 7LL, 7},
|
||||
{1, 131LL, 7}, {2, 5LL, 7}, {1, 65LL, 6},
|
||||
{2, 3LL, 7}, {1, 129LL, 7}, {0, 0LL, 8},
|
||||
};
|
||||
static const uniform int64 __idiv_table_s8[][3] = {
|
||||
{0, 0LL, 1}, {1, 86LL, 0}, {0, 0LL, 2},
|
||||
{1, 103LL, 1}, {1, 43LL, 0}, {1, 147LL, 2},
|
||||
{0, 0LL, 3}, {1, 57LL, 1}, {1, 103LL, 2},
|
||||
{1, 187LL, 3}, {1, 43LL, 1}, {1, 79LL, 2},
|
||||
{1, 147LL, 3}, {1, 137LL, 3}, {0, 0LL, 4},
|
||||
{1, 121LL, 3}, {1, 57LL, 2}, {1, 27LL, 1},
|
||||
{1, 103LL, 3}, {1, 49LL, 2}, {1, 187LL, 4},
|
||||
{1, 179LL, 4}, {1, 43LL, 2}, {1, 41LL, 2},
|
||||
{1, 79LL, 3}, {1, 19LL, 1}, {1, 147LL, 4},
|
||||
{1, 71LL, 3}, {1, 137LL, 4}, {1, 133LL, 4},
|
||||
{0, 0LL, 5}, {1, 125LL, 4}, {1, 121LL, 4},
|
||||
{1, 59LL, 3}, {1, 57LL, 3}, {1, 111LL, 4},
|
||||
{1, 27LL, 2}, {1, 211LL, 5}, {1, 103LL, 4},
|
||||
{1, 25LL, 2}, {1, 49LL, 3}, {1, 6LL, 0},
|
||||
{1, 47LL, 3}, {1, 23LL, 2}, {1, 45LL, 3},
|
||||
{1, 11LL, 1}, {1, 43LL, 3}, {1, 21LL, 2},
|
||||
{1, 41LL, 3}, {1, 81LL, 4}, {1, 79LL, 4},
|
||||
{1, 39LL, 3}, {1, 19LL, 2}, {1, 75LL, 4},
|
||||
{1, 147LL, 5}, {1, 9LL, 1}, {1, 71LL, 4},
|
||||
{1, 35LL, 3}, {1, 137LL, 5}, {1, 135LL, 5},
|
||||
{1, 133LL, 5}, {1, 131LL, 5}, {0, 0LL, 6},
|
||||
{1, 127LL, 5}, {1, 63LL, 4}, {1, 31LL, 3},
|
||||
{1, 61LL, 4}, {1, 15LL, 2}, {1, 59LL, 4},
|
||||
{1, 29LL, 3}, {1, 57LL, 4}, {1, 113LL, 5},
|
||||
{1, 7LL, 1}, {1, 55LL, 4}, {1, 27LL, 3},
|
||||
{1, 107LL, 5}, {1, 53LL, 4}, {1, 13LL, 2},
|
||||
{1, 103LL, 5}, {1, 51LL, 4}, {1, 25LL, 3},
|
||||
{1, 99LL, 5}, {1, 49LL, 4}, {1, 97LL, 5},
|
||||
{1, 3LL, 0}, {1, 95LL, 5}, {1, 47LL, 4},
|
||||
{1, 93LL, 5}, {1, 23LL, 3}, {1, 91LL, 5},
|
||||
{1, 45LL, 4}, {1, 89LL, 5}, {1, 11LL, 2},
|
||||
{1, 87LL, 5}, {1, 43LL, 4}, {1, 85LL, 5},
|
||||
{1, 21LL, 3}, {1, 83LL, 5}, {1, 41LL, 4},
|
||||
{1, 163LL, 6}, {1, 81LL, 5}, {1, 5LL, 1},
|
||||
{1, 79LL, 5}, {1, 157LL, 6}, {1, 39LL, 4},
|
||||
{1, 77LL, 5}, {1, 19LL, 3}, {1, 151LL, 6},
|
||||
{1, 75LL, 5}, {1, 37LL, 4}, {1, 147LL, 6},
|
||||
{1, 73LL, 5}, {1, 9LL, 2}, {1, 143LL, 6},
|
||||
{1, 71LL, 5}, {1, 141LL, 6}, {1, 35LL, 4},
|
||||
{1, 69LL, 5}, {1, 137LL, 6}, {1, 17LL, 3},
|
||||
{1, 135LL, 6}, {1, 67LL, 5}, {1, 133LL, 6},
|
||||
{1, 33LL, 4}, {1, 131LL, 6}, {1, 65LL, 5},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
{0, 0LL, 7}, {0, 0LL, 7}, {0, 0LL, 7},
|
||||
};
|
||||
static const uniform int64 __idiv_table_u16[][3] = {
|
||||
{0, 0LL, 1}, {1, 43691LL, 1}, {0, 0LL, 2},
|
||||
{1, 52429LL, 2}, {1, 43691LL, 2}, {2, 9363LL, 2},
|
||||
{0, 0LL, 3}, {1, 58255LL, 3}, {1, 52429LL, 3},
|
||||
{1, 47663LL, 3}, {1, 43691LL, 3}, {1, 20165LL, 2},
|
||||
{2, 9363LL, 3}, {1, 34953LL, 3}, {0, 0LL, 4},
|
||||
{1, 61681LL, 4}, {1, 58255LL, 4}, {1, 55189LL, 4},
|
||||
{1, 52429LL, 4}, {2, 34329LL, 4}, {1, 47663LL, 4},
|
||||
{2, 25645LL, 4}, {1, 43691LL, 4}, {2, 18351LL, 4},
|
||||
{1, 20165LL, 3}, {2, 12137LL, 4}, {2, 9363LL, 4},
|
||||
{1, 18079LL, 3}, {1, 34953LL, 4}, {2, 2115LL, 4},
|
||||
{0, 0LL, 5}, {1, 63551LL, 5}, {1, 61681LL, 5},
|
||||
{1, 59919LL, 5}, {1, 58255LL, 5}, {1, 7085LL, 2},
|
||||
{1, 55189LL, 5}, {2, 42011LL, 5}, {1, 52429LL, 5},
|
||||
{2, 36765LL, 5}, {2, 34329LL, 5}, {1, 48771LL, 5},
|
||||
{1, 47663LL, 5}, {1, 11651LL, 3}, {2, 25645LL, 5},
|
||||
{2, 23705LL, 5}, {1, 43691LL, 5}, {2, 20063LL, 5},
|
||||
{2, 18351LL, 5}, {1, 41121LL, 5}, {1, 20165LL, 4},
|
||||
{1, 39569LL, 5}, {2, 12137LL, 5}, {2, 10725LL, 5},
|
||||
{2, 9363LL, 5}, {2, 8049LL, 5}, {1, 18079LL, 4},
|
||||
{1, 35545LL, 5}, {1, 34953LL, 5}, {1, 8595LL, 3},
|
||||
{2, 2115LL, 5}, {2, 1041LL, 5}, {0, 0LL, 6},
|
||||
{1, 4033LL, 2}, {1, 63551LL, 6}, {1, 31301LL, 5},
|
||||
{1, 61681LL, 6}, {2, 56039LL, 6}, {1, 59919LL, 6},
|
||||
{1, 59075LL, 6}, {1, 58255LL, 6}, {1, 57457LL, 6},
|
||||
{1, 7085LL, 3}, {2, 46313LL, 6}, {1, 55189LL, 6},
|
||||
{1, 6809LL, 3}, {2, 42011LL, 6}, {1, 53093LL, 6},
|
||||
{1, 52429LL, 6}, {1, 25891LL, 5}, {2, 36765LL, 6},
|
||||
{1, 25267LL, 5}, {2, 34329LL, 6}, {1, 49345LL, 6},
|
||||
{1, 48771LL, 6}, {1, 48211LL, 6}, {1, 47663LL, 6},
|
||||
{2, 28719LL, 6}, {1, 11651LL, 4}, {2, 26647LL, 6},
|
||||
{2, 25645LL, 6}, {2, 24665LL, 6}, {2, 23705LL, 6},
|
||||
{1, 44151LL, 6}, {1, 43691LL, 6}, {2, 20945LL, 6},
|
||||
{2, 20063LL, 6}, {1, 42367LL, 6}, {2, 18351LL, 6},
|
||||
{1, 5191LL, 3}, {1, 41121LL, 6}, {1, 20361LL, 5},
|
||||
{1, 20165LL, 5}, {1, 19973LL, 5}, {1, 39569LL, 6},
|
||||
{2, 12863LL, 6}, {2, 12137LL, 6}, {1, 2405LL, 2},
|
||||
{2, 10725LL, 6}, {1, 37787LL, 6}, {2, 9363LL, 6},
|
||||
{1, 18559LL, 5}, {2, 8049LL, 6}, {2, 7409LL, 6},
|
||||
{1, 18079LL, 5}, {1, 35849LL, 6}, {1, 35545LL, 6},
|
||||
{2, 4957LL, 6}, {1, 34953LL, 6}, {1, 4333LL, 3},
|
||||
{1, 8595LL, 4}, {2, 2665LL, 6}, {2, 2115LL, 6},
|
||||
{2, 1573LL, 6}, {2, 1041LL, 6}, {2, 517LL, 6},
|
||||
{0, 0LL, 7}, {1, 16257LL, 5}, {1, 4033LL, 3},
|
||||
{1, 16009LL, 5}, {1, 63551LL, 7}, {1, 63073LL, 7},
|
||||
{1, 31301LL, 6}, {1, 31069LL, 6}, {1, 61681LL, 7},
|
||||
{1, 61231LL, 7}, {2, 56039LL, 7}, {1, 30175LL, 6},
|
||||
{1, 59919LL, 7}, {1, 29747LL, 6}, {1, 59075LL, 7},
|
||||
{1, 29331LL, 6}, {1, 58255LL, 7}, {1, 57853LL, 7},
|
||||
{1, 57457LL, 7}, {1, 28533LL, 6}, {1, 7085LL, 4},
|
||||
{1, 14075LL, 5}, {2, 46313LL, 7}, {1, 27777LL, 6},
|
||||
{1, 55189LL, 7}, {1, 13707LL, 5}, {1, 6809LL, 4},
|
||||
{2, 42705LL, 7}, {2, 42011LL, 7}, {1, 53431LL, 7},
|
||||
{1, 53093LL, 7}, {1, 52759LL, 7}, {1, 52429LL, 7},
|
||||
{2, 38671LL, 7}, {1, 25891LL, 6}, {1, 6433LL, 4},
|
||||
{2, 36765LL, 7}, {2, 36145LL, 7}, {1, 25267LL, 6},
|
||||
{2, 34927LL, 7}, {2, 34329LL, 7}, {1, 49637LL, 7},
|
||||
{1, 49345LL, 7}, {2, 32577LL, 7}, {1, 48771LL, 7},
|
||||
{2, 31443LL, 7}, {1, 48211LL, 7}, {1, 47935LL, 7},
|
||||
{1, 47663LL, 7}, {2, 29251LL, 7}, {2, 28719LL, 7},
|
||||
{1, 2929LL, 3}, {1, 11651LL, 5}, {1, 23173LL, 6},
|
||||
{2, 26647LL, 7}, {1, 2865LL, 3}, {2, 25645LL, 7},
|
||||
{1, 1417LL, 2}, {2, 24665LL, 7}, {1, 44859LL, 7},
|
||||
{2, 23705LL, 7}, {2, 23233LL, 7}, {1, 44151LL, 7},
|
||||
{1, 2745LL, 3}, {1, 43691LL, 7}, {2, 21393LL, 7},
|
||||
{2, 20945LL, 7}, {1, 43019LL, 7}, {2, 20063LL, 7},
|
||||
{1, 21291LL, 6}, {1, 42367LL, 7}, {1, 21077LL, 6},
|
||||
{2, 18351LL, 7}, {1, 41735LL, 7}, {1, 5191LL, 4},
|
||||
{2, 17111LL, 7}, {1, 41121LL, 7}, {2, 16305LL, 7},
|
||||
{1, 20361LL, 6}, {1, 40525LL, 7}, {1, 20165LL, 6},
|
||||
{1, 40137LL, 7}, {1, 19973LL, 6}, {1, 39757LL, 7},
|
||||
{1, 39569LL, 7}, {2, 13231LL, 7}, {2, 12863LL, 7},
|
||||
{1, 39017LL, 7}, {2, 12137LL, 7}, {2, 11779LL, 7},
|
||||
{1, 2405LL, 3}, {2, 11073LL, 7}, {2, 10725LL, 7},
|
||||
{1, 18979LL, 6}, {1, 37787LL, 7}, {2, 9699LL, 7},
|
||||
{2, 9363LL, 7}, {1, 37283LL, 7}, {1, 18559LL, 6},
|
||||
{2, 8373LL, 7}, {2, 8049LL, 7}, {1, 4579LL, 4},
|
||||
{2, 7409LL, 7}, {2, 7093LL, 7}, {1, 18079LL, 6},
|
||||
{1, 36003LL, 7}, {1, 35849LL, 7}, {2, 5857LL, 7},
|
||||
{1, 35545LL, 7}, {1, 35395LL, 7}, {2, 4957LL, 7},
|
||||
{1, 35099LL, 7}, {1, 34953LL, 7}, {1, 4351LL, 4},
|
||||
{1, 4333LL, 4}, {2, 3507LL, 7}, {1, 8595LL, 5},
|
||||
{2, 2943LL, 7}, {2, 2665LL, 7}, {1, 16981LL, 6},
|
||||
{2, 2115LL, 7}, {2, 1843LL, 7}, {2, 1573LL, 7},
|
||||
{1, 33421LL, 7}, {2, 1041LL, 7}, {1, 33157LL, 7},
|
||||
{2, 517LL, 7}, {1, 32897LL, 7}, {0, 0LL, 8},
|
||||
};
|
||||
static const uniform int64 __idiv_table_s16[][3] = {
|
||||
{0, 0LL, 1}, {1, 21846LL, 0}, {0, 0LL, 2},
|
||||
{1, 26215LL, 1}, {1, 10923LL, 0}, {1, 18725LL, 1},
|
||||
{0, 0LL, 3}, {1, 7282LL, 0}, {1, 26215LL, 2},
|
||||
{1, 5958LL, 0}, {1, 10923LL, 1}, {1, 20165LL, 2},
|
||||
{1, 18725LL, 2}, {1, 34953LL, 3}, {0, 0LL, 4},
|
||||
{1, 30841LL, 3}, {1, 3641LL, 0}, {1, 55189LL, 4},
|
||||
{1, 26215LL, 3}, {1, 49933LL, 4}, {1, 2979LL, 0},
|
||||
{1, 45591LL, 4}, {1, 10923LL, 2}, {1, 5243LL, 1},
|
||||
{1, 20165LL, 3}, {1, 38837LL, 4}, {1, 18725LL, 3},
|
||||
{1, 18079LL, 3}, {1, 34953LL, 4}, {1, 16913LL, 3},
|
||||
{0, 0LL, 5}, {1, 1986LL, 0}, {1, 30841LL, 4},
|
||||
{1, 3745LL, 1}, {1, 3641LL, 1}, {1, 7085LL, 2},
|
||||
{1, 55189LL, 5}, {1, 26887LL, 4}, {1, 26215LL, 4},
|
||||
{1, 51151LL, 5}, {1, 49933LL, 5}, {1, 12193LL, 3},
|
||||
{1, 2979LL, 1}, {1, 11651LL, 3}, {1, 45591LL, 5},
|
||||
{1, 44621LL, 5}, {1, 10923LL, 3}, {1, 2675LL, 1},
|
||||
{1, 5243LL, 2}, {1, 41121LL, 5}, {1, 20165LL, 4},
|
||||
{1, 19785LL, 4}, {1, 38837LL, 5}, {1, 38131LL, 5},
|
||||
{1, 18725LL, 4}, {1, 36793LL, 5}, {1, 18079LL, 4},
|
||||
{1, 17773LL, 4}, {1, 34953LL, 5}, {1, 8595LL, 3},
|
||||
{1, 16913LL, 4}, {1, 33289LL, 5}, {0, 0LL, 6},
|
||||
{1, 4033LL, 2}, {1, 993LL, 0}, {1, 31301LL, 5},
|
||||
{1, 30841LL, 5}, {1, 15197LL, 4}, {1, 3745LL, 2},
|
||||
{1, 14769LL, 4}, {1, 3641LL, 2}, {1, 57457LL, 6},
|
||||
{1, 7085LL, 3}, {1, 55925LL, 6}, {1, 55189LL, 6},
|
||||
{1, 6809LL, 3}, {1, 26887LL, 5}, {1, 26547LL, 5},
|
||||
{1, 26215LL, 5}, {1, 25891LL, 5}, {1, 51151LL, 6},
|
||||
{1, 25267LL, 5}, {1, 49933LL, 6}, {1, 24673LL, 5},
|
||||
{1, 12193LL, 4}, {1, 48211LL, 6}, {1, 2979LL, 2},
|
||||
{1, 5891LL, 3}, {1, 11651LL, 4}, {1, 11523LL, 4},
|
||||
{1, 45591LL, 6}, {1, 45101LL, 6}, {1, 44621LL, 6},
|
||||
{1, 44151LL, 6}, {1, 10923LL, 4}, {1, 43241LL, 6},
|
||||
{1, 2675LL, 2}, {1, 662LL, 0}, {1, 5243LL, 3},
|
||||
{1, 5191LL, 3}, {1, 41121LL, 6}, {1, 20361LL, 5},
|
||||
{1, 20165LL, 5}, {1, 19973LL, 5}, {1, 19785LL, 5},
|
||||
{1, 1225LL, 1}, {1, 38837LL, 6}, {1, 2405LL, 2},
|
||||
{1, 38131LL, 6}, {1, 37787LL, 6}, {1, 18725LL, 5},
|
||||
{1, 18559LL, 5}, {1, 36793LL, 6}, {1, 36473LL, 6},
|
||||
{1, 18079LL, 5}, {1, 35849LL, 6}, {1, 17773LL, 5},
|
||||
{1, 35247LL, 6}, {1, 34953LL, 6}, {1, 4333LL, 3},
|
||||
{1, 8595LL, 4}, {1, 34101LL, 6}, {1, 16913LL, 5},
|
||||
{1, 33555LL, 6}, {1, 33289LL, 6}, {1, 33027LL, 6},
|
||||
{0, 0LL, 7}, {1, 16257LL, 5}, {1, 4033LL, 3},
|
||||
{1, 16009LL, 5}, {1, 993LL, 1}, {1, 31537LL, 6},
|
||||
{1, 31301LL, 6}, {1, 31069LL, 6}, {1, 30841LL, 6},
|
||||
{1, 3827LL, 3}, {1, 15197LL, 5}, {1, 30175LL, 6},
|
||||
{1, 3745LL, 3}, {1, 29747LL, 6}, {1, 14769LL, 5},
|
||||
{1, 29331LL, 6}, {1, 3641LL, 3}, {1, 28927LL, 6},
|
||||
{1, 57457LL, 7}, {1, 28533LL, 6}, {1, 7085LL, 4},
|
||||
{1, 14075LL, 5}, {1, 55925LL, 7}, {1, 27777LL, 6},
|
||||
{1, 55189LL, 7}, {1, 13707LL, 5}, {1, 6809LL, 4},
|
||||
{1, 54121LL, 7}, {1, 26887LL, 6}, {1, 6679LL, 4},
|
||||
{1, 26547LL, 6}, {1, 6595LL, 4}, {1, 26215LL, 6},
|
||||
{1, 6513LL, 4}, {1, 25891LL, 6}, {1, 6433LL, 4},
|
||||
{1, 51151LL, 7}, {1, 50841LL, 7}, {1, 25267LL, 6},
|
||||
{1, 6279LL, 4}, {1, 49933LL, 7}, {1, 24819LL, 6},
|
||||
{1, 24673LL, 6}, {1, 49057LL, 7}, {1, 12193LL, 5},
|
||||
{1, 24245LL, 6}, {1, 48211LL, 7}, {1, 749LL, 1},
|
||||
{1, 2979LL, 3}, {1, 23697LL, 6}, {1, 5891LL, 4},
|
||||
{1, 2929LL, 3}, {1, 11651LL, 5}, {1, 23173LL, 6},
|
||||
{1, 11523LL, 5}, {1, 2865LL, 3}, {1, 45591LL, 7},
|
||||
{1, 1417LL, 2}, {1, 45101LL, 7}, {1, 11215LL, 5},
|
||||
{1, 44621LL, 7}, {1, 44385LL, 7}, {1, 44151LL, 7},
|
||||
{1, 2745LL, 3}, {1, 10923LL, 5}, {1, 43465LL, 7},
|
||||
{1, 43241LL, 7}, {1, 43019LL, 7}, {1, 2675LL, 3},
|
||||
{1, 21291LL, 6}, {1, 331LL, 0}, {1, 21077LL, 6},
|
||||
{1, 5243LL, 4}, {1, 41735LL, 7}, {1, 5191LL, 4},
|
||||
{1, 10331LL, 5}, {1, 41121LL, 7}, {1, 40921LL, 7},
|
||||
{1, 20361LL, 6}, {1, 40525LL, 7}, {1, 20165LL, 6},
|
||||
{1, 20069LL, 6}, {1, 19973LL, 6}, {1, 39757LL, 7},
|
||||
{1, 19785LL, 6}, {1, 4923LL, 4}, {1, 1225LL, 2},
|
||||
{1, 39017LL, 7}, {1, 38837LL, 7}, {1, 19329LL, 6},
|
||||
{1, 2405LL, 3}, {1, 38305LL, 7}, {1, 38131LL, 7},
|
||||
{1, 18979LL, 6}, {1, 37787LL, 7}, {1, 18809LL, 6},
|
||||
{1, 18725LL, 6}, {1, 37283LL, 7}, {1, 18559LL, 6},
|
||||
{1, 36955LL, 7}, {1, 36793LL, 7}, {1, 4579LL, 4},
|
||||
{1, 36473LL, 7}, {1, 36315LL, 7}, {1, 18079LL, 6},
|
||||
{1, 36003LL, 7}, {1, 35849LL, 7}, {1, 35697LL, 7},
|
||||
{1, 17773LL, 6}, {1, 8849LL, 5}, {1, 35247LL, 7},
|
||||
{1, 35099LL, 7}, {1, 34953LL, 7}, {1, 4351LL, 4},
|
||||
{1, 4333LL, 4}, {1, 17261LL, 6}, {1, 8595LL, 5},
|
||||
{1, 535LL, 1}, {1, 34101LL, 7}, {1, 16981LL, 6},
|
||||
{1, 16913LL, 6}, {1, 16845LL, 6}, {1, 33555LL, 7},
|
||||
{1, 33421LL, 7}, {1, 33289LL, 7}, {1, 33157LL, 7},
|
||||
{1, 33027LL, 7}, {1, 32897LL, 7}, {1, 32769LL, 7},
|
||||
};
|
||||
static const uniform int64 __idiv_table_u32[][3] = {
|
||||
{0, 0LL, 1}, {1, 2863311531LL, 1}, {0, 0LL, 2},
|
||||
{1, 3435973837LL, 2}, {1, 2863311531LL, 2}, {2, 613566757LL, 2},
|
||||
{0, 0LL, 3}, {1, 954437177LL, 1}, {1, 3435973837LL, 3},
|
||||
{1, 3123612579LL, 3}, {1, 2863311531LL, 3}, {1, 1321528399LL, 2},
|
||||
{2, 613566757LL, 3}, {1, 2290649225LL, 3}, {0, 0LL, 4},
|
||||
{1, 4042322161LL, 4}, {1, 954437177LL, 2}, {2, 2938661835LL, 4},
|
||||
{1, 3435973837LL, 4}, {2, 2249744775LL, 4}, {1, 3123612579LL, 4},
|
||||
{1, 2987803337LL, 4}, {1, 2863311531LL, 4}, {1, 1374389535LL, 3},
|
||||
{1, 1321528399LL, 3}, {2, 795364315LL, 4}, {2, 613566757LL, 4},
|
||||
{1, 2369637129LL, 4}, {1, 2290649225LL, 4}, {2, 138547333LL, 4},
|
||||
{0, 0LL, 5}, {1, 1041204193LL, 3}, {1, 4042322161LL, 5},
|
||||
{2, 3558687189LL, 5}, {1, 954437177LL, 3}, {2, 3134165325LL, 5},
|
||||
{2, 2938661835LL, 5}, {2, 2753184165LL, 5}, {1, 3435973837LL, 5},
|
||||
{1, 3352169597LL, 5}, {2, 2249744775LL, 5}, {1, 799063683LL, 3},
|
||||
{1, 3123612579LL, 5}, {2, 1813430637LL, 5}, {1, 2987803337LL, 5},
|
||||
{1, 2924233053LL, 5}, {1, 2863311531LL, 5}, {1, 1402438301LL, 4},
|
||||
{1, 1374389535LL, 4}, {1, 2694881441LL, 5}, {1, 1321528399LL, 4},
|
||||
{2, 891408307LL, 5}, {2, 795364315LL, 5}, {2, 702812831LL, 5},
|
||||
{2, 613566757LL, 5}, {2, 527452125LL, 5}, {1, 2369637129LL, 5},
|
||||
{1, 582368447LL, 3}, {1, 2290649225LL, 5}, {1, 1126548799LL, 4},
|
||||
{2, 138547333LL, 5}, {2, 68174085LL, 5}, {0, 0LL, 6},
|
||||
{1, 4228890877LL, 6}, {1, 1041204193LL, 4}, {1, 128207979LL, 1},
|
||||
{1, 4042322161LL, 6}, {1, 1991868891LL, 5}, {2, 3558687189LL, 6},
|
||||
{1, 3871519817LL, 6}, {1, 954437177LL, 4}, {2, 3235934265LL, 6},
|
||||
{2, 3134165325LL, 6}, {1, 458129845LL, 3}, {2, 2938661835LL, 6},
|
||||
{1, 892460737LL, 4}, {2, 2753184165LL, 6}, {1, 3479467177LL, 6},
|
||||
{1, 3435973837LL, 6}, {1, 3393554407LL, 6}, {1, 3352169597LL, 6},
|
||||
{1, 827945503LL, 4}, {2, 2249744775LL, 6}, {1, 3233857729LL, 6},
|
||||
{1, 799063683LL, 4}, {1, 789879043LL, 4}, {1, 3123612579LL, 6},
|
||||
{1, 3088515809LL, 6}, {2, 1813430637LL, 6}, {2, 1746305385LL, 6},
|
||||
{1, 2987803337LL, 6}, {1, 2955676419LL, 6}, {1, 2924233053LL, 6},
|
||||
{2, 1491936009LL, 6}, {1, 2863311531LL, 6}, {2, 1372618415LL, 6},
|
||||
{1, 1402438301LL, 5}, {1, 2776544515LL, 6}, {1, 1374389535LL, 5},
|
||||
{2, 1148159575LL, 6}, {1, 2694881441LL, 6}, {2, 1042467791LL, 6},
|
||||
{1, 1321528399LL, 5}, {2, 940802361LL, 6}, {2, 891408307LL, 6},
|
||||
{2, 842937507LL, 6}, {2, 795364315LL, 6}, {2, 748664025LL, 6},
|
||||
{2, 702812831LL, 6}, {2, 657787785LL, 6}, {2, 613566757LL, 6},
|
||||
{2, 570128403LL, 6}, {2, 527452125LL, 6}, {2, 485518043LL, 6},
|
||||
{1, 2369637129LL, 6}, {2, 403800345LL, 6}, {1, 582368447LL, 4},
|
||||
{1, 1154949189LL, 5}, {1, 2290649225LL, 6}, {2, 248469183LL, 6},
|
||||
{1, 1126548799LL, 5}, {2, 174592167LL, 6}, {2, 138547333LL, 6},
|
||||
{1, 274877907LL, 3}, {2, 68174085LL, 6}, {2, 33818641LL, 6},
|
||||
{0, 0LL, 7}, {1, 266354561LL, 3}, {1, 4228890877LL, 7},
|
||||
{1, 4196609267LL, 7}, {1, 1041204193LL, 5}, {1, 4133502361LL, 7},
|
||||
{1, 128207979LL, 2}, {1, 4072265289LL, 7}, {1, 4042322161LL, 7},
|
||||
{1, 125400505LL, 2}, {1, 1991868891LL, 6}, {1, 1977538899LL, 6},
|
||||
{2, 3558687189LL, 7}, {1, 974744351LL, 5}, {1, 3871519817LL, 7},
|
||||
{1, 3844446251LL, 7}, {1, 954437177LL, 5}, {1, 3791419407LL, 7},
|
||||
{2, 3235934265LL, 7}, {1, 3739835469LL, 7}, {2, 3134165325LL, 7},
|
||||
{1, 3689636335LL, 7}, {1, 458129845LL, 4}, {1, 910191745LL, 5},
|
||||
{2, 2938661835LL, 7}, {1, 3593175255LL, 7}, {1, 892460737LL, 5},
|
||||
{1, 3546811703LL, 7}, {2, 2753184165LL, 7}, {1, 875407347LL, 5},
|
||||
{1, 3479467177LL, 7}, {2, 2620200175LL, 7}, {1, 3435973837LL, 7},
|
||||
{1, 3414632385LL, 7}, {1, 3393554407LL, 7}, {1, 3372735055LL, 7},
|
||||
{1, 3352169597LL, 7}, {1, 1665926709LL, 6}, {1, 827945503LL, 5},
|
||||
{1, 1645975491LL, 6}, {2, 2249744775LL, 7}, {1, 1626496491LL, 6},
|
||||
{1, 3233857729LL, 7}, {2, 2134925265LL, 7}, {1, 799063683LL, 5},
|
||||
{2, 2060591247LL, 7}, {1, 789879043LL, 5}, {1, 1570730897LL, 6},
|
||||
{1, 3123612579LL, 7}, {2, 1916962805LL, 7}, {1, 3088515809LL, 7},
|
||||
{2, 1847555765LL, 7}, {2, 1813430637LL, 7}, {1, 3037324939LL, 7},
|
||||
{2, 1746305385LL, 7}, {1, 3004130131LL, 7}, {1, 2987803337LL, 7},
|
||||
{2, 1648338801LL, 7}, {1, 2955676419LL, 7}, {1, 2939870663LL, 7},
|
||||
{1, 2924233053LL, 7}, {2, 1522554545LL, 7}, {2, 1491936009LL, 7},
|
||||
{1, 2878302691LL, 7}, {1, 2863311531LL, 7}, {1, 356059465LL, 4},
|
||||
{2, 1372618415LL, 7}, {2, 1343553873LL, 7}, {1, 1402438301LL, 6},
|
||||
{2, 1286310003LL, 7}, {1, 2776544515LL, 7}, {1, 1381296015LL, 6},
|
||||
{1, 1374389535LL, 6}, {1, 42735993LL, 1}, {2, 1148159575LL, 7},
|
||||
{1, 2708156719LL, 7}, {1, 2694881441LL, 7}, {1, 1340867839LL, 6},
|
||||
{2, 1042467791LL, 7}, {1, 663956297LL, 5}, {1, 1321528399LL, 6},
|
||||
{1, 2630410593LL, 7}, {2, 940802361LL, 7}, {1, 2605477791LL, 7},
|
||||
{2, 891408307LL, 7}, {1, 2581013211LL, 7}, {2, 842937507LL, 7},
|
||||
{1, 1278501893LL, 6}, {2, 795364315LL, 7}, {2, 771906565LL, 7},
|
||||
{2, 748664025LL, 7}, {2, 725633745LL, 7}, {2, 702812831LL, 7},
|
||||
{2, 680198441LL, 7}, {2, 657787785LL, 7}, {2, 635578121LL, 7},
|
||||
{2, 613566757LL, 7}, {1, 2443359173LL, 7}, {2, 570128403LL, 7},
|
||||
{2, 548696263LL, 7}, {2, 527452125LL, 7}, {1, 1200340205LL, 6},
|
||||
{2, 485518043LL, 7}, {2, 464823301LL, 7}, {1, 2369637129LL, 7},
|
||||
{2, 423966729LL, 7}, {2, 403800345LL, 7}, {2, 383805589LL, 7},
|
||||
{1, 582368447LL, 5}, {2, 344322273LL, 7}, {1, 1154949189LL, 6},
|
||||
{1, 2300233531LL, 7}, {1, 2290649225LL, 7}, {1, 285143057LL, 4},
|
||||
{2, 248469183LL, 7}, {1, 2262369605LL, 7}, {1, 1126548799LL, 6},
|
||||
{2, 192835267LL, 7}, {2, 174592167LL, 7}, {2, 156496785LL, 7},
|
||||
{2, 138547333LL, 7}, {2, 120742053LL, 7}, {1, 274877907LL, 4},
|
||||
{1, 2190262207LL, 7}, {2, 68174085LL, 7}, {1, 2172947881LL, 7},
|
||||
{2, 33818641LL, 7}, {1, 2155905153LL, 7}, {0, 0LL, 8},
|
||||
};
|
||||
static const uniform int64 __idiv_table_s32[][3] = {
|
||||
{0, 0LL, 1}, {1, 1431655766LL, 0}, {0, 0LL, 2},
|
||||
{1, 1717986919LL, 1}, {1, 715827883LL, 0}, {1, 2454267027LL, 2},
|
||||
{0, 0LL, 3}, {1, 954437177LL, 1}, {1, 1717986919LL, 2},
|
||||
{1, 780903145LL, 1}, {1, 715827883LL, 1}, {1, 1321528399LL, 2},
|
||||
{1, 2454267027LL, 3}, {1, 2290649225LL, 3}, {0, 0LL, 4},
|
||||
{1, 2021161081LL, 3}, {1, 954437177LL, 2}, {1, 1808407283LL, 3},
|
||||
{1, 1717986919LL, 3}, {1, 818089009LL, 2}, {1, 780903145LL, 2},
|
||||
{1, 2987803337LL, 4}, {1, 715827883LL, 2}, {1, 1374389535LL, 3},
|
||||
{1, 1321528399LL, 3}, {1, 1272582903LL, 3}, {1, 2454267027LL, 4},
|
||||
{1, 2369637129LL, 4}, {1, 2290649225LL, 4}, {1, 2216757315LL, 4},
|
||||
{0, 0LL, 5}, {1, 1041204193LL, 3}, {1, 2021161081LL, 4},
|
||||
{1, 3926827243LL, 5}, {1, 954437177LL, 3}, {1, 3714566311LL, 5},
|
||||
{1, 1808407283LL, 4}, {1, 3524075731LL, 5}, {1, 1717986919LL, 4},
|
||||
{1, 1676084799LL, 4}, {1, 818089009LL, 3}, {1, 799063683LL, 3},
|
||||
{1, 780903145LL, 3}, {1, 3054198967LL, 5}, {1, 2987803337LL, 5},
|
||||
{1, 2924233053LL, 5}, {1, 715827883LL, 3}, {1, 1402438301LL, 4},
|
||||
{1, 1374389535LL, 4}, {1, 2694881441LL, 5}, {1, 1321528399LL, 4},
|
||||
{1, 1296593901LL, 4}, {1, 1272582903LL, 4}, {1, 156180629LL, 1},
|
||||
{1, 2454267027LL, 5}, {1, 2411209711LL, 5}, {1, 2369637129LL, 5},
|
||||
{1, 582368447LL, 3}, {1, 2290649225LL, 5}, {1, 1126548799LL, 4},
|
||||
{1, 2216757315LL, 5}, {1, 2181570691LL, 5}, {0, 0LL, 6},
|
||||
{1, 2114445439LL, 5}, {1, 1041204193LL, 4}, {1, 128207979LL, 1},
|
||||
{1, 2021161081LL, 5}, {1, 1991868891LL, 5}, {1, 3926827243LL, 6},
|
||||
{1, 3871519817LL, 6}, {1, 954437177LL, 4}, {1, 3765450781LL, 6},
|
||||
{1, 3714566311LL, 6}, {1, 458129845LL, 3}, {1, 1808407283LL, 5},
|
||||
{1, 892460737LL, 4}, {1, 3524075731LL, 6}, {1, 1739733589LL, 5},
|
||||
{1, 1717986919LL, 5}, {1, 424194301LL, 3}, {1, 1676084799LL, 5},
|
||||
{1, 827945503LL, 4}, {1, 818089009LL, 4}, {1, 1616928865LL, 5},
|
||||
{1, 799063683LL, 4}, {1, 789879043LL, 4}, {1, 780903145LL, 4},
|
||||
{1, 3088515809LL, 6}, {1, 3054198967LL, 6}, {1, 3020636341LL, 6},
|
||||
{1, 2987803337LL, 6}, {1, 738919105LL, 4}, {1, 2924233053LL, 6},
|
||||
{1, 2893451653LL, 6}, {1, 715827883LL, 4}, {1, 354224107LL, 3},
|
||||
{1, 1402438301LL, 5}, {1, 2776544515LL, 6}, {1, 1374389535LL, 5},
|
||||
{1, 680390859LL, 4}, {1, 2694881441LL, 6}, {1, 333589693LL, 3},
|
||||
{1, 1321528399LL, 5}, {1, 2617884829LL, 6}, {1, 1296593901LL, 5},
|
||||
{1, 1284476201LL, 5}, {1, 1272582903LL, 5}, {1, 2521815661LL, 6},
|
||||
{1, 156180629LL, 2}, {1, 2476377541LL, 6}, {1, 2454267027LL, 6},
|
||||
{1, 1216273925LL, 5}, {1, 2411209711LL, 6}, {1, 1195121335LL, 5},
|
||||
{1, 2369637129LL, 6}, {1, 2349383821LL, 6}, {1, 582368447LL, 4},
|
||||
{1, 1154949189LL, 5}, {1, 2290649225LL, 6}, {1, 70991195LL, 1},
|
||||
{1, 1126548799LL, 5}, {1, 558694933LL, 4}, {1, 2216757315LL, 6},
|
||||
{1, 274877907LL, 3}, {1, 2181570691LL, 6}, {1, 2164392969LL, 6},
|
||||
{0, 0LL, 7}, {1, 266354561LL, 3}, {1, 2114445439LL, 6},
|
||||
{1, 1049152317LL, 5}, {1, 1041204193LL, 5}, {1, 4133502361LL, 7},
|
||||
{1, 128207979LL, 2}, {1, 4072265289LL, 7}, {1, 2021161081LL, 6},
|
||||
{1, 125400505LL, 2}, {1, 1991868891LL, 6}, {1, 1977538899LL, 6},
|
||||
{1, 3926827243LL, 7}, {1, 974744351LL, 5}, {1, 3871519817LL, 7},
|
||||
{1, 961111563LL, 5}, {1, 954437177LL, 5}, {1, 3791419407LL, 7},
|
||||
{1, 3765450781LL, 7}, {1, 1869917735LL, 6}, {1, 3714566311LL, 7},
|
||||
{1, 230602271LL, 3}, {1, 458129845LL, 4}, {1, 910191745LL, 5},
|
||||
{1, 1808407283LL, 6}, {1, 3593175255LL, 7}, {1, 892460737LL, 5},
|
||||
{1, 443351463LL, 4}, {1, 3524075731LL, 7}, {1, 875407347LL, 5},
|
||||
{1, 1739733589LL, 6}, {1, 432197967LL, 4}, {1, 1717986919LL, 6},
|
||||
{1, 3414632385LL, 7}, {1, 424194301LL, 4}, {1, 210795941LL, 3},
|
||||
{1, 1676084799LL, 6}, {1, 1665926709LL, 6}, {1, 827945503LL, 5},
|
||||
{1, 1645975491LL, 6}, {1, 818089009LL, 5}, {1, 1626496491LL, 6},
|
||||
{1, 1616928865LL, 6}, {1, 3214946281LL, 7}, {1, 799063683LL, 5},
|
||||
{1, 397222409LL, 4}, {1, 789879043LL, 5}, {1, 1570730897LL, 6},
|
||||
{1, 780903145LL, 5}, {1, 3105965051LL, 7}, {1, 3088515809LL, 7},
|
||||
{1, 3071261531LL, 7}, {1, 3054198967LL, 7}, {1, 759331235LL, 5},
|
||||
{1, 3020636341LL, 7}, {1, 3004130131LL, 7}, {1, 2987803337LL, 7},
|
||||
{1, 2971653049LL, 7}, {1, 738919105LL, 5}, {1, 2939870663LL, 7},
|
||||
{1, 2924233053LL, 7}, {1, 2908760921LL, 7}, {1, 2893451653LL, 7},
|
||||
{1, 2878302691LL, 7}, {1, 715827883LL, 5}, {1, 356059465LL, 4},
|
||||
{1, 354224107LL, 4}, {1, 2819260585LL, 7}, {1, 1402438301LL, 6},
|
||||
{1, 1395319325LL, 6}, {1, 2776544515LL, 7}, {1, 1381296015LL, 6},
|
||||
{1, 1374389535LL, 6}, {1, 42735993LL, 1}, {1, 680390859LL, 5},
|
||||
{1, 2708156719LL, 7}, {1, 2694881441LL, 7}, {1, 1340867839LL, 6},
|
||||
{1, 333589693LL, 4}, {1, 663956297LL, 5}, {1, 1321528399LL, 6},
|
||||
{1, 2630410593LL, 7}, {1, 2617884829LL, 7}, {1, 81421181LL, 2},
|
||||
{1, 1296593901LL, 6}, {1, 2581013211LL, 7}, {1, 1284476201LL, 6},
|
||||
{1, 1278501893LL, 6}, {1, 1272582903LL, 6}, {1, 2533436931LL, 7},
|
||||
{1, 2521815661LL, 7}, {1, 2510300521LL, 7}, {1, 156180629LL, 3},
|
||||
{1, 2487582869LL, 7}, {1, 2476377541LL, 7}, {1, 2465272709LL, 7},
|
||||
{1, 2454267027LL, 7}, {1, 2443359173LL, 7}, {1, 1216273925LL, 6},
|
||||
{1, 605457945LL, 5}, {1, 2411209711LL, 7}, {1, 1200340205LL, 6},
|
||||
{1, 1195121335LL, 6}, {1, 2379895299LL, 7}, {1, 2369637129LL, 7},
|
||||
{1, 2359467013LL, 7}, {1, 2349383821LL, 7}, {1, 2339386443LL, 7},
|
||||
{1, 582368447LL, 5}, {1, 2319644785LL, 7}, {1, 1154949189LL, 6},
|
||||
{1, 2300233531LL, 7}, {1, 2290649225LL, 7}, {1, 285143057LL, 4},
|
||||
{1, 70991195LL, 2}, {1, 2262369605LL, 7}, {1, 1126548799LL, 6},
|
||||
{1, 1121950641LL, 6}, {1, 558694933LL, 5}, {1, 2225732041LL, 7},
|
||||
{1, 2216757315LL, 7}, {1, 2207854675LL, 7}, {1, 274877907LL, 4},
|
||||
{1, 2190262207LL, 7}, {1, 2181570691LL, 7}, {1, 2172947881LL, 7},
|
||||
{1, 2164392969LL, 7}, {1, 2155905153LL, 7}, {1, 2147483649LL, 7},
|
||||
};
|
||||
|
||||
__declspec(safe)
|
||||
static unmasked unsigned int8 __fast_idiv(unsigned int8 numerator,
|
||||
uniform unsigned int8 divisor) {
|
||||
uniform int64 method = __idiv_table_u8[divisor-2][0];
|
||||
uniform int64 multiplier = __idiv_table_u8[divisor-2][1];
|
||||
uniform int64 shift = __idiv_table_u8[divisor-2][2];
|
||||
|
||||
unsigned int16 mult = multiplier;
|
||||
unsigned int16 val = numerator;
|
||||
if (method == 0)
|
||||
return numerator >> shift;
|
||||
else if (method == 1)
|
||||
return (val * mult) >> (8 + shift);
|
||||
else {
|
||||
val *= mult;
|
||||
val >>= 8;
|
||||
val += (numerator-val)>>1;
|
||||
return (val >> shift);
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static unmasked int8 __fast_idiv(int8 numerator, uniform int8 divisor) {
|
||||
uniform int8 method = __idiv_table_s8[divisor-2][0];
|
||||
uniform int16 multiplier = __idiv_table_s8[divisor-2][1];
|
||||
uniform int8 shift = __idiv_table_s8[divisor-2][2];
|
||||
|
||||
if (method == 0)
|
||||
return numerator >> shift;
|
||||
else {
|
||||
unsigned int8 sign = numerator >> 7;
|
||||
numerator ^= sign;
|
||||
int16 mul = (int16)numerator * (int16)multiplier;
|
||||
mul >>= 8 + shift;
|
||||
return (int8)mul ^ sign;
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static unmasked unsigned int16 __fast_idiv(unsigned int16 numerator,
|
||||
uniform unsigned int16 divisor) {
|
||||
uniform int64 method = __idiv_table_u16[divisor-2][0];
|
||||
uniform int64 multiplier = __idiv_table_u16[divisor-2][1];
|
||||
uniform int64 shift = __idiv_table_u16[divisor-2][2];
|
||||
|
||||
unsigned int32 mult = multiplier;
|
||||
unsigned int32 val = numerator;
|
||||
if (method == 0)
|
||||
return numerator >> shift;
|
||||
else if (method == 1)
|
||||
return (val * mult) >> (16 + shift);
|
||||
else {
|
||||
val *= mult;
|
||||
val >>= 16;
|
||||
val += (numerator-val)>>1;
|
||||
return val >> shift;
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static unmasked int16 __fast_idiv(int16 numerator, uniform int16 divisor) {
|
||||
uniform int64 method = __idiv_table_s16[divisor-2][0];
|
||||
uniform int64 multiplier = __idiv_table_s16[divisor-2][1];
|
||||
uniform int64 shift = __idiv_table_s16[divisor-2][2];
|
||||
|
||||
if (method == 0)
|
||||
return numerator >> shift;
|
||||
else {
|
||||
unsigned int16 sign = numerator >> 15;
|
||||
numerator ^= sign;
|
||||
int32 mul = (int32)numerator * (int32)multiplier;
|
||||
mul >>= 16 + shift;
|
||||
int16 result = mul;
|
||||
return result ^ sign;
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static unmasked inline unsigned int32 __fast_idiv(unsigned int32 numerator,
|
||||
uniform unsigned int32 divisor) {
|
||||
uniform int64 method = __idiv_table_u32[divisor-2][0];
|
||||
uniform int64 multiplier = __idiv_table_u32[divisor-2][1];
|
||||
uniform int64 shift = __idiv_table_u32[divisor-2][2];
|
||||
|
||||
unsigned int64 mult = multiplier;
|
||||
unsigned int64 val = numerator;
|
||||
if (method == 0)
|
||||
return numerator >> shift;
|
||||
else if (method == 1)
|
||||
return (val * mult) >> (32 + shift);
|
||||
else {
|
||||
val *= mult;
|
||||
val >>= 32;
|
||||
val += (numerator-val)>>1;
|
||||
return val >> shift;
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static unmasked int32 __fast_idiv(int32 numerator, uniform int32 divisor) {
|
||||
uniform int64 method = __idiv_table_s32[divisor-2][0];
|
||||
uniform int64 multiplier = __idiv_table_s32[divisor-2][1];
|
||||
uniform int64 shift = __idiv_table_s32[divisor-2][2];
|
||||
|
||||
if (method == 0)
|
||||
return numerator >> shift;
|
||||
else {
|
||||
unsigned int32 sign = numerator >> 31;
|
||||
numerator ^= sign;
|
||||
int64 mul = (int64)numerator * (int64)multiplier;
|
||||
mul >>= 32 + shift;
|
||||
int32 result = mul;
|
||||
return result ^ sign;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
75
tests/idiv.ispc
Normal file
75
tests/idiv.ispc
Normal file
@@ -0,0 +1,75 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int errorCount = 0;
|
||||
|
||||
for (unsigned int8 num = 0; num < 255; ++num) {
|
||||
for (uniform unsigned int8 div = 2; div < 255; ++div) {
|
||||
if (__fast_idiv(num, div) != num/div) {
|
||||
++errorCount;
|
||||
print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int8 num = 0; num < 127; ++num) {
|
||||
for (uniform int8 div = 2; div < 127; ++div) {
|
||||
if (__fast_idiv(num, div) != num/div) {
|
||||
++errorCount;
|
||||
print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int16 num = 0; num < 32767; ++num) {
|
||||
for (uniform int16 div = 2; div < 256; ++div) {
|
||||
if (__fast_idiv(num, div) != num/div) {
|
||||
++errorCount;
|
||||
print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int16 num = 0; num < 0xffff; ++num) {
|
||||
for (uniform unsigned int16 div = 2; div < 256; ++div) {
|
||||
if (__fast_idiv(num, div) != num/div) {
|
||||
++errorCount;
|
||||
print("error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// randomly sample int32s...
|
||||
uniform RNGState state;
|
||||
seed_rng(&state, 1234);
|
||||
for (uniform int i = 0; i < 1M; ++i) {
|
||||
unsigned int32 num = random(&state);
|
||||
for (uniform unsigned int32 div = 2; div < 256; ++div) {
|
||||
if (__fast_idiv(num, div) != num/div) {
|
||||
++errorCount;
|
||||
print("ui32 error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uniform int64 i = 0; i < 1M; ++i) {
|
||||
int32 num = random(&state);
|
||||
if (num < 0)
|
||||
continue;
|
||||
for (uniform int32 div = 2; div < 256; ++div) {
|
||||
if (__fast_idiv(num, div) != num/div) {
|
||||
++errorCount;
|
||||
print("si32 error %/% = %, got %\n", num, div, num/div, __fast_idiv(num,div));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RET[programIndex] = errorCount;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user