Merge pull request #751 from Vsevolod-Livinskij/master
Saturating multiplication for int64 was added.
This commit is contained in:
@@ -3615,6 +3615,39 @@ normalized exponent as a power of two in the ``pw2`` parameter.
|
|||||||
uniform int * uniform pw2)
|
uniform int * uniform pw2)
|
||||||
|
|
||||||
|
|
||||||
|
Saturating Arithmetic
|
||||||
|
---------------------
|
||||||
|
A saturation (no overflow possible) addition, substraction, multiplication and
|
||||||
|
division of all integer types is provided by the ``ispc`` standard library.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
int8 saturating_add(uniform int8 a, uniform int8 b)
|
||||||
|
int8 saturating_add(varying int8 a, varying int8 b)
|
||||||
|
unsigned int8 saturating_add(uniform unsigned int8 a, uniform unsigned int8 b)
|
||||||
|
unsigned int8 saturating_add(varying unsigned int8 a, varying unsigned int8 b)
|
||||||
|
|
||||||
|
int8 saturating_sub(uniform int8 a, uniform int8 b)
|
||||||
|
int8 saturating_sub(varying int8 a, varying int8 b)
|
||||||
|
unsigned int8 saturating_sub(uniform unsigned int8 a, uniform unsigned int8 b)
|
||||||
|
unsigned int8 saturating_sub(varying unsigned int8 a, varying unsigned int8 b)
|
||||||
|
|
||||||
|
int8 saturating_mul(uniform int8 a, uniform int8 b)
|
||||||
|
int8 saturating_mul(varying int8 a, varying int8 b)
|
||||||
|
unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b)
|
||||||
|
unsigned int8 saturating_mul(varying unsigned int8 a, varying unsigned int8 b)
|
||||||
|
|
||||||
|
int8 saturating_div(uniform int8 a, uniform int8 b)
|
||||||
|
int8 saturating_div(varying int8 a, varying int8 b)
|
||||||
|
unsigned int8 saturating_div(uniform unsigned int8 a, uniform unsigned int8 b)
|
||||||
|
unsigned int8 saturating_div(varying unsigned int8 a,varying unsigned int8 b)
|
||||||
|
|
||||||
|
|
||||||
|
In addition to the ``int8`` variants of saturating arithmetic functions listed
|
||||||
|
above, there are versions that supports ``int16``, ``int32`` and ``int64``
|
||||||
|
values as well.
|
||||||
|
|
||||||
|
|
||||||
Pseudo-Random Numbers
|
Pseudo-Random Numbers
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|||||||
144
stdlib.ispc
144
stdlib.ispc
@@ -4938,6 +4938,150 @@ static inline varying unsigned int32 saturating_mul(varying unsigned int32 a,
|
|||||||
varying unsigned int32 lo = result;
|
varying unsigned int32 lo = result;
|
||||||
return lo | - (varying int32) !! hi;
|
return lo | - (varying int32) !! hi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) {
|
||||||
|
uniform unsigned int64 ret = 0;
|
||||||
|
|
||||||
|
uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1;
|
||||||
|
uniform unsigned int64 a_abs = 0;
|
||||||
|
uniform unsigned int64 b_abs = 0;
|
||||||
|
|
||||||
|
if (a == INT64_MIN)
|
||||||
|
a_abs = (uniform unsigned int64) INT64_MIN;
|
||||||
|
// Operation "-" is undefined for "INT64_MIN".
|
||||||
|
//See 6.3.1.3 section in C99 standart.
|
||||||
|
else
|
||||||
|
a_abs = (a > 0) ? a : -a;
|
||||||
|
|
||||||
|
if (b == INT64_MIN)
|
||||||
|
b_abs = (uniform unsigned int64) INT64_MIN;
|
||||||
|
else
|
||||||
|
b_abs = (b > 0) ? b : -b;
|
||||||
|
|
||||||
|
uniform unsigned int32 a0 = a_abs & 0xFFFFFFFF;
|
||||||
|
uniform unsigned int32 b0 = b_abs & 0xFFFFFFFF;
|
||||||
|
uniform unsigned int32 a1 = a_abs >> 32;
|
||||||
|
uniform unsigned int32 b1 = b_abs >> 32;
|
||||||
|
|
||||||
|
if ((a1 != 0) && (b1 != 0)) {
|
||||||
|
if (sign > 0) {
|
||||||
|
return INT64_MAX;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return INT64_MIN;
|
||||||
|
}
|
||||||
|
} else if (a1 != 0) {
|
||||||
|
ret = saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 ,
|
||||||
|
(uniform unsigned int64) (a0) * b0);
|
||||||
|
} else if (b1 != 0) {
|
||||||
|
ret = saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 ,
|
||||||
|
(uniform unsigned int64) (a0) * b0);
|
||||||
|
} else {
|
||||||
|
ret = a_abs * b_abs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ((sign < 0) && (ret >= (uniform unsigned int64) INT64_MIN)) {
|
||||||
|
return INT64_MIN;
|
||||||
|
} else if ((sign > 0) && (ret >= INT64_MAX)) {
|
||||||
|
return INT64_MAX;
|
||||||
|
} else {
|
||||||
|
return ret * sign;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline varying int64 saturating_mul(varying int64 a, varying int64 b) {
|
||||||
|
varying unsigned int64 ret = 0;
|
||||||
|
|
||||||
|
varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1;
|
||||||
|
varying unsigned int64 a_abs = 0;
|
||||||
|
varying unsigned int64 b_abs = 0;
|
||||||
|
|
||||||
|
if (a == INT64_MIN)
|
||||||
|
a_abs = (varying unsigned int64) INT64_MIN;
|
||||||
|
// Operation "-" is undefined for "INT64_MIN".
|
||||||
|
//See 6.3.1.3 section in C99 standart.
|
||||||
|
else
|
||||||
|
a_abs = (a > 0) ? a : -a;
|
||||||
|
|
||||||
|
if (b == INT64_MIN)
|
||||||
|
b_abs = (varying unsigned int64) INT64_MIN;
|
||||||
|
else
|
||||||
|
b_abs = (b > 0) ? b : -b;
|
||||||
|
|
||||||
|
|
||||||
|
varying unsigned int32 a0 = a_abs & 0xFFFFFFFF;
|
||||||
|
varying unsigned int32 b0 = b_abs & 0xFFFFFFFF;
|
||||||
|
varying unsigned int32 a1 = a_abs >> 32;
|
||||||
|
varying unsigned int32 b1 = b_abs >> 32;
|
||||||
|
|
||||||
|
if ((a1 != 0) && (b1 != 0)) {
|
||||||
|
if (sign > 0) {
|
||||||
|
return INT64_MAX;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return INT64_MIN;
|
||||||
|
}
|
||||||
|
} else if (a1 != 0) {
|
||||||
|
ret = saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 ,
|
||||||
|
(varying unsigned int64) (a0) * b0);
|
||||||
|
} else if (b1 != 0) {
|
||||||
|
ret = saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 ,
|
||||||
|
(varying unsigned int64) (a0) * b0);
|
||||||
|
} else {
|
||||||
|
ret = a_abs * b_abs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ((sign < 0) && (ret >= (varying unsigned int64) INT64_MIN)) {
|
||||||
|
return INT64_MIN;
|
||||||
|
} else if ((sign > 0) && (ret >= INT64_MAX)) {
|
||||||
|
return INT64_MAX;
|
||||||
|
} else {
|
||||||
|
return ret * sign;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline uniform unsigned int64 saturating_mul(uniform unsigned int64 a,
|
||||||
|
uniform unsigned int64 b) {
|
||||||
|
uniform unsigned int32 a0 = a & 0xFFFFFFFF;
|
||||||
|
uniform unsigned int32 b0 = b & 0xFFFFFFFF;
|
||||||
|
uniform unsigned int32 a1 = a >> 32;
|
||||||
|
uniform unsigned int32 b1 = b >> 32;
|
||||||
|
|
||||||
|
if ((a1 != 0) && (b1 != 0)) {
|
||||||
|
return UINT64_MAX;
|
||||||
|
} else if (a1 != 0) {
|
||||||
|
return saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 ,
|
||||||
|
(uniform unsigned int64) (a0) * b0);
|
||||||
|
} else if (b1 != 0) {
|
||||||
|
return saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 ,
|
||||||
|
(uniform unsigned int64) (a0) * b0);
|
||||||
|
} else {
|
||||||
|
return a * b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline varying unsigned int64 saturating_mul(varying unsigned int64 a,
|
||||||
|
varying unsigned int64 b) {
|
||||||
|
varying unsigned int32 a0 = a & 0xFFFFFFFF;
|
||||||
|
varying unsigned int32 b0 = b & 0xFFFFFFFF;
|
||||||
|
varying unsigned int32 a1 = a >> 32;
|
||||||
|
varying unsigned int32 b1 = b >> 32;
|
||||||
|
|
||||||
|
if ((a1 != 0) && (b1 != 0)) {
|
||||||
|
return UINT64_MAX;
|
||||||
|
} else if (a1 != 0) {
|
||||||
|
return saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 ,
|
||||||
|
(varying unsigned int64) (a0) * b0);
|
||||||
|
} else if (b1 != 0) {
|
||||||
|
return saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 ,
|
||||||
|
(varying unsigned int64) (a0) * b0);
|
||||||
|
} else {
|
||||||
|
return a * b;
|
||||||
|
}
|
||||||
|
}
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// rdrand
|
// rdrand
|
||||||
|
|
||||||
|
|||||||
28
tests/pmuls_i64.ispc
Normal file
28
tests/pmuls_i64.ispc
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64
|
||||||
|
if (programIndex % 3 == 0) {
|
||||||
|
RET[programIndex] = saturating_mul(a_max, (uniform int64) b);
|
||||||
|
}
|
||||||
|
else if (programIndex % 3 == 1) {
|
||||||
|
RET[programIndex] = saturating_mul(a_min, (uniform int64) b);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RET[programIndex] = saturating_mul((uniform int64) b,
|
||||||
|
(uniform int64) b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
if (programIndex % 3 == 0) {
|
||||||
|
RET[programIndex] = (uniform int64) 0x7FFFFFFFFFFFFFFF; // max signed int64
|
||||||
|
}
|
||||||
|
else if (programIndex % 3 == 1) {
|
||||||
|
RET[programIndex] = (uniform int64) -0x8000000000000000; // min signed int64
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RET[programIndex] = (uniform int64) 25;
|
||||||
|
}
|
||||||
|
}
|
||||||
28
tests/pmuls_vi64.ispc
Normal file
28
tests/pmuls_vi64.ispc
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
varying int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64
|
||||||
|
if (programIndex % 3 == 0) {
|
||||||
|
RET[programIndex] = saturating_mul(a_max, (varying int64) b);
|
||||||
|
}
|
||||||
|
else if (programIndex % 3 == 1) {
|
||||||
|
RET[programIndex] = saturating_mul(a_min, (varying int64) b);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RET[programIndex] = saturating_mul((varying int64) b,
|
||||||
|
(varying int64) b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
if (programIndex % 3 == 0) {
|
||||||
|
RET[programIndex] = (varying int64) 0x7FFFFFFFFFFFFFFF; // max signed int64
|
||||||
|
}
|
||||||
|
else if (programIndex % 3 == 1) {
|
||||||
|
RET[programIndex] = (varying int64) -0x8000000000000000; // min signed int64
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RET[programIndex] = (varying int64) 25;
|
||||||
|
}
|
||||||
|
}
|
||||||
28
tests/pmulus_i64.ispc
Normal file
28
tests/pmulus_i64.ispc
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
uniform unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64
|
||||||
|
if (programIndex % 3 == 0) {
|
||||||
|
RET[programIndex] = saturating_mul(a_max, (uniform unsigned int64) b);
|
||||||
|
}
|
||||||
|
else if (programIndex % 3 == 1) {
|
||||||
|
RET[programIndex] = saturating_mul(a_min, (uniform unsigned int64) -b);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RET[programIndex] = saturating_mul((uniform unsigned int64) b,
|
||||||
|
(uniform unsigned int64) b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
if (programIndex % 3 == 0) {
|
||||||
|
RET[programIndex] = (uniform unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64
|
||||||
|
}
|
||||||
|
else if (programIndex % 3 == 1) {
|
||||||
|
RET[programIndex] = (uniform unsigned int64) 0; // min unsigned int64
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RET[programIndex] = (uniform unsigned int64) 25;
|
||||||
|
}
|
||||||
|
}
|
||||||
28
tests/pmulus_vi64.ispc
Normal file
28
tests/pmulus_vi64.ispc
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||||
|
varying unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64
|
||||||
|
if (programIndex % 3 == 0) {
|
||||||
|
RET[programIndex] = saturating_mul(a_max, (varying unsigned int64) b);
|
||||||
|
}
|
||||||
|
else if (programIndex % 3 == 1) {
|
||||||
|
RET[programIndex] = saturating_mul(a_min, (varying unsigned int64) -b);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RET[programIndex] = saturating_mul((varying unsigned int64) b,
|
||||||
|
(varying unsigned int64) b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
if (programIndex % 3 == 0) {
|
||||||
|
RET[programIndex] = (varying unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64
|
||||||
|
}
|
||||||
|
else if (programIndex % 3 == 1) {
|
||||||
|
RET[programIndex] = (varying unsigned int64) 0; // min unsigned int64
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RET[programIndex] = (varying unsigned int64) 25;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user