Merge pull request #751 from Vsevolod-Livinskij/master

Saturating multiplication for int64 was added.
This commit is contained in:
Ilia Filippov
2014-03-12 00:12:34 -07:00
6 changed files with 289 additions and 0 deletions

View File

@@ -3615,6 +3615,39 @@ normalized exponent as a power of two in the ``pw2`` parameter.
uniform int * uniform pw2) uniform int * uniform pw2)
Saturating Arithmetic
---------------------
A saturation (no overflow possible) addition, substraction, multiplication and
division of all integer types is provided by the ``ispc`` standard library.
::
int8 saturating_add(uniform int8 a, uniform int8 b)
int8 saturating_add(varying int8 a, varying int8 b)
unsigned int8 saturating_add(uniform unsigned int8 a, uniform unsigned int8 b)
unsigned int8 saturating_add(varying unsigned int8 a, varying unsigned int8 b)
int8 saturating_sub(uniform int8 a, uniform int8 b)
int8 saturating_sub(varying int8 a, varying int8 b)
unsigned int8 saturating_sub(uniform unsigned int8 a, uniform unsigned int8 b)
unsigned int8 saturating_sub(varying unsigned int8 a, varying unsigned int8 b)
int8 saturating_mul(uniform int8 a, uniform int8 b)
int8 saturating_mul(varying int8 a, varying int8 b)
unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b)
unsigned int8 saturating_mul(varying unsigned int8 a, varying unsigned int8 b)
int8 saturating_div(uniform int8 a, uniform int8 b)
int8 saturating_div(varying int8 a, varying int8 b)
unsigned int8 saturating_div(uniform unsigned int8 a, uniform unsigned int8 b)
unsigned int8 saturating_div(varying unsigned int8 a,varying unsigned int8 b)
In addition to the ``int8`` variants of saturating arithmetic functions listed
above, there are versions that supports ``int16``, ``int32`` and ``int64``
values as well.
Pseudo-Random Numbers Pseudo-Random Numbers
--------------------- ---------------------

View File

@@ -4938,6 +4938,150 @@ static inline varying unsigned int32 saturating_mul(varying unsigned int32 a,
varying unsigned int32 lo = result; varying unsigned int32 lo = result;
return lo | - (varying int32) !! hi; return lo | - (varying int32) !! hi;
} }
static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) {
uniform unsigned int64 ret = 0;
uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1;
uniform unsigned int64 a_abs = 0;
uniform unsigned int64 b_abs = 0;
if (a == INT64_MIN)
a_abs = (uniform unsigned int64) INT64_MIN;
// Operation "-" is undefined for "INT64_MIN".
//See 6.3.1.3 section in C99 standart.
else
a_abs = (a > 0) ? a : -a;
if (b == INT64_MIN)
b_abs = (uniform unsigned int64) INT64_MIN;
else
b_abs = (b > 0) ? b : -b;
uniform unsigned int32 a0 = a_abs & 0xFFFFFFFF;
uniform unsigned int32 b0 = b_abs & 0xFFFFFFFF;
uniform unsigned int32 a1 = a_abs >> 32;
uniform unsigned int32 b1 = b_abs >> 32;
if ((a1 != 0) && (b1 != 0)) {
if (sign > 0) {
return INT64_MAX;
}
else {
return INT64_MIN;
}
} else if (a1 != 0) {
ret = saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 ,
(uniform unsigned int64) (a0) * b0);
} else if (b1 != 0) {
ret = saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 ,
(uniform unsigned int64) (a0) * b0);
} else {
ret = a_abs * b_abs;
}
if ((sign < 0) && (ret >= (uniform unsigned int64) INT64_MIN)) {
return INT64_MIN;
} else if ((sign > 0) && (ret >= INT64_MAX)) {
return INT64_MAX;
} else {
return ret * sign;
}
}
static inline varying int64 saturating_mul(varying int64 a, varying int64 b) {
varying unsigned int64 ret = 0;
varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1;
varying unsigned int64 a_abs = 0;
varying unsigned int64 b_abs = 0;
if (a == INT64_MIN)
a_abs = (varying unsigned int64) INT64_MIN;
// Operation "-" is undefined for "INT64_MIN".
//See 6.3.1.3 section in C99 standart.
else
a_abs = (a > 0) ? a : -a;
if (b == INT64_MIN)
b_abs = (varying unsigned int64) INT64_MIN;
else
b_abs = (b > 0) ? b : -b;
varying unsigned int32 a0 = a_abs & 0xFFFFFFFF;
varying unsigned int32 b0 = b_abs & 0xFFFFFFFF;
varying unsigned int32 a1 = a_abs >> 32;
varying unsigned int32 b1 = b_abs >> 32;
if ((a1 != 0) && (b1 != 0)) {
if (sign > 0) {
return INT64_MAX;
}
else {
return INT64_MIN;
}
} else if (a1 != 0) {
ret = saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 ,
(varying unsigned int64) (a0) * b0);
} else if (b1 != 0) {
ret = saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 ,
(varying unsigned int64) (a0) * b0);
} else {
ret = a_abs * b_abs;
}
if ((sign < 0) && (ret >= (varying unsigned int64) INT64_MIN)) {
return INT64_MIN;
} else if ((sign > 0) && (ret >= INT64_MAX)) {
return INT64_MAX;
} else {
return ret * sign;
}
}
static inline uniform unsigned int64 saturating_mul(uniform unsigned int64 a,
uniform unsigned int64 b) {
uniform unsigned int32 a0 = a & 0xFFFFFFFF;
uniform unsigned int32 b0 = b & 0xFFFFFFFF;
uniform unsigned int32 a1 = a >> 32;
uniform unsigned int32 b1 = b >> 32;
if ((a1 != 0) && (b1 != 0)) {
return UINT64_MAX;
} else if (a1 != 0) {
return saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 ,
(uniform unsigned int64) (a0) * b0);
} else if (b1 != 0) {
return saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 ,
(uniform unsigned int64) (a0) * b0);
} else {
return a * b;
}
}
static inline varying unsigned int64 saturating_mul(varying unsigned int64 a,
varying unsigned int64 b) {
varying unsigned int32 a0 = a & 0xFFFFFFFF;
varying unsigned int32 b0 = b & 0xFFFFFFFF;
varying unsigned int32 a1 = a >> 32;
varying unsigned int32 b1 = b >> 32;
if ((a1 != 0) && (b1 != 0)) {
return UINT64_MAX;
} else if (a1 != 0) {
return saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 ,
(varying unsigned int64) (a0) * b0);
} else if (b1 != 0) {
return saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 ,
(varying unsigned int64) (a0) * b0);
} else {
return a * b;
}
}
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// rdrand // rdrand

28
tests/pmuls_i64.ispc Normal file
View File

@@ -0,0 +1,28 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
uniform int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64
if (programIndex % 3 == 0) {
RET[programIndex] = saturating_mul(a_max, (uniform int64) b);
}
else if (programIndex % 3 == 1) {
RET[programIndex] = saturating_mul(a_min, (uniform int64) b);
}
else {
RET[programIndex] = saturating_mul((uniform int64) b,
(uniform int64) b);
}
}
export void result(uniform float RET[]) {
if (programIndex % 3 == 0) {
RET[programIndex] = (uniform int64) 0x7FFFFFFFFFFFFFFF; // max signed int64
}
else if (programIndex % 3 == 1) {
RET[programIndex] = (uniform int64) -0x8000000000000000; // min signed int64
}
else {
RET[programIndex] = (uniform int64) 25;
}
}

28
tests/pmuls_vi64.ispc Normal file
View File

@@ -0,0 +1,28 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
varying int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64
if (programIndex % 3 == 0) {
RET[programIndex] = saturating_mul(a_max, (varying int64) b);
}
else if (programIndex % 3 == 1) {
RET[programIndex] = saturating_mul(a_min, (varying int64) b);
}
else {
RET[programIndex] = saturating_mul((varying int64) b,
(varying int64) b);
}
}
export void result(uniform float RET[]) {
if (programIndex % 3 == 0) {
RET[programIndex] = (varying int64) 0x7FFFFFFFFFFFFFFF; // max signed int64
}
else if (programIndex % 3 == 1) {
RET[programIndex] = (varying int64) -0x8000000000000000; // min signed int64
}
else {
RET[programIndex] = (varying int64) 25;
}
}

28
tests/pmulus_i64.ispc Normal file
View File

@@ -0,0 +1,28 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
uniform unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64
if (programIndex % 3 == 0) {
RET[programIndex] = saturating_mul(a_max, (uniform unsigned int64) b);
}
else if (programIndex % 3 == 1) {
RET[programIndex] = saturating_mul(a_min, (uniform unsigned int64) -b);
}
else {
RET[programIndex] = saturating_mul((uniform unsigned int64) b,
(uniform unsigned int64) b);
}
}
export void result(uniform float RET[]) {
if (programIndex % 3 == 0) {
RET[programIndex] = (uniform unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64
}
else if (programIndex % 3 == 1) {
RET[programIndex] = (uniform unsigned int64) 0; // min unsigned int64
}
else {
RET[programIndex] = (uniform unsigned int64) 25;
}
}

28
tests/pmulus_vi64.ispc Normal file
View File

@@ -0,0 +1,28 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
varying unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64
if (programIndex % 3 == 0) {
RET[programIndex] = saturating_mul(a_max, (varying unsigned int64) b);
}
else if (programIndex % 3 == 1) {
RET[programIndex] = saturating_mul(a_min, (varying unsigned int64) -b);
}
else {
RET[programIndex] = saturating_mul((varying unsigned int64) b,
(varying unsigned int64) b);
}
}
export void result(uniform float RET[]) {
if (programIndex % 3 == 0) {
RET[programIndex] = (varying unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64
}
else if (programIndex % 3 == 1) {
RET[programIndex] = (varying unsigned int64) 0; // min unsigned int64
}
else {
RET[programIndex] = (varying unsigned int64) 25;
}
}