From af836cda27f38010333454b023f838de37c610fd Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Sun, 23 Feb 2014 19:48:03 +0400 Subject: [PATCH 1/4] Saturating multiplication for int64 was added. --- stdlib.ispc | 36 ++++++++++++++++++++++++++++++++++++ tests/pmuls_i64.ispc | 28 ++++++++++++++++++++++++++++ tests/pmuls_vi64.ispc | 28 ++++++++++++++++++++++++++++ tests/pmulus_i64.ispc | 28 ++++++++++++++++++++++++++++ tests/pmulus_vi64.ispc | 28 ++++++++++++++++++++++++++++ 5 files changed, 148 insertions(+) create mode 100644 tests/pmuls_i64.ispc create mode 100644 tests/pmuls_vi64.ispc create mode 100644 tests/pmulus_i64.ispc create mode 100644 tests/pmulus_vi64.ispc diff --git a/stdlib.ispc b/stdlib.ispc index 1582a04a..28dd8970 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4885,6 +4885,28 @@ static inline varying int32 saturating_mul(varying int32 a, varying int32 b) { return result; } +static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { + uniform int64 a_abs = (a > 0) ? a : -a; + uniform int64 b_abs = (b > 0) ? b : -b; + if (a_abs > (INT64_MAX / b_abs)) + if (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) + return INT64_MAX; + else + return INT64_MIN; + return a * b; +} + +static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { + varying int64 a_abs = (a > 0) ? a : -a; + varying int64 b_abs = (b > 0) ? b : -b; + if (a_abs > (INT64_MAX / b_abs)) + if (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) + return INT64_MAX; + else + return INT64_MIN; + return a * b; +} + static inline uniform unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b) { uniform unsigned int16 result = (uniform unsigned int16) a * @@ -4938,6 +4960,20 @@ static inline varying unsigned int32 saturating_mul(varying unsigned int32 a, varying unsigned int32 lo = result; return lo | - (varying int32) !! hi; } + +static inline uniform unsigned int64 saturating_mul(uniform unsigned int64 a, + uniform unsigned int64 b) { + if (a > (UINT64_MAX / b)) + return UINT64_MAX; + return a * b; +} + +static inline varying unsigned int64 saturating_mul(varying unsigned int64 a, + varying unsigned int64 b) { + if (a > (UINT64_MAX / b)) + return UINT64_MAX; + return a * b; +} /////////////////////////////////////////////////////////////////////////// // rdrand diff --git a/tests/pmuls_i64.ispc b/tests/pmuls_i64.ispc new file mode 100644 index 00000000..a04ca698 --- /dev/null +++ b/tests/pmuls_i64.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_mul(a_max, (uniform int64) b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_mul(a_min, (uniform int64) b); + } + else { + RET[programIndex] = saturating_mul((uniform int64) b, + (uniform int64) b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (uniform int64) 0x7FFFFFFFFFFFFFFF; // max signed int64 + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (uniform int64) -0x8000000000000000; // min signed int64 + } + else { + RET[programIndex] = (uniform int64) 25; + } +} diff --git a/tests/pmuls_vi64.ispc b/tests/pmuls_vi64.ispc new file mode 100644 index 00000000..32df2fac --- /dev/null +++ b/tests/pmuls_vi64.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_mul(a_max, (varying int64) b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_mul(a_min, (varying int64) b); + } + else { + RET[programIndex] = saturating_mul((varying int64) b, + (varying int64) b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (varying int64) 0x7FFFFFFFFFFFFFFF; // max signed int64 + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (varying int64) -0x8000000000000000; // min signed int64 + } + else { + RET[programIndex] = (varying int64) 25; + } +} diff --git a/tests/pmulus_i64.ispc b/tests/pmulus_i64.ispc new file mode 100644 index 00000000..179902a3 --- /dev/null +++ b/tests/pmulus_i64.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_mul(a_max, (uniform unsigned int64) b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_mul(a_min, (uniform unsigned int64) -b); + } + else { + RET[programIndex] = saturating_mul((uniform unsigned int64) b, + (uniform unsigned int64) b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (uniform unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64 + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (uniform unsigned int64) 0; // min unsigned int64 + } + else { + RET[programIndex] = (uniform unsigned int64) 25; + } +} diff --git a/tests/pmulus_vi64.ispc b/tests/pmulus_vi64.ispc new file mode 100644 index 00000000..43ae9aac --- /dev/null +++ b/tests/pmulus_vi64.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_mul(a_max, (varying unsigned int64) b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_mul(a_min, (varying unsigned int64) -b); + } + else { + RET[programIndex] = saturating_mul((varying unsigned int64) b, + (varying unsigned int64) b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (varying unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64 + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (varying unsigned int64) 0; // min unsigned int64 + } + else { + RET[programIndex] = (varying unsigned int64) 25; + } +} From c2e05e2231ebeed85ce2ec07ab8df989f3bbe996 Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Fri, 28 Feb 2014 20:06:46 +0400 Subject: [PATCH 2/4] Algorithm was modified and division was changed to bit operations. --- stdlib.ispc | 135 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 109 insertions(+), 26 deletions(-) diff --git a/stdlib.ispc b/stdlib.ispc index 28dd8970..d60219d0 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4885,28 +4885,6 @@ static inline varying int32 saturating_mul(varying int32 a, varying int32 b) { return result; } -static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { - uniform int64 a_abs = (a > 0) ? a : -a; - uniform int64 b_abs = (b > 0) ? b : -b; - if (a_abs > (INT64_MAX / b_abs)) - if (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) - return INT64_MAX; - else - return INT64_MIN; - return a * b; -} - -static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { - varying int64 a_abs = (a > 0) ? a : -a; - varying int64 b_abs = (b > 0) ? b : -b; - if (a_abs > (INT64_MAX / b_abs)) - if (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) - return INT64_MAX; - else - return INT64_MIN; - return a * b; -} - static inline uniform unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b) { uniform unsigned int16 result = (uniform unsigned int16) a * @@ -4961,18 +4939,123 @@ static inline varying unsigned int32 saturating_mul(varying unsigned int32 a, return lo | - (varying int32) !! hi; } +static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { + uniform unsigned int64 ret = 0; + + uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; + uniform unsigned int64 a_abs = (a > 0) ? a : -a; + uniform unsigned int64 b_abs = (b > 0) ? b : -b; + + uniform unsigned int32 a0 = a_abs & 0xFFFFFFFF; + uniform unsigned int32 b0 = b_abs & 0xFFFFFFFF; + uniform unsigned int32 a1 = a_abs >> 32; + uniform unsigned int32 b1 = b_abs >> 32; + + if ((a1 != 0) && (b1 != 0)) { + if (sign > 0) { + return INT64_MAX; + } + else { + return INT64_MIN; + } + } else if (a1 != 0) { + ret = saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 , + (uniform unsigned int64) (a0) * b0); + } else if (b1 != 0) { + ret = saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 , + (uniform unsigned int64) (a0) * b0); + } else { + ret = a_abs * b_abs; + } + + + if ((sign < 0) && (ret >= -INT64_MIN)) { + return INT64_MIN; + } else if ((sign > 0) && (ret >= INT64_MAX)) { + return INT64_MAX; + } else { + return ret * sign; + } +} + +static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { + varying unsigned int64 ret = 0; + + varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; + varying unsigned int64 a_abs = (a > 0) ? a : -a; + varying unsigned int64 b_abs = (b > 0) ? b : -b; + + varying unsigned int32 a0 = a_abs & 0xFFFFFFFF; + varying unsigned int32 b0 = b_abs & 0xFFFFFFFF; + varying unsigned int32 a1 = a_abs >> 32; + varying unsigned int32 b1 = b_abs >> 32; + + if ((a1 != 0) && (b1 != 0)) { + if (sign > 0) { + return INT64_MAX; + } + else { + return INT64_MIN; + } + } else if (a1 != 0) { + ret = saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 , + (varying unsigned int64) (a0) * b0); + } else if (b1 != 0) { + ret = saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 , + (varying unsigned int64) (a0) * b0); + } else { + ret = a_abs * b_abs; + } + + + if ((sign < 0) && (ret >= -INT64_MIN)) { + return INT64_MIN; + } else if ((sign > 0) && (ret >= INT64_MAX)) { + return INT64_MAX; + } else { + return ret * sign; + } +} + + static inline uniform unsigned int64 saturating_mul(uniform unsigned int64 a, uniform unsigned int64 b) { - if (a > (UINT64_MAX / b)) + uniform unsigned int32 a0 = a & 0xFFFFFFFF; + uniform unsigned int32 b0 = b & 0xFFFFFFFF; + uniform unsigned int32 a1 = a >> 32; + uniform unsigned int32 b1 = b >> 32; + + if ((a1 != 0) && (b1 != 0)) { return UINT64_MAX; - return a * b; + } else if (a1 != 0) { + return saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 , + (uniform unsigned int64) (a0) * b0); + } else if (b1 != 0) { + return saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 , + (uniform unsigned int64) (a0) * b0); + } else { + return a * b; + } } static inline varying unsigned int64 saturating_mul(varying unsigned int64 a, varying unsigned int64 b) { - if (a > (UINT64_MAX / b)) + varying unsigned int32 a0 = a & 0xFFFFFFFF; + varying unsigned int32 b0 = b & 0xFFFFFFFF; + varying unsigned int32 a1 = a >> 32; + varying unsigned int32 b1 = b >> 32; + + if ((a1 != 0) && (b1 != 0)) { return UINT64_MAX; - return a * b; + } else if (a1 != 0) { + return saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 , + (varying unsigned int64) (a0) * b0); + } else if (b1 != 0) { + return saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 , + (varying unsigned int64) (a0) * b0); + } else { + return a * b; + } } /////////////////////////////////////////////////////////////////////////// // rdrand From 2e2fd394bfdba1adc62d54f622ab37d1de5191e5 Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Wed, 5 Mar 2014 01:30:16 +0400 Subject: [PATCH 3/4] Documents for saturating arithmetic was added. --- docs/ispc.rst | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/docs/ispc.rst b/docs/ispc.rst index 2c41301c..18663942 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3615,6 +3615,41 @@ normalized exponent as a power of two in the ``pw2`` parameter. uniform int * uniform pw2) +Saturating Arithmetic +--------------------- +A saturation addition, substraction, multiplication and division of all integer +types is provided by the ``ispc`` standard library. + +:: + + int8 saturating_add(uniform int8 a, uniform int8 b) + int8 saturating_add(varying int8 a, varying int8 b) + unsigned int8 saturating_add(uniform unsigned int8 a, uniform unsigned int8 b) + unsigned int8 saturating_add(varying unsigned int8 a, varying unsigned int8 b) + + int8 saturating_sub(uniform int8 a, uniform int8 b) + int8 saturating_sub(varying int8 a, varying int8 b) + unsigned int8 saturating_sub(uniform unsigned int8 a, uniform unsigned int8 b) + unsigned int8 saturating_sub(varying unsigned int8 a, varying unsigned int8 b) + + int8 saturating_mul(uniform int8 a, uniform int8 b) + int8 saturating_mul(varying int8 a, varying int8 b) + unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b) + unsigned int8 saturating_mul(varying unsigned int8 a, varying unsigned int8 b) + + int8 saturating_div(uniform int8 a, uniform int8 b) + int8 saturating_div(varying int8 a, varying int8 b) + unsigned int8 saturating_div(uniform unsigned int8 a, uniform unsigned int8 b) + unsigned int8 saturating_div(varying unsigned int8 a,varying unsigned int8 b) + + +In addition to the ``int8`` variants of saturating arithmetic functions listed +above, there are versions that supports ``int16``, ``int32`` and ``int64`` +values as well. Functions that have best high-speed performance are functions +that support ``varying signed/unsined int8/int16`` on Intel® SSE and Intel® AVX, +because they have hardware implementation. + + Pseudo-Random Numbers --------------------- From dc00b4dd64389a2e82bf9e6bb3482e61264bc47b Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Thu, 6 Mar 2014 21:10:08 +0400 Subject: [PATCH 4/4] Undefined operation -INT64_MIN was fixed. --- docs/ispc.rst | 8 +++----- stdlib.ispc | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 18663942..4819ee9c 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3617,8 +3617,8 @@ normalized exponent as a power of two in the ``pw2`` parameter. Saturating Arithmetic --------------------- -A saturation addition, substraction, multiplication and division of all integer -types is provided by the ``ispc`` standard library. +A saturation (no overflow possible) addition, substraction, multiplication and +division of all integer types is provided by the ``ispc`` standard library. :: @@ -3645,9 +3645,7 @@ types is provided by the ``ispc`` standard library. In addition to the ``int8`` variants of saturating arithmetic functions listed above, there are versions that supports ``int16``, ``int32`` and ``int64`` -values as well. Functions that have best high-speed performance are functions -that support ``varying signed/unsined int8/int16`` on Intel® SSE and Intel® AVX, -because they have hardware implementation. +values as well. Pseudo-Random Numbers diff --git a/stdlib.ispc b/stdlib.ispc index d60219d0..a2ca02e7 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4943,8 +4943,20 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { uniform unsigned int64 ret = 0; uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; - uniform unsigned int64 a_abs = (a > 0) ? a : -a; - uniform unsigned int64 b_abs = (b > 0) ? b : -b; + uniform unsigned int64 a_abs = 0; + uniform unsigned int64 b_abs = 0; + + if (a == INT64_MIN) + a_abs = (uniform unsigned int64) INT64_MIN; + // Operation "-" is undefined for "INT64_MIN". + //See 6.3.1.3 section in C99 standart. + else + a_abs = (a > 0) ? a : -a; + + if (b == INT64_MIN) + b_abs = (uniform unsigned int64) INT64_MIN; + else + b_abs = (b > 0) ? b : -b; uniform unsigned int32 a0 = a_abs & 0xFFFFFFFF; uniform unsigned int32 b0 = b_abs & 0xFFFFFFFF; @@ -4969,7 +4981,7 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { } - if ((sign < 0) && (ret >= -INT64_MIN)) { + if ((sign < 0) && (ret >= (uniform unsigned int64) INT64_MIN)) { return INT64_MIN; } else if ((sign > 0) && (ret >= INT64_MAX)) { return INT64_MAX; @@ -4981,9 +4993,22 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { varying unsigned int64 ret = 0; - varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; - varying unsigned int64 a_abs = (a > 0) ? a : -a; - varying unsigned int64 b_abs = (b > 0) ? b : -b; + varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; + varying unsigned int64 a_abs = 0; + varying unsigned int64 b_abs = 0; + + if (a == INT64_MIN) + a_abs = (varying unsigned int64) INT64_MIN; + // Operation "-" is undefined for "INT64_MIN". + //See 6.3.1.3 section in C99 standart. + else + a_abs = (a > 0) ? a : -a; + + if (b == INT64_MIN) + b_abs = (varying unsigned int64) INT64_MIN; + else + b_abs = (b > 0) ? b : -b; + varying unsigned int32 a0 = a_abs & 0xFFFFFFFF; varying unsigned int32 b0 = b_abs & 0xFFFFFFFF; @@ -5008,7 +5033,7 @@ static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { } - if ((sign < 0) && (ret >= -INT64_MIN)) { + if ((sign < 0) && (ret >= (varying unsigned int64) INT64_MIN)) { return INT64_MIN; } else if ((sign > 0) && (ret >= INT64_MAX)) { return INT64_MAX;