From 65768c20aec633b7c9f33b8c150169aeaca82c49 Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Thu, 5 Dec 2013 00:34:14 +0400 Subject: [PATCH] Added tests for saturation and some fixes for generic and avx target --- builtins/target-avx.ll | 1 - builtins/target-avx1.ll | 1 + builtins/target-avx11.ll | 1 + builtins/target-avx2.ll | 1 + builtins/target-generic-1.ll | 2 + builtins/target-generic-16.ll | 2 +- builtins/target-generic-4.ll | 2 +- builtins/target-generic-8.ll | 2 +- builtins/target-generic-common.ll | 1 + builtins/util.m4 | 102 +++++++++++++++++++++++++++++- stdlib.ispc | 8 +-- tests/padds_i16.ispc | 11 ++++ tests/padds_i8.ispc | 11 ++++ tests/padds_vi16.ispc | 11 ++++ tests/padds_vi8.ispc | 11 ++++ tests/paddus_i16.ispc | 11 ++++ tests/paddus_i8.ispc | 11 ++++ tests/paddus_vi16.ispc | 11 ++++ tests/paddus_vi8.ispc | 11 ++++ tests/psubs_i16.ispc | 11 ++++ tests/psubs_i8.ispc | 11 ++++ tests/psubs_vi16.ispc | 11 ++++ tests/psubs_vi8.ispc | 11 ++++ tests/psubus_i16.ispc | 11 ++++ tests/psubus_i8.ispc | 11 ++++ tests/psubus_vi16.ispc | 11 ++++ tests/psubus_vi8.ispc | 11 ++++ 27 files changed, 288 insertions(+), 11 deletions(-) create mode 100644 tests/padds_i16.ispc create mode 100644 tests/padds_i8.ispc create mode 100644 tests/padds_vi16.ispc create mode 100644 tests/padds_vi8.ispc create mode 100644 tests/paddus_i16.ispc create mode 100644 tests/paddus_i8.ispc create mode 100644 tests/paddus_vi16.ispc create mode 100644 tests/paddus_vi8.ispc create mode 100644 tests/psubs_i16.ispc create mode 100644 tests/psubs_i8.ispc create mode 100644 tests/psubs_vi16.ispc create mode 100644 tests/psubs_vi8.ispc create mode 100644 tests/psubus_i16.ispc create mode 100644 tests/psubus_i8.ispc create mode 100644 tests/psubus_vi16.ispc create mode 100644 tests/psubus_vi8.ispc diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 8f20bfed..e98a3843 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -40,7 +40,6 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec8() include(`target-avx-common.ll') diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll index 9c86cab8..f0cf1efb 100644 --- a/builtins/target-avx1.ll +++ b/builtins/target-avx1.ll @@ -32,6 +32,7 @@ include(`target-avx.ll') rdrand_decls() +saturation_arithmetic_vec8() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx11.ll b/builtins/target-avx11.ll index fea0a7c2..706314a5 100644 --- a/builtins/target-avx11.ll +++ b/builtins/target-avx11.ll @@ -34,6 +34,7 @@ include(`target-avx.ll') ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', `rdrand_definition()') +saturation_arithmetic_vec8() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx2.ll b/builtins/target-avx2.ll index f4a0ee07..c5f8e84f 100644 --- a/builtins/target-avx2.ll +++ b/builtins/target-avx2.ll @@ -38,6 +38,7 @@ include(`target-avx.ll') ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', `rdrand_definition()') +saturation_arithmetic_vec8() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index 910565dd..bb974932 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -9,6 +9,8 @@ packed_load_and_store() scans() int64minmax() aossoa() +saturation_arithmetic_scalar() +saturation_arithmetic_novec() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; masked store diff --git a/builtins/target-generic-16.ll b/builtins/target-generic-16.ll index 807fd242..36a2ee4c 100644 --- a/builtins/target-generic-16.ll +++ b/builtins/target-generic-16.ll @@ -31,4 +31,4 @@ define(`WIDTH',`16') include(`target-generic-common.ll') - +saturation_arithmetic_vec16() diff --git a/builtins/target-generic-4.ll b/builtins/target-generic-4.ll index 7eb1f300..a7e8dcaa 100644 --- a/builtins/target-generic-4.ll +++ b/builtins/target-generic-4.ll @@ -31,4 +31,4 @@ define(`WIDTH',`4') include(`target-generic-common.ll') - +saturation_arithmetic_vec4() diff --git a/builtins/target-generic-8.ll b/builtins/target-generic-8.ll index bd9261ff..b692322e 100644 --- a/builtins/target-generic-8.ll +++ b/builtins/target-generic-8.ll @@ -31,4 +31,4 @@ define(`WIDTH',`8') include(`target-generic-common.ll') - +saturation_arithmetic_vec8() diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 92b7a18e..c4d3b950 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -41,6 +41,7 @@ stdlib_core() scans() reduce_equal(WIDTH) rdrand_decls() +saturation_arithmetic_scalar() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; broadcast/rotate/shuffle diff --git a/builtins/util.m4 b/builtins/util.m4 index 0d5ed2de..e0f7aaec 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -50,12 +50,28 @@ define(`MASK_HIGH_BIT_ON', ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; vector convertation utilities -;; convert 4-wide vector into 8-wide vector +;; convert 1-wide vector into 8-wide vector ;; ;; $1: vector element type -;; $2: 4-wide vector +;; $2: 1-wide vector ;; $3: 8-wide vector + +define(`convert1to8', ` + $3 = shufflevector <1 x $1> $2, <1 x $1> undef, + <8 x i32> +') + + +define(`convert1to16', ` + $3 = shufflevector <1 x $1> $2, <1 x $1> undef, + <16 x i32> +') + define(`convert4to8', ` $3 = shufflevector <4 x $1> $2, <4 x $1> undef, <8 x i32> $2, <8 x $1> undef, + <1 x i32> +') + + +define(`convert16to1', ` + $3 = shufflevector <16 x $1> $2, <16 x $1> undef, + <1 x i32> +') define(`convert8to4', ` $3 = shufflevector <8 x $1> $2, <8 x $1> undef, @@ -204,6 +232,74 @@ define i16 @__psubus_i16(i16 %a0, i16 %a1) { } ') +;;no vector saturation arithmetic + +define(`saturation_arithmetic_novec', ` +define @__padds_vi8(, ) { + convert1to16(i8, %0, %v0) + convert1to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to1(i8, %r16, %r) + ret %r +} + +define @__padds_vi16(, ) { + convert1to8(i16, %0, %v0) + convert1to8(i16, %1, %v1) + %r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1) + convert8to1(i16, %r16, %r) + ret %r +} + +define @__paddus_vi8(, ) { + convert1to16(i8, %0, %v0) + convert1to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to1(i8, %r16, %r) + ret %r +} + +define @__paddus_vi16(, ) { + convert1to8(i16, %0, %v0) + convert1to8(i16, %1, %v1) + %r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1) + convert8to1(i16, %r16, %r) + ret %r +} + +define @__psubs_vi8(, ) { + convert1to16(i8, %0, %v0) + convert1to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to1(i8, %r16, %r) + ret %r +} + +define @__psubs_vi16(, ) { + convert1to8(i16, %0, %v0) + convert1to8(i16, %1, %v1) + %r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1) + convert8to1(i16, %r16, %r) + ret %r +} + +define @__psubus_vi8(, ) { + convert1to16(i8, %0, %v0) + convert1to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to1(i8, %r16, %r) + ret %r +} + +define @__psubus_vi16(, ) { + convert1to8(i16, %0, %v0) + convert1to8(i16, %1, %v1) + %r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1) + convert8to1(i16, %r16, %r) + ret %r +} +') + ;;4-wide vector saturation arithmetic define(`saturation_arithmetic_vec4', ` diff --git a/stdlib.ispc b/stdlib.ispc index 5b3d144c..9e296687 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4287,12 +4287,12 @@ static inline varying unsigned int8 paddus(varying unsigned int8 a, } static inline uniform unsigned int16 paddus(uniform unsigned int16 a, - unsigned uniform int16 b) { + uniform unsigned int16 b) { return __paddus_i16(a, b); } static inline varying unsigned int16 paddus(varying unsigned int16 a, - unsigned varying int16 b) { + varying unsigned int16 b) { return __paddus_vi16(a, b); } @@ -4323,12 +4323,12 @@ static inline varying unsigned int8 psubus(varying unsigned int8 a, } static inline uniform unsigned int16 psubus(uniform unsigned int16 a, - unsigned uniform int16 b) { + uniform unsigned int16 b) { return __psubus_i16(a, b); } static inline varying unsigned int16 psubus(varying unsigned int16 a, - unsigned varying int16 b) { + varying unsigned int16 b) { return __psubus_vi16(a, b); } /////////////////////////////////////////////////////////////////////////// diff --git a/tests/padds_i16.ispc b/tests/padds_i16.ispc new file mode 100644 index 00000000..4668071b --- /dev/null +++ b/tests/padds_i16.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int16 a = 32767, b = 32767; // max signed int16 + RET[programIndex] = padds(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 32767; +} diff --git a/tests/padds_i8.ispc b/tests/padds_i8.ispc new file mode 100644 index 00000000..81da8a21 --- /dev/null +++ b/tests/padds_i8.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int8 a = 127, b = 127; // max signed int8 + RET[programIndex] = padds(a1, b1); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 127; +} diff --git a/tests/padds_vi16.ispc b/tests/padds_vi16.ispc new file mode 100644 index 00000000..7c6848e7 --- /dev/null +++ b/tests/padds_vi16.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int16 a = 32767, b = 32767; // max signed int16 + RET[programIndex] = padds(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 32767; +} diff --git a/tests/padds_vi8.ispc b/tests/padds_vi8.ispc new file mode 100644 index 00000000..5d6196be --- /dev/null +++ b/tests/padds_vi8.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int8 a = 127, b = 127; // max signed int8 + RET[programIndex] = padds(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 127; +} diff --git a/tests/paddus_i16.ispc b/tests/paddus_i16.ispc new file mode 100644 index 00000000..d2939677 --- /dev/null +++ b/tests/paddus_i16.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int16 a = 65535, b = 65535; // max unsigned int16 + RET[programIndex] = paddus(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 65535; +} diff --git a/tests/paddus_i8.ispc b/tests/paddus_i8.ispc new file mode 100644 index 00000000..23de8c21 --- /dev/null +++ b/tests/paddus_i8.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int8 a = 255, b = 255; // max unsigned int8 + RET[programIndex] = paddus(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 255; +} diff --git a/tests/paddus_vi16.ispc b/tests/paddus_vi16.ispc new file mode 100644 index 00000000..803259f5 --- /dev/null +++ b/tests/paddus_vi16.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int16 a = 65535, b = 65535; // max unsigned int16 + RET[programIndex] = paddus(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 65535; +} diff --git a/tests/paddus_vi8.ispc b/tests/paddus_vi8.ispc new file mode 100644 index 00000000..3d7d3509 --- /dev/null +++ b/tests/paddus_vi8.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int8 a = 255, b = 255; // max unsigned int8 + RET[programIndex] = paddus(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 255; +} diff --git a/tests/psubs_i16.ispc b/tests/psubs_i16.ispc new file mode 100644 index 00000000..9038215e --- /dev/null +++ b/tests/psubs_i16.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int8 a = -32768, b = 32767; // min and max signed int16 + RET[programIndex] = psubs(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = -32768; +} diff --git a/tests/psubs_i8.ispc b/tests/psubs_i8.ispc new file mode 100644 index 00000000..1a661520 --- /dev/null +++ b/tests/psubs_i8.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int8 a = -128, b = 127; // min and max signed int8 + RET[programIndex] = psubs(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = -128; +} diff --git a/tests/psubs_vi16.ispc b/tests/psubs_vi16.ispc new file mode 100644 index 00000000..b1e2cf48 --- /dev/null +++ b/tests/psubs_vi16.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int16 a = -32768, b = 32767; // min and max unsigned int16 + RET[programIndex] = psubs(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = -32768; +} diff --git a/tests/psubs_vi8.ispc b/tests/psubs_vi8.ispc new file mode 100644 index 00000000..a6148a3f --- /dev/null +++ b/tests/psubs_vi8.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int8 a = -128, b = 127; // min and max unsigned int8 + RET[programIndex] = psubs(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = -128; +} diff --git a/tests/psubus_i16.ispc b/tests/psubus_i16.ispc new file mode 100644 index 00000000..b31b250e --- /dev/null +++ b/tests/psubus_i16.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int8 a = 0, b = 32767; // min and max unsigned int16 + RET[programIndex] = psubus(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/psubus_i8.ispc b/tests/psubus_i8.ispc new file mode 100644 index 00000000..c073d306 --- /dev/null +++ b/tests/psubus_i8.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + uniform int8 a = 0, b = 255; // min and max unsigned int8 + RET[programIndex] = psubus(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/psubus_vi16.ispc b/tests/psubus_vi16.ispc new file mode 100644 index 00000000..fd4db693 --- /dev/null +++ b/tests/psubus_vi16.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int16 a = 0, b = 32767; // min and max unsigned int16 + RET[programIndex] = psubus(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/psubus_vi8.ispc b/tests/psubus_vi8.ispc new file mode 100644 index 00000000..3c00308f --- /dev/null +++ b/tests/psubus_vi8.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int8 a = 0, b = 255; // min and max unsigned int8 + RET[programIndex] = psubus(a, b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +}