diff --git a/builtins/target-avx-common.ll b/builtins/target-avx-common.ll index 32157a77..d6b577b8 100644 --- a/builtins/target-avx-common.ll +++ b/builtins/target-avx-common.ll @@ -40,7 +40,7 @@ ctlztz() define_prefetches() define_shuffles() aossoa() -saturation_arithmetic_scalar() +saturation_arithmetic_uniform() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; rounding floats diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index cde63e7b..8d3e29c8 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -40,7 +40,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec16() +saturation_arithmetic() include(`target-avx-common.ll') diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll index a2d292f2..d9c60c26 100644 --- a/builtins/target-avx1-i64x4base.ll +++ b/builtins/target-avx1-i64x4base.ll @@ -40,7 +40,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec4() +saturation_arithmetic() include(`target-avx-common.ll') diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll index f0cf1efb..a9ddc112 100644 --- a/builtins/target-avx1.ll +++ b/builtins/target-avx1.ll @@ -32,7 +32,7 @@ include(`target-avx.ll') rdrand_decls() -saturation_arithmetic_vec8() +saturation_arithmetic() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx11.ll b/builtins/target-avx11.ll index 706314a5..c4c421a0 100644 --- a/builtins/target-avx11.ll +++ b/builtins/target-avx11.ll @@ -34,7 +34,8 @@ include(`target-avx.ll') ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', `rdrand_definition()') -saturation_arithmetic_vec8() + +saturation_arithmetic() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx2.ll b/builtins/target-avx2.ll index c5f8e84f..20ecef47 100644 --- a/builtins/target-avx2.ll +++ b/builtins/target-avx2.ll @@ -38,7 +38,8 @@ include(`target-avx.ll') ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', `rdrand_definition()') -saturation_arithmetic_vec8() + +saturation_arithmetic() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index bb974932..af343496 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -9,7 +9,7 @@ packed_load_and_store() scans() int64minmax() aossoa() -saturation_arithmetic_scalar() +saturation_arithmetic() saturation_arithmetic_novec() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/builtins/target-generic-16.ll b/builtins/target-generic-16.ll index 36a2ee4c..df04187c 100644 --- a/builtins/target-generic-16.ll +++ b/builtins/target-generic-16.ll @@ -31,4 +31,4 @@ define(`WIDTH',`16') include(`target-generic-common.ll') -saturation_arithmetic_vec16() +saturation_arithmetic() diff --git a/builtins/target-generic-4.ll b/builtins/target-generic-4.ll index a7e8dcaa..e43f45c5 100644 --- a/builtins/target-generic-4.ll +++ b/builtins/target-generic-4.ll @@ -31,4 +31,4 @@ define(`WIDTH',`4') include(`target-generic-common.ll') -saturation_arithmetic_vec4() +saturation_arithmetic() diff --git a/builtins/target-generic-8.ll b/builtins/target-generic-8.ll index b692322e..6b87509d 100644 --- a/builtins/target-generic-8.ll +++ b/builtins/target-generic-8.ll @@ -31,4 +31,4 @@ define(`WIDTH',`8') include(`target-generic-common.ll') -saturation_arithmetic_vec8() +saturation_arithmetic() diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index c4d3b950..6f5199d8 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -41,7 +41,7 @@ stdlib_core() scans() reduce_equal(WIDTH) rdrand_decls() -saturation_arithmetic_scalar() +saturation_arithmetic_uniform() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; broadcast/rotate/shuffle diff --git a/builtins/target-sse2-common.ll b/builtins/target-sse2-common.ll index b5c5559c..d8a461aa 100644 --- a/builtins/target-sse2-common.ll +++ b/builtins/target-sse2-common.ll @@ -34,7 +34,7 @@ define_prefetches() define_shuffles() aossoa() rdrand_decls() -saturation_arithmetic_scalar() +saturation_arithmetic_uniform() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; rcp diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index b4b52d91..1cb2abc4 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -44,7 +44,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec8() +saturation_arithmetic() include(`target-sse2-common.ll') diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index bdf6f848..ee8b533c 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -41,7 +41,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec4() +saturation_arithmetic() include(`target-sse2-common.ll') diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index 1c0b045a..00ff2519 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -41,7 +41,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec8() +saturation_arithmetic() include(`target-sse4-common.ll') diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index 49351856..15c577e8 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -41,7 +41,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec16() +saturation_arithmetic() include(`target-sse4-common.ll') diff --git a/builtins/target-sse4-common.ll b/builtins/target-sse4-common.ll index 8eeaa413..2dd5c149 100644 --- a/builtins/target-sse4-common.ll +++ b/builtins/target-sse4-common.ll @@ -37,7 +37,7 @@ define_prefetches() define_shuffles() aossoa() rdrand_decls() -saturation_arithmetic_scalar() +saturation_arithmetic_uniform() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; rounding floats diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index 2cd0ea4d..59a6942a 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -44,7 +44,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec8() +saturation_arithmetic() include(`target-sse4-common.ll') diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 96effe39..4762836d 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -41,7 +41,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() -saturation_arithmetic_vec4() +saturation_arithmetic() include(`target-sse4-common.ll') diff --git a/builtins/util.m4 b/builtins/util.m4 index e0f7aaec..6f36f71e 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -179,10 +179,17 @@ define(`convert32to16', ` ') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;saturation arithmetic -;;scalar saturation arithmetic +;;saturation arithmetic + +define(`saturation_arithmetic', +`ifelse(WIDTH, `4', `saturation_arithmetic_vec4()', + WIDTH, `8', `saturation_arithmetic_vec8()', + WIDTH, `16', `saturation_arithmetic_vec16()', + `saturation_arithmetic_uniform()')') -define(`saturation_arithmetic_scalar', ` +;;uniform saturation arithmetic + +define(`saturation_arithmetic_uniform', ` declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone define i8 @__padds_i8(i8 %a0, i8 %a1) { sse_binary_scalar(ret, 16, i8, @llvm.x86.sse2.padds.b, %a0, %a1) @@ -303,168 +310,168 @@ define @__psubus_vi16(, ) { ;;4-wide vector saturation arithmetic define(`saturation_arithmetic_vec4', ` -define @__padds_vi8(, ) { +define <4 x i8> @__padds_vi8(<4 x i8>, <4 x i8>) { convert4to16(i8, %0, %v0) convert4to16(i8, %1, %v1) %r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1) convert16to4(i8, %r16, %r) - ret %r + ret <4 x i8> %r } -define @__padds_vi16(, ) { +define <4 x i16> @__padds_vi16(<4 x i16>, <4 x i16>) { convert4to8(i16, %0, %v0) convert4to8(i16, %1, %v1) %r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1) convert8to4(i16, %r16, %r) - ret %r + ret <4 x i16> %r } -define @__paddus_vi8(, ) { +define <4 x i8> @__paddus_vi8(<4 x i8>, <4 x i8>) { convert4to16(i8, %0, %v0) convert4to16(i8, %1, %v1) %r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1) convert16to4(i8, %r16, %r) - ret %r + ret <4 x i8> %r } -define @__paddus_vi16(, ) { +define <4 x i16> @__paddus_vi16(<4 x i16>, <4 x i16>) { convert4to8(i16, %0, %v0) convert4to8(i16, %1, %v1) %r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1) convert8to4(i16, %r16, %r) - ret %r + ret <4 x i16> %r } -define @__psubs_vi8(, ) { +define <4 x i8> @__psubs_vi8(<4 x i8>, <4 x i8>) { convert4to16(i8, %0, %v0) convert4to16(i8, %1, %v1) %r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1) convert16to4(i8, %r16, %r) - ret %r + ret <4 x i8> %r } -define @__psubs_vi16(, ) { +define <4 x i16> @__psubs_vi16(<4 x i16>, <4 x i16>) { convert4to8(i16, %0, %v0) convert4to8(i16, %1, %v1) %r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1) convert8to4(i16, %r16, %r) - ret %r + ret <4 x i16> %r } -define @__psubus_vi8(, ) { +define <4 x i8> @__psubus_vi8(<4 x i8>, <4 x i8>) { convert4to16(i8, %0, %v0) convert4to16(i8, %1, %v1) %r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1) convert16to4(i8, %r16, %r) - ret %r + ret <4 x i8> %r } -define @__psubus_vi16(, ) { +define <4 x i16> @__psubus_vi16(<4 x i16>, <4 x i16>) { convert4to8(i16, %0, %v0) convert4to8(i16, %1, %v1) %r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1) convert8to4(i16, %r16, %r) - ret %r + ret <4 x i16> %r } ') ;;8-wide vector saturation arithmetic define(`saturation_arithmetic_vec8', ` -define @__padds_vi8(, ) { +define <8 x i8> @__padds_vi8(<8 x i8>, <8 x i8>) { convert8to16(i8, %0, %v0) convert8to16(i8, %1, %v1) %r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1) convert16to8(i8, %r16, %r) - ret %r + ret <8 x i8> %r } -define @__padds_vi16( %a0, %a1) { - %res = call @llvm.x86.sse2.padds.w( %a0, %a1) - ret %res +define <8 x i16> @__padds_vi16(<8 x i16> %a0, <8 x i16> %a1) { + %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) + ret <8 x i16> %res } -define @__paddus_vi8(, ) { +define <8 x i8> @__paddus_vi8(<8 x i8>, <8 x i8>) { convert8to16(i8, %0, %v0) convert8to16(i8, %1, %v1) %r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1) convert16to8(i8, %r16, %r) - ret %r + ret <8 x i8> %r } -define @__paddus_vi16( %a0, %a1) { - %res = call @llvm.x86.sse2.paddus.w( %a0, %a1) - ret %res +define <8 x i16> @__paddus_vi16(<8 x i16> %a0, <8 x i16> %a1) { + %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) + ret <8 x i16> %res } -define @__psubs_vi8(, ) { +define <8 x i8> @__psubs_vi8(<8 x i8>, <8 x i8>) { convert8to16(i8, %0, %v0) convert8to16(i8, %1, %v1) %r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1) convert16to8(i8, %r16, %r) - ret %r + ret <8 x i8> %r } -define @__psubs_vi16( %a0, %a1) { - %res = call @llvm.x86.sse2.psubs.w( %a0, %a1) - ret %res +define <8 x i16> @__psubs_vi16(<8 x i16> %a0, <8 x i16> %a1) { + %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) + ret <8 x i16> %res } -define @__psubus_vi8(, ) { +define <8 x i8> @__psubus_vi8(<8 x i8>, <8 x i8>) { convert8to16(i8, %0, %v0) convert8to16(i8, %1, %v1) %r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1) convert16to8(i8, %r16, %r) - ret %r + ret <8 x i8> %r } -define @__psubus_vi16( %a0, %a1) { - %res = call @llvm.x86.sse2.psubus.w( %a0, %a1) - ret %res +define <8 x i16> @__psubus_vi16(<8 x i16> %a0, <8 x i16> %a1) { + %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) + ret <8 x i16> %res } ') ;;16-wide vector saturation arithmetic define(`saturation_arithmetic_vec16', ` -define @__padds_vi8( %a0, %a1) { - %res = call @llvm.x86.sse2.padds.b( %a0, %a1) ; <<16 x i8>> [#uses=1] - ret %res +define <16 x i8> @__padds_vi8(<16 x i8> %a0, <16 x i8> %a1) { + %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res } -define @__padds_vi16( %a0, %a1) { +define <16 x i16> @__padds_vi16(<16 x i16> %a0, <16 x i16> %a1) { binary8to16(ret, i16, @llvm.x86.sse2.padds.w, %a0, %a1) - ret %ret + ret <16 x i16> %ret } -define @__paddus_vi8( %a0, %a1) { - %res = call @llvm.x86.sse2.paddus.b( %a0, %a1) ; <<16 x i8>> [#uses=1] - ret %res +define <16 x i8> @__paddus_vi8(<16 x i8> %a0, <16 x i8> %a1) { + %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res } -define @__paddus_vi16( %a0, %a1) { +define <16 x i16> @__paddus_vi16(<16 x i16> %a0, <16 x i16> %a1) { binary8to16(ret, i16, @llvm.x86.sse2.paddus.w, %a0, %a1) - ret %ret + ret <16 x i16> %ret } -define @__psubs_vi8( %a0, %a1) { - %res = call @llvm.x86.sse2.psubs.b( %a0, %a1) ; <<16 x i8>> [#uses=1] - ret %res +define <16 x i8> @__psubs_vi8(<16 x i8> %a0, <16 x i8> %a1) { + %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res } -define @__psubs_vi16( %a0, %a1) { +define <16 x i16> @__psubs_vi16(<16 x i16> %a0, <16 x i16> %a1) { binary8to16(ret, i16, @llvm.x86.sse2.psubs.w, %a0, %a1) - ret %ret + ret <16 x i16> %ret } -define @__psubus_vi8( %a0, %a1) { - %res = call @llvm.x86.sse2.psubus.b( %a0, %a1) ; <<16 x i8>> [#uses=1] - ret %res +define <16 x i8> @__psubus_vi8(<16 x i8> %a0, <16 x i8> %a1) { + %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res } -define @__psubus_vi16( %a0, %a1) { +define <16 x i16> @__psubus_vi16(<16 x i16> %a0, <16 x i16> %a1) { binary8to16(ret, i16, @llvm.x86.sse2.psubus.w, %a0, %a1) - ret %ret + ret <16 x i16> %ret } ') diff --git a/tests/padds_i16-2.ispc b/tests/padds_i16-2.ispc new file mode 100644 index 00000000..83234804 --- /dev/null +++ b/tests/padds_i16-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 a = -32768; // min signed int16 + RET[programIndex] = saturating_add(a, -b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = (uniform int16) -32768; +} diff --git a/tests/padds_i16.ispc b/tests/padds_i16.ispc index 930593ac..e5456416 100644 --- a/tests/padds_i16.ispc +++ b/tests/padds_i16.ispc @@ -1,11 +1,11 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { - uniform int16 a = 32767, b = 32767; // max signed int16 +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 a = 32767; // max signed int16 RET[programIndex] = saturating_add(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 32767; + RET[programIndex] = (uniform int16) 32767; } diff --git a/tests/padds_i8-2.ispc b/tests/padds_i8-2.ispc new file mode 100644 index 00000000..9a303d70 --- /dev/null +++ b/tests/padds_i8-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 a = -128; // min signed int8 + RET[programIndex] = saturating_add(a, -b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = (uniform int8) -128; +} diff --git a/tests/padds_i8.ispc b/tests/padds_i8.ispc index 6d72a61b..bbcc4cc7 100644 --- a/tests/padds_i8.ispc +++ b/tests/padds_i8.ispc @@ -1,11 +1,11 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { - uniform int8 a = 127, b = 127; // max signed int8 +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 a = 127; // max signed int8 RET[programIndex] = saturating_add(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 127; + RET[programIndex] = (uniform int8) 127; } diff --git a/tests/padds_vi16-2.ispc b/tests/padds_vi16-2.ispc new file mode 100644 index 00000000..5f1eda37 --- /dev/null +++ b/tests/padds_vi16-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int16 a = -32768, b = aFOO[programIndex]; // max signed int16 + RET[programIndex] = saturating_add(a, -b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = (varying int16) -32768; +} diff --git a/tests/padds_vi16.ispc b/tests/padds_vi16.ispc index b48d776a..e3bd0f51 100644 --- a/tests/padds_vi16.ispc +++ b/tests/padds_vi16.ispc @@ -2,10 +2,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - varying int16 a = 32767, b = 32767; // max signed int16 + varying int16 a = 32767, b = aFOO[programIndex]; // max signed int16 RET[programIndex] = saturating_add(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 32767; + RET[programIndex] = (varying int16) 32767; } diff --git a/tests/padds_vi8-2.ispc b/tests/padds_vi8-2.ispc new file mode 100644 index 00000000..e3302d18 --- /dev/null +++ b/tests/padds_vi8-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int8 a = -128, b = aFOO[programIndex]; // max signed int8 + RET[programIndex] = saturating_add(a, -b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = (varying int8) -128; +} diff --git a/tests/padds_vi8.ispc b/tests/padds_vi8.ispc index 71d42cb8..df921414 100644 --- a/tests/padds_vi8.ispc +++ b/tests/padds_vi8.ispc @@ -2,10 +2,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - varying int8 a = 127, b = 127; // max signed int8 + varying int8 a = 127, b = aFOO[programIndex]; // max signed int8 RET[programIndex] = saturating_add(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 127; + RET[programIndex] = (varying int8) 127; } diff --git a/tests/paddus_i16.ispc b/tests/paddus_i16.ispc index 968953fa..e38f6db7 100644 --- a/tests/paddus_i16.ispc +++ b/tests/paddus_i16.ispc @@ -1,11 +1,11 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { - uniform unsigned int16 a = 65535, b = 65535; // max unsigned int16 +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int16 a = 65535; // max unsigned int16 RET[programIndex] = saturating_add(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 65535; + RET[programIndex] = (uniform unsigned int16) 65535; } diff --git a/tests/paddus_i8.ispc b/tests/paddus_i8.ispc index 44c41a6c..7cd3ecf8 100644 --- a/tests/paddus_i8.ispc +++ b/tests/paddus_i8.ispc @@ -1,11 +1,11 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { - uniform unsigned int8 a = 255, b = 255; // max unsigned int8 +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int8 a = 255; // max unsigned int8 RET[programIndex] = saturating_add(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 255; + RET[programIndex] = (uniform unsigned int8) 255; } diff --git a/tests/paddus_vi16.ispc b/tests/paddus_vi16.ispc index 4d15e49b..c4454cd2 100644 --- a/tests/paddus_vi16.ispc +++ b/tests/paddus_vi16.ispc @@ -2,10 +2,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - varying unsigned int16 a = 65535, b = 65535; // max unsigned int16 + varying unsigned int16 a = 65535, b = aFOO[programIndex]; // max unsigned int16 RET[programIndex] = saturating_add(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 65535; + RET[programIndex] = (varying unsigned int16) 65535; } diff --git a/tests/paddus_vi8.ispc b/tests/paddus_vi8.ispc index 77fcec7a..b7b970ff 100644 --- a/tests/paddus_vi8.ispc +++ b/tests/paddus_vi8.ispc @@ -2,10 +2,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - varying unsigned int8 a = 255, b = 255; // max unsigned int8 + varying unsigned int8 a = 255, b = aFOO[programIndex]; // max unsigned int8 RET[programIndex] = saturating_add(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 255; + RET[programIndex] = (varying unsigned int8) 255; } diff --git a/tests/psubs_i16-2.ispc b/tests/psubs_i16-2.ispc new file mode 100644 index 00000000..ace62b1c --- /dev/null +++ b/tests/psubs_i16-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 a = 32767; // max signed int16 + RET[programIndex] = saturating_sub(a, -b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = (uniform int16) 32767; +} diff --git a/tests/psubs_i16.ispc b/tests/psubs_i16.ispc index 163af2da..47f3d2b9 100644 --- a/tests/psubs_i16.ispc +++ b/tests/psubs_i16.ispc @@ -1,11 +1,11 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { - uniform int16 a = -32768, b = 32767; // min and max signed int16 +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 a = -32768; // min signed int16 RET[programIndex] = saturating_sub(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = -32768; + RET[programIndex] = (uniform int16) -32768; } diff --git a/tests/psubs_i8-2.ispc b/tests/psubs_i8-2.ispc new file mode 100644 index 00000000..6d3d608a --- /dev/null +++ b/tests/psubs_i8-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 a = 127; // max signed int8 + RET[programIndex] = saturating_sub(a, -b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = (uniform int8) 127; +} diff --git a/tests/psubs_i8.ispc b/tests/psubs_i8.ispc index 1dba8fe3..fbc24d25 100644 --- a/tests/psubs_i8.ispc +++ b/tests/psubs_i8.ispc @@ -1,11 +1,11 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { - uniform int8 a = -128, b = 127; // min and max signed int8 +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 a = -128; // min signed int8 RET[programIndex] = saturating_sub(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = -128; + RET[programIndex] = (uniform int8) -128; } diff --git a/tests/psubs_vi16-2.ispc b/tests/psubs_vi16-2.ispc new file mode 100644 index 00000000..ef1b2ef4 --- /dev/null +++ b/tests/psubs_vi16-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int16 a = 32767, b = aFOO[programIndex]; // min unsigned int16 + RET[programIndex] = saturating_sub(a, -b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = (varying int16) 32767; +} diff --git a/tests/psubs_vi16.ispc b/tests/psubs_vi16.ispc index 3208e842..e405a23f 100644 --- a/tests/psubs_vi16.ispc +++ b/tests/psubs_vi16.ispc @@ -2,10 +2,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - varying int16 a = -32768, b = 32767; // min and max unsigned int16 + varying int16 a = -32768, b = aFOO[programIndex]; // min unsigned int16 RET[programIndex] = saturating_sub(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = -32768; + RET[programIndex] = (varying int16) -32768; } diff --git a/tests/psubs_vi8-2.ispc b/tests/psubs_vi8-2.ispc new file mode 100644 index 00000000..b7fb02c6 --- /dev/null +++ b/tests/psubs_vi8-2.ispc @@ -0,0 +1,11 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + varying int8 a = 127, b = aFOO[programIndex]; // min unsigned int8 + RET[programIndex] = saturating_sub(a, -b); +} + +export void result(uniform float RET[]) { + RET[programIndex] = (varying int8) 127; +} diff --git a/tests/psubs_vi8.ispc b/tests/psubs_vi8.ispc index 143aaf4e..7d852f0a 100644 --- a/tests/psubs_vi8.ispc +++ b/tests/psubs_vi8.ispc @@ -2,10 +2,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - varying int8 a = -128, b = 127; // min and max unsigned int8 + varying int8 a = -128, b = aFOO[programIndex]; // min unsigned int8 RET[programIndex] = saturating_sub(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = -128; + RET[programIndex] = (varying int8) -128; } diff --git a/tests/psubus_i16.ispc b/tests/psubus_i16.ispc index bb62f03f..a7f60603 100644 --- a/tests/psubus_i16.ispc +++ b/tests/psubus_i16.ispc @@ -1,11 +1,11 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { - uniform unsigned int8 a = 0, b = 32767; // min and max unsigned int16 +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int8 a = 0; // min unsigned int16 RET[programIndex] = saturating_sub(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 0; + RET[programIndex] = (uniform unsigned int8) 0; } diff --git a/tests/psubus_i8.ispc b/tests/psubus_i8.ispc index 176ecc33..7cb7ecdc 100644 --- a/tests/psubus_i8.ispc +++ b/tests/psubus_i8.ispc @@ -1,11 +1,11 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { - uniform unsigned int8 a = 0, b = 255; // min and max unsigned int8 +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int8 a = 0; // min unsigned int8 RET[programIndex] = saturating_sub(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 0; + RET[programIndex] = (uniform unsigned int8) 0; } diff --git a/tests/psubus_vi16.ispc b/tests/psubus_vi16.ispc index ca58f374..e441b699 100644 --- a/tests/psubus_vi16.ispc +++ b/tests/psubus_vi16.ispc @@ -2,10 +2,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - varying unsigned int16 a = 0, b = 32767; // min and max unsigned int16 + varying unsigned int16 a = 0, b = aFOO[programIndex]; // min unsigned int16 RET[programIndex] = saturating_sub(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 0; + RET[programIndex] = (varying unsigned int16) 0; } diff --git a/tests/psubus_vi8.ispc b/tests/psubus_vi8.ispc index e730fd7e..7ba5f14a 100644 --- a/tests/psubus_vi8.ispc +++ b/tests/psubus_vi8.ispc @@ -2,10 +2,10 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { - varying unsigned int8 a = 0, b = 255; // min and max unsigned int8 + varying unsigned int8 a = 0, b = aFOO[programIndex]; // min unsigned int8 RET[programIndex] = saturating_sub(a, b); } export void result(uniform float RET[]) { - RET[programIndex] = 0; + RET[programIndex] = (varying unsigned int8) 0; }