Merge pull request #673 from Vsevolod-Livinskij/master
Saturation arithmetic.
This commit is contained in:
@@ -489,12 +489,20 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__packed_load_active",
|
||||
"__packed_store_active",
|
||||
"__packed_store_active2",
|
||||
"__padds_vi8",
|
||||
"__padds_vi16",
|
||||
"__paddus_vi8",
|
||||
"__paddus_vi16",
|
||||
"__popcnt_int32",
|
||||
"__popcnt_int64",
|
||||
"__prefetch_read_uniform_1",
|
||||
"__prefetch_read_uniform_2",
|
||||
"__prefetch_read_uniform_3",
|
||||
"__prefetch_read_uniform_nt",
|
||||
"__psubs_vi8",
|
||||
"__psubs_vi16",
|
||||
"__psubus_vi8",
|
||||
"__psubus_vi16",
|
||||
"__rcp_uniform_float",
|
||||
"__rcp_varying_float",
|
||||
"__rcp_uniform_double",
|
||||
|
||||
@@ -40,6 +40,7 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic()
|
||||
|
||||
include(`target-avx-common.ll')
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic()
|
||||
|
||||
include(`target-avx-common.ll')
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
include(`target-avx.ll')
|
||||
|
||||
rdrand_decls()
|
||||
saturation_arithmetic()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
@@ -35,6 +35,8 @@ ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()',
|
||||
LLVM_VERSION, `LLVM_3_1', `rdrand_decls()',
|
||||
`rdrand_definition()')
|
||||
|
||||
saturation_arithmetic()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
|
||||
@@ -39,6 +39,8 @@ ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()',
|
||||
LLVM_VERSION, `LLVM_3_1', `rdrand_decls()',
|
||||
`rdrand_definition()')
|
||||
|
||||
saturation_arithmetic()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
aossoa()
|
||||
saturation_arithmetic_novec()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
@@ -31,4 +31,4 @@
|
||||
|
||||
define(`WIDTH',`16')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
saturation_arithmetic_novec()
|
||||
|
||||
@@ -31,3 +31,4 @@
|
||||
|
||||
define(`WIDTH',`32')
|
||||
include(`target-generic-common.ll')
|
||||
saturation_arithmetic_novec()
|
||||
|
||||
@@ -31,4 +31,4 @@
|
||||
|
||||
define(`WIDTH',`4')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
saturation_arithmetic_novec()
|
||||
|
||||
@@ -31,3 +31,4 @@
|
||||
|
||||
define(`WIDTH',`64')
|
||||
include(`target-generic-common.ll')
|
||||
saturation_arithmetic_novec()
|
||||
|
||||
@@ -31,4 +31,4 @@
|
||||
|
||||
define(`WIDTH',`8')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
saturation_arithmetic_novec()
|
||||
|
||||
@@ -44,6 +44,7 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic()
|
||||
|
||||
include(`target-sse2-common.ll')
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic()
|
||||
|
||||
include(`target-sse2-common.ll')
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic()
|
||||
|
||||
include(`target-sse4-common.ll')
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic()
|
||||
|
||||
include(`target-sse4-common.ll')
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic()
|
||||
|
||||
include(`target-sse4-common.ll')
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic()
|
||||
|
||||
include(`target-sse4-common.ll')
|
||||
|
||||
|
||||
410
builtins/util.m4
410
builtins/util.m4
@@ -49,6 +49,416 @@ define(`MASK_HIGH_BIT_ON',
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; vector convertation utilities
|
||||
;; convert vector of one width into vector of other width
|
||||
;;
|
||||
;; $1: vector element type
|
||||
;; $2: vector of the first width
|
||||
;; $3: vector of the second width
|
||||
|
||||
|
||||
define(`convert1to8', `
|
||||
$3 = shufflevector <1 x $1> $2, <1 x $1> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
|
||||
define(`convert1to16', `
|
||||
$3 = shufflevector <1 x $1> $2, <1 x $1> undef,
|
||||
<16 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
define(`convert4to8', `
|
||||
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
define(`convert4to16', `
|
||||
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
define(`convert8to16', `
|
||||
$3 = shufflevector <8 x $1> $2, <8 x $1> undef,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
define(`convert4to32', `
|
||||
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
|
||||
<32 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
define(`convert8to32', `
|
||||
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
|
||||
<32 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 4, i32 5, i32 6, i32 7,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
define(`convert16to32', `
|
||||
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
|
||||
<32 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 4, i32 5, i32 6, i32 7,
|
||||
i32 8, i32 9, i32 10, i32 11,
|
||||
i32 12, i32 13, i32 14, i32 15
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
define(`convert8to1', `
|
||||
$3 = shufflevector <8 x $1> $2, <8 x $1> undef,
|
||||
<1 x i32> <i32 0>
|
||||
')
|
||||
|
||||
|
||||
define(`convert16to1', `
|
||||
$3 = shufflevector <16 x $1> $2, <16 x $1> undef,
|
||||
<1 x i32> <i32 0>
|
||||
')
|
||||
|
||||
define(`convert8to4', `
|
||||
$3 = shufflevector <8 x $1> $2, <8 x $1> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
|
||||
|
||||
define(`convert16to4', `
|
||||
$3 = shufflevector <16 x $1> $2, <16 x $1> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
|
||||
define(`convert16to8', `
|
||||
$3 = shufflevector <16 x $1> $2, <16 x $1> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
')
|
||||
|
||||
define(`convert32to4', `
|
||||
$3 = shufflevector <32 x $1> $2, <32 x $1> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
|
||||
define(`convert32to8', `
|
||||
$3 = shufflevector <32 x $1> $2, <32 x $1> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
|
||||
define(`convert32to16', `
|
||||
$3 = shufflevector <32 x $1> $2, <32 x $1> undef,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;saturation arithmetic
|
||||
|
||||
define(`saturation_arithmetic',
|
||||
`ifelse(WIDTH, `4', `saturation_arithmetic_vec4()',
|
||||
WIDTH, `8', `saturation_arithmetic_vec8()',
|
||||
WIDTH, `16', `saturation_arithmetic_vec16() ',
|
||||
`errprint(`ERROR: saturation_arithmetic() macro called with unsupported width = 'WIDTH
|
||||
)
|
||||
m4exit(`1')')
|
||||
')
|
||||
|
||||
;; create vector constant. Used by saturation_arithmetic_novec_universal below.
|
||||
|
||||
define(`const_vector', `
|
||||
ifelse(WIDTH, `4', `<$1 $2, $1 $2, $1 $2, $1 $2>',
|
||||
WIDTH, `8', `<$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2>',
|
||||
WIDTH, `16', `<$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2>',
|
||||
WIDTH, `32', `<$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2>',
|
||||
WIDTH, `64', `<$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2,
|
||||
$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2>',
|
||||
`<$1 $2>')')
|
||||
|
||||
;; utility function used by saturation_arithmetic_novec below. This shouldn't be called by
|
||||
;; target .ll files directly.
|
||||
;; $1: {add,sub} (used in constructing function names)
|
||||
|
||||
define(`saturation_arithmetic_novec_universal', `
|
||||
define <WIDTH x i8> @__p$1s_vi8(<WIDTH x i8>, <WIDTH x i8>) {
|
||||
%v0_i16 = sext <WIDTH x i8> %0 to <WIDTH x i16>
|
||||
%v1_i16 = sext <WIDTH x i8> %1 to <WIDTH x i16>
|
||||
%res = $1 <WIDTH x i16> %v0_i16, %v1_i16
|
||||
%over_mask = icmp sgt <WIDTH x i16> %res, const_vector(i16, 127)
|
||||
%over_res = select <WIDTH x i1> %over_mask, <WIDTH x i16> const_vector(i16, 127), <WIDTH x i16> %res
|
||||
%under_mask = icmp slt <WIDTH x i16> %res, const_vector(i16, -128)
|
||||
%ret_i16 = select <WIDTH x i1> %under_mask, <WIDTH x i16> const_vector(i16, -128), <WIDTH x i16> %over_res
|
||||
%ret = trunc <WIDTH x i16> %ret_i16 to <WIDTH x i8>
|
||||
ret <WIDTH x i8> %ret
|
||||
}
|
||||
|
||||
define <WIDTH x i16> @__p$1s_vi16(<WIDTH x i16>, <WIDTH x i16>) {
|
||||
%v0_i32 = sext <WIDTH x i16> %0 to <WIDTH x i32>
|
||||
%v1_i32 = sext <WIDTH x i16> %1 to <WIDTH x i32>
|
||||
%res = $1 <WIDTH x i32> %v0_i32, %v1_i32
|
||||
%over_mask = icmp sgt <WIDTH x i32> %res, const_vector(i32, 32767)
|
||||
%over_res = select <WIDTH x i1> %over_mask, <WIDTH x i32> const_vector(i32, 32767), <WIDTH x i32> %res
|
||||
%under_mask = icmp slt <WIDTH x i32> %res, const_vector(i32, -32768)
|
||||
%ret_i32 = select <WIDTH x i1> %under_mask, <WIDTH x i32> const_vector(i32, -32768), <WIDTH x i32> %over_res
|
||||
%ret = trunc <WIDTH x i32> %ret_i32 to <WIDTH x i16>
|
||||
ret <WIDTH x i16> %ret
|
||||
}
|
||||
|
||||
define <WIDTH x i8> @__p$1us_vi8(<WIDTH x i8>, <WIDTH x i8>) {
|
||||
%v0_i16 = zext <WIDTH x i8> %0 to <WIDTH x i16>
|
||||
%v1_i16 = zext <WIDTH x i8> %1 to <WIDTH x i16>
|
||||
%res = $1 <WIDTH x i16> %v0_i16, %v1_i16
|
||||
%over_mask = icmp ugt <WIDTH x i16> %res, const_vector(i16, 255)
|
||||
%over_res = select <WIDTH x i1> %over_mask, <WIDTH x i16> const_vector(i16, 255), <WIDTH x i16> %res
|
||||
%under_mask = icmp slt <WIDTH x i16> %res, const_vector(i16, 0)
|
||||
%ret_i16 = select <WIDTH x i1> %under_mask, <WIDTH x i16> const_vector(i16, 0), <WIDTH x i16> %over_res
|
||||
%ret = trunc <WIDTH x i16> %ret_i16 to <WIDTH x i8>
|
||||
ret <WIDTH x i8> %ret
|
||||
}
|
||||
|
||||
define <WIDTH x i16> @__p$1us_vi16(<WIDTH x i16>, <WIDTH x i16>) {
|
||||
%v0_i32 = zext <WIDTH x i16> %0 to <WIDTH x i32>
|
||||
%v1_i32 = zext <WIDTH x i16> %1 to <WIDTH x i32>
|
||||
%res = $1 <WIDTH x i32> %v0_i32, %v1_i32
|
||||
%over_mask = icmp ugt <WIDTH x i32> %res, const_vector(i32, 65535)
|
||||
%over_res = select <WIDTH x i1> %over_mask, <WIDTH x i32> const_vector(i32, 65535), <WIDTH x i32> %res
|
||||
%under_mask = icmp slt <WIDTH x i32> %res, const_vector(i32, 0)
|
||||
%ret_i32 = select <WIDTH x i1> %under_mask, <WIDTH x i32> const_vector(i32, 0), <WIDTH x i32> %over_res
|
||||
%ret = trunc <WIDTH x i32> %ret_i32 to <WIDTH x i16>
|
||||
ret <WIDTH x i16> %ret
|
||||
}
|
||||
')
|
||||
|
||||
;; implementation for targets which doesn't have h/w instructions
|
||||
|
||||
define(`saturation_arithmetic_novec', `
|
||||
saturation_arithmetic_novec_universal(sub)
|
||||
saturation_arithmetic_novec_universal(add)
|
||||
')
|
||||
|
||||
;;4-wide vector saturation arithmetic
|
||||
|
||||
define(`saturation_arithmetic_vec4', `
|
||||
declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <4 x i8> @__padds_vi8(<4 x i8>, <4 x i8>) {
|
||||
convert4to16(i8, %0, %v0)
|
||||
convert4to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to4(i8, %r16, %r)
|
||||
ret <4 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <4 x i16> @__padds_vi16(<4 x i16>, <4 x i16>) {
|
||||
convert4to8(i16, %0, %v0)
|
||||
convert4to8(i16, %1, %v1)
|
||||
%r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1)
|
||||
convert8to4(i16, %r16, %r)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <4 x i8> @__paddus_vi8(<4 x i8>, <4 x i8>) {
|
||||
convert4to16(i8, %0, %v0)
|
||||
convert4to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to4(i8, %r16, %r)
|
||||
ret <4 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <4 x i16> @__paddus_vi16(<4 x i16>, <4 x i16>) {
|
||||
convert4to8(i16, %0, %v0)
|
||||
convert4to8(i16, %1, %v1)
|
||||
%r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1)
|
||||
convert8to4(i16, %r16, %r)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <4 x i8> @__psubs_vi8(<4 x i8>, <4 x i8>) {
|
||||
convert4to16(i8, %0, %v0)
|
||||
convert4to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to4(i8, %r16, %r)
|
||||
ret <4 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <4 x i16> @__psubs_vi16(<4 x i16>, <4 x i16>) {
|
||||
convert4to8(i16, %0, %v0)
|
||||
convert4to8(i16, %1, %v1)
|
||||
%r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1)
|
||||
convert8to4(i16, %r16, %r)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <4 x i8> @__psubus_vi8(<4 x i8>, <4 x i8>) {
|
||||
convert4to16(i8, %0, %v0)
|
||||
convert4to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to4(i8, %r16, %r)
|
||||
ret <4 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <4 x i16> @__psubus_vi16(<4 x i16>, <4 x i16>) {
|
||||
convert4to8(i16, %0, %v0)
|
||||
convert4to8(i16, %1, %v1)
|
||||
%r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1)
|
||||
convert8to4(i16, %r16, %r)
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
')
|
||||
|
||||
;;8-wide vector saturation arithmetic
|
||||
|
||||
define(`saturation_arithmetic_vec8', `
|
||||
declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <8 x i8> @__padds_vi8(<8 x i8>, <8 x i8>) {
|
||||
convert8to16(i8, %0, %v0)
|
||||
convert8to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to8(i8, %r16, %r)
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <8 x i16> @__padds_vi16(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <8 x i8> @__paddus_vi8(<8 x i8>, <8 x i8>) {
|
||||
convert8to16(i8, %0, %v0)
|
||||
convert8to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to8(i8, %r16, %r)
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <8 x i16> @__paddus_vi16(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <8 x i8> @__psubs_vi8(<8 x i8>, <8 x i8>) {
|
||||
convert8to16(i8, %0, %v0)
|
||||
convert8to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to8(i8, %r16, %r)
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <8 x i16> @__psubs_vi16(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <8 x i8> @__psubus_vi8(<8 x i8>, <8 x i8>) {
|
||||
convert8to16(i8, %0, %v0)
|
||||
convert8to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to8(i8, %r16, %r)
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <8 x i16> @__psubus_vi16(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
')
|
||||
|
||||
;;16-wide vector saturation arithmetic
|
||||
|
||||
define(`saturation_arithmetic_vec16', `
|
||||
declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <16 x i8> @__padds_vi8(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
%res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <16 x i16> @__padds_vi16(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
binary8to16(ret, i16, @llvm.x86.sse2.padds.w, %a0, %a1)
|
||||
ret <16 x i16> %ret
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <16 x i8> @__paddus_vi8(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
%res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <16 x i16> @__paddus_vi16(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
binary8to16(ret, i16, @llvm.x86.sse2.paddus.w, %a0, %a1)
|
||||
ret <16 x i16> %ret
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <16 x i8> @__psubs_vi8(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
%res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <16 x i16> @__psubs_vi16(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
binary8to16(ret, i16, @llvm.x86.sse2.psubs.w, %a0, %a1)
|
||||
ret <16 x i16> %ret
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
define <16 x i8> @__psubus_vi8(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
%res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
define <16 x i16> @__psubus_vi16(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
binary8to16(ret, i16, @llvm.x86.sse2.psubus.w, %a0, %a1)
|
||||
ret <16 x i16> %ret
|
||||
}
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; vector deconstruction utilities
|
||||
;; split 8-wide vector into 2 4-wide vectors
|
||||
;;
|
||||
|
||||
139
stdlib.ispc
139
stdlib.ispc
@@ -57,6 +57,43 @@
|
||||
#error Unknown value of ISPC_MASK_BITS
|
||||
#endif
|
||||
|
||||
/* Limits of integral types. */
|
||||
#ifndef INT8_MAX
|
||||
#define INT8_MAX (127)
|
||||
#endif
|
||||
#ifndef INT16_MAX
|
||||
#define INT16_MAX (32767)
|
||||
#endif
|
||||
#ifndef INT32_MAX
|
||||
#define INT32_MAX (2147483647)
|
||||
#endif
|
||||
#ifndef INT64_MAX
|
||||
#define INT64_MAX (9223372036854775807)
|
||||
#endif
|
||||
#ifndef UINT8_MAX
|
||||
#define UINT8_MAX (255)
|
||||
#endif
|
||||
#ifndef UINT16_MAX
|
||||
#define UINT16_MAX (65535)
|
||||
#endif
|
||||
#ifndef UINT32_MAX
|
||||
#define UINT32_MAX (4294967295)
|
||||
#endif
|
||||
#ifndef UINT64_MAX
|
||||
#define UINT64_MAX (18446744073709551615)
|
||||
#endif
|
||||
#ifndef INT8_MIN
|
||||
#define INT8_MIN (-INT8_MAX - 1)
|
||||
#endif
|
||||
#ifndef INT16_MIN
|
||||
#define INT16_MIN (-INT16_MAX - 1)
|
||||
#endif
|
||||
#ifndef INT32_MIN
|
||||
#define INT32_MIN (-INT32_MAX - 1)
|
||||
#endif
|
||||
#ifndef INT64_MIN
|
||||
#define INT64_MIN (-INT64_MAX - 1)
|
||||
#endif
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Low level primitives
|
||||
|
||||
@@ -4345,6 +4382,108 @@ static inline void fastmath() {
|
||||
__fastmath();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// saturation arithmetic
|
||||
|
||||
static inline uniform int8 saturating_add(uniform int8 a, uniform int8 b) {
|
||||
uniform unsigned int8 a_unsig = a, b_unsig = b;
|
||||
uniform unsigned int8 result = a_unsig + b_unsig;
|
||||
a_unsig = (a_unsig >> 7) + INT8_MAX;
|
||||
if ((uniform int8) ((a_unsig ^ b_unsig) | ~(b_unsig ^ result)) >= 0)
|
||||
result = a_unsig;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline varying int8 saturating_add(varying int8 a, varying int8 b) {
|
||||
return __padds_vi8(a, b);
|
||||
}
|
||||
|
||||
static inline uniform int16 saturating_add(uniform int16 a, uniform int16 b) {
|
||||
uniform unsigned int16 a_unsig = a, b_unsig = b;
|
||||
uniform unsigned int16 result = a_unsig + b_unsig;
|
||||
a_unsig = (a_unsig >> 15) + INT16_MAX;
|
||||
if ((uniform int16) ((a_unsig ^ b_unsig) | ~(b_unsig ^ result)) >= 0)
|
||||
result = a_unsig;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline varying int16 saturating_add(varying int16 a, varying int16 b) {
|
||||
return __padds_vi16(a, b);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int8 saturating_add(uniform unsigned int8 a,
|
||||
uniform unsigned int8 b) {
|
||||
uniform unsigned int8 result = a + b;
|
||||
result |= (-(uniform int8)(result < a));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline varying unsigned int8 saturating_add(varying unsigned int8 a,
|
||||
varying unsigned int8 b) {
|
||||
return __paddus_vi8(a, b);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int16 saturating_add(uniform unsigned int16 a,
|
||||
uniform unsigned int16 b) {
|
||||
uniform unsigned int16 result = a + b;
|
||||
result |= (-(uniform int16)(result < a));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline varying unsigned int16 saturating_add(varying unsigned int16 a,
|
||||
varying unsigned int16 b) {
|
||||
return __paddus_vi16(a, b);
|
||||
}
|
||||
|
||||
static inline uniform int8 saturating_sub(uniform int8 a, uniform int8 b) {
|
||||
uniform unsigned int8 a_unsig = a, b_unsig = b;
|
||||
uniform unsigned int8 result = a_unsig - b_unsig;
|
||||
a_unsig = (a_unsig >> 7) + INT8_MAX;
|
||||
if ((uniform int8) ((a_unsig ^ b_unsig) & (a_unsig ^ result)) < 0)
|
||||
result = a_unsig;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline varying int8 saturating_sub(varying int8 a, varying int8 b) {
|
||||
return __psubs_vi8(a, b);
|
||||
}
|
||||
|
||||
static inline uniform int16 saturating_sub(uniform int16 a, uniform int16 b) {
|
||||
uniform unsigned int16 a_unsig = a, b_unsig = b;
|
||||
uniform unsigned int16 result = a_unsig - b_unsig;
|
||||
a_unsig = (a_unsig >> 15) + INT16_MAX;
|
||||
if ((uniform int16) ((a_unsig ^ b_unsig) & (a_unsig ^ result)) < 0)
|
||||
result = a_unsig;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline varying int16 saturating_sub(varying int16 a, varying int16 b) {
|
||||
return __psubs_vi16(a, b);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int8 saturating_sub(uniform unsigned int8 a,
|
||||
uniform unsigned int8 b) {
|
||||
uniform unsigned int8 result = a - b;
|
||||
result &= (-(uniform int8)(result <= a));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline varying unsigned int8 saturating_sub(varying unsigned int8 a,
|
||||
varying unsigned int8 b) {
|
||||
return __psubus_vi8(a, b);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int16 saturating_sub(uniform unsigned int16 a,
|
||||
uniform unsigned int16 b) {
|
||||
uniform unsigned int16 result = a - b;
|
||||
result &= (-(uniform int16)(result <= a));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline varying unsigned int16 saturating_sub(varying unsigned int16 a,
|
||||
varying unsigned int16 b) {
|
||||
return __psubus_vi16(a, b);
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// rdrand
|
||||
|
||||
|
||||
27
tests/padds_i16.ispc
Normal file
27
tests/padds_i16.ispc
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int16 a_max = 32767, a_min = -32768; // max and min signed int16
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = saturating_add(a_max, b);
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = saturating_add(a_min, -b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_add(a_min, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = (uniform int16) 32767;
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = (uniform int16) -32768;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (uniform int16) -32763;
|
||||
}
|
||||
}
|
||||
27
tests/padds_i8.ispc
Normal file
27
tests/padds_i8.ispc
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int8 a_max = 127, a_min = -128; // max and min signed int8
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = saturating_add(a_max, b);
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = saturating_add(a_min, -b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_add(a_min, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = (uniform int8) 127;
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = (uniform int8) -128;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (uniform int8) -123;
|
||||
}
|
||||
}
|
||||
27
tests/padds_vi16.ispc
Normal file
27
tests/padds_vi16.ispc
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
varying int16 a_max = 32767, a_min = -32768; // max and min signed int16
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = saturating_add(a_max, b);
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = saturating_add(a_min, -b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_add(a_min, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = (varying int16) 32767;
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = (varying int16) -32768;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (varying int16) -32763;
|
||||
}
|
||||
}
|
||||
27
tests/padds_vi8.ispc
Normal file
27
tests/padds_vi8.ispc
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
varying int8 a_max = 127, a_min = -128; // max and min signed int8
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = saturating_add(a_max, b);
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = saturating_add(a_min, -b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_add(a_min, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = (varying int8) 127;
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = (varying int8) -128;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (varying int8) -123;
|
||||
}
|
||||
}
|
||||
21
tests/paddus_i16.ispc
Normal file
21
tests/paddus_i16.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform unsigned int16 a_max = 65535, a_min = 0; // max and min unsigned int16
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = saturating_add(a_max, b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_add(a_min, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = (uniform unsigned int16) 65535;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (uniform unsigned int16) 5;
|
||||
}
|
||||
}
|
||||
21
tests/paddus_i8.ispc
Normal file
21
tests/paddus_i8.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform unsigned int8 a_max = 255, a_min = 0; // max and min unsigned int8
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = saturating_add(a_max, b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_add(a_min, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = (uniform unsigned int8) 255;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (uniform unsigned int8) 5;
|
||||
}
|
||||
}
|
||||
21
tests/paddus_vi16.ispc
Normal file
21
tests/paddus_vi16.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
varying unsigned int16 a_max = 65535, a_min = 0; // max and min unsigned int16
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = saturating_add(a_max, b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_add(a_min, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = (varying unsigned int16) 65535;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (varying unsigned int16) 5;
|
||||
}
|
||||
}
|
||||
22
tests/paddus_vi8.ispc
Normal file
22
tests/paddus_vi8.ispc
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
varying unsigned int8 a_max = 255, a_min = 0; // max and min signed int8
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = saturating_add(a_max, b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_add(a_min, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = (varying unsigned int8) 255;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (varying unsigned int8) 5;
|
||||
}
|
||||
}
|
||||
|
||||
27
tests/psubs_i16.ispc
Normal file
27
tests/psubs_i16.ispc
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int16 a_max = 32767, a_min = -32768; // max and min signed int16
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = saturating_sub(a_min, b);
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = saturating_sub(a_max, -b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_sub(a_max, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = (uniform int16) -32768;
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = (uniform int16) 32767;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (uniform int16) 32762;
|
||||
}
|
||||
}
|
||||
27
tests/psubs_i8.ispc
Normal file
27
tests/psubs_i8.ispc
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform int8 a_max = 127, a_min = -128; // max and min signed int8
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = saturating_sub(a_min, b);
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = saturating_sub(a_max, -b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_sub(a_max, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = (uniform int8) -128;
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = (uniform int8) 127;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (uniform int8) 122;
|
||||
}
|
||||
}
|
||||
27
tests/psubs_vi16.ispc
Normal file
27
tests/psubs_vi16.ispc
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
varying int16 a_max = 32767, a_min = -32768; // max and min signed int16
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = saturating_sub(a_min, b);
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = saturating_sub(a_max, -b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_sub(a_max, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = (varying int16) -32768;
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = (varying int16) 32767;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (varying int16) 32762;
|
||||
}
|
||||
}
|
||||
27
tests/psubs_vi8.ispc
Normal file
27
tests/psubs_vi8.ispc
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
varying int8 a_max = 127, a_min = -128; // max and min signed int8
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = saturating_sub(a_min, b);
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = saturating_sub(a_max, -b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_sub(a_max, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 3 == 0) {
|
||||
RET[programIndex] = (varying int8) -128;
|
||||
}
|
||||
else if (programIndex % 3 == 1) {
|
||||
RET[programIndex] = (varying int8) 127;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (varying int8) 122;
|
||||
}
|
||||
}
|
||||
21
tests/psubus_i16.ispc
Normal file
21
tests/psubus_i16.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform unsigned int16 a_max = 65535, a_min = 0; // max and min unsigned int16
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = saturating_sub(a_min, b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_sub(a_max, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = (uniform unsigned int16) 0;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (uniform unsigned int16) 65530;
|
||||
}
|
||||
}
|
||||
21
tests/psubus_i8.ispc
Normal file
21
tests/psubus_i8.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
uniform unsigned int8 a_max = 255, a_min = 0; // max and min unsigned int8
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = saturating_sub(a_min, b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_sub(a_max, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = (uniform unsigned int8) 0;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (uniform unsigned int8) 250;
|
||||
}
|
||||
}
|
||||
21
tests/psubus_vi16.ispc
Normal file
21
tests/psubus_vi16.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
varying unsigned int16 a_max = 65535, a_min = 0; // max and min unsigned int16
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = saturating_sub(a_min, b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_sub(a_max, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = (varying unsigned int16) 0;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (varying unsigned int16) 65530;
|
||||
}
|
||||
}
|
||||
21
tests/psubus_vi8.ispc
Normal file
21
tests/psubus_vi8.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
varying unsigned int8 a_max = 255, a_min = 0; // max and min signed int8
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = saturating_sub(a_min, b);
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = saturating_sub(a_max, b);
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programIndex % 2 == 0) {
|
||||
RET[programIndex] = (varying unsigned int8) 0;
|
||||
}
|
||||
else {
|
||||
RET[programIndex] = (varying unsigned int8) 250;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user