Added tests for saturation and some fixes for generic and avx target
This commit is contained in:
@@ -40,7 +40,6 @@ stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
saturation_arithmetic_vec8()
|
||||
|
||||
include(`target-avx-common.ll')
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
include(`target-avx.ll')
|
||||
|
||||
rdrand_decls()
|
||||
saturation_arithmetic_vec8()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
@@ -34,6 +34,7 @@ include(`target-avx.ll')
|
||||
ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()',
|
||||
LLVM_VERSION, `LLVM_3_1', `rdrand_decls()',
|
||||
`rdrand_definition()')
|
||||
saturation_arithmetic_vec8()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
@@ -38,6 +38,7 @@ include(`target-avx.ll')
|
||||
ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()',
|
||||
LLVM_VERSION, `LLVM_3_1', `rdrand_decls()',
|
||||
`rdrand_definition()')
|
||||
saturation_arithmetic_vec8()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
@@ -9,6 +9,8 @@ packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
aossoa()
|
||||
saturation_arithmetic_scalar()
|
||||
saturation_arithmetic_novec()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
@@ -31,4 +31,4 @@
|
||||
|
||||
define(`WIDTH',`16')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
saturation_arithmetic_vec16()
|
||||
|
||||
@@ -31,4 +31,4 @@
|
||||
|
||||
define(`WIDTH',`4')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
saturation_arithmetic_vec4()
|
||||
|
||||
@@ -31,4 +31,4 @@
|
||||
|
||||
define(`WIDTH',`8')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
saturation_arithmetic_vec8()
|
||||
|
||||
@@ -41,6 +41,7 @@ stdlib_core()
|
||||
scans()
|
||||
reduce_equal(WIDTH)
|
||||
rdrand_decls()
|
||||
saturation_arithmetic_scalar()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; broadcast/rotate/shuffle
|
||||
|
||||
102
builtins/util.m4
102
builtins/util.m4
@@ -50,12 +50,28 @@ define(`MASK_HIGH_BIT_ON',
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; vector convertation utilities
|
||||
;; convert 4-wide vector into 8-wide vector
|
||||
;; convert 1-wide vector into 8-wide vector
|
||||
;;
|
||||
;; $1: vector element type
|
||||
;; $2: 4-wide vector
|
||||
;; $2: 1-wide vector
|
||||
;; $3: 8-wide vector
|
||||
|
||||
|
||||
define(`convert1to8', `
|
||||
$3 = shufflevector <1 x $1> $2, <1 x $1> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
|
||||
define(`convert1to16', `
|
||||
$3 = shufflevector <1 x $1> $2, <1 x $1> undef,
|
||||
<16 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
')
|
||||
|
||||
define(`convert4to8', `
|
||||
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
@@ -117,7 +133,19 @@ define(`convert16to32', `
|
||||
;;
|
||||
;; $1: vector element type
|
||||
;; $2: 8-wide vector
|
||||
;; $3: 4-wide vector
|
||||
;; $3: 1-wide vector
|
||||
|
||||
|
||||
define(`convert8to1', `
|
||||
$3 = shufflevector <8 x $1> $2, <8 x $1> undef,
|
||||
<1 x i32> <i32 0>
|
||||
')
|
||||
|
||||
|
||||
define(`convert16to1', `
|
||||
$3 = shufflevector <16 x $1> $2, <16 x $1> undef,
|
||||
<1 x i32> <i32 0>
|
||||
')
|
||||
|
||||
define(`convert8to4', `
|
||||
$3 = shufflevector <8 x $1> $2, <8 x $1> undef,
|
||||
@@ -204,6 +232,74 @@ define i16 @__psubus_i16(i16 %a0, i16 %a1) {
|
||||
}
|
||||
')
|
||||
|
||||
;;no vector saturation arithmetic
|
||||
|
||||
define(`saturation_arithmetic_novec', `
|
||||
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
|
||||
convert1to16(i8, %0, %v0)
|
||||
convert1to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to1(i8, %r16, %r)
|
||||
ret <WIDTH x i8> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i16> @__padds_vi16(<WIDTH x i16>, <WIDTH x i16>) {
|
||||
convert1to8(i16, %0, %v0)
|
||||
convert1to8(i16, %1, %v1)
|
||||
%r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1)
|
||||
convert8to1(i16, %r16, %r)
|
||||
ret <WIDTH x i16> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
|
||||
convert1to16(i8, %0, %v0)
|
||||
convert1to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to1(i8, %r16, %r)
|
||||
ret <WIDTH x i8> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
|
||||
convert1to8(i16, %0, %v0)
|
||||
convert1to8(i16, %1, %v1)
|
||||
%r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1)
|
||||
convert8to1(i16, %r16, %r)
|
||||
ret <WIDTH x i16> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
|
||||
convert1to16(i8, %0, %v0)
|
||||
convert1to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to1(i8, %r16, %r)
|
||||
ret <WIDTH x i8> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16>, <WIDTH x i16>) {
|
||||
convert1to8(i16, %0, %v0)
|
||||
convert1to8(i16, %1, %v1)
|
||||
%r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1)
|
||||
convert8to1(i16, %r16, %r)
|
||||
ret <WIDTH x i16> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
|
||||
convert1to16(i8, %0, %v0)
|
||||
convert1to16(i8, %1, %v1)
|
||||
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
|
||||
convert16to1(i8, %r16, %r)
|
||||
ret <WIDTH x i8> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i16> @__psubus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
|
||||
convert1to8(i16, %0, %v0)
|
||||
convert1to8(i16, %1, %v1)
|
||||
%r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1)
|
||||
convert8to1(i16, %r16, %r)
|
||||
ret <WIDTH x i16> %r
|
||||
}
|
||||
')
|
||||
|
||||
;;4-wide vector saturation arithmetic
|
||||
|
||||
define(`saturation_arithmetic_vec4', `
|
||||
|
||||
@@ -4287,12 +4287,12 @@ static inline varying unsigned int8 paddus(varying unsigned int8 a,
|
||||
}
|
||||
|
||||
static inline uniform unsigned int16 paddus(uniform unsigned int16 a,
|
||||
unsigned uniform int16 b) {
|
||||
uniform unsigned int16 b) {
|
||||
return __paddus_i16(a, b);
|
||||
}
|
||||
|
||||
static inline varying unsigned int16 paddus(varying unsigned int16 a,
|
||||
unsigned varying int16 b) {
|
||||
varying unsigned int16 b) {
|
||||
return __paddus_vi16(a, b);
|
||||
}
|
||||
|
||||
@@ -4323,12 +4323,12 @@ static inline varying unsigned int8 psubus(varying unsigned int8 a,
|
||||
}
|
||||
|
||||
static inline uniform unsigned int16 psubus(uniform unsigned int16 a,
|
||||
unsigned uniform int16 b) {
|
||||
uniform unsigned int16 b) {
|
||||
return __psubus_i16(a, b);
|
||||
}
|
||||
|
||||
static inline varying unsigned int16 psubus(varying unsigned int16 a,
|
||||
unsigned varying int16 b) {
|
||||
varying unsigned int16 b) {
|
||||
return __psubus_vi16(a, b);
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
11
tests/padds_i16.ispc
Normal file
11
tests/padds_i16.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int16 a = 32767, b = 32767; // max signed int16
|
||||
RET[programIndex] = padds(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 32767;
|
||||
}
|
||||
11
tests/padds_i8.ispc
Normal file
11
tests/padds_i8.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int8 a = 127, b = 127; // max signed int8
|
||||
RET[programIndex] = padds(a1, b1);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 127;
|
||||
}
|
||||
11
tests/padds_vi16.ispc
Normal file
11
tests/padds_vi16.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
varying int16 a = 32767, b = 32767; // max signed int16
|
||||
RET[programIndex] = padds(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 32767;
|
||||
}
|
||||
11
tests/padds_vi8.ispc
Normal file
11
tests/padds_vi8.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
varying int8 a = 127, b = 127; // max signed int8
|
||||
RET[programIndex] = padds(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 127;
|
||||
}
|
||||
11
tests/paddus_i16.ispc
Normal file
11
tests/paddus_i16.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int16 a = 65535, b = 65535; // max unsigned int16
|
||||
RET[programIndex] = paddus(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 65535;
|
||||
}
|
||||
11
tests/paddus_i8.ispc
Normal file
11
tests/paddus_i8.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int8 a = 255, b = 255; // max unsigned int8
|
||||
RET[programIndex] = paddus(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 255;
|
||||
}
|
||||
11
tests/paddus_vi16.ispc
Normal file
11
tests/paddus_vi16.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
varying int16 a = 65535, b = 65535; // max unsigned int16
|
||||
RET[programIndex] = paddus(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 65535;
|
||||
}
|
||||
11
tests/paddus_vi8.ispc
Normal file
11
tests/paddus_vi8.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
varying int8 a = 255, b = 255; // max unsigned int8
|
||||
RET[programIndex] = paddus(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 255;
|
||||
}
|
||||
11
tests/psubs_i16.ispc
Normal file
11
tests/psubs_i16.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int8 a = -32768, b = 32767; // min and max signed int16
|
||||
RET[programIndex] = psubs(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -32768;
|
||||
}
|
||||
11
tests/psubs_i8.ispc
Normal file
11
tests/psubs_i8.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int8 a = -128, b = 127; // min and max signed int8
|
||||
RET[programIndex] = psubs(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -128;
|
||||
}
|
||||
11
tests/psubs_vi16.ispc
Normal file
11
tests/psubs_vi16.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
varying int16 a = -32768, b = 32767; // min and max unsigned int16
|
||||
RET[programIndex] = psubs(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -32768;
|
||||
}
|
||||
11
tests/psubs_vi8.ispc
Normal file
11
tests/psubs_vi8.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
varying int8 a = -128, b = 127; // min and max unsigned int8
|
||||
RET[programIndex] = psubs(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -128;
|
||||
}
|
||||
11
tests/psubus_i16.ispc
Normal file
11
tests/psubus_i16.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int8 a = 0, b = 32767; // min and max unsigned int16
|
||||
RET[programIndex] = psubus(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
11
tests/psubus_i8.ispc
Normal file
11
tests/psubus_i8.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int8 a = 0, b = 255; // min and max unsigned int8
|
||||
RET[programIndex] = psubus(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
11
tests/psubus_vi16.ispc
Normal file
11
tests/psubus_vi16.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
varying int16 a = 0, b = 32767; // min and max unsigned int16
|
||||
RET[programIndex] = psubus(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
11
tests/psubus_vi8.ispc
Normal file
11
tests/psubus_vi8.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
varying int8 a = 0, b = 255; // min and max unsigned int8
|
||||
RET[programIndex] = psubus(a, b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
Reference in New Issue
Block a user