Add code generation of saturation

This commit is contained in:
Vsevolod Livinskij
2013-11-29 18:40:04 +04:00
parent bec6662338
commit 4c330bc38b
8 changed files with 179 additions and 400 deletions

View File

@@ -45,18 +45,12 @@ include(`target-avx-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;vector saturation arithmetic
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -66,17 +60,10 @@ define <WIDTH x i16> @__padds_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -86,17 +73,10 @@ define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -106,17 +86,10 @@ define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}

View File

@@ -45,123 +45,68 @@ include(`target-avx-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;vector saturation arithmetic
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__padds_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}

View File

@@ -49,18 +49,12 @@ include(`target-sse2-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;vector saturation arithmetic
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -70,17 +64,10 @@ define <WIDTH x i16> @__padds_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -90,17 +77,10 @@ define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -110,17 +90,10 @@ define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}

View File

@@ -46,123 +46,68 @@ include(`target-sse2-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;vector saturation arithmetic
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__padds_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}

View File

@@ -46,18 +46,12 @@ include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;vector saturation arithmetic
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -67,17 +61,10 @@ define <WIDTH x i16> @__padds_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -87,17 +74,10 @@ define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -107,17 +87,10 @@ define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}

View File

@@ -49,18 +49,12 @@ include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;vector saturation arithmetic
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -70,17 +64,10 @@ define <WIDTH x i16> @__padds_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -90,17 +77,10 @@ define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
@@ -110,17 +90,10 @@ define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}

View File

@@ -46,123 +46,68 @@ include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;vector saturation arithmetic
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__padds_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
%v0 = shufflevector <4 x i8> %0, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i8> %1, <4 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
%v0 = shufflevector <4 x i16> %0, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <4 x i16> %1, <4 x i16> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1)
%r = shufflevector <8 x i16> %r16, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}

View File

@@ -49,6 +49,58 @@ define(`MASK_HIGH_BIT_ON',
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector convertation utilities
;; convert 4-wide vector into 8-wide vector
;;
;; $1: vector element type
;; $2: 4-wide vector
;; $3: 8-wide vector
define(`convert4to8', `
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef>
')
define(`convert4to16', `
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
')
define(`convert8to16', `
$3 = shufflevector <8 x $1> $2, <8 x $1> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
')
;; convert 4-wide vector into 8-wide vector
;;
;; $1: vector element type
;; $2: 8-wide vector
;; $3: 4-wide vector
define(`convert8to4', `
$3 = shufflevector <8 x $1> $2, <8 x $1> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
')
define(`convert16to4', `
$3 = shufflevector <16 x $1> $2, <16 x $1> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
')
define(`convert16to8', `
$3 = shufflevector <16 x $1> $2, <16 x $1> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector deconstruction utilities
;; split 8-wide vector into 2 4-wide vectors
;;