structural change

This commit is contained in:
Vsevolod Livinskij
2013-11-30 10:48:18 +04:00
parent 4c330bc38b
commit 4faff1a63c
13 changed files with 285 additions and 660 deletions

View File

@@ -77,6 +77,42 @@ define(`convert8to16', `
i32 undef, i32 undef, i32 undef, i32 undef>
')
define(`convert4to32', `
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
<32 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
')
define(`convert8to32', `
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
<32 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
')
define(`convert16to32', `
$3 = shufflevector <4 x $1> $2, <4 x $1> undef,
<32 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11,
i32 12, i32 13, i32 14, i32 15
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
')
;; convert 4-wide vector into 8-wide vector
;;
;; $1: vector element type
@@ -99,6 +135,243 @@ define(`convert16to8', `
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
')
define(`convert32to4', `
$3 = shufflevector <32 x $1> $2, <32 x $1> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
')
define(`convert32to8', `
$3 = shufflevector <32 x $1> $2, <32 x $1> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3>
')
define(`convert32to16', `
$3 = shufflevector <32 x $1> $2, <32 x $1> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3>
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;saturation arithmetic
;;scalar saturation arithmetic
define(`saturation_arithmetic_scalar', `
declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
define i8 @__padds_i8(i8 %a0, i8 %a1) {
sse_binary_scalar(ret, 16, i8, @llvm.x86.sse2.padds.b, %a0, %a1)
ret i8 %ret
}
declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
define i16 @__padds_i16(i16 %a0, i16 %a1) {
sse_binary_scalar(ret, 8, i16, @llvm.x86.sse2.padds.w, %a0, %a1)
ret i16 %ret
}
declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
define i8 @__paddus_i8(i8 %a0, i8 %a1) {
sse_binary_scalar(ret, 16, i8, @llvm.x86.sse2.paddus.b, %a0, %a1)
ret i8 %ret
}
declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
define i16 @__paddus_i16(i16 %a0, i16 %a1) {
sse_binary_scalar(ret, 8, i16, @llvm.x86.sse2.paddus.w, %a0, %a1)
ret i16 %ret
}
declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
define i8 @__psubs_i8(i8 %a0, i8 %a1) {
sse_binary_scalar(ret, 16, i8, @llvm.x86.sse2.psubs.b, %a0, %a1)
ret i8 %ret
}
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
define i16 @__psubs_i16(i16 %a0, i16 %a1) {
sse_binary_scalar(ret, 8, i16, @llvm.x86.sse2.psubs.w, %a0, %a1)
ret i16 %ret
}
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
define i8 @__psubus_i8(i8 %a0, i8 %a1) {
sse_binary_scalar(ret, 16, i8, @llvm.x86.sse2.psubus.b, %a0, %a1)
ret i8 %ret
}
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
define i16 @__psubus_i16(i16 %a0, i16 %a1) {
sse_binary_scalar(ret, 8, i16, @llvm.x86.sse2.psubus.w, %a0, %a1)
ret i16 %ret
}
')
;;4-wide vector saturation arithmetic
define(`saturation_arithmetic_vec4', `
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__padds_vi16(<WIDTH x i16>, <WIDTH x i16>) {
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1)
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1)
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16>, <WIDTH x i16>) {
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1)
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
convert4to16(i8, %0, %v0)
convert4to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
convert16to4(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubus_vi16(<WIDTH x i16>, <WIDTH x i16>) {
convert4to8(i16, %0, %v0)
convert4to8(i16, %1, %v1)
%r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1)
convert8to4(i16, %r16, %r)
ret <WIDTH x i16> %r
}
')
;;8-wide vector saturation arithmetic
define(`saturation_arithmetic_vec8', `
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8>, <WIDTH x i8>) {
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1)
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__padds_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
%res = call <WIDTH x i16> @llvm.x86.sse2.padds.w(<WIDTH x i16> %a0, <WIDTH x i16> %a1)
ret <WIDTH x i16> %res
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1)
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
%res = call <WIDTH x i16> @llvm.x86.sse2.paddus.w(<WIDTH x i16> %a0, <WIDTH x i16> %a1)
ret <WIDTH x i16> %res
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8>, <WIDTH x i8>) {
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1)
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
%res = call <WIDTH x i16> @llvm.x86.sse2.psubs.w(<WIDTH x i16> %a0, <WIDTH x i16> %a1)
ret <WIDTH x i16> %res
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8>, <WIDTH x i8>) {
convert8to16(i8, %0, %v0)
convert8to16(i8, %1, %v1)
%r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1)
convert16to8(i8, %r16, %r)
ret <WIDTH x i8> %r
}
define <WIDTH x i16> @__psubus_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
%res = call <WIDTH x i16> @llvm.x86.sse2.psubus.w(<WIDTH x i16> %a0, <WIDTH x i16> %a1)
ret <WIDTH x i16> %res
}
')
;;16-wide vector saturation arithmetic
define(`saturation_arithmetic_vec16', `
define <WIDTH x i8> @__padds_vi8(<WIDTH x i8> %a0, <WIDTH x i8> %a1) {
%res = call <WIDTH x i8> @llvm.x86.sse2.padds.b(<WIDTH x i8> %a0, <WIDTH x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <WIDTH x i8> %res
}
define <WIDTH x i16> @__padds_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
binary8to16(ret, i16, @llvm.x86.sse2.padds.w, %a0, %a1)
ret <WIDTH x i16> %ret
}
define <WIDTH x i8> @__paddus_vi8(<WIDTH x i8> %a0, <WIDTH x i8> %a1) {
%res = call <WIDTH x i8> @llvm.x86.sse2.paddus.b(<WIDTH x i8> %a0, <WIDTH x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <WIDTH x i8> %res
}
define <WIDTH x i16> @__paddus_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
binary8to16(ret, i16, @llvm.x86.sse2.paddus.w, %a0, %a1)
ret <WIDTH x i16> %ret
}
define <WIDTH x i8> @__psubs_vi8(<WIDTH x i8> %a0, <WIDTH x i8> %a1) {
%res = call <WIDTH x i8> @llvm.x86.sse2.psubs.b(<WIDTH x i8> %a0, <WIDTH x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <WIDTH x i8> %res
}
define <WIDTH x i16> @__psubs_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
binary8to16(ret, i16, @llvm.x86.sse2.psubs.w, %a0, %a1)
ret <WIDTH x i16> %ret
}
define <WIDTH x i8> @__psubus_vi8(<WIDTH x i8> %a0, <WIDTH x i8> %a1) {
%res = call <WIDTH x i8> @llvm.x86.sse2.psubus.b(<WIDTH x i8> %a0, <WIDTH x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <WIDTH x i8> %res
}
define <WIDTH x i16> @__psubus_vi16(<WIDTH x i16> %a0, <WIDTH x i16> %a1) {
binary8to16(ret, i16, @llvm.x86.sse2.psubus.w, %a0, %a1)
ret <WIDTH x i16> %ret
}
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector deconstruction utilities