Changes for sse2 and sse4 in saturation

This commit is contained in:
Vsevolod Livinskij
2013-11-29 03:33:40 +04:00
parent 35a4d1b3a2
commit 42c148bf75
9 changed files with 535 additions and 192 deletions

View File

@@ -39,7 +39,7 @@ aossoa()
rdrand_decls()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; saturation arithmetic
;;scalar saturation arithmetic
declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
define i8 @__padds_i8(i8 %a0, i8 %a1) {
@@ -47,95 +47,48 @@ define i8 @__padds_i8(i8 %a0, i8 %a1) {
ret i8 %ret
}
define <16 x i8> @__padds_vi8(<16 x i8> %a0, <16 x i8> %a1) {
%res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
define i16 @__padds_i16(i16 %a0, i16 %a1) {
sse_binary_scalar(ret, 8, i16, @llvm.x86.sse2.padds.w, %a0, %a1)
ret i16 %ret
}
define <8 x i16> @__padds_vi16(<8 x i16> %a0, <8 x i16> %a1) {
%res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
define i8 @__paddus_i8(i8 %a0, i8 %a1) {
sse_binary_scalar(ret, 16, i8, @llvm.x86.sse2.paddus.b, %a0, %a1)
ret i8 %ret
}
define <16 x i8> @__paddus_vi8(<16 x i8> %a0, <16 x i8> %a1) {
%res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
define i16 @__paddus_i16(i16 %a0, i16 %a1) {
sse_binary_scalar(ret, 8, i16, @llvm.x86.sse2.paddus.w, %a0, %a1)
ret i16 %ret
}
define <8 x i16> @__paddus_vi16(<8 x i16> %a0, <8 x i16> %a1) {
%res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
define i8 @__psubs_i8(i8 %a0, i8 %a1) {
sse_binary_scalar(ret, 16, i8, @llvm.x86.sse2.psubs.b, %a0, %a1)
ret i8 %ret
}
define <16 x i8> @__psubs_vi8(<16 x i8> %a0, <16 x i8> %a1) {
%res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
define i16 @__psubs_i16(i16 %a0, i16 %a1) {
sse_binary_scalar(ret, 8, i16, @llvm.x86.sse2.psubs.w, %a0, %a1)
ret i16 %ret
}
define <8 x i16> @__psubs_vi16(<8 x i16> %a0, <8 x i16> %a1) {
%res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
define i8 @__psubus_i8(i8 %a0, i8 %a1) {
sse_binary_scalar(ret, 16, i8, @llvm.x86.sse2.psubus.b, %a0, %a1)
ret i8 %ret
}
define <16 x i8> @__psubus_vi8(<16 x i8> %a0, <16 x i8> %a1) {
%res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
define i16 @__psubus_i16(i16 %a0, i16 %a1) {
sse_binary_scalar(ret, 8, i16, @llvm.x86.sse2.psubus.w, %a0, %a1)
ret i16 %ret
}
define <8 x i16> @__psubus_vi16(<8 x i16> %a0, <8 x i16> %a1) {
%res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding floats