From 82f5716362eb46f798993bf21dd7ea869546ae0d Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskiy Date: Thu, 7 May 2015 15:16:48 +0300 Subject: [PATCH] [AVX512]: max/min functions --- builtins/target-avx512-common.ll | 142 ++++++++++++++++++++++++------- 1 file changed, 113 insertions(+), 29 deletions(-) diff --git a/builtins/target-avx512-common.ll b/builtins/target-avx512-common.ll index cc05288f..8f3f9c50 100644 --- a/builtins/target-avx512-common.ll +++ b/builtins/target-avx512-common.ll @@ -159,28 +159,121 @@ define <16 x double> @__ceil_varying_double(<16 x double>) nounwind readonly alw int64minmax() -declare float @__max_uniform_float(float, float) nounwind readnone -declare float @__min_uniform_float(float, float) nounwind readnone -declare i32 @__min_uniform_int32(i32, i32) nounwind readnone -declare i32 @__max_uniform_int32(i32, i32) nounwind readnone -declare i32 @__min_uniform_uint32(i32, i32) nounwind readnone -declare i32 @__max_uniform_uint32(i32, i32) nounwind readnone -declare double @__min_uniform_double(double, double) nounwind readnone -declare double @__max_uniform_double(double, double) nounwind readnone +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; float min/max -declare @__max_varying_float(, - ) nounwind readnone -declare @__min_varying_float(, - ) nounwind readnone -declare @__min_varying_int32(, ) nounwind readnone -declare @__max_varying_int32(, ) nounwind readnone -declare @__min_varying_uint32(, ) nounwind readnone -declare @__max_varying_uint32(, ) nounwind readnone -declare @__min_varying_double(, - ) nounwind readnone -declare @__max_varying_double(, - ) nounwind readnone +define float @__max_uniform_float(float, float) nounwind readonly alwaysinline { + %cmp = fcmp ogt float %1, %0 + %ret = select i1 %cmp, float %1, float %0 + ret float %ret +} +define float @__min_uniform_float(float, float) nounwind readonly alwaysinline { + %cmp = fcmp ogt float %1, %0 + %ret = select i1 %cmp, float %0, float %1 + ret float %ret +} + +declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone +declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone + +define <16 x float> @__max_varying_float(<16 x float>, + <16 x float>) nounwind readonly alwaysinline { + binary8to16(call, float, @llvm.x86.avx.max.ps.256, %0, %1) + ret <16 x float> %call +} + +define <16 x float> @__min_varying_float(<16 x float>, + <16 x float>) nounwind readonly alwaysinline { + binary8to16(call, float, @llvm.x86.avx.min.ps.256, %0, %1) + ret <16 x float> %call +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline { + %cmp = icmp sgt i32 %1, %0 + %ret = select i1 %cmp, i32 %0, i32 %1 + ret i32 %ret +} + +define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline { + %cmp = icmp sgt i32 %1, %0 + %ret = select i1 %cmp, i32 %1, i32 %0 + ret i32 %ret +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline { + %cmp = icmp ugt i32 %1, %0 + %ret = select i1 %cmp, i32 %0, i32 %1 + ret i32 %ret +} + +define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline { + %cmp = icmp ugt i32 %1, %0 + %ret = select i1 %cmp, i32 %1, i32 %0 + ret i32 %ret +} + +declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly +declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly + +define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary8to16(m, i32, @llvm.x86.avx2.pmins.d, %0, %1) + ret <16 x i32> %m +} + +define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary8to16(m, i32, @llvm.x86.avx2.pmaxs.d, %0, %1) + ret <16 x i32> %m +} + +declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly +declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly + +define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary8to16(m, i32, @llvm.x86.avx2.pminu.d, %0, %1) + ret <16 x i32> %m +} + +define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary8to16(m, i32, @llvm.x86.avx2.pmaxu.d, %0, %1) + ret <16 x i32> %m +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; double precision min/max + +define double @__min_uniform_double(double, double) nounwind readnone alwaysinline { + %cmp = fcmp ogt double %1, %0 + %ret = select i1 %cmp, double %0, double %1 + ret double %ret +} + +define double @__max_uniform_double(double, double) nounwind readnone alwaysinline { + %cmp = fcmp ogt double %1, %0 + %ret = select i1 %cmp, double %1, double %0 + ret double %ret +} + +declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone +declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone + +define <16 x double> @__min_varying_double(<16 x double>, <16 x double>) nounwind readnone alwaysinline { + binary4to16(ret, double, @llvm.x86.avx.min.pd.256, %0, %1) + ret <16 x double> %ret +} + +define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwind readnone alwaysinline { + binary4to16(ret, double, @llvm.x86.avx.max.pd.256, %0, %1) + ret <16 x double> %ret +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; sqrt/rsqrt/rcp declare float @__rsqrt_uniform_float(float) nounwind readnone @@ -268,7 +361,6 @@ define i16 @__reduce_add_int16(<16 x i16>) nounwind readnone alwaysinline { reduce16(i16, @__add_varying_i16, @__add_uniform_i16) } - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; horizontal float ops @@ -296,7 +388,6 @@ define float @__reduce_max_float(<16 x float>) nounwind readnone alwaysinline { reduce16(float, @__max_varying_float, @__max_uniform_float) } - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; horizontal int32 ops @@ -323,7 +414,6 @@ define i32 @__reduce_max_int32(<16 x i32>) nounwind readnone alwaysinline { reduce16(i32, @__max_varying_int32, @__max_uniform_int32) } - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; horizontal uint32 ops @@ -335,7 +425,6 @@ define i32 @__reduce_max_uint32(<16 x i32>) nounwind readnone alwaysinline { reduce16(i32, @__max_varying_uint32, @__max_uniform_uint32) } - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; horizontal double ops @@ -369,7 +458,6 @@ define double @__reduce_max_double(<16 x double>) nounwind readnone alwaysinline reduce16(double, @__max_varying_double, @__max_uniform_double) } - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; horizontal int64 ops @@ -388,17 +476,14 @@ define i64 @__reduce_add_int64(<16 x i64>) nounwind readnone alwaysinline { reduce16(i64, @__add_varying_int64, @__add_uniform_int64) } - define i64 @__reduce_min_int64(<16 x i64>) nounwind readnone alwaysinline { reduce16(i64, @__min_varying_int64, @__min_uniform_int64) } - define i64 @__reduce_max_int64(<16 x i64>) nounwind readnone alwaysinline { reduce16(i64, @__max_varying_int64, @__max_uniform_int64) } - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; horizontal uint64 ops @@ -406,7 +491,6 @@ define i64 @__reduce_min_uint64(<16 x i64>) nounwind readnone alwaysinline { reduce16(i64, @__min_varying_uint64, @__min_uniform_uint64) } - define i64 @__reduce_max_uint64(<16 x i64>) nounwind readnone alwaysinline { reduce16(i64, @__max_varying_uint64, @__max_uniform_uint64) }