From 28b402a778ab810beba454973e393b64c0b28013 Mon Sep 17 00:00:00 2001 From: Anton Mitrokhin Date: Tue, 3 Nov 2015 13:31:05 +0300 Subject: [PATCH] [AVX-512]: transcendentals: add exp() implementation, TODO: log() and pow() --- builtins/target-avx512-common.ll | 70 +++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/builtins/target-avx512-common.ll b/builtins/target-avx512-common.ll index 317b3ed8..b3ad98d2 100644 --- a/builtins/target-avx512-common.ll +++ b/builtins/target-avx512-common.ll @@ -1220,5 +1220,73 @@ declare_nvptx() rsqrtd_decl() rcpd_decl() -transcendetals_decl() +;; Transcendentals + +;; exponent +define(`F144', `0x3FF7154760000000') ;; F144 = log(2, e) +define(`D144', `0x3FF71547652B82FE') ;; D144 = log(2, e) + +declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone +declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone +declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) +declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define float @__exp_uniform_float(float %a) nounwind readnone alwaysinline { + %res = call float @__stdlib_expf(float %a) + ret float %res; +} + +define double @__exp_uniform_double(double %a) nounwind readnone alwaysinline { + %res = call double @__stdlib_exp(double %a) + ret double %res; +} + +define <16 x float> @__exp_varying_float(<16 x float> %a) nounwind readnone alwaysinline { + %a0 = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> , <16 x float> %a, <16 x float> zeroinitializer, i16 -1, i32 0) + %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} + +define <16 x double> @__exp_varying_double(<16 x double> %a) nounwind readnone alwaysinline { + %alo = shufflevector <16 x double> %a, <16 x double> undef, <8 x i32> + %ahi = shufflevector <16 x double> %a, <16 x double> undef, <8 x i32> + %alo0 = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> , <8 x double> %alo, <8 x double> zeroinitializer, i8 -1, i32 0) + %ahi0 = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> , <8 x double> %ahi, <8 x double> zeroinitializer, i8 -1, i32 0) + %res_lo = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %alo0, <8 x double> zeroinitializer, i8 -1, i32 8) + %res_hi = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %ahi0, <8 x double> zeroinitializer, i8 -1, i32 8) + %res = shufflevector <8 x double> %res_lo, <8 x double> %res_hi, <16 x i32> + ret <16 x double> %res +} + +;; power +define float @__pow_uniform_float(float %a, float %b) nounwind readnone alwaysinline { + %res = call float @__stdlib_powf(float %a, float %b) + ret float %res; +} + +define double @__pow_uniform_double(double %a, double %b) nounwind readnone alwaysinline { + %res = call double @__stdlib_pow(double %a, double %b) + ret double %res; +} + +declare <16 x float> @__pow_varying_float(<16 x float> %a, <16 x float> %b) nounwind readnone alwaysinline + +;;define <16 x float> @__pow_varying_float(<16 x float> %a, <16 x float> %b) nounwind readnone alwaysinline +;; ret <16 x float> %a +;;} + +declare <16 x double> @__pow_varying_double(<16 x double> %a, <16 x double> %b) nounwind readnone alwaysinline + + +;; log +declare float @__log_uniform_float(float %a) nounwind readnone alwaysinline +declare double @__log_uniform_double(double %a) nounwind readnone alwaysinline +declare <16 x float> @__log_varying_float(<16 x float> %a) nounwind readnone alwaysinline +declare <16 x double> @__log_varying_double(<16 x double> %a) nounwind readnone alwaysinline + +;; Trigonometry trigonometry_decl()