Merge pull request #1117 from ncos/native-knl

[AVX-512]: transcendentals: add exp() implementation, no functional change
This commit is contained in:
Dmitry Babokin
2015-11-03 13:41:17 +03:00

View File

@@ -1220,5 +1220,73 @@ declare_nvptx()
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
;; Transcendentals
;; exponent
define(`F144', `0x3FF7154760000000') ;; F144 = log(2, e)
define(`D144', `0x3FF71547652B82FE') ;; D144 = log(2, e)
declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
define float @__exp_uniform_float(float %a) nounwind readnone alwaysinline {
%res = call float @__stdlib_expf(float %a)
ret float %res;
}
define double @__exp_uniform_double(double %a) nounwind readnone alwaysinline {
%res = call double @__stdlib_exp(double %a)
ret double %res;
}
define <16 x float> @__exp_varying_float(<16 x float> %a) nounwind readnone alwaysinline {
%a0 = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> <float F144, float F144, float F144, float F144,
float F144, float F144, float F144, float F144, float F144, float F144, float F144, float F144,
float F144, float F144, float F144, float F144>, <16 x float> %a, <16 x float> zeroinitializer, i16 -1, i32 0)
%res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
define <16 x double> @__exp_varying_double(<16 x double> %a) nounwind readnone alwaysinline {
%alo = shufflevector <16 x double> %a, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%ahi = shufflevector <16 x double> %a, <16 x double> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%alo0 = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> <double D144, double D144, double D144,
double D144, double D144, double D144, double D144, double D144>, <8 x double> %alo, <8 x double> zeroinitializer, i8 -1, i32 0)
%ahi0 = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> <double D144, double D144, double D144,
double D144, double D144, double D144, double D144, double D144>, <8 x double> %ahi, <8 x double> zeroinitializer, i8 -1, i32 0)
%res_lo = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %alo0, <8 x double> zeroinitializer, i8 -1, i32 8)
%res_hi = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %ahi0, <8 x double> zeroinitializer, i8 -1, i32 8)
%res = shufflevector <8 x double> %res_lo, <8 x double> %res_hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x double> %res
}
;; power
define float @__pow_uniform_float(float %a, float %b) nounwind readnone alwaysinline {
%res = call float @__stdlib_powf(float %a, float %b)
ret float %res;
}
define double @__pow_uniform_double(double %a, double %b) nounwind readnone alwaysinline {
%res = call double @__stdlib_pow(double %a, double %b)
ret double %res;
}
declare <16 x float> @__pow_varying_float(<16 x float> %a, <16 x float> %b) nounwind readnone alwaysinline
;;define <16 x float> @__pow_varying_float(<16 x float> %a, <16 x float> %b) nounwind readnone alwaysinline
;; ret <16 x float> %a
;;}
declare <16 x double> @__pow_varying_double(<16 x double> %a, <16 x double> %b) nounwind readnone alwaysinline
;; log
declare float @__log_uniform_float(float %a) nounwind readnone alwaysinline
declare double @__log_uniform_double(double %a) nounwind readnone alwaysinline
declare <16 x float> @__log_varying_float(<16 x float> %a) nounwind readnone alwaysinline
declare <16 x double> @__log_varying_double(<16 x double> %a) nounwind readnone alwaysinline
;; Trigonometry
trigonometry_decl()