Merge pull request #1117 from ncos/native-knl
[AVX-512]: transcendentals: add exp() implementation, no functional change
This commit is contained in:
@@ -1220,5 +1220,73 @@ declare_nvptx()
|
||||
rsqrtd_decl()
|
||||
rcpd_decl()
|
||||
|
||||
transcendetals_decl()
|
||||
;; Transcendentals
|
||||
|
||||
;; exponent
|
||||
define(`F144', `0x3FF7154760000000') ;; F144 = log(2, e)
|
||||
define(`D144', `0x3FF71547652B82FE') ;; D144 = log(2, e)
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define float @__exp_uniform_float(float %a) nounwind readnone alwaysinline {
|
||||
%res = call float @__stdlib_expf(float %a)
|
||||
ret float %res;
|
||||
}
|
||||
|
||||
define double @__exp_uniform_double(double %a) nounwind readnone alwaysinline {
|
||||
%res = call double @__stdlib_exp(double %a)
|
||||
ret double %res;
|
||||
}
|
||||
|
||||
define <16 x float> @__exp_varying_float(<16 x float> %a) nounwind readnone alwaysinline {
|
||||
%a0 = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> <float F144, float F144, float F144, float F144,
|
||||
float F144, float F144, float F144, float F144, float F144, float F144, float F144, float F144,
|
||||
float F144, float F144, float F144, float F144>, <16 x float> %a, <16 x float> zeroinitializer, i16 -1, i32 0)
|
||||
%res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x double> @__exp_varying_double(<16 x double> %a) nounwind readnone alwaysinline {
|
||||
%alo = shufflevector <16 x double> %a, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%ahi = shufflevector <16 x double> %a, <16 x double> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%alo0 = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> <double D144, double D144, double D144,
|
||||
double D144, double D144, double D144, double D144, double D144>, <8 x double> %alo, <8 x double> zeroinitializer, i8 -1, i32 0)
|
||||
%ahi0 = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> <double D144, double D144, double D144,
|
||||
double D144, double D144, double D144, double D144, double D144>, <8 x double> %ahi, <8 x double> zeroinitializer, i8 -1, i32 0)
|
||||
%res_lo = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %alo0, <8 x double> zeroinitializer, i8 -1, i32 8)
|
||||
%res_hi = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %ahi0, <8 x double> zeroinitializer, i8 -1, i32 8)
|
||||
%res = shufflevector <8 x double> %res_lo, <8 x double> %res_hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x double> %res
|
||||
}
|
||||
|
||||
;; power
|
||||
define float @__pow_uniform_float(float %a, float %b) nounwind readnone alwaysinline {
|
||||
%res = call float @__stdlib_powf(float %a, float %b)
|
||||
ret float %res;
|
||||
}
|
||||
|
||||
define double @__pow_uniform_double(double %a, double %b) nounwind readnone alwaysinline {
|
||||
%res = call double @__stdlib_pow(double %a, double %b)
|
||||
ret double %res;
|
||||
}
|
||||
|
||||
declare <16 x float> @__pow_varying_float(<16 x float> %a, <16 x float> %b) nounwind readnone alwaysinline
|
||||
|
||||
;;define <16 x float> @__pow_varying_float(<16 x float> %a, <16 x float> %b) nounwind readnone alwaysinline
|
||||
;; ret <16 x float> %a
|
||||
;;}
|
||||
|
||||
declare <16 x double> @__pow_varying_double(<16 x double> %a, <16 x double> %b) nounwind readnone alwaysinline
|
||||
|
||||
|
||||
;; log
|
||||
declare float @__log_uniform_float(float %a) nounwind readnone alwaysinline
|
||||
declare double @__log_uniform_double(double %a) nounwind readnone alwaysinline
|
||||
declare <16 x float> @__log_varying_float(<16 x float> %a) nounwind readnone alwaysinline
|
||||
declare <16 x double> @__log_varying_double(<16 x double> %a) nounwind readnone alwaysinline
|
||||
|
||||
;; Trigonometry
|
||||
trigonometry_decl()
|
||||
|
||||
Reference in New Issue
Block a user