From b60d77c15452e56e2a6028480cfe807fd0df7874 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Fri, 21 Feb 2014 10:36:46 +0100 Subject: [PATCH] use __nv_* libcalls for rcp/sqrt/rsqrt --- builtins/target-nvptx.ll | 63 +++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index 81dbdf49..6db1cc89 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -678,46 +678,41 @@ minmax_vy(max, double, double) ;; sqrt/rsqrt/rcp -declare float @llvm.nvvm.rsqrt.approx.f(float %f) nounwind readonly alwaysinline -declare float @llvm.nvvm.sqrt.f(float %f) nounwind readonly alwaysinline -declare double @llvm.nvvm.rsqrt.approx.d(double %f) nounwind readonly alwaysinline -declare double @llvm.sqrt.f64(double %f) nounwind readonly alwaysinline - -;; declare float @__rcp_uniform_float(float) nounwind readnone define float @__rcp_uniform_float(float) nounwind readonly alwaysinline { -; uniform float iv = extract(__rcp_u(v), 0); -; return iv * (2. - v * iv); %ret = fdiv float 1.,%0 -; %ret = tail call float asm sideeffect "rcp.approx.ftz.f32 $0, $1;", "=f,f"(float %0) nounwind readnone alwaysinline ret float %ret } -;; declare float @__sqrt_uniform_float(float) nounwind readnone -define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline { - %ret = call float @llvm.nvvm.sqrt.f(float %0) -; %ret = tail call float asm sideeffect "sqrt.approx.ftz.f32 $0, $1;", "=f,f"(float %0) nounwind readnone alwaysinline - ret float %ret -} -;; declare float @__rsqrt_uniform_float(float) nounwind readnone -define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline -{ - %ret = call float @llvm.nvvm.rsqrt.approx.f(float %0) -; %ret = tail call float asm sideeffect "rsqrt.approx.ftz.f32 $0, $1;", "=f,f"(float %0) nounwind readnone alwaysinline - ret float %ret -} - -define double @__rsqrt_uniform_double(double) nounwind readonly alwaysinline -{ - %ret1 = call double @llvm.sqrt.f64(double %0) - %ret = fdiv double 1., %ret1 - ret double %ret -} +declare double @__nv_drcp_rn(double) define double @__rcp_uniform_double(double) nounwind readonly alwaysinline { - %ret = fdiv double 1., %0 + %ret = call double @__nv_drcp_rn(double %0) + ret double %ret +} +declare float @__nv_sqrtf(float) +define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline +{ + %ret = call float @__nv_sqrtf(float %0) + ret float %ret +} +declare double @__nv_sqrt(double) +define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline { + %ret = call double @__nv_sqrt(double %0) + ret double %ret +} +declare float @__nv_rsqrtf(float) +define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline +{ + %ret = call float @__nv_rsqrtf(float %0) + ret float %ret +} +declare double @__nv_rsqrt(double) +define double @__rsqrt_uniform_double(double) nounwind readonly alwaysinline +{ + %ret = call double @__nv_rsqrt(double %0) ret double %ret } - +;;;;;; varying define @__rcp_varying_float() nounwind readnone alwaysinline { %v = extractelement <1 x float> %0, i32 0 @@ -746,7 +741,6 @@ define @__rsqrt_varying_double() nounwind readn %rv = insertelement <1 x double> undef, double %r, i32 0 ret %rv } - define @__sqrt_varying_float() nounwind readnone alwaysinline { %v = extractelement <1 x float> %0, i32 0 @@ -754,11 +748,6 @@ define @__sqrt_varying_float() nounwind readnone %rv = insertelement <1 x float> undef, float %r, i32 0 ret %rv } -;; declare double @__sqrt_uniform_double(double) nounwind readnone -define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline { - %ret = call double @llvm.sqrt.f64(double %0) - ret double %ret -} define @__sqrt_varying_double() nounwind readnone alwaysinline { %v = extractelement <1 x double> %0, i32 0