+added approx rcp/rsqrt/rtz with ftz=true

This commit is contained in:
Evghenii
2013-11-14 22:17:57 +01:00
parent 2c8afde6d9
commit f12826bac5
3 changed files with 9 additions and 6 deletions

View File

@@ -510,18 +510,21 @@ declare double @llvm.nvvm.sqrt.d(double %f) nounwind readonly alwaysinline
define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
; uniform float iv = extract(__rcp_u(v), 0);
; return iv * (2. - v * iv);
%r = fdiv float 1.,%0
ret float %r
; %ret = fdiv float 1.,%0
%ret = tail call float asm sideeffect "rcp.approx.ftz.f32 $0, $1;", "=f,f"(float %0) nounwind readnone alwaysinline
ret float %ret
}
;; declare float @__sqrt_uniform_float(float) nounwind readnone
define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
%ret = call float @llvm.nvvm.sqrt.f(float %0)
;;%ret = call float @llvm.nvvm.sqrt.f(float %0)
%ret = tail call float asm sideeffect "sqrt.approx.ftz.f32 $0, $1;", "=f,f"(float %0) nounwind readnone alwaysinline
ret float %ret
}
;; declare float @__rsqrt_uniform_float(float) nounwind readnone
define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline
{
%ret = call float @llvm.nvvm.rsqrt.approx.f(float %0)
;; %ret = call float @llvm.nvvm.rsqrt.approx.f(float %0)
%ret = tail call float asm sideeffect "rsqrt.approx.ftz.f32 $0, $1;", "=f,f"(float %0) nounwind readnone alwaysinline
ret float %ret
}