diff --git a/builtins/target-nvptx64.ll b/builtins/target-nvptx64.ll index c44c67f1..030b08c0 100644 --- a/builtins/target-nvptx64.ll +++ b/builtins/target-nvptx64.ll @@ -319,6 +319,8 @@ declare float @llvm.exp.f32(float %Val) declare float @llvm.log.f32(float %Val) declare float @llvm.pow.f32(float %f, float %e) +declare float @llvm.nvvm.rsqrt.approx.f(float %f) nounwind readonly alwaysinline + @@ -624,6 +626,8 @@ define <1 x float> @__sqrt_varying_float(<1 x float>) nounwind readonly alwaysi ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; rsqrt + + define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alwaysinline { ; float is = __rsqrt_v(v); ;%is = call <1 x float> @llvm.x86.sse.rsqrt.ps(<1 x float> %v) @@ -635,10 +639,14 @@ define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alw ;%is_mul = fmul <1 x float> %is, %three_sub ;%half_scale = fmul <1 x float> , %is_mul ;ret <1 x float> %half_scale - %s = call <1 x float> @__sqrt_varying_float(<1 x float> %v) - %r = call <1 x float> @__rcp_varying_float(<1 x float> %s) - ret <1 x float> %r - + ;%s = call <1 x float> @__sqrt_varying_float(<1 x float> %v) + ;%r = call <1 x float> @__rcp_varying_float(<1 x float> %s) + ;ret <1 x float> %r + %vs = extractelement <1 x float> %v, i32 0 + %rs = call float @llvm.nvvm.rsqrt.approx.f(float %vs) +; %rs = call float asm "rsqrt.approx.f32 $0,$0", "=f,f"(float %vs) + %rv = insertelement <1 x float> undef , float %rs, i32 0 + ret <1 x float> %rv }