From 7691d961c1ff8a5ce11180b9bf77572dde49c8ec Mon Sep 17 00:00:00 2001 From: Andrey Shishpanov Date: Thu, 10 Mar 2016 23:59:32 +0300 Subject: [PATCH] Fixed ifelse in rsqrt, rcp def. for knl, skx (compfails with old LLVM). --- builtins/target-knl.ll | 19 +++++++------ builtins/target-skx.ll | 61 +++++++++++++++++++++--------------------- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/builtins/target-knl.ll b/builtins/target-knl.ll index 799ed500..bf75c20e 100644 --- a/builtins/target-knl.ll +++ b/builtins/target-knl.ll @@ -31,7 +31,6 @@ define(`WIDTH',`16') - ifelse(LLVM_VERSION, LLVM_3_7, `include(`target-avx512-common.ll')', LLVM_VERSION, LLVM_3_8, @@ -41,23 +40,27 @@ ifelse(LLVM_VERSION, LLVM_3_7, ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; rcp +;; rcp, rsqrt +define(`rcp_rsqrt_varying_float_knl',` declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone - define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline { %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %0, <16 x float> undef, i16 -1, i32 8) ret <16 x float> %res } - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; rsqrt - declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone - define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline { %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %v, <16 x float> undef, i16 -1, i32 8) ret <16 x float> %res } +') + +ifelse(LLVM_VERSION, LLVM_3_7, + rcp_rsqrt_varying_float_knl(), + LLVM_VERSION, LLVM_3_8, + rcp_rsqrt_varying_float_knl(), + LLVM_VERSION, LLVM_3_9, + rcp_rsqrt_varying_float_knl() + ) ;;saturation_arithmetic_novec() diff --git a/builtins/target-skx.ll b/builtins/target-skx.ll index e8929894..d330f259 100644 --- a/builtins/target-skx.ll +++ b/builtins/target-skx.ll @@ -39,47 +39,48 @@ ifelse(LLVM_VERSION, LLVM_3_8, ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; rcp - +;; rcp, rsqrt +define(`rcp_rsqrt_varying_float_skx',` declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone - define <16 x float> @__rcp_varying_float(<16 x float>) nounwind readonly alwaysinline { %call = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %0, <16 x float> undef, i16 -1) - ; do one Newton-Raphson iteration to improve precision - ; float iv = __rcp_v(v); - ; return iv * (2. - v * iv); - %v_iv = fmul <16 x float> %0, %call - %two_minus = fsub <16 x float> , %v_iv - %iv_mul = fmul <16 x float> %call, %two_minus + ;; do one Newton-Raphson iteration to improve precision + ;; float iv = __rcp_v(v); + ;; return iv * (2. - v * iv); + %v_iv = fmul <16 x float> %0`,' %call + %two_minus = fsub <16 x float> `,' %v_iv + %iv_mul = fmul <16 x float> %call`,' %two_minus ret <16 x float> %iv_mul } - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; rsqrt - -declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone - +declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>`,' <16 x float>`,' i16) nounwind readnone define <16 x float> @__rsqrt_varying_float(<16 x float> %v) nounwind readonly alwaysinline { - %is = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %v, <16 x float> undef, i16 -1) + %is = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %v`,' <16 x float> undef`,' i16 -1) ; Newton-Raphson iteration to improve precision ; float is = __rsqrt_v(v); ; return 0.5 * is * (3. - (v * is) * is); - %v_is = fmul <16 x float> %v, %is - %v_is_is = fmul <16 x float> %v_is, %is - %three_sub = fsub <16 x float> , %v_is_is - %is_mul = fmul <16 x float> %is, %three_sub - %half_scale = fmul <16 x float> , %is_mul + %v_is = fmul <16 x float> %v`,' %is + %v_is_is = fmul <16 x float> %v_is`,' %is + %three_sub = fsub <16 x float> `,' %v_is_is + %is_mul = fmul <16 x float> %is`,' %three_sub + %half_scale = fmul <16 x float> `,' %is_mul ret <16 x float> %half_scale } +') + +ifelse(LLVM_VERSION, LLVM_3_8, + rcp_rsqrt_varying_float_skx(), + LLVM_VERSION, LLVM_3_9, + rcp_rsqrt_varying_float_skx() + ) ;;saturation_arithmetic_novec()