some changes

This commit is contained in:
egaburov
2013-07-29 11:05:05 +02:00
parent 307abc8db7
commit 153fbc3d7d
2 changed files with 19 additions and 24 deletions

View File

@@ -320,6 +320,9 @@ declare float @llvm.log.f32(float %Val)
declare float @llvm.pow.f32(float %f, float %e) declare float @llvm.pow.f32(float %f, float %e)
declare float @llvm.nvvm.rsqrt.approx.f(float %f) nounwind readonly alwaysinline declare float @llvm.nvvm.rsqrt.approx.f(float %f) nounwind readonly alwaysinline
declare float @llvm.nvvm.sqrt.f(float %f) nounwind readonly alwaysinline
declare double @llvm.nvvm.rsqrt.approx.d(double %f) nounwind readonly alwaysinline
declare double @llvm.nvvm.sqrt.d(double %f) nounwind readonly alwaysinline
@@ -614,12 +617,10 @@ define <1 x float> @__rcp_varying_float(<1 x float>) nounwind readonly alwaysin
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; sqrt ; sqrt
define <1 x float> @__sqrt_varying_float(<1 x float>) nounwind readonly alwaysinline { define <1 x float> @__sqrt_varying_float(<1 x float> %v) nounwind readonly alwaysinline {
;%call = call <1 x float> @llvm.x86.sse.sqrt.ps(<1 x float> %0) %vs = extractelement <1 x float> %v, i32 0
;ret <1 x float> %call %rs = call float @llvm.nvvm.sqrt.f(float %vs)
%d = extractelement <1 x float> %0, i32 0 %rv = insertelement <1 x float> undef , float %rs, i32 0
%r = call float @llvm.sqrt.f32(float %d)
%rv = insertelement <1 x float> undef, float %r, i32 0
ret <1 x float> %rv ret <1 x float> %rv
} }
@@ -627,27 +628,19 @@ define <1 x float> @__sqrt_varying_float(<1 x float>) nounwind readonly alwaysi
; rsqrt ; rsqrt
define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alwaysinline { define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alwaysinline {
; float is = __rsqrt_v(v);
;%is = call <1 x float> @llvm.x86.sse.rsqrt.ps(<1 x float> %v)
; Newton-Raphson iteration to improve precision
; return 0.5 * is * (3. - (v * is) * is);
;%v_is = fmul <1 x float> %v, %is
;%v_is_is = fmul <1 x float> %v_is, %is
;%three_sub = fsub <1 x float> <float 3., float 3., float 3., float 3.>, %v_is_is
;%is_mul = fmul <1 x float> %is, %three_sub
;%half_scale = fmul <1 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
;ret <1 x float> %half_scale
;%s = call <1 x float> @__sqrt_varying_float(<1 x float> %v)
;%r = call <1 x float> @__rcp_varying_float(<1 x float> %s)
;ret <1 x float> %r
%vs = extractelement <1 x float> %v, i32 0 %vs = extractelement <1 x float> %v, i32 0
%rs = call float @llvm.nvvm.rsqrt.approx.f(float %vs) %rs = call float @llvm.nvvm.rsqrt.approx.f(float %vs)
; %rs = call float asm "rsqrt.approx.f32 $0,$0", "=f,f"(float %vs) ; %rs = call float asm "rsqrt.approx.f32 $0,$0", "=f,f"(float %vs) ; example of inline ptx
%rv = insertelement <1 x float> undef , float %rs, i32 0 %rv = insertelement <1 x float> undef , float %rs, i32 0
ret <1 x float> %rv ret <1 x float> %rv
} }
define <1 x double> @__rsqrt_varying_double(<1 x double> %v) nounwind readonly alwaysinline {
%vs = extractelement <1 x double> %v, i32 0
%rs = call double @llvm.nvvm.rsqrt.approx.d(double %vs)
%rv = insertelement <1 x double> undef , double %rs, i32 0
ret <1 x double> %rv
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -2,9 +2,11 @@ export void saxpy(const uniform float a, const uniform float x_[], const uniform
{ {
foreach (i = 0 ... n) foreach (i = 0 ... n)
{ {
const float x = x_[i]; const double x = x_[i];
const float y = y_[i]; const double y = y_[i];
const float z = y + a*x; const double dz = y + a*x;
const double dz1 = 1.0/sqrt(dz);
const float z = dz1;
z_[i] = rsqrt(z); z_[i] = rsqrt(z);
} }
} }