diff --git a/builtins/util.m4 b/builtins/util.m4
index 3915bc4e..74b5cdd3 100644
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -4548,18 +4548,16 @@ define  <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>, <WIDTH x MASK
 ')
 
 define(`rcp_double', `
-define double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
+declare double @__rcp_safe_uniform_double___und(double, <WIDTH x MASK>)
+define  double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
 {
-  %flt = fptrunc double %0 to float
-  %res = call float @__rcp_uniform_float(float %flt)
-  %dres = fpext float %res to double
-  ret double %dres
+  %res = call double @__rcp_safe_uniform_double___und(double %0, <WIDTH x MASK> %1)
+  ret double %res
 }
-define <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
+declare <WIDTH x double> @__rcp_safe_varying_double___vyd(<WIDTH x double>, <WIDTH x MASK>) 
+define  <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
 {
-  %flt = fptrunc <WIDTH x double> %0 to <WIDTH x float>
-  %res = call <WIDTH x float> @__rcp_varying_float(<WIDTH x float> %flt)
-  %dres = fpext <WIDTH x float> %res to <WIDTH x double>
-  ret <WIDTH x double> %dres
+  %res = call <WIDTH x double> @__rcp_safe_varying_double___vyd(<WIDTH x double> %0, <WIDTH x MASK> %1)
+  ret <WIDTH x double> %res
 }
 ')
diff --git a/stdlib.ispc b/stdlib.ispc
index 70b5f0d1..bd12034c 100644
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -1391,11 +1391,32 @@ static inline uniform float rcp(uniform float v) {
     return __rcp_uniform_float(v);
 }
 
+#define RCPD(QUAL) \
+__declspec(safe)  \
+static inline QUAL double __rcp_iterate_##QUAL##_double(QUAL double v, QUAL double iv) \
+{ \
+  iv = iv * (2.0d - v*iv); \
+  iv = iv * (2.0d - v*iv); \
+  return iv; \
+} \
+__declspec(safe)  \
+static inline QUAL double __rcp_safe_##QUAL##_double(QUAL double x) \
+{ \
+  if (x <= 1.0d+33 && x >= 1.0d-33) \
+    return __rcp_iterate_##QUAL##_double(x, rcp((QUAL float)x)); \
+  QUAL int64  ex   = intbits(x) & 0x7fe0000000000000; \
+  QUAL double exp  = doublebits(  0x7fd0000000000000 + ~ex      );   \
+  QUAL double   y  = rcp((QUAL float)(x*exp)); \
+  return __rcp_iterate_##QUAL##_double(x, y*exp); \
+} 
+
+RCPD(varying)
 __declspec(safe) 
 static inline double rcp(double v) {
     return __rcp_varying_double(v, (IntMaskType)__mask);
 }
 
+RCPD(uniform)
 __declspec(safe) 
 static inline uniform double rcp(uniform double v) {
     return __rcp_uniform_double(v, (IntMaskType)__mask);
@@ -3529,7 +3550,7 @@ static inline uniform double sqrt(uniform double v) {
 
 #define RSQRTD(QUAL)  \
 __declspec(safe)    \
-static inline QUAL double __rsqrt_iterate_double(QUAL double x, QUAL double y)   \
+static inline QUAL double __rsqrt_iterate_##QUAL##_double(QUAL double x, QUAL double y)   \
 {   \
   QUAL double xh = x*0.5d;    \
   y += y*(0.5d0 - xh*y*y);    \
@@ -3540,12 +3561,12 @@ __declspec(safe)    \
 static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x)    \
 {   \
   if (x <= 1.0d+33 && x >= 1.0d-33)   \
-    return __rsqrt_iterate_double(x, rsqrt((QUAL float)x));   \
+    return __rsqrt_iterate_##QUAL##_double(x, rsqrt((QUAL float)x));   \
   QUAL int64  ex   = intbits(x) & 0x7fe0000000000000;                 \
   QUAL double exp  = doublebits(  0x7fd0000000000000 -  ex      );   /* 1.0d/exponent  */   \
   QUAL double exph = doublebits(  0x5fe0000000000000 - (ex >> 1));   /* 1.0d/sqrt(exponent) */    \
   QUAL double   y  = rsqrt((QUAL float)(x*exp));          \
-  return __rsqrt_iterate_double(x, y*exph);    \
+  return __rsqrt_iterate_##QUAL##_double(x, y*exph);    \
 }
 
 RSQRTD(varying)
diff --git a/tests/test-147.ispc b/tests/test-147.ispc
index 4091a78d..5c5d4bc1 100644
--- a/tests/test-147.ispc
+++ b/tests/test-147.ispc
@@ -3,12 +3,12 @@ export uniform int width() { return programCount; }
 
 
 export void f_f(uniform float RET[], uniform float aFOO[]) {
-    double x = aFOO[programIndex&0x3]; 
+    double x = aFOO[programIndex&0x3]*1d100;
     double d, ix; 
     ix = rcp(x);
     d = (ix - 1.0d0 / x);
     d = (d < 0.0d0) ? -d : d;
-    RET[programIndex] = (d < 1d-7) ? 1.0d0 : 0.0d0; 
+    RET[programIndex] = (d < 1d-15 && !isnan(d)) ? 1.0d0 : 0.0d0; 
 }
 
 export void result(uniform float RET[]) {