diff --git a/builtins.cpp b/builtins.cpp
index da66ed5f..fee322e7 100644
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -1150,6 +1150,10 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
                        symbolTable);
     lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(),
                        module, symbolTable);
+    lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(),
+                       module, symbolTable);
+    lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
+                       module, symbolTable);
 
     if (g->forceAlignment != -1) {
         llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll
index dfdc7e9e..401c862d 100644
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -191,13 +191,13 @@ declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
 
 declare float @__rsqrt_uniform_float(float) nounwind readnone 
 declare float @__rcp_uniform_float(float) nounwind readnone 
-declare double @__rsqrt_uniform_double(double, <WIDTH x MASK>) nounwind readnone 
-declare double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind readnone 
+declare double @__rsqrt_uniform_double(double) nounwind readnone 
+declare double @__rcp_uniform_double(double) nounwind readnone 
 declare float @__sqrt_uniform_float(float) nounwind readnone 
 declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone 
 declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone 
-declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind readnone 
-declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind readnone 
+declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone 
+declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone 
 
 declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone 
 
diff --git a/builtins/util.m4 b/builtins/util.m4
index 74b5cdd3..8a1f280a 100644
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -4533,31 +4533,11 @@ define_down_avgs()
 ')
 
 define(`rsqrt_double', `
-declare double @__rsqrt_safe_uniform_double___und(double, <WIDTH x MASK>)
-define  double @__rsqrt_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
-{
-  %res = call double @__rsqrt_safe_uniform_double___und(double %0, <WIDTH x MASK> %1)
-  ret double %res
-}
-declare <WIDTH x double> @__rsqrt_safe_varying_double___vyd(<WIDTH x double>, <WIDTH x MASK>) 
-define  <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
-{
-  %res = call <WIDTH x double> @__rsqrt_safe_varying_double___vyd(<WIDTH x double> %0, <WIDTH x MASK> %1)
-  ret <WIDTH x double> %res
-}
+declare  double @__rsqrt_uniform_double(double)
+declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>)
 ')
 
 define(`rcp_double', `
-declare double @__rcp_safe_uniform_double___und(double, <WIDTH x MASK>)
-define  double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
-{
-  %res = call double @__rcp_safe_uniform_double___und(double %0, <WIDTH x MASK> %1)
-  ret double %res
-}
-declare <WIDTH x double> @__rcp_safe_varying_double___vyd(<WIDTH x double>, <WIDTH x MASK>) 
-define  <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
-{
-  %res = call <WIDTH x double> @__rcp_safe_varying_double___vyd(<WIDTH x double> %0, <WIDTH x MASK> %1)
-  ret <WIDTH x double> %res
-}
+declare  double @__rcp_uniform_double(double)
+declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>)
 ')
diff --git a/ispc.cpp b/ispc.cpp
index ed326b14..1386d65e 100644
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -201,7 +201,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
     m_hasRand(false),
     m_hasGather(false),
     m_hasScatter(false),
-    m_hasTranscendentals(false)
+    m_hasTranscendentals(false),
+    m_hasRsqrtd(false),
+    m_hasRcpd(false)
 {
     if (isa == NULL) {
         if (cpu != NULL) {
@@ -419,6 +421,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
         this->m_hasGather = this->m_hasScatter = true;
+        this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
     else if (!strcasecmp(isa, "generic-8") ||
              !strcasecmp(isa, "generic-x8")) {
@@ -431,6 +434,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
         this->m_hasGather = this->m_hasScatter = true;
+        this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
     else if (!strcasecmp(isa, "generic-16") ||
              !strcasecmp(isa, "generic-x16")) {
@@ -443,6 +447,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
         this->m_hasGather = this->m_hasScatter = true;
+        this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
     else if (!strcasecmp(isa, "generic-32") ||
              !strcasecmp(isa, "generic-x32")) {
@@ -455,6 +460,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
         this->m_hasGather = this->m_hasScatter = true;
+        this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
     else if (!strcasecmp(isa, "generic-64") ||
              !strcasecmp(isa, "generic-x64")) {
@@ -467,6 +473,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
         this->m_hasHalf = true;
         this->m_hasTranscendentals = true;
         this->m_hasGather = this->m_hasScatter = true;
+        this->m_hasRsqrtd = this->m_hasRcpd = true;
     }
     else if (!strcasecmp(isa, "generic-1") ||
              !strcasecmp(isa, "generic-x1")) {
diff --git a/ispc.h b/ispc.h
index 88eb8353..4b6df8c3 100644
--- a/ispc.h
+++ b/ispc.h
@@ -281,6 +281,10 @@ public:
     bool hasScatter() const {return m_hasScatter;}
 
     bool hasTranscendentals() const {return m_hasTranscendentals;}
+    
+    bool hasRsqrtd() const {return m_hasRsqrtd;}
+    
+    bool hasRcpd() const {return m_hasRcpd;}
 
 private:
 
@@ -380,6 +384,12 @@ private:
     /** Indicates whether the target has support for transcendentals (beyond
         sqrt, which we assume that all of them handle). */
     bool m_hasTranscendentals;
+    
+    /** Indicates whether there is an ISA double precision rsqrt. */
+    bool m_hasRsqrtd;
+    
+    /** Indicates whether there is an ISA double precision rcp. */
+    bool m_hasRcpd;
 };
 
 
diff --git a/stdlib.ispc b/stdlib.ispc
index bd12034c..2f204aa0 100644
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -1412,14 +1412,21 @@ static inline QUAL double __rcp_safe_##QUAL##_double(QUAL double x) \
 
 RCPD(varying)
 __declspec(safe) 
-static inline double rcp(double v) {
-    return __rcp_varying_double(v, (IntMaskType)__mask);
-}
+__declspec(safe)   
+static inline double rcp(double v) {   
+  if (__have_native_rcpd)
+    return __rcp_varying_double(v);
+  else
+    return __rcp_safe_varying_double(v);
+}   
 
 RCPD(uniform)
-__declspec(safe) 
+__declspec(safe)
 static inline uniform double rcp(uniform double v) {
-    return __rcp_uniform_double(v, (IntMaskType)__mask);
+  if (__have_native_rcpd)
+    return __rcp_uniform_double(v);
+  else
+    return __rcp_safe_uniform_double(v);
 }
 
 ///////////////////////////////////////////////////////////////////////////
@@ -3572,13 +3579,19 @@ static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x)    \
 RSQRTD(varying)
 __declspec(safe)   
 static inline double rsqrt(double v) {   
-  return __rsqrt_varying_double(v, (IntMaskType)__mask);   
+  if (__have_native_rsqrtd)
+    return __rsqrt_varying_double(v);
+  else
+    return __rsqrt_safe_varying_double(v);
 }   
 
 RSQRTD(uniform)
 __declspec(safe)
 static inline uniform double rsqrt(uniform double v) {
-    return __rsqrt_uniform_double(v, (IntMaskType)__mask);
+  if (__have_native_rsqrtd)
+    return __rsqrt_uniform_double(v);
+  else
+    return __rsqrt_safe_uniform_double(v);
 }
 __declspec(safe)
 static inline double ldexp(double x, int n) {