diff --git a/builtins.cpp b/builtins.cpp index da66ed5f..fee322e7 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -1150,6 +1150,10 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod symbolTable); lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(), module, symbolTable); + lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(), + module, symbolTable); + lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(), + module, symbolTable); if (g->forceAlignment != -1) { llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true); diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index dfdc7e9e..401c862d 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -191,13 +191,13 @@ declare @__max_varying_double(, declare float @__rsqrt_uniform_float(float) nounwind readnone declare float @__rcp_uniform_float(float) nounwind readnone -declare double @__rsqrt_uniform_double(double, ) nounwind readnone -declare double @__rcp_uniform_double(double, ) nounwind readnone +declare double @__rsqrt_uniform_double(double) nounwind readnone +declare double @__rcp_uniform_double(double) nounwind readnone declare float @__sqrt_uniform_float(float) nounwind readnone declare @__rcp_varying_float() nounwind readnone declare @__rsqrt_varying_float() nounwind readnone -declare @__rcp_varying_double(, ) nounwind readnone -declare @__rsqrt_varying_double(, ) nounwind readnone +declare @__rcp_varying_double() nounwind readnone +declare @__rsqrt_varying_double() nounwind readnone declare @__sqrt_varying_float() nounwind readnone diff --git a/builtins/util.m4 b/builtins/util.m4 index 74b5cdd3..8a1f280a 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -4533,31 +4533,11 @@ define_down_avgs() ') define(`rsqrt_double', ` -declare double @__rsqrt_safe_uniform_double___und(double, ) -define double @__rsqrt_uniform_double(double, ) nounwind alwaysinline readnone -{ - %res = call double @__rsqrt_safe_uniform_double___und(double %0, %1) - ret double %res -} -declare @__rsqrt_safe_varying_double___vyd(, ) -define @__rsqrt_varying_double(, ) nounwind alwaysinline readnone -{ - %res = call @__rsqrt_safe_varying_double___vyd( %0, %1) - ret %res -} +declare double @__rsqrt_uniform_double(double) +declare @__rsqrt_varying_double() ') define(`rcp_double', ` -declare double @__rcp_safe_uniform_double___und(double, ) -define double @__rcp_uniform_double(double, ) nounwind alwaysinline readnone -{ - %res = call double @__rcp_safe_uniform_double___und(double %0, %1) - ret double %res -} -declare @__rcp_safe_varying_double___vyd(, ) -define @__rcp_varying_double(, ) nounwind alwaysinline readnone -{ - %res = call @__rcp_safe_varying_double___vyd( %0, %1) - ret %res -} +declare double @__rcp_uniform_double(double) +declare @__rcp_varying_double() ') diff --git a/ispc.cpp b/ispc.cpp index ed326b14..1386d65e 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -201,7 +201,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : m_hasRand(false), m_hasGather(false), m_hasScatter(false), - m_hasTranscendentals(false) + m_hasTranscendentals(false), + m_hasRsqrtd(false), + m_hasRcpd(false) { if (isa == NULL) { if (cpu != NULL) { @@ -419,6 +421,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasHalf = true; this->m_hasTranscendentals = true; this->m_hasGather = this->m_hasScatter = true; + this->m_hasRsqrtd = this->m_hasRcpd = true; } else if (!strcasecmp(isa, "generic-8") || !strcasecmp(isa, "generic-x8")) { @@ -431,6 +434,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasHalf = true; this->m_hasTranscendentals = true; this->m_hasGather = this->m_hasScatter = true; + this->m_hasRsqrtd = this->m_hasRcpd = true; } else if (!strcasecmp(isa, "generic-16") || !strcasecmp(isa, "generic-x16")) { @@ -443,6 +447,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasHalf = true; this->m_hasTranscendentals = true; this->m_hasGather = this->m_hasScatter = true; + this->m_hasRsqrtd = this->m_hasRcpd = true; } else if (!strcasecmp(isa, "generic-32") || !strcasecmp(isa, "generic-x32")) { @@ -455,6 +460,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasHalf = true; this->m_hasTranscendentals = true; this->m_hasGather = this->m_hasScatter = true; + this->m_hasRsqrtd = this->m_hasRcpd = true; } else if (!strcasecmp(isa, "generic-64") || !strcasecmp(isa, "generic-x64")) { @@ -467,6 +473,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : this->m_hasHalf = true; this->m_hasTranscendentals = true; this->m_hasGather = this->m_hasScatter = true; + this->m_hasRsqrtd = this->m_hasRcpd = true; } else if (!strcasecmp(isa, "generic-1") || !strcasecmp(isa, "generic-x1")) { diff --git a/ispc.h b/ispc.h index 88eb8353..4b6df8c3 100644 --- a/ispc.h +++ b/ispc.h @@ -281,6 +281,10 @@ public: bool hasScatter() const {return m_hasScatter;} bool hasTranscendentals() const {return m_hasTranscendentals;} + + bool hasRsqrtd() const {return m_hasRsqrtd;} + + bool hasRcpd() const {return m_hasRcpd;} private: @@ -380,6 +384,12 @@ private: /** Indicates whether the target has support for transcendentals (beyond sqrt, which we assume that all of them handle). */ bool m_hasTranscendentals; + + /** Indicates whether there is an ISA double precision rsqrt. */ + bool m_hasRsqrtd; + + /** Indicates whether there is an ISA double precision rcp. */ + bool m_hasRcpd; }; diff --git a/stdlib.ispc b/stdlib.ispc index bd12034c..2f204aa0 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -1412,14 +1412,21 @@ static inline QUAL double __rcp_safe_##QUAL##_double(QUAL double x) \ RCPD(varying) __declspec(safe) -static inline double rcp(double v) { - return __rcp_varying_double(v, (IntMaskType)__mask); -} +__declspec(safe) +static inline double rcp(double v) { + if (__have_native_rcpd) + return __rcp_varying_double(v); + else + return __rcp_safe_varying_double(v); +} RCPD(uniform) -__declspec(safe) +__declspec(safe) static inline uniform double rcp(uniform double v) { - return __rcp_uniform_double(v, (IntMaskType)__mask); + if (__have_native_rcpd) + return __rcp_uniform_double(v); + else + return __rcp_safe_uniform_double(v); } /////////////////////////////////////////////////////////////////////////// @@ -3572,13 +3579,19 @@ static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x) \ RSQRTD(varying) __declspec(safe) static inline double rsqrt(double v) { - return __rsqrt_varying_double(v, (IntMaskType)__mask); + if (__have_native_rsqrtd) + return __rsqrt_varying_double(v); + else + return __rsqrt_safe_varying_double(v); } RSQRTD(uniform) __declspec(safe) static inline uniform double rsqrt(uniform double v) { - return __rsqrt_uniform_double(v, (IntMaskType)__mask); + if (__have_native_rsqrtd) + return __rsqrt_uniform_double(v); + else + return __rsqrt_safe_uniform_double(v); } __declspec(safe) static inline double ldexp(double x, int n) {