adding __have_native_{rsqrtd,rcpd} to select between native support for double precision reciprocals and using slower but safe version in stdlib
This commit is contained in:
@@ -1150,6 +1150,10 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
symbolTable);
|
symbolTable);
|
||||||
lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(),
|
lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(),
|
||||||
module, symbolTable);
|
module, symbolTable);
|
||||||
|
lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(),
|
||||||
|
module, symbolTable);
|
||||||
|
lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
|
||||||
|
module, symbolTable);
|
||||||
|
|
||||||
if (g->forceAlignment != -1) {
|
if (g->forceAlignment != -1) {
|
||||||
llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
|
llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
|
||||||
|
|||||||
@@ -191,13 +191,13 @@ declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
|
|||||||
|
|
||||||
declare float @__rsqrt_uniform_float(float) nounwind readnone
|
declare float @__rsqrt_uniform_float(float) nounwind readnone
|
||||||
declare float @__rcp_uniform_float(float) nounwind readnone
|
declare float @__rcp_uniform_float(float) nounwind readnone
|
||||||
declare double @__rsqrt_uniform_double(double, <WIDTH x MASK>) nounwind readnone
|
declare double @__rsqrt_uniform_double(double) nounwind readnone
|
||||||
declare double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind readnone
|
declare double @__rcp_uniform_double(double) nounwind readnone
|
||||||
declare float @__sqrt_uniform_float(float) nounwind readnone
|
declare float @__sqrt_uniform_float(float) nounwind readnone
|
||||||
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
|
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
|
||||||
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
|
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||||
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind readnone
|
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone
|
||||||
declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind readnone
|
declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone
|
||||||
|
|
||||||
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||||
|
|
||||||
|
|||||||
@@ -4533,31 +4533,11 @@ define_down_avgs()
|
|||||||
')
|
')
|
||||||
|
|
||||||
define(`rsqrt_double', `
|
define(`rsqrt_double', `
|
||||||
declare double @__rsqrt_safe_uniform_double___und(double, <WIDTH x MASK>)
|
declare double @__rsqrt_uniform_double(double)
|
||||||
define double @__rsqrt_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
|
declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>)
|
||||||
{
|
|
||||||
%res = call double @__rsqrt_safe_uniform_double___und(double %0, <WIDTH x MASK> %1)
|
|
||||||
ret double %res
|
|
||||||
}
|
|
||||||
declare <WIDTH x double> @__rsqrt_safe_varying_double___vyd(<WIDTH x double>, <WIDTH x MASK>)
|
|
||||||
define <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
|
|
||||||
{
|
|
||||||
%res = call <WIDTH x double> @__rsqrt_safe_varying_double___vyd(<WIDTH x double> %0, <WIDTH x MASK> %1)
|
|
||||||
ret <WIDTH x double> %res
|
|
||||||
}
|
|
||||||
')
|
')
|
||||||
|
|
||||||
define(`rcp_double', `
|
define(`rcp_double', `
|
||||||
declare double @__rcp_safe_uniform_double___und(double, <WIDTH x MASK>)
|
declare double @__rcp_uniform_double(double)
|
||||||
define double @__rcp_uniform_double(double, <WIDTH x MASK>) nounwind alwaysinline readnone
|
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>)
|
||||||
{
|
|
||||||
%res = call double @__rcp_safe_uniform_double___und(double %0, <WIDTH x MASK> %1)
|
|
||||||
ret double %res
|
|
||||||
}
|
|
||||||
declare <WIDTH x double> @__rcp_safe_varying_double___vyd(<WIDTH x double>, <WIDTH x MASK>)
|
|
||||||
define <WIDTH x double> @__rcp_varying_double(<WIDTH x double>, <WIDTH x MASK>) nounwind alwaysinline readnone
|
|
||||||
{
|
|
||||||
%res = call <WIDTH x double> @__rcp_safe_varying_double___vyd(<WIDTH x double> %0, <WIDTH x MASK> %1)
|
|
||||||
ret <WIDTH x double> %res
|
|
||||||
}
|
|
||||||
')
|
')
|
||||||
|
|||||||
9
ispc.cpp
9
ispc.cpp
@@ -201,7 +201,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
m_hasRand(false),
|
m_hasRand(false),
|
||||||
m_hasGather(false),
|
m_hasGather(false),
|
||||||
m_hasScatter(false),
|
m_hasScatter(false),
|
||||||
m_hasTranscendentals(false)
|
m_hasTranscendentals(false),
|
||||||
|
m_hasRsqrtd(false),
|
||||||
|
m_hasRcpd(false)
|
||||||
{
|
{
|
||||||
if (isa == NULL) {
|
if (isa == NULL) {
|
||||||
if (cpu != NULL) {
|
if (cpu != NULL) {
|
||||||
@@ -419,6 +421,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
this->m_hasTranscendentals = true;
|
this->m_hasTranscendentals = true;
|
||||||
this->m_hasGather = this->m_hasScatter = true;
|
this->m_hasGather = this->m_hasScatter = true;
|
||||||
|
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "generic-8") ||
|
else if (!strcasecmp(isa, "generic-8") ||
|
||||||
!strcasecmp(isa, "generic-x8")) {
|
!strcasecmp(isa, "generic-x8")) {
|
||||||
@@ -431,6 +434,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
this->m_hasTranscendentals = true;
|
this->m_hasTranscendentals = true;
|
||||||
this->m_hasGather = this->m_hasScatter = true;
|
this->m_hasGather = this->m_hasScatter = true;
|
||||||
|
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "generic-16") ||
|
else if (!strcasecmp(isa, "generic-16") ||
|
||||||
!strcasecmp(isa, "generic-x16")) {
|
!strcasecmp(isa, "generic-x16")) {
|
||||||
@@ -443,6 +447,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
this->m_hasTranscendentals = true;
|
this->m_hasTranscendentals = true;
|
||||||
this->m_hasGather = this->m_hasScatter = true;
|
this->m_hasGather = this->m_hasScatter = true;
|
||||||
|
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "generic-32") ||
|
else if (!strcasecmp(isa, "generic-32") ||
|
||||||
!strcasecmp(isa, "generic-x32")) {
|
!strcasecmp(isa, "generic-x32")) {
|
||||||
@@ -455,6 +460,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
this->m_hasTranscendentals = true;
|
this->m_hasTranscendentals = true;
|
||||||
this->m_hasGather = this->m_hasScatter = true;
|
this->m_hasGather = this->m_hasScatter = true;
|
||||||
|
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "generic-64") ||
|
else if (!strcasecmp(isa, "generic-64") ||
|
||||||
!strcasecmp(isa, "generic-x64")) {
|
!strcasecmp(isa, "generic-x64")) {
|
||||||
@@ -467,6 +473,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_hasHalf = true;
|
this->m_hasHalf = true;
|
||||||
this->m_hasTranscendentals = true;
|
this->m_hasTranscendentals = true;
|
||||||
this->m_hasGather = this->m_hasScatter = true;
|
this->m_hasGather = this->m_hasScatter = true;
|
||||||
|
this->m_hasRsqrtd = this->m_hasRcpd = true;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "generic-1") ||
|
else if (!strcasecmp(isa, "generic-1") ||
|
||||||
!strcasecmp(isa, "generic-x1")) {
|
!strcasecmp(isa, "generic-x1")) {
|
||||||
|
|||||||
10
ispc.h
10
ispc.h
@@ -281,6 +281,10 @@ public:
|
|||||||
bool hasScatter() const {return m_hasScatter;}
|
bool hasScatter() const {return m_hasScatter;}
|
||||||
|
|
||||||
bool hasTranscendentals() const {return m_hasTranscendentals;}
|
bool hasTranscendentals() const {return m_hasTranscendentals;}
|
||||||
|
|
||||||
|
bool hasRsqrtd() const {return m_hasRsqrtd;}
|
||||||
|
|
||||||
|
bool hasRcpd() const {return m_hasRcpd;}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@@ -380,6 +384,12 @@ private:
|
|||||||
/** Indicates whether the target has support for transcendentals (beyond
|
/** Indicates whether the target has support for transcendentals (beyond
|
||||||
sqrt, which we assume that all of them handle). */
|
sqrt, which we assume that all of them handle). */
|
||||||
bool m_hasTranscendentals;
|
bool m_hasTranscendentals;
|
||||||
|
|
||||||
|
/** Indicates whether there is an ISA double precision rsqrt. */
|
||||||
|
bool m_hasRsqrtd;
|
||||||
|
|
||||||
|
/** Indicates whether there is an ISA double precision rcp. */
|
||||||
|
bool m_hasRcpd;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
27
stdlib.ispc
27
stdlib.ispc
@@ -1412,14 +1412,21 @@ static inline QUAL double __rcp_safe_##QUAL##_double(QUAL double x) \
|
|||||||
|
|
||||||
RCPD(varying)
|
RCPD(varying)
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline double rcp(double v) {
|
__declspec(safe)
|
||||||
return __rcp_varying_double(v, (IntMaskType)__mask);
|
static inline double rcp(double v) {
|
||||||
}
|
if (__have_native_rcpd)
|
||||||
|
return __rcp_varying_double(v);
|
||||||
|
else
|
||||||
|
return __rcp_safe_varying_double(v);
|
||||||
|
}
|
||||||
|
|
||||||
RCPD(uniform)
|
RCPD(uniform)
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline uniform double rcp(uniform double v) {
|
static inline uniform double rcp(uniform double v) {
|
||||||
return __rcp_uniform_double(v, (IntMaskType)__mask);
|
if (__have_native_rcpd)
|
||||||
|
return __rcp_uniform_double(v);
|
||||||
|
else
|
||||||
|
return __rcp_safe_uniform_double(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -3572,13 +3579,19 @@ static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x) \
|
|||||||
RSQRTD(varying)
|
RSQRTD(varying)
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline double rsqrt(double v) {
|
static inline double rsqrt(double v) {
|
||||||
return __rsqrt_varying_double(v, (IntMaskType)__mask);
|
if (__have_native_rsqrtd)
|
||||||
|
return __rsqrt_varying_double(v);
|
||||||
|
else
|
||||||
|
return __rsqrt_safe_varying_double(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
RSQRTD(uniform)
|
RSQRTD(uniform)
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline uniform double rsqrt(uniform double v) {
|
static inline uniform double rsqrt(uniform double v) {
|
||||||
return __rsqrt_uniform_double(v, (IntMaskType)__mask);
|
if (__have_native_rsqrtd)
|
||||||
|
return __rsqrt_uniform_double(v);
|
||||||
|
else
|
||||||
|
return __rsqrt_safe_uniform_double(v);
|
||||||
}
|
}
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline double ldexp(double x, int n) {
|
static inline double ldexp(double x, int n) {
|
||||||
|
|||||||
Reference in New Issue
Block a user