Pass log/exp/pow transcendentals through to targets that support them.

Currently, this is the generic targets.
This commit is contained in:
Matt Pharr
2012-05-03 13:46:56 -07:00
parent 7d7e99a92c
commit 0c1b206185
7 changed files with 172 additions and 8 deletions

View File

@@ -886,10 +886,12 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
symbolTable);
lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
symbolTable);
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
symbolTable);
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload,
module, symbolTable);
lDefineConstantInt("__have_native_half", (g->target.isa == Target::AVX2),
lDefineConstantInt("__have_native_half", g->target.hasHalf, module,
symbolTable);
lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
module, symbolTable);
if (includeStdlibISPC) {

View File

@@ -1654,6 +1654,13 @@ declare void @__pseudo_scatter_base_offsets64_32(i8 * nocapture, <WIDTH x i64>,
declare void @__pseudo_scatter_base_offsets64_64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i64>, <WIDTH x MASK>) nounwind
declare float @__log_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
declare float @__exp_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
declare float @__pow_uniform_float(float, float) nounwind readnone
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector ops

View File

@@ -586,6 +586,121 @@ ROTATE(__vec16_f, float, float)
SHUFFLES(__vec16_f, float, float)
LOAD_STORE(__vec16_f, float)
static FORCEINLINE float __exp_uniform_float(float v) {
return expf(v);
}
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = expf(v.v[i]);
return ret;
}
static FORCEINLINE float __log_uniform_float(float v) {
return logf(v);
}
static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = logf(v.v[i]);
return ret;
}
static FORCEINLINE float __pow_uniform_float(float a, float b) {
return powf(a, b);
}
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = powf(a.v[i], b.v[i]);
return ret;
}
static FORCEINLINE int __intbits(float v) {
union {
float f;
int i;
} u;
u.f = v;
return u.i;
}
static FORCEINLINE float __floatbits(int v) {
union {
float f;
int i;
} u;
u.i = v;
return u.f;
}
static FORCEINLINE float __half_to_float_uniform(int16_t h) {
static const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift
int32_t o = ((int32_t)(h & 0x7fff)) << 13; // exponent/mantissa bits
uint32_t exp = shifted_exp & o; // just the exponent
o += (127 - 15) << 23; // exponent adjust
// handle exponent special cases
if (exp == shifted_exp) // Inf/NaN?
o += (128 - 16) << 23; // extra exp adjust
else if (exp == 0) { // Zero/Denormal?
o += 1 << 23; // extra exp adjust
o = __intbits(__floatbits(o) - __floatbits(113 << 23)); // renormalize
}
o |= ((int32_t)(h & 0x8000)) << 16; // sign bit
return __floatbits(o);
}
static FORCEINLINE __vec16_f __half_to_float_varying(__vec16_i16 v) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = __half_to_float_uniform(v.v[i]);
return ret;
}
static FORCEINLINE int16_t __float_to_half_uniform(float f) {
uint32_t sign_mask = 0x80000000u;
int32_t o;
int32_t fint = __intbits(f);
int32_t sign = fint & sign_mask;
fint ^= sign;
int32_t f32infty = 255 << 23;
o = (fint > f32infty) ? 0x7e00 : 0x7c00;
// (De)normalized number or zero
// update fint unconditionally to save the blending; we don't need it
// anymore for the Inf/NaN case anyway.
const uint32_t round_mask = ~0xfffu;
const int32_t magic = 15 << 23;
const int32_t f16infty = 31 << 23;
int32_t fint2 = __intbits(__floatbits(fint & round_mask) * __floatbits(magic)) - round_mask;
fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
if (fint < f32infty)
o = fint2 >> 13; // Take the bits!
return (o | (sign >> 16));
}
static FORCEINLINE __vec16_i16 __float_to_half_varying(__vec16_f v) {
__vec16_i16 ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = __float_to_half_uniform(v.v[i]);
return ret;
}
///////////////////////////////////////////////////////////////////////////
// double

View File

@@ -197,6 +197,9 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->arch = arch;
}
// This is the case for most of them
t->hasHalf = t->hasTranscendentals = false;
if (!strcasecmp(isa, "sse2")) {
t->isa = Target::SSE2;
t->nativeVectorWidth = 4;
@@ -256,6 +259,8 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->maskingIsFree = true;
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
}
else if (!strcasecmp(isa, "generic-32")) {
t->isa = Target::GENERIC;
@@ -264,6 +269,8 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->maskingIsFree = true;
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
}
else if (!strcasecmp(isa, "generic-1")) {
t->isa = Target::GENERIC;
@@ -300,6 +307,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
t->hasHalf = true;
}
else if (!strcasecmp(isa, "avx2-x2")) {
t->isa = Target::AVX2;
@@ -309,6 +317,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
t->hasHalf = true;
}
#endif // !LLVM_3_0
else {

8
ispc.h
View File

@@ -249,6 +249,14 @@ struct Target {
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
the generic target. */
int maskBitCount;
/** Indicates whether the target has native support for float/half
conversions. */
bool hasHalf;
/** Indicates whether the target has support for transcendentals (beyond
sqrt, which we assume that all of them handle). */
bool hasTranscendentals;
};

View File

@@ -1333,6 +1333,11 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
else
opts.addMacroDef("ISPC_POINTER_SIZE=64");
if (g->target.hasHalf)
opts.addMacroDef("ISPC_TARGET_HAS_HALF");
if (g->target.hasTranscendentals)
opts.addMacroDef("ISPC_TARGET_HAS_TRANSCENDENTALS");
opts.addMacroDef("ISPC_MAJOR_VERSION=1");
opts.addMacroDef("ISPC_MINOR_VERSION=2");

View File

@@ -2915,7 +2915,10 @@ static inline uniform float atan2(uniform float y, uniform float x) {
__declspec(safe)
static inline float exp(float x_full) {
if (__math_lib == __math_lib_svml) {
if (__have_native_transcendentals) {
return __exp_varying_float(x_full);
}
else if (__math_lib == __math_lib_svml) {
return __svml_exp(x_full);
}
else if (__math_lib == __math_lib_system) {
@@ -2994,7 +2997,10 @@ static inline float exp(float x_full) {
__declspec(safe)
static inline uniform float exp(uniform float x_full) {
if (__math_lib == __math_lib_system ||
if (__have_native_transcendentals) {
return __exp_uniform_float(x_full);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_expf(x_full);
}
@@ -3116,7 +3122,10 @@ static inline void __range_reduce_log(uniform float input, uniform float * unifo
__declspec(safe)
static inline float log(float x_full) {
if (__math_lib == __math_lib_svml) {
if (__have_native_transcendentals) {
return __log_varying_float(x_full);
}
else if (__math_lib == __math_lib_svml) {
return __svml_log(x_full);
}
else if (__math_lib == __math_lib_system) {
@@ -3204,7 +3213,10 @@ static inline float log(float x_full) {
__declspec(safe)
static inline uniform float log(uniform float x_full) {
if (__math_lib == __math_lib_system ||
if (__have_native_transcendentals) {
return __log_uniform_float(x_full);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_logf(x_full);
}
@@ -3285,7 +3297,10 @@ static inline uniform float log(uniform float x_full) {
__declspec(safe)
static inline float pow(float a, float b) {
if (__math_lib == __math_lib_svml) {
if (__have_native_transcendentals) {
return __pow_varying_float(a, b);
}
else if (__math_lib == __math_lib_svml) {
return __svml_pow(a, b);
}
else if (__math_lib == __math_lib_system) {
@@ -3304,6 +3319,9 @@ static inline float pow(float a, float b) {
__declspec(safe)
static inline uniform float pow(uniform float a, uniform float b) {
if (__have_native_transcendentals) {
return __pow_uniform_float(a, b);
}
if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_powf(a, b);