merged with master

This commit is contained in:
Evghenii
2014-02-21 08:25:28 +01:00
46 changed files with 1127 additions and 197 deletions

View File

@@ -1234,6 +1234,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
symbolTable);
lDefineConstantInt("__have_native_transcendentals", g->target->hasTranscendentals(),
module, symbolTable);
lDefineConstantInt("__have_native_trigonometry", g->target->hasTrigonometry(),
module, symbolTable);
lDefineConstantInt("__have_native_rsqrtd", g->target->hasRsqrtd(),
module, symbolTable);
lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),

View File

@@ -695,3 +695,5 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -564,3 +564,6 @@ gen_scatter(double)
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -514,3 +514,6 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -999,3 +999,6 @@ define_avgs()
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -191,13 +191,9 @@ declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
declare float @__rsqrt_uniform_float(float) nounwind readnone
declare float @__rcp_uniform_float(float) nounwind readnone
declare double @__rsqrt_uniform_double(double) nounwind readnone
declare double @__rcp_uniform_double(double) nounwind readnone
declare float @__sqrt_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__rsqrt_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
@@ -393,3 +389,12 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
define_avgs()
declare_nvptx()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reciprocals in double precision, if supported
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -521,3 +521,6 @@ define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone {
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -491,3 +491,6 @@ define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone {
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -587,3 +587,6 @@ define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone {
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -2262,3 +2262,90 @@ define void @__memory_barrier() nounwind readnone alwaysinline {
}
saturation_arithmetic_novec();
;;;;;;;;;;;;;;;;;;;;
;; trigonometry
define(`transcendetals_decl',`
declare float @__log_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
declare float @__exp_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
declare float @__pow_uniform_float(float, float) nounwind readnone
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
declare double @__log_uniform_double(double) nounwind readnone
declare <WIDTH x double> @__log_varying_double(<WIDTH x double>) nounwind readnone
declare double @__exp_uniform_double(double) nounwind readnone
declare <WIDTH x double> @__exp_varying_double(<WIDTH x double>) nounwind readnone
declare double @__pow_uniform_double(double, double) nounwind readnone
declare <WIDTH x double> @__pow_varying_double(<WIDTH x double>, <WIDTH x double>) nounwind readnone
')
;; 1 - function call, e.g. __nv_fast_logf
;; 2 - data-type, float/double
;; 3 - local function name, e.g. __log, __exp, ..
define(`transcendentals1',`
declare $2 @$1($2)
define $2 @$3_uniform_$2($2) nounwind readnone alwaysinline
{
%ret = call $2 @$1($2 %0)
ret $2 %ret
}
define <1 x $2> @$3_varying_$2(<1 x $2>) nounwind readnone alwaysinline
{
%v = bitcast <1 x $2> %0 to $2
%r = call $2 @$3_uniform_$2($2 %v);
%ret = bitcast $2 %r to <1 x $2>
ret <1 x $2> %ret
}
')
define(`transcendentals2',`
declare $2 @$1($2, $2)
define $2 @$3_uniform_$2($2, $2) nounwind readnone alwaysinline
{
%ret = call $2 @$1($2 %0, $2 %1)
ret $2 %ret
}
define <1 x $2> @$3_varying_$2(<1 x $2>, <1x $2>) nounwind readnone alwaysinline
{
%v0 = bitcast <1 x $2> %0 to $2
%v1 = bitcast <1 x $2> %1 to $2
%r = call $2 @$3_uniform_$2($2 %v0, $2 %v1);
%ret = bitcast $2 %r to <1 x $2>
ret <1 x $2> %ret
}
')
transcendentals1(__nv_fast_logf, float, __log)
transcendentals1(__nv_fast_expf, float, __exp)
transcendentals2(__nv_fast_powf, float, __pow)
transcendentals1(__nv_log, double, __log)
transcendentals1(__nv_exp, double, __exp)
transcendentals2(__nv_pow, double, __pow)
transcendentals1(__nv_fast_sinf, float, __sin)
transcendentals1(__nv_fast_cosf, float, __cos)
transcendentals1(__nv_fast_tanf, float, __tan)
transcendentals1(__nv_asinf, float, __asin)
transcendentals1(__nv_acosf, float, __acos)
transcendentals1(__nv_atanf, float, __atan)
transcendentals2(__nv_atan2f, float, __atan2)
transcendentals1(__nv_sin, double, __sin)
transcendentals1(__nv_cos, double, __cos)
transcendentals1(__nv_tan, double, __tan)
transcendentals1(__nv_asin, double, __asin)
transcendentals1(__nv_acos, double, __acos)
transcendentals1(__nv_atan, double, __atan)
transcendentals2(__nv_atan2, double, __atan2)
declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone
declare void @__sincos_varying_float(<WIDTH x float>, <WIDTH x float>*, <WIDTH x float>*) nounwind readnone
declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone
declare void @__sincos_varying_double(<WIDTH x double>, <WIDTH x double>*, <WIDTH x double>*) nounwind readnone

View File

@@ -659,3 +659,6 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -594,3 +594,6 @@ gen_scatter(double)
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -495,3 +495,6 @@ define_down_avgs()
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -497,3 +497,6 @@ define_down_avgs()
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -598,3 +598,6 @@ define_avgs()
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -521,3 +521,6 @@ define_avgs()
rsqrtd_decl()
rcpd_decl()
transcendetals_decl()
trigonometry_decl()

View File

@@ -1365,13 +1365,6 @@ declare void
@__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x double>, <WIDTH x MASK>) nounwind
declare float @__log_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
declare float @__exp_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
declare float @__pow_uniform_float(float, float) nounwind readnone
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
declare void @__use8(<WIDTH x i8>)

View File

@@ -2516,13 +2516,6 @@ declare void
@__pseudo_scatter_base_offsets64_double(i8 * nocapture, i32, <WIDTH x i64>,
<WIDTH x double>, <WIDTH x MASK>) nounwind
declare float @__log_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
declare float @__exp_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
declare float @__pow_uniform_float(float, float) nounwind readnone
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
declare void @__use8(<WIDTH x i8>)
@@ -5008,3 +5001,57 @@ global_atomic_varying(WIDTH, min, i64, int64)
global_atomic_varying(WIDTH, max, i64, int64)
global_atomic_varying(WIDTH, umin, i64, uint64)
global_atomic_varying(WIDTH, umax, i64, uint64)
define(`transcendetals_decl',`
declare float @__log_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
declare float @__exp_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
declare float @__pow_uniform_float(float, float) nounwind readnone
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
declare double @__log_uniform_double(double) nounwind readnone
declare <WIDTH x double> @__log_varying_double(<WIDTH x double>) nounwind readnone
declare double @__exp_uniform_double(double) nounwind readnone
declare <WIDTH x double> @__exp_varying_double(<WIDTH x double>) nounwind readnone
declare double @__pow_uniform_double(double, double) nounwind readnone
declare <WIDTH x double> @__pow_varying_double(<WIDTH x double>, <WIDTH x double>) nounwind readnone
')
define(`trigonometry_decl',`
declare <WIDTH x float> @__sin_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__asin_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__cos_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__acos_varying_float(<WIDTH x float>) nounwind readnone
declare void @__sincos_varying_float(<WIDTH x float>, <WIDTH x float>*, <WIDTH x float>*) nounwind readnone
declare <WIDTH x float> @__tan_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__atan_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__atan2_varying_float(<WIDTH x float>,<WIDTH x float>) nounwind readnone
declare float @__sin_uniform_float(float) nounwind readnone
declare float @__asin_uniform_float(float) nounwind readnone
declare float @__cos_uniform_float(float) nounwind readnone
declare float @__acos_uniform_float(float) nounwind readnone
declare void @__sincos_uniform_float(float, float*, float*) nounwind readnone
declare float @__tan_uniform_float(float) nounwind readnone
declare float @__atan_uniform_float(float) nounwind readnone
declare float @__atan2_uniform_float(float,float) nounwind readnone
declare <WIDTH x double> @__sin_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__asin_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__cos_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__acos_varying_double(<WIDTH x double>) nounwind readnone
declare void @__sincos_varying_double(<WIDTH x double>, <WIDTH x double>*, <WIDTH x double>*) nounwind readnone
declare <WIDTH x double> @__tan_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__atan_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__atan2_varying_double(<WIDTH x double>,<WIDTH x double>) nounwind readnone
declare double @__sin_uniform_double(double) nounwind readnone
declare double @__asin_uniform_double(double) nounwind readnone
declare double @__cos_uniform_double(double) nounwind readnone
declare double @__acos_uniform_double(double) nounwind readnone
declare void @__sincos_uniform_double(double, double*, double*) nounwind readnone
declare double @__tan_uniform_double(double) nounwind readnone
declare double @__atan_uniform_double(double) nounwind readnone
declare double @__atan2_uniform_double(double,double) nounwind readnone
')

View File

@@ -691,39 +691,6 @@ SHIFT(__vec16_f, float, float)
SHUFFLES(__vec16_f, float, float)
LOAD_STORE(__vec16_f, float)
static FORCEINLINE float __exp_uniform_float(float v) {
return expf(v);
}
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = expf(v.v[i]);
return ret;
}
static FORCEINLINE float __log_uniform_float(float v) {
return logf(v);
}
static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = logf(v.v[i]);
return ret;
}
static FORCEINLINE float __pow_uniform_float(float a, float b) {
return powf(a, b);
}
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = powf(a.v[i], b.v[i]);
return ret;
}
static FORCEINLINE int __intbits(float v) {
union {
float f;
@@ -1813,3 +1780,97 @@ static FORCEINLINE uint64_t __clock() {
#endif // !WIN32
///////////////////////////////////////////////////////////////////////////
// Transcendentals
//
//
#define TRANSCENDENTALS(op) \
static FORCEINLINE float __##op##_uniform_float(float v) { \
return op##f(v); \
} \
static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { \
__vec16_f ret; \
for (int i = 0; i < 16; ++i) \
ret.v[i] = op##f(v.v[i]); \
return ret; \
} \
static FORCEINLINE double __##op##_uniform_double(double v) { \
return op(v); \
} \
static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { \
__vec16_d ret; \
for (int i = 0; i < 16; ++i) \
ret.v[i] = op(v.v[i]); \
return ret; \
}
TRANSCENDENTALS(log)
TRANSCENDENTALS(exp)
static FORCEINLINE float __pow_uniform_float(float a, float b) {
return powf(a, b);
}
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = powf(a.v[i], b.v[i]);
return ret;
}
static FORCEINLINE double __pow_uniform_double(double a, double b) {
return pow(a, b);
}
static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) {
__vec16_d ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = pow(a.v[i], b.v[i]);
return ret;
}
///////////////////////////////////////////////////////////////////////////
// Trigonometry
TRANSCENDENTALS(sin)
TRANSCENDENTALS(asin)
TRANSCENDENTALS(cos)
TRANSCENDENTALS(acos)
TRANSCENDENTALS(tan)
TRANSCENDENTALS(atan)
static FORCEINLINE float __atan2_uniform_float(float a, float b) {
return atan2f(a, b);
}
static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = atan2f(a.v[i], b.v[i]);
return ret;
}
static FORCEINLINE double __atan2_uniform_double(double a, double b) {
return atan2(a, b);
}
static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) {
__vec16_d ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = atan2(a.v[i], b.v[i]);
return ret;
}
static FORCEINLINE void __sincos_uniform_float(float x, float *a, float *b) {
sincosf(x,a,b);
}
static FORCEINLINE void __sincos_varying_float(__vec16_f x, __vec16_f *a, __vec16_f *b) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
sincosf(x.v[i], (float*)a + i, (float*)b+i);
}
static FORCEINLINE void __sincos_uniform_double(double x, double *a, double *b) {
sincos(x,a,b);
}
static FORCEINLINE void __sincos_varying_double(__vec16_d x, __vec16_d *a, __vec16_d *b) {
__vec16_d ret;
for (int i = 0; i < 16; ++i)
sincos(x.v[i], (double*)a + i, (double*)b+i);
}

View File

@@ -1142,28 +1142,6 @@ template <> static FORCEINLINE void __store<64>(__vec16_f *p, __vec16_f v)
}
#endif
/******** math ******/
/*** float ***/
static FORCEINLINE float __exp_uniform_float(float v) { return expf(v);}
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) { return _mm512_exp_ps(v); }
static FORCEINLINE float __log_uniform_float(float v) { return logf(v);}
static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) { return _mm512_log_ps(v); }
static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);}
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); }
/*** double ***/
static FORCEINLINE double __exp_uniform_double(double v) { return exp(v);}
static FORCEINLINE __vec16_d __exp_varying_double(__vec16_d v) { return __vec16_d(_mm512_exp_pd(v.v1),_mm512_exp_pd(v.v2)); }
static FORCEINLINE double __log_uniform_double(double v) { return log(v);}
static FORCEINLINE __vec16_d __log_varying_double(__vec16_d v) { return __vec16_d(_mm512_log_pd(v.v1),_mm512_log_pd(v.v2)); }
static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);}
static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); }
/******** bitcast ******/
static FORCEINLINE int __intbits(float v) {
@@ -2806,6 +2784,40 @@ static FORCEINLINE uint64_t __clock() {
#endif // !WIN32
///////////////////////////////////////////////////////////////////////////
// Transcendentals
#define TRANSCENDENTALS(op) \
static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { return _mm512_##op##_ps(v); } \
static FORCEINLINE float __##op##_uniform_float(float v) { return op##f(v); } \
static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { return __vec16_d(_mm512_##op##_pd(v.v1),_mm512_##op##_pd(v.v2)); } \
static FORCEINLINE double __##op##_uniform_double(double a) { return op(a); }
TRANSCENDENTALS(log)
TRANSCENDENTALS(exp)
static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);}
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); }
static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);}
static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); }
///////////////////////////////////////////////////////////////////////////
// Trigonometry
TRANSCENDENTALS(sin)
TRANSCENDENTALS(asin)
TRANSCENDENTALS(cos)
TRANSCENDENTALS(acos)
TRANSCENDENTALS(tan)
TRANSCENDENTALS(atan)
static FORCEINLINE float __atan2_uniform_float(float a, float b) { return atan2f(a, b);}
static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { return _mm512_atan2_ps(a,b); }
static FORCEINLINE double __atan2_uniform_double(double a, double b) { return atan2(a,b);}
static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_atan2_pd(a.v1,b.v1),_mm512_atan2_pd(a.v2,b.v2)); }
#undef FORCEINLINE
#undef PRE_ALIGN
#undef POST_ALIGN

View File

@@ -95,6 +95,7 @@ typedef struct PRE_ALIGN(64) __vec16_f {
typedef struct PRE_ALIGN(64) __vec16_d {
FORCEINLINE __vec16_d() : v1(_mm512_undefined_pd()), v2(_mm512_undefined_pd()) {}
FORCEINLINE __vec16_d(const __vec16_d &o) : v1(o.v1), v2(o.v2) {}
FORCEINLINE __vec16_d(const __m512d _v1, const __m512d _v2) : v1(_v1), v2(_v2) {}
FORCEINLINE __vec16_d& operator =(const __vec16_d &o) { v1=o.v1; v2=o.v2; return *this; }
FORCEINLINE __vec16_d(double v00, double v01, double v02, double v03,
double v04, double v05, double v06, double v07,
@@ -1503,18 +1504,6 @@ static FORCEINLINE double __rsqrt_uniform_double(double v)
}
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
return _mm512_exp_ps(v);
}
static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) {
return _mm512_log_ps(v);
}
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
return _mm512_pow_ps(a, b);
}
///////////////////////////////////////////////////////////////////////////
// bit ops
///////////////////////////////////////////////////////////////////////////
@@ -2173,6 +2162,39 @@ static FORCEINLINE uint64_t __clock() {
}
#endif // !WIN32
///////////////////////////////////////////////////////////////////////////
// Transcendentals
#define TRANSCENDENTALS(op) \
static FORCEINLINE __vec16_f __##op##_varying_float(__vec16_f v) { return _mm512_##op##_ps(v); } \
static FORCEINLINE float __##op##_uniform_float(float v) { return op##f(v); } \
static FORCEINLINE __vec16_d __##op##_varying_double(__vec16_d v) { return __vec16_d(_mm512_##op##_pd(v.v1),_mm512_##op##_pd(v.v2)); } \
static FORCEINLINE double __##op##_uniform_double(double a) { return op(a); }
TRANSCENDENTALS(log)
TRANSCENDENTALS(exp)
static FORCEINLINE float __pow_uniform_float(float a, float b) { return powf(a, b);}
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) { return _mm512_pow_ps(a,b); }
static FORCEINLINE double __pow_uniform_double(double a, double b) { return pow(a,b);}
static FORCEINLINE __vec16_d __pow_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_pow_pd(a.v1,b.v1),_mm512_pow_pd(a.v2,b.v2)); }
///////////////////////////////////////////////////////////////////////////
// Trigonometry
TRANSCENDENTALS(sin)
TRANSCENDENTALS(asin)
TRANSCENDENTALS(cos)
TRANSCENDENTALS(acos)
TRANSCENDENTALS(tan)
TRANSCENDENTALS(atan)
static FORCEINLINE float __atan2_uniform_float(float a, float b) { return atan2f(a, b);}
static FORCEINLINE __vec16_f __atan2_varying_float(__vec16_f a, __vec16_f b) { return _mm512_atan2_ps(a,b); }
static FORCEINLINE double __atan2_uniform_double(double a, double b) { return atan2(a,b);}
static FORCEINLINE __vec16_d __atan2_varying_double(__vec16_d a, __vec16_d b) { return __vec16_d(_mm512_atan2_pd(a.v1,b.v1),_mm512_atan2_pd(a.v2,b.v2)); }
#undef FORCEINLINE
#undef PRE_ALIGN
#undef POST_ALIGN

View File

@@ -167,6 +167,10 @@ struct __vec4_d {
}
__m128d v[2];
FORCEINLINE __vec4_d(double *p) {
v[0] = _mm_set_pd(p[1], p[0]);
v[1] = _mm_set_pd(p[3], p[2]);
}
};
@@ -2471,39 +2475,6 @@ static FORCEINLINE __vec4_d __sqrt_varying_double(__vec4_d v) {
return __vec4_d(_mm_sqrt_pd(v.v[0]), _mm_sqrt_pd(v.v[1]));
}
static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) {
float r[4];
for (int i = 0; i < 4; ++i)
r[i] = powf(__extract_element(a, i), __extract_element(b, i));
return __vec4_f(r);
}
static FORCEINLINE float __pow_uniform_float(float a, float b) {
return powf(a, b);
}
static FORCEINLINE __vec4_f __exp_varying_float(__vec4_f a) {
float r[4];
for (int i = 0; i < 4; ++i)
r[i] = expf(__extract_element(a, i));
return __vec4_f(r);
}
static FORCEINLINE float __exp_uniform_float(float a) {
return expf(a);
}
static FORCEINLINE __vec4_f __log_varying_float(__vec4_f a) {
float r[4];
for (int i = 0; i < 4; ++i)
r[i] = logf(__extract_element(a, i));
return __vec4_f(r);
}
static FORCEINLINE float __log_uniform_float(float a) {
return logf(a);
}
static FORCEINLINE int __intbits(float v) {
union {
float f;
@@ -4166,4 +4137,97 @@ static FORCEINLINE uint64_t __clock() {
}
#endif // !WIN32
///////////////////////////////////////////////////////////////////////////
// Transcendentals
#define TRANSCENDENTALS(op) \
static FORCEINLINE __vec4_f __##op##_varying_float(__vec4_f a) {\
float r[4];\
for (int i = 0; i < 4; ++i)\
r[i] = op##f(__extract_element(a, i));\
return __vec4_f(r);\
}\
static FORCEINLINE float __##op##_uniform_float(float a) {\
return op##f(a);\
}\
static FORCEINLINE __vec4_d __##op##_varying_double(__vec4_d a) {\
double r[4];\
for (int i = 0; i < 4; ++i)\
r[i] = op(__extract_element(a, i));\
return __vec4_d(r);\
}\
static FORCEINLINE double __##op##_uniform_double(double a) {\
return op(a);\
}
TRANSCENDENTALS(log)
TRANSCENDENTALS(exp)
static FORCEINLINE __vec4_f __pow_varying_float(__vec4_f a, __vec4_f b) {
float r[4];
for (int i = 0; i < 4; ++i)
r[i] = powf(__extract_element(a, i), __extract_element(b, i));
return __vec4_f(r);
}
static FORCEINLINE float __pow_uniform_float(float a, float b) {
return powf(a, b);
}
static FORCEINLINE __vec4_d __pow_varying_double(__vec4_d a, __vec4_d b) {
double r[4];
for (int i = 0; i < 4; ++i)
r[i] = pow(__extract_element(a, i), __extract_element(b, i));
return __vec4_d(r);
}
static FORCEINLINE double __pow_uniform_double(double a, double b) {
return pow(a, b);
}
///////////////////////////////////////////////////////////////////////////
// Trigonometry
TRANSCENDENTALS(sin)
TRANSCENDENTALS(asin)
TRANSCENDENTALS(cos)
TRANSCENDENTALS(acos)
TRANSCENDENTALS(tan)
TRANSCENDENTALS(atan)
static FORCEINLINE __vec4_f __atan2_varying_float(__vec4_f a, __vec4_f b) {
float r[4];
for (int i = 0; i < 4; ++i)
r[i] = atan2f(__extract_element(a, i), __extract_element(b, i));
return __vec4_f(r);
}
static FORCEINLINE float __atan2_uniform_float(float a, float b) {
return atan2f(a, b);
}
static FORCEINLINE __vec4_d __atan2_varying_double(__vec4_d a, __vec4_d b) {
double r[4];
for (int i = 0; i < 4; ++i)
r[i] = atan2(__extract_element(a, i), __extract_element(b, i));
return __vec4_d(r);
}
static FORCEINLINE double __atan2_uniform_double(double a, double b) {
return atan2(a, b);
}
static FORCEINLINE void __sincos_varying_float(__vec4_f x, __vec4_f * _sin, __vec4_f * _cos) {
for (int i = 0; i < 4; ++i)
sincosf(__extract_element(x, i), (float*)_sin + i, (float*)_cos + i);
}
static FORCEINLINE void __sincos_uniform_float(float x, float *_sin, float *_cos) {
sincosf(x, _sin, _cos);
}
static FORCEINLINE void __sincos_varying_double(__vec4_d x, __vec4_d * _sin, __vec4_d * _cos) {
for (int i = 0; i < 4; ++i)
sincos(__extract_element(x, i), (double*)_sin + i, (double*)_cos + i);
}
static FORCEINLINE void __sincos_uniform_double(double x, double *_sin, double *_cos) {
sincos(x, _sin, _cos);
}
#undef FORCEINLINE

View File

@@ -5,7 +5,7 @@ CXX_SRC=ao.cpp
PTXCC_REGMAX=64
#ISPC_FLAGS= --opt=disable-uniform-control-flow
LLVM_GPU=1
#LLVM_GPU=1
NVVM_GPU=1
include ../common_ptx.mk

View File

@@ -202,6 +202,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
m_hasGather(false),
m_hasScatter(false),
m_hasTranscendentals(false),
m_hasTrigonometry(false),
m_hasRsqrtd(false),
m_hasRcpd(false)
{
@@ -423,6 +424,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
}
@@ -436,6 +438,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
}
@@ -449,6 +452,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
}
@@ -462,6 +466,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
}
@@ -475,6 +480,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 1;
this->m_hasHalf = true;
this->m_hasTranscendentals = true;
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = true;
this->m_hasRsqrtd = this->m_hasRcpd = true;
}
@@ -720,7 +726,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasHalf = true;
this->m_maskingIsFree = true;
this->m_maskBitCount = 1;
this->m_hasTranscendentals = false;
this->m_hasTranscendentals = true;
this->m_hasTrigonometry = true;
this->m_hasGather = this->m_hasScatter = false;
}
else {

5
ispc.h
View File

@@ -282,6 +282,8 @@ public:
bool hasTranscendentals() const {return m_hasTranscendentals;}
bool hasTrigonometry() const {return m_hasTrigonometry;}
bool hasRsqrtd() const {return m_hasRsqrtd;}
bool hasRcpd() const {return m_hasRcpd;}
@@ -385,6 +387,9 @@ private:
sqrt, which we assume that all of them handle). */
bool m_hasTranscendentals;
/** Indicates whether the target has ISA support for trigonometry */
bool m_hasTrigonometry;
/** Indicates whether there is an ISA double precision rsqrt. */
bool m_hasRsqrtd;

View File

@@ -2403,8 +2403,12 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) {
__declspec(safe)
static inline float sin(float x_full) {
if (__math_lib == __math_lib_svml) {
return __svml_sinf(x_full);
if (__have_native_trigonometry)
{
return __sin_varying_float(x_full);
}
else if (__math_lib == __math_lib_svml) {
return __svml_sinf(x_full);
}
else if (__math_lib == __math_lib_system) {
float ret;
@@ -2465,7 +2469,11 @@ static inline float sin(float x_full) {
__declspec(safe)
static inline uniform float sin(uniform float x_full) {
if (__math_lib == __math_lib_system ||
if (__have_native_trigonometry)
{
return __sin_uniform_float(x_full);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_sinf(x_full);
}
@@ -2530,25 +2538,29 @@ static inline uniform float sin(uniform float x_full) {
__declspec(safe)
static inline float asin(float x) {
bool isneg = x < 0;
x = abs(x);
static inline float asin(float x0) {
bool isneg = x0< 0;
float x = abs(x0);
bool isnan = (x > 1);
float v;
if (__math_lib == __math_lib_svml) {
return __svml_asinf(x);
if (__have_native_trigonometry)
{
return __asin_varying_float(x0);
}
else if (__math_lib == __math_lib_svml) {
return __svml_asinf(x0);
}
else if (__math_lib == __math_lib_system) {
float ret;
foreach_active (i) {
uniform float r = __stdlib_asinf(extract(x, i));
uniform float r = __stdlib_asinf(extract(x0, i));
ret = insert(ret, i, r);
}
return ret;
}
else if (__math_lib == __math_lib_ispc)
{
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
// [|single...|], [1e-20;.9999999999999999]);
@@ -2564,7 +2576,9 @@ static inline float asin(float x) {
x * (3.05023305118083953857421875e-2f +
x * (-1.2897425331175327301025390625e-2f +
x * 2.38926825113594532012939453125e-3f)))))))));
}
else if (__math_lib == __math_lib_ispc_fast)
{
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
// [1e-20;.9999999999999999]);
@@ -2575,6 +2589,7 @@ static inline float asin(float x) {
x * (-4.489909112453460693359375e-2f +
x * (1.928029954433441162109375e-2f +
x * (-4.3095736764371395111083984375e-3f)))));
}
v *= -sqrt(1.f - x);
v = v + 1.57079637050628662109375;
@@ -2589,18 +2604,21 @@ static inline float asin(float x) {
__declspec(safe)
static inline uniform float asin(uniform float x) {
uniform bool isneg = x < 0;
x = abs(x);
static inline uniform float asin(uniform float x0) {
uniform bool isneg = x0 < 0;
uniform float x = abs(x0);
uniform bool isnan = (x > 1);
uniform float v;
if (__math_lib == __math_lib_svml ||
if (__have_native_trigonometry)
{
return __asin_uniform_float(x0);
}
else if (__math_lib == __math_lib_svml ||
__math_lib == __math_lib_system) {
return __stdlib_asinf(x);
return __stdlib_asinf(x0);
}
else if (__math_lib == __math_lib_ispc)
{
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
// [|single...|], [1e-20;.9999999999999999]);
@@ -2616,7 +2634,9 @@ static inline uniform float asin(uniform float x) {
x * (3.05023305118083953857421875e-2f +
x * (-1.2897425331175327301025390625e-2f +
x * 2.38926825113594532012939453125e-3f)))))))));
}
else if (__math_lib == __math_lib_ispc_fast)
{
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
// [1e-20;.9999999999999999]);
@@ -2627,6 +2647,7 @@ static inline uniform float asin(uniform float x) {
x * (-4.489909112453460693359375e-2f +
x * (1.928029954433441162109375e-2f +
x * (-4.3095736764371395111083984375e-3f)))));
}
v *= -sqrt(1.f - x);
v = v + 1.57079637050628662109375;
@@ -2642,6 +2663,10 @@ static inline uniform float asin(uniform float x) {
__declspec(safe)
static inline float cos(float x_full) {
if (__have_native_trigonometry)
{
return __cos_varying_float(x_full);
}
if (__math_lib == __math_lib_svml) {
return __svml_cosf(x_full);
}
@@ -2703,7 +2728,11 @@ static inline float cos(float x_full) {
__declspec(safe)
static inline uniform float cos(uniform float x_full) {
if (__math_lib == __math_lib_system ||
if (__have_native_trigonometry)
{
return __cos_uniform_float(x_full);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_cosf(x_full);
}
@@ -2768,22 +2797,34 @@ static inline uniform float cos(uniform float x_full) {
__declspec(safe)
static inline float acos(float v) {
if (__have_native_trigonometry)
return __acos_varying_float(v);
else
return 1.57079637050628662109375 - asin(v);
}
__declspec(safe)
static inline double acos(const double v) {
if (__have_native_trigonometry)
return __acos_varying_double(v);
else
return 1.57079637050628662109375d0 - asin(v);
}
__declspec(safe)
static inline uniform float acos(uniform float v) {
if (__have_native_trigonometry)
return __acos_uniform_float(v);
else
return 1.57079637050628662109375 - asin(v);
}
__declspec(safe)
static inline uniform double acos(const uniform double v) {
if (__have_native_trigonometry)
return __acos_uniform_double(v);
else
return 1.57079637050628662109375d0 - asin(v);
}
@@ -2791,6 +2832,10 @@ static inline uniform double acos(const uniform double v) {
__declspec(safe)
static inline void sincos(float x_full, varying float * uniform sin_result,
varying float * uniform cos_result) {
if (__have_native_trigonometry)
{
__sincos_varying_float(x_full,sin_result,cos_result);
}
if (__math_lib == __math_lib_svml) {
__svml_sincosf(x_full, sin_result, cos_result);
}
@@ -2861,6 +2906,10 @@ static inline void sincos(float x_full, varying float * uniform sin_result,
__declspec(safe)
static inline void sincos(uniform float x_full, uniform float * uniform sin_result,
uniform float * uniform cos_result) {
if (__have_native_trigonometry)
{
__sincos_uniform_float(x_full, sin_result, cos_result);
}
if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
__stdlib_sincosf(x_full, sin_result, cos_result);
@@ -2923,7 +2972,11 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu
__declspec(safe)
static inline float tan(float x_full) {
if (__math_lib == __math_lib_svml) {
if (__have_native_trigonometry)
{
return __tan_varying_float(x_full);
}
else if (__math_lib == __math_lib_svml) {
return __svml_tanf(x_full);
}
else if (__math_lib == __math_lib_system) {
@@ -3002,7 +3055,11 @@ static inline float tan(float x_full) {
__declspec(safe)
static inline uniform float tan(uniform float x_full) {
if (__math_lib == __math_lib_system ||
if (__have_native_trigonometry)
{
return __tan_uniform_float(x_full);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_tanf(x_full);
}
@@ -3074,7 +3131,11 @@ static inline uniform float tan(uniform float x_full) {
__declspec(safe)
static inline float atan(float x_full) {
if (__math_lib == __math_lib_svml) {
if (__have_native_trigonometry)
{
return __atan_varying_float(x_full);
}
else if (__math_lib == __math_lib_svml) {
return __svml_atanf(x_full);
}
else if (__math_lib == __math_lib_system) {
@@ -3125,7 +3186,11 @@ static inline float atan(float x_full) {
__declspec(safe)
static inline uniform float atan(uniform float x_full) {
if (__math_lib == __math_lib_system ||
if (__have_native_trigonometry)
{
return __atan_uniform_float(x_full);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_atanf(x_full);
}
@@ -3169,7 +3234,11 @@ static inline uniform float atan(uniform float x_full) {
__declspec(safe)
static inline float atan2(float y, float x) {
if (__math_lib == __math_lib_svml) {
if (__have_native_trigonometry)
{
return __atan2_varying_float(y,x);
}
else if (__math_lib == __math_lib_svml) {
return __svml_atan2f(y, x);
}
else if (__math_lib == __math_lib_system) {
@@ -3208,7 +3277,11 @@ static inline float atan2(float y, float x) {
__declspec(safe)
static inline uniform float atan2(uniform float y, uniform float x) {
if (__math_lib == __math_lib_system ||
if (__have_native_trigonometry)
{
return __atan2_uniform_float(y,x);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_atan2f(y, x);
}
@@ -3743,12 +3816,14 @@ static inline uniform double frexp(uniform double x, uniform int * uniform pw2)
__declspec(safe)
static inline double sin(double x) {
if (__math_lib == __math_lib_svml)
if (__have_native_trigonometry)
{
return __sin_varying_double(x);
}
else if (__math_lib == __math_lib_svml)
{
return __svml_sind(x);
}
else if (__math_lib == __math_lib_ispc_fast)
return sin((float)x);
else {
double ret;
foreach_active (i) {
@@ -3758,23 +3833,46 @@ static inline double sin(double x) {
return ret;
}
}
__declspec(safe)
static inline double asin(double x) {
if (__have_native_trigonometry)
{
return __asin_varying_double(x);
}
else if (__math_lib == __math_lib_svml)
{
return __svml_asind(x);
}
else {
double ret;
foreach_active (i) {
uniform double r = __stdlib_asin(extract(x, i));
ret = insert(ret, i, r);
}
return ret;
}
}
__declspec(safe)
static inline uniform double sin(uniform double x) {
if (__math_lib == __math_lib_ispc_fast)
return sin((float)x);
if (__have_native_trigonometry)
{
return __sin_uniform_double(x);
}
else
return __stdlib_sin(x);
}
__declspec(safe)
static inline double asin(const double x) {
if (__math_lib == __math_lib_svml)
if (__have_native_trigonometry)
{
return __asin_varying_double(x);
}
else if (__math_lib == __math_lib_svml)
{
return __svml_asind(x);
}
else if (__math_lib == __math_lib_ispc_fast)
return asin((float)x);
else {
double ret;
foreach_active (i) {
@@ -3787,12 +3885,14 @@ static inline double asin(const double x) {
__declspec(safe)
static inline double cos(const double x) {
if (__have_native_trigonometry)
{
return __cos_varying_double(x);
}
if (__math_lib == __math_lib_svml)
{
return __svml_cosd(x);
}
else if (__math_lib == __math_lib_ispc_fast)
return cos((float)x);
else {
double ret;
foreach_active (i) {
@@ -3805,8 +3905,10 @@ static inline double cos(const double x) {
__declspec(safe)
static inline uniform double cos(uniform double x) {
if (__math_lib == __math_lib_ispc_fast)
return cos((float)x);
if (__have_native_trigonometry)
{
return __cos_uniform_double(x);
}
else
return __stdlib_cos(x);
}
@@ -3814,16 +3916,14 @@ static inline uniform double cos(uniform double x) {
__declspec(safe)
static inline void sincos(double x, varying double * uniform sin_result,
varying double * uniform cos_result) {
if (__have_native_trigonometry)
{
__sincos_varying_double(x,sin_result,cos_result);
}
if (__math_lib == __math_lib_svml)
{
__svml_sincosd(x, sin_result, cos_result);
}
else if (__math_lib == __math_lib_ispc_fast) {
float sr, cr;
sincos((float)x, &sr, &cr);
*sin_result = sr;
*cos_result = cr;
}
else {
foreach_active (i) {
uniform double sr, cr;
@@ -3837,11 +3937,9 @@ static inline void sincos(double x, varying double * uniform sin_result,
__declspec(safe)
static inline void sincos(uniform double x, uniform double * uniform sin_result,
uniform double * uniform cos_result) {
if (__math_lib == __math_lib_ispc_fast) {
uniform float sr, cr;
sincos((uniform float)x, &sr, &cr);
*sin_result = sr;
*cos_result = cr;
if (__have_native_trigonometry)
{
__sincos_uniform_double(x,sin_result, cos_result);
}
else
__stdlib_sincos(x, sin_result, cos_result);
@@ -3849,12 +3947,14 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result,
__declspec(safe)
static inline double tan(double x) {
if (__math_lib == __math_lib_svml)
if (__have_native_trigonometry)
{
return __tan_varying_double(x);
}
else if (__math_lib == __math_lib_svml)
{
return __svml_tand(x);
}
else if (__math_lib == __math_lib_ispc_fast)
return tan((float)x);
else {
double ret;
foreach_active (i) {
@@ -3867,16 +3967,20 @@ static inline double tan(double x) {
__declspec(safe)
static inline uniform double tan(uniform double x) {
if (__math_lib == __math_lib_ispc_fast)
return tan((float)x);
if (__have_native_trigonometry)
{
return __tan_uniform_double(x);
}
else
return __stdlib_tan(x);
}
__declspec(safe)
static inline double atan(double x) {
if (__math_lib == __math_lib_ispc_fast)
return atan((float)x);
if (__have_native_trigonometry)
{
return __atan_varying_double(x);
}
else {
double ret;
foreach_active (i) {
@@ -3889,20 +3993,24 @@ static inline double atan(double x) {
__declspec(safe)
static inline uniform double atan(uniform double x) {
if (__math_lib == __math_lib_ispc_fast)
return atan((float)x);
if (__have_native_trigonometry)
{
return __atan_uniform_double(x);
}
else
return __stdlib_atan(x);
}
__declspec(safe)
static inline double atan2(double y, double x) {
if (__math_lib == __math_lib_svml)
if (__have_native_trigonometry)
{
return __atan2_varying_double(y,x);
}
else if (__math_lib == __math_lib_svml)
{
return __svml_atan2d(y,x);
}
else if (__math_lib == __math_lib_ispc_fast)
return atan2((float)y, (float)x);
else {
double ret;
foreach_active (i) {
@@ -3915,20 +4023,23 @@ static inline double atan2(double y, double x) {
__declspec(safe)
static inline uniform double atan2(uniform double y, uniform double x) {
if (__math_lib == __math_lib_ispc_fast)
return atan2((float)y, (float)x);
if (__have_native_trigonometry)
{
return __atan2_uniform_double(y,x);
}
else
return __stdlib_atan2(y, x);
}
__declspec(safe)
static inline double exp(double x) {
if (__math_lib == __math_lib_svml)
if (__have_native_transcendentals) {
return __exp_varying_double(x);
}
else if (__math_lib == __math_lib_svml)
{
return __svml_expd(x);
}
else if (__math_lib == __math_lib_ispc_fast)
return exp((float)x);
else {
double ret;
foreach_active (i) {
@@ -3941,20 +4052,22 @@ static inline double exp(double x) {
__declspec(safe)
static inline uniform double exp(uniform double x) {
if (__math_lib == __math_lib_ispc_fast)
return exp((float)x);
if (__have_native_transcendentals) {
return __exp_uniform_double(x);
}
else
return __stdlib_exp(x);
}
__declspec(safe)
static inline double log(double x) {
if (__math_lib == __math_lib_svml)
if (__have_native_transcendentals) {
return __log_varying_double(x);
}
else if (__math_lib == __math_lib_svml)
{
return __svml_logd(x);
}
else if (__math_lib == __math_lib_ispc_fast)
return log((float)x);
else {
double ret;
foreach_active (i) {
@@ -3967,20 +4080,22 @@ static inline double log(double x) {
__declspec(safe)
static inline uniform double log(uniform double x) {
if (__math_lib == __math_lib_ispc_fast)
return log((float)x);
if (__have_native_transcendentals) {
return __log_uniform_double(x);
}
else
return __stdlib_log(x);
}
__declspec(safe)
static inline double pow(double a, double b) {
if (__math_lib == __math_lib_svml)
if (__have_native_transcendentals) {
return __pow_varying_double(a,b);
}
else if (__math_lib == __math_lib_svml)
{
return __svml_powd(a,b);
}
else if (__math_lib == __math_lib_ispc_fast)
return pow((float)a, (float)b);
else {
double ret;
foreach_active (i) {
@@ -3993,8 +4108,9 @@ static inline double pow(double a, double b) {
__declspec(safe)
static inline uniform double pow(uniform double a, uniform double b) {
if (__math_lib == __math_lib_ispc_fast)
return pow((float)a, (float)b);
if (__have_native_transcendentals) {
return __pow_uniform_double(a,b);
}
else
return __stdlib_pow(a, b);
}

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
double ref = double4((8.742277955963554d-08),(0.000000000000000d0),(-8.742277955963554d-08),(1.7484555911927038d-07));
RET[programIndex] = ok(sin(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.5707963705062866d0),(1.5707963705062866d0));
double ref = double4((0.8414709848078965d0),(-0.8414709848078965d0),(-1.000000000000000d0),(1.000000000000000d0));
RET[programIndex] = ok(sin(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,28 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) {
bool r = (abs(x - ref) < 1d-14) || abs((x-ref)/ref) < 1d-14;
if (any(r == false))
print("mismatch got %, expected %\n", x, ref);
return r;
}
export void f_v(uniform float RET[]) {
double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
double ref = double4((2.3849760909612067d-08),(-0.7568024953079282d0),(-0.5440211108893699d0),(0.5440211108893699d0));
RET[programIndex] = ok(sin(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
double ref = double4((0.9867719333537206d0),(-0.40484974621184605d0),(-0.7111606865372466d0),(0.4121184852417566d0));
RET[programIndex] = ok(sin(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((-3.141592653589793d0),(0.000000000000000d0),(3.141592653589793d0),(6.283185307179586d0));
double ref = double4((-1.000000000000000d0),(1.000000000000000d0),(-1.000000000000000d0),(1.000000000000000d0));
RET[programIndex] = ok(cos(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0));
double ref = double4((0.5403023058681398d0),(0.5403023058681398d0),(-4.371139044595162d-08),(-4.371139044595162d-08));
RET[programIndex] = ok(cos(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((-9.42477796076938d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
double ref = double4((-1.000000000000000d0),(-0.6536436208636119d0),(-0.8390715290764524d0),(-0.8390715290764524d0));
RET[programIndex] = ok(cos(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,29 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) {
bool r = (abs(x - ref) < 1d-14) || abs((x-ref)/ref) < 1d-14;
if (any(r == false))
print("mismatch got %, expected %\n", x, ref);
return r;
}
export void f_v(uniform float RET[]) {
double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
double ref = double4((-0.1621146247115303d0),(0.9143832254542971d0),(-0.7030295000381365d0),(-0.9111302618846769d0));
RET[programIndex] = ok(cos(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
double ref = double4((-8.742277955963587d-08),(0.000000000000000d0),(8.742277955963587d-08),(1.7484555911927306d-07));
RET[programIndex] = ok(tan(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((1.000000000000000d0),(-1.000000000000000d0),(0),(0));
double ref = double4((1.557407724654902d0),(-1.557407724654902d0),(0),(0));
RET[programIndex] = ok(tan(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
double ref = double4((-2.3849760909612074d-08),(1.1578212823495775d0),(0.6483608274590867d0),(-0.6483608274590867d0));
RET[programIndex] = ok(tan(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
double ref = double4((-6.086877942749462d0),(-0.442757188607329d0),(1.0115659250410816d0),(-0.4523156594418099d0));
RET[programIndex] = ok(tan(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((-3.141592741012573d0),(0.000000000000000d0),(3.141592741012573d0),(6.283185482025146d0));
double ref = double4((0.04321391448589156d0),(1.000000000000000d0),(23.140694655803028d0),(535.4917491531108d0));
RET[programIndex] = ok(exp(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((1.000000000000000d0),(-1.000000000000000d0),(-1.570796370506287d0),(1.570796370506287d0));
double ref = double4((2.718281828459045d0),(0.36787944117144233d0),(0.2078795672640568d0),(4.810477591238011d0));
RET[programIndex] = ok(exp(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((-9.424777984619141d0),(4.000000000000000d0),(10.000000000000000d0),(-10.000000000000000d0));
double ref = double4((8.069951564564043d-05),(54.598150033144236d0),(22026.465794806718d0),(4.5399929762484854d-05));
RET[programIndex] = ok(exp(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((14.300000190734863d0),(-6.699999809265137d0),(-21.200000762939453d0),(9.000000000000000d0));
double ref = double4((1623346.2946371625d0),(0.001230912137451317d0),(6.208070673019769d-10),(8103.083927575384d0));
RET[programIndex] = ok(exp(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-13; }
export void f_v(uniform float RET[]) {
double v = double4((3.141602741012573d),(0.000010000000000d),(3.141602741012573d),(6.283195482025146d));
double ref = double4((1.144733096770642d0),(-11.512925464970229d0),(1.144733096770642d0),(1.8378786857850002d0));
RET[programIndex] = ok(log(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((1.000010000000000d0),(1.000010000000000d0),(1.570806370506287d0),(1.570806370506287d0));
double ref = double4((9.999950000398841d-06),(9.999950000398841d-06),(0.4515890992942722d0),(0.4515890992942722d0));
RET[programIndex] = ok(log(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((9.424787984619140d0),(4.000010000000000d0),(10.000010000000000d0),(10.000010000000000d0));
double ref = double4((2.2433432380804366d0),(1.3862968611167654d0),(2.3025860929935456d0),(2.3025860929935456d0));
RET[programIndex] = ok(log(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[]) { RET[programIndex] = 1.; }

View File

@@ -0,0 +1,23 @@
static double double4(uniform double a, uniform double b, uniform double c,
uniform double d) {
double ret = 0;
for (uniform int i = 0; i < programCount; i += 4) {
ret = insert(ret, i + 0, a);
ret = insert(ret, i + 1, b);
ret = insert(ret, i + 2, c);
ret = insert(ret, i + 3, d);
}
return ret;
}
export uniform int width() { return programCount; }
bool ok(double x, double ref) { return (abs(x - ref) < 1d-15) || abs((x-ref)/ref) < 1d-14; }
export void f_v(uniform float RET[]) {
double v = double4((14.300010190734863d0),(6.700009809265136d0),(21.200010762939453d0),(9.000010000000000d0));
double ref = double4((2.6602602499044092d0),(1.902108990465272d0),(3.0540016893636612d0),(2.197225688446713d0));
RET[programIndex] = ok(log(v), ref) ? 1. : 0.;
}
export void result(uniform float RET[4]) { RET[programIndex] = 1.; }