@@ -1890,6 +1890,8 @@ entry:
|
||||
declare float @sinf(float) nounwind readnone
|
||||
declare float @cosf(float) nounwind readnone
|
||||
declare void @sincosf(float, float *, float *) nounwind readnone
|
||||
declare float @asinf(float) nounwind readnone
|
||||
declare float @acosf(float) nounwind readnone
|
||||
declare float @tanf(float) nounwind readnone
|
||||
declare float @atanf(float) nounwind readnone
|
||||
declare float @atan2f(float, float) nounwind readnone
|
||||
@@ -1912,6 +1914,16 @@ define void @__stdlib_sincosf(float, float *, float *) nounwind readnone alwaysi
|
||||
ret void
|
||||
}
|
||||
|
||||
define float @__stdlib_asinf(float) nounwind readnone alwaysinline {
|
||||
%r = call float @asinf(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__stdlib_acosf(float) nounwind readnone alwaysinline {
|
||||
%r = call float @acosf(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__stdlib_tanf(float) nounwind readnone alwaysinline {
|
||||
%r = call float @tanf(float %0)
|
||||
ret float %r
|
||||
|
||||
@@ -3013,13 +3013,17 @@ architectures.
|
||||
float tan(float x)
|
||||
uniform float tan(uniform float x)
|
||||
|
||||
Arctangent functions are also available:
|
||||
The corresponding inverse functions are also available:
|
||||
|
||||
::
|
||||
|
||||
float asin(float x)
|
||||
uniform float asin(uniformfloat x)
|
||||
float acos(float x)
|
||||
uniform float acos(uniform float x)
|
||||
float atan(float x)
|
||||
float atan2(float x, float y)
|
||||
uniform float atan(uniform float x)
|
||||
float atan2(float x, float y)
|
||||
uniform float atan2(uniform float x, uniform float y)
|
||||
|
||||
If both sine and cosine are needed, then the ``sincos()`` call computes
|
||||
|
||||
120
stdlib.ispc
120
stdlib.ispc
@@ -1785,6 +1785,116 @@ static inline uniform float sin(uniform float x_full) {
|
||||
}
|
||||
|
||||
|
||||
static inline float asin(float x) {
|
||||
bool isneg = x < 0;
|
||||
x = abs(x);
|
||||
|
||||
bool isnan = (x > 1);
|
||||
|
||||
float v;
|
||||
if (__math_lib == __math_lib_svml ||
|
||||
__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
uniform int mask = lanemask();
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
if ((mask & (1 << i)) == 0)
|
||||
continue;
|
||||
uniform float r = __stdlib_asinf(extract(x, i));
|
||||
ret = insert(ret, i, r);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc)
|
||||
// sollya
|
||||
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
|
||||
// [|single...|], [1e-20;.9999999999999999]);
|
||||
// avg error: 8.5716801e-09, max error: 2.1373853e-07
|
||||
v = 1.57079637050628662109375f +
|
||||
x * (-0.21460501849651336669921875f +
|
||||
x * (8.9116774499416351318359375e-2f +
|
||||
x * (-5.146093666553497314453125e-2f +
|
||||
x * (3.7269376218318939208984375e-2f +
|
||||
x * (-3.5882405936717987060546875e-2f +
|
||||
x * (4.14929799735546112060546875e-2f +
|
||||
x * (-4.25077490508556365966796875e-2f +
|
||||
x * (3.05023305118083953857421875e-2f +
|
||||
x * (-1.2897425331175327301025390625e-2f +
|
||||
x * 2.38926825113594532012939453125e-3f)))))))));
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
// sollya
|
||||
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
|
||||
// [1e-20;.9999999999999999]);
|
||||
// avg error: 1.1105439e-06, max error 1.3187528e-06
|
||||
v = 1.57079517841339111328125f +
|
||||
x * (-0.21450997889041900634765625f +
|
||||
x * (8.78556668758392333984375e-2f +
|
||||
x * (-4.489909112453460693359375e-2f +
|
||||
x * (1.928029954433441162109375e-2f +
|
||||
x * (-4.3095736764371395111083984375e-3f)))));
|
||||
|
||||
v *= -sqrt(1.f - x);
|
||||
v = v + 1.57079637050628662109375;
|
||||
if (v < 0) v = 0;
|
||||
// v = max(0, v);
|
||||
|
||||
if (isneg) v = -v;
|
||||
if (isnan) v = floatbits(0x7fc00000);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
static inline uniform float asin(uniform float x) {
|
||||
uniform bool isneg = x < 0;
|
||||
x = abs(x);
|
||||
|
||||
uniform bool isnan = (x > 1);
|
||||
|
||||
uniform float v;
|
||||
if (__math_lib == __math_lib_svml ||
|
||||
__math_lib == __math_lib_system) {
|
||||
return __stdlib_asinf(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc)
|
||||
// sollya
|
||||
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
|
||||
// [|single...|], [1e-20;.9999999999999999]);
|
||||
// avg error: 8.5716801e-09, max error: 2.1373853e-07
|
||||
v = 1.57079637050628662109375f +
|
||||
x * (-0.21460501849651336669921875f +
|
||||
x * (8.9116774499416351318359375e-2f +
|
||||
x * (-5.146093666553497314453125e-2f +
|
||||
x * (3.7269376218318939208984375e-2f +
|
||||
x * (-3.5882405936717987060546875e-2f +
|
||||
x * (4.14929799735546112060546875e-2f +
|
||||
x * (-4.25077490508556365966796875e-2f +
|
||||
x * (3.05023305118083953857421875e-2f +
|
||||
x * (-1.2897425331175327301025390625e-2f +
|
||||
x * 2.38926825113594532012939453125e-3f)))))))));
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
// sollya
|
||||
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
|
||||
// [1e-20;.9999999999999999]);
|
||||
// avg error: 1.1105439e-06, max error 1.3187528e-06
|
||||
v = 1.57079517841339111328125f +
|
||||
x * (-0.21450997889041900634765625f +
|
||||
x * (8.78556668758392333984375e-2f +
|
||||
x * (-4.489909112453460693359375e-2f +
|
||||
x * (1.928029954433441162109375e-2f +
|
||||
x * (-4.3095736764371395111083984375e-3f)))));
|
||||
|
||||
v *= -sqrt(1.f - x);
|
||||
v = v + 1.57079637050628662109375;
|
||||
if (v < 0) v = 0;
|
||||
// v = max(0, v);
|
||||
|
||||
if (isneg) v = -v;
|
||||
if (isnan) v = floatbits(0x7fc00000);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
static inline float cos(float x_full) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
return __svml_cos(x_full);
|
||||
@@ -1912,6 +2022,16 @@ static inline uniform float cos(uniform float x_full) {
|
||||
}
|
||||
|
||||
|
||||
static inline float acos(float v) {
|
||||
return 1.57079637050628662109375 - asin(v);
|
||||
}
|
||||
|
||||
|
||||
static inline uniform float acos(uniform float v) {
|
||||
return 1.57079637050628662109375 - asin(v);
|
||||
}
|
||||
|
||||
|
||||
static inline void sincos(float x_full, varying float * uniform sin_result,
|
||||
varying float * uniform cos_result) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
|
||||
23
tests/acos.ispc
Normal file
23
tests/acos.ispc
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-6; }
|
||||
|
||||
export void f_v(uniform float RET[]) {
|
||||
uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 };
|
||||
uniform float r[8];
|
||||
foreach (i = 0 ... 8)
|
||||
r[i] = cos(acos(vals[i]));
|
||||
|
||||
int errors = 0;
|
||||
for (uniform int i = 0; i < 8; ++i) {
|
||||
if (ok(r[i], vals[i]) == false) {
|
||||
print("error @ %: got %, expected %\n", i, r[i], vals[i]);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
RET[programIndex] = errors;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) { RET[programIndex] = 0; }
|
||||
23
tests/asin.ispc
Normal file
23
tests/asin.ispc
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-6; }
|
||||
|
||||
export void f_v(uniform float RET[]) {
|
||||
uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 };
|
||||
uniform float r[8];
|
||||
foreach (i = 0 ... 8)
|
||||
r[i] = sin(asin(vals[i]));
|
||||
|
||||
int errors = 0;
|
||||
for (uniform int i = 0; i < 8; ++i) {
|
||||
if (ok(r[i], vals[i]) == false) {
|
||||
print("error @ %: got %, expected %\n", i, r[i], vals[i]);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
RET[programIndex] = errors;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) { RET[programIndex] = 0; }
|
||||
Reference in New Issue
Block a user