Add single-precision asin() and acos() to stdlib.

Issue #184.
This commit is contained in:
Matt Pharr
2012-03-05 13:31:38 -08:00
parent f6cbaa78e8
commit c152ae3c32
5 changed files with 184 additions and 2 deletions

View File

@@ -1890,6 +1890,8 @@ entry:
declare float @sinf(float) nounwind readnone
declare float @cosf(float) nounwind readnone
declare void @sincosf(float, float *, float *) nounwind readnone
declare float @asinf(float) nounwind readnone
declare float @acosf(float) nounwind readnone
declare float @tanf(float) nounwind readnone
declare float @atanf(float) nounwind readnone
declare float @atan2f(float, float) nounwind readnone
@@ -1912,6 +1914,16 @@ define void @__stdlib_sincosf(float, float *, float *) nounwind readnone alwaysi
ret void
}
define float @__stdlib_asinf(float) nounwind readnone alwaysinline {
%r = call float @asinf(float %0)
ret float %r
}
define float @__stdlib_acosf(float) nounwind readnone alwaysinline {
%r = call float @acosf(float %0)
ret float %r
}
define float @__stdlib_tanf(float) nounwind readnone alwaysinline {
%r = call float @tanf(float %0)
ret float %r

View File

@@ -3013,13 +3013,17 @@ architectures.
float tan(float x)
uniform float tan(uniform float x)
Arctangent functions are also available:
The corresponding inverse functions are also available:
::
float asin(float x)
uniform float asin(uniformfloat x)
float acos(float x)
uniform float acos(uniform float x)
float atan(float x)
float atan2(float x, float y)
uniform float atan(uniform float x)
float atan2(float x, float y)
uniform float atan2(uniform float x, uniform float y)
If both sine and cosine are needed, then the ``sincos()`` call computes

View File

@@ -1785,6 +1785,116 @@ static inline uniform float sin(uniform float x_full) {
}
static inline float asin(float x) {
bool isneg = x < 0;
x = abs(x);
bool isnan = (x > 1);
float v;
if (__math_lib == __math_lib_svml ||
__math_lib == __math_lib_system) {
float ret;
uniform int mask = lanemask();
for (uniform int i = 0; i < programCount; ++i) {
if ((mask & (1 << i)) == 0)
continue;
uniform float r = __stdlib_asinf(extract(x, i));
ret = insert(ret, i, r);
}
return ret;
}
else if (__math_lib == __math_lib_ispc)
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
// [|single...|], [1e-20;.9999999999999999]);
// avg error: 8.5716801e-09, max error: 2.1373853e-07
v = 1.57079637050628662109375f +
x * (-0.21460501849651336669921875f +
x * (8.9116774499416351318359375e-2f +
x * (-5.146093666553497314453125e-2f +
x * (3.7269376218318939208984375e-2f +
x * (-3.5882405936717987060546875e-2f +
x * (4.14929799735546112060546875e-2f +
x * (-4.25077490508556365966796875e-2f +
x * (3.05023305118083953857421875e-2f +
x * (-1.2897425331175327301025390625e-2f +
x * 2.38926825113594532012939453125e-3f)))))))));
else if (__math_lib == __math_lib_ispc_fast)
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
// [1e-20;.9999999999999999]);
// avg error: 1.1105439e-06, max error 1.3187528e-06
v = 1.57079517841339111328125f +
x * (-0.21450997889041900634765625f +
x * (8.78556668758392333984375e-2f +
x * (-4.489909112453460693359375e-2f +
x * (1.928029954433441162109375e-2f +
x * (-4.3095736764371395111083984375e-3f)))));
v *= -sqrt(1.f - x);
v = v + 1.57079637050628662109375;
if (v < 0) v = 0;
// v = max(0, v);
if (isneg) v = -v;
if (isnan) v = floatbits(0x7fc00000);
return v;
}
static inline uniform float asin(uniform float x) {
uniform bool isneg = x < 0;
x = abs(x);
uniform bool isnan = (x > 1);
uniform float v;
if (__math_lib == __math_lib_svml ||
__math_lib == __math_lib_system) {
return __stdlib_asinf(x);
}
else if (__math_lib == __math_lib_ispc)
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
// [|single...|], [1e-20;.9999999999999999]);
// avg error: 8.5716801e-09, max error: 2.1373853e-07
v = 1.57079637050628662109375f +
x * (-0.21460501849651336669921875f +
x * (8.9116774499416351318359375e-2f +
x * (-5.146093666553497314453125e-2f +
x * (3.7269376218318939208984375e-2f +
x * (-3.5882405936717987060546875e-2f +
x * (4.14929799735546112060546875e-2f +
x * (-4.25077490508556365966796875e-2f +
x * (3.05023305118083953857421875e-2f +
x * (-1.2897425331175327301025390625e-2f +
x * 2.38926825113594532012939453125e-3f)))))))));
else if (__math_lib == __math_lib_ispc_fast)
// sollya
// fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
// [1e-20;.9999999999999999]);
// avg error: 1.1105439e-06, max error 1.3187528e-06
v = 1.57079517841339111328125f +
x * (-0.21450997889041900634765625f +
x * (8.78556668758392333984375e-2f +
x * (-4.489909112453460693359375e-2f +
x * (1.928029954433441162109375e-2f +
x * (-4.3095736764371395111083984375e-3f)))));
v *= -sqrt(1.f - x);
v = v + 1.57079637050628662109375;
if (v < 0) v = 0;
// v = max(0, v);
if (isneg) v = -v;
if (isnan) v = floatbits(0x7fc00000);
return v;
}
static inline float cos(float x_full) {
if (__math_lib == __math_lib_svml) {
return __svml_cos(x_full);
@@ -1912,6 +2022,16 @@ static inline uniform float cos(uniform float x_full) {
}
static inline float acos(float v) {
return 1.57079637050628662109375 - asin(v);
}
static inline uniform float acos(uniform float v) {
return 1.57079637050628662109375 - asin(v);
}
static inline void sincos(float x_full, varying float * uniform sin_result,
varying float * uniform cos_result) {
if (__math_lib == __math_lib_svml) {

23
tests/acos.ispc Normal file
View File

@@ -0,0 +1,23 @@
export uniform int width() { return programCount; }
bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-6; }
export void f_v(uniform float RET[]) {
uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 };
uniform float r[8];
foreach (i = 0 ... 8)
r[i] = cos(acos(vals[i]));
int errors = 0;
for (uniform int i = 0; i < 8; ++i) {
if (ok(r[i], vals[i]) == false) {
print("error @ %: got %, expected %\n", i, r[i], vals[i]);
++errors;
}
}
RET[programIndex] = errors;
}
export void result(uniform float RET[]) { RET[programIndex] = 0; }

23
tests/asin.ispc Normal file
View File

@@ -0,0 +1,23 @@
export uniform int width() { return programCount; }
bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-6; }
export void f_v(uniform float RET[]) {
uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 };
uniform float r[8];
foreach (i = 0 ... 8)
r[i] = sin(asin(vals[i]));
int errors = 0;
for (uniform int i = 0; i < 8; ++i) {
if (ok(r[i], vals[i]) == false) {
print("error @ %: got %, expected %\n", i, r[i], vals[i]);
++errors;
}
}
RET[programIndex] = errors;
}
export void result(uniform float RET[]) { RET[programIndex] = 0; }