diff --git a/builtins/util.m4 b/builtins/util.m4 index 7c022e94..96f3544a 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1890,6 +1890,8 @@ entry: declare float @sinf(float) nounwind readnone declare float @cosf(float) nounwind readnone declare void @sincosf(float, float *, float *) nounwind readnone +declare float @asinf(float) nounwind readnone +declare float @acosf(float) nounwind readnone declare float @tanf(float) nounwind readnone declare float @atanf(float) nounwind readnone declare float @atan2f(float, float) nounwind readnone @@ -1912,6 +1914,16 @@ define void @__stdlib_sincosf(float, float *, float *) nounwind readnone alwaysi ret void } +define float @__stdlib_asinf(float) nounwind readnone alwaysinline { + %r = call float @asinf(float %0) + ret float %r +} + +define float @__stdlib_acosf(float) nounwind readnone alwaysinline { + %r = call float @acosf(float %0) + ret float %r +} + define float @__stdlib_tanf(float) nounwind readnone alwaysinline { %r = call float @tanf(float %0) ret float %r diff --git a/docs/ispc.rst b/docs/ispc.rst index c6882fae..442c36bf 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3013,13 +3013,17 @@ architectures. float tan(float x) uniform float tan(uniform float x) -Arctangent functions are also available: +The corresponding inverse functions are also available: :: + float asin(float x) + uniform float asin(uniformfloat x) + float acos(float x) + uniform float acos(uniform float x) float atan(float x) - float atan2(float x, float y) uniform float atan(uniform float x) + float atan2(float x, float y) uniform float atan2(uniform float x, uniform float y) If both sine and cosine are needed, then the ``sincos()`` call computes diff --git a/stdlib.ispc b/stdlib.ispc index 3c00b8ce..4727ec6e 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -1785,6 +1785,116 @@ static inline uniform float sin(uniform float x_full) { } +static inline float asin(float x) { + bool isneg = x < 0; + x = abs(x); + + bool isnan = (x > 1); + + float v; + if (__math_lib == __math_lib_svml || + __math_lib == __math_lib_system) { + float ret; + uniform int mask = lanemask(); + for (uniform int i = 0; i < programCount; ++i) { + if ((mask & (1 << i)) == 0) + continue; + uniform float r = __stdlib_asinf(extract(x, i)); + ret = insert(ret, i, r); + } + return ret; + } + else if (__math_lib == __math_lib_ispc) + // sollya + // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], + // [|single...|], [1e-20;.9999999999999999]); + // avg error: 8.5716801e-09, max error: 2.1373853e-07 + v = 1.57079637050628662109375f + + x * (-0.21460501849651336669921875f + + x * (8.9116774499416351318359375e-2f + + x * (-5.146093666553497314453125e-2f + + x * (3.7269376218318939208984375e-2f + + x * (-3.5882405936717987060546875e-2f + + x * (4.14929799735546112060546875e-2f + + x * (-4.25077490508556365966796875e-2f + + x * (3.05023305118083953857421875e-2f + + x * (-1.2897425331175327301025390625e-2f + + x * 2.38926825113594532012939453125e-3f))))))))); + else if (__math_lib == __math_lib_ispc_fast) + // sollya + // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], + // [1e-20;.9999999999999999]); + // avg error: 1.1105439e-06, max error 1.3187528e-06 + v = 1.57079517841339111328125f + + x * (-0.21450997889041900634765625f + + x * (8.78556668758392333984375e-2f + + x * (-4.489909112453460693359375e-2f + + x * (1.928029954433441162109375e-2f + + x * (-4.3095736764371395111083984375e-3f))))); + + v *= -sqrt(1.f - x); + v = v + 1.57079637050628662109375; + if (v < 0) v = 0; + // v = max(0, v); + + if (isneg) v = -v; + if (isnan) v = floatbits(0x7fc00000); + + return v; +} + + +static inline uniform float asin(uniform float x) { + uniform bool isneg = x < 0; + x = abs(x); + + uniform bool isnan = (x > 1); + + uniform float v; + if (__math_lib == __math_lib_svml || + __math_lib == __math_lib_system) { + return __stdlib_asinf(x); + } + else if (__math_lib == __math_lib_ispc) + // sollya + // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], + // [|single...|], [1e-20;.9999999999999999]); + // avg error: 8.5716801e-09, max error: 2.1373853e-07 + v = 1.57079637050628662109375f + + x * (-0.21460501849651336669921875f + + x * (8.9116774499416351318359375e-2f + + x * (-5.146093666553497314453125e-2f + + x * (3.7269376218318939208984375e-2f + + x * (-3.5882405936717987060546875e-2f + + x * (4.14929799735546112060546875e-2f + + x * (-4.25077490508556365966796875e-2f + + x * (3.05023305118083953857421875e-2f + + x * (-1.2897425331175327301025390625e-2f + + x * 2.38926825113594532012939453125e-3f))))))))); + else if (__math_lib == __math_lib_ispc_fast) + // sollya + // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], + // [1e-20;.9999999999999999]); + // avg error: 1.1105439e-06, max error 1.3187528e-06 + v = 1.57079517841339111328125f + + x * (-0.21450997889041900634765625f + + x * (8.78556668758392333984375e-2f + + x * (-4.489909112453460693359375e-2f + + x * (1.928029954433441162109375e-2f + + x * (-4.3095736764371395111083984375e-3f))))); + + v *= -sqrt(1.f - x); + v = v + 1.57079637050628662109375; + if (v < 0) v = 0; + // v = max(0, v); + + if (isneg) v = -v; + if (isnan) v = floatbits(0x7fc00000); + + return v; +} + + static inline float cos(float x_full) { if (__math_lib == __math_lib_svml) { return __svml_cos(x_full); @@ -1912,6 +2022,16 @@ static inline uniform float cos(uniform float x_full) { } +static inline float acos(float v) { + return 1.57079637050628662109375 - asin(v); +} + + +static inline uniform float acos(uniform float v) { + return 1.57079637050628662109375 - asin(v); +} + + static inline void sincos(float x_full, varying float * uniform sin_result, varying float * uniform cos_result) { if (__math_lib == __math_lib_svml) { diff --git a/tests/acos.ispc b/tests/acos.ispc new file mode 100644 index 00000000..45173782 --- /dev/null +++ b/tests/acos.ispc @@ -0,0 +1,23 @@ + +export uniform int width() { return programCount; } + + +bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-6; } + +export void f_v(uniform float RET[]) { + uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 }; + uniform float r[8]; + foreach (i = 0 ... 8) + r[i] = cos(acos(vals[i])); + + int errors = 0; + for (uniform int i = 0; i < 8; ++i) { + if (ok(r[i], vals[i]) == false) { + print("error @ %: got %, expected %\n", i, r[i], vals[i]); + ++errors; + } + } + RET[programIndex] = errors; +} + +export void result(uniform float RET[]) { RET[programIndex] = 0; } diff --git a/tests/asin.ispc b/tests/asin.ispc new file mode 100644 index 00000000..a6839b09 --- /dev/null +++ b/tests/asin.ispc @@ -0,0 +1,23 @@ + +export uniform int width() { return programCount; } + + +bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) < 1e-6; } + +export void f_v(uniform float RET[]) { + uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 }; + uniform float r[8]; + foreach (i = 0 ... 8) + r[i] = sin(asin(vals[i])); + + int errors = 0; + for (uniform int i = 0; i < 8; ++i) { + if (ok(r[i], vals[i]) == false) { + print("error @ %: got %, expected %\n", i, r[i], vals[i]); + ++errors; + } + } + RET[programIndex] = errors; +} + +export void result(uniform float RET[]) { RET[programIndex] = 0; }