Fast acosf() using polynomial approximation

2025-07-14 14:36:37 +02:00 · 2017-06-27 11:29:44 -07:00 · 2017-06-27 11:29:44 -07:00 · dcdf07191b
commit dcdf07191b
parent 209a4f33b5
1 changed files with 42 additions and 0 deletions
--- a/libraries/shared/src/AudioHelpers.h
+++ b/libraries/shared/src/AudioHelpers.h
@ -66,6 +66,48 @@ static inline float fastExp2f(float x) {
    return x * xi.f;
 }

+//
+// on x86 architecture, assume that SSE2 is present
+//
+#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
+
+#include <xmmintrin.h>
+// inline sqrtss, without requiring /fp:fast
+static inline float fastSqrtf(float x) {   
+    return _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(x)));
+}
+
+#else 
+
+static inline float fastSqrtf(float x) {   
+    return sqrtf(x);
+}
+
+#endif
+
+//
+// for -1 <= x <= 1, returns acos(x)
+// otherwise, returns NaN
+//
+// abs |error| < 7e-5, smooth
+//
+static inline float fastAcosf(float x) {
+
+    union { float f; int32_t i; } xi = { x };
+
+    int32_t sign = xi.i & 0x80000000;
+    xi.i ^= sign;   // fabs(x)
+
+    // compute sqrt(1-x) in parallel
+    float r = fastSqrtf(1.0f - xi.f);
+
+    // polynomial for acos(x)/sqrt(1-x) over x=[0,1]
+    xi.f = ((-0.0198439236f * xi.f + 0.0762021306f) * xi.f + -0.212940971f) * xi.f + 1.57079633f;
+
+    xi.f *= r;
+    return (sign ? 3.141592654f - xi.f : xi.f);
+}
+
 //
 // Quantize a non-negative gain value to the nearest 0.5dB, and pack to a byte.
 //