Fast acosf() using polynomial approximation

This commit is contained in:
Ken Cooke 2017-06-27 11:29:44 -07:00
parent 209a4f33b5
commit dcdf07191b

View file

@ -66,6 +66,48 @@ static inline float fastExp2f(float x) {
return x * xi.f;
}
//
// on x86 architecture, assume that SSE2 is present
//
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
#include <xmmintrin.h>
// inline sqrtss, without requiring /fp:fast
static inline float fastSqrtf(float x) {
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(x)));
}
#else
static inline float fastSqrtf(float x) {
return sqrtf(x);
}
#endif
//
// for -1 <= x <= 1, returns acos(x)
// otherwise, returns NaN
//
// abs |error| < 7e-5, smooth
//
static inline float fastAcosf(float x) {
union { float f; int32_t i; } xi = { x };
int32_t sign = xi.i & 0x80000000;
xi.i ^= sign; // fabs(x)
// compute sqrt(1-x) in parallel
float r = fastSqrtf(1.0f - xi.f);
// polynomial for acos(x)/sqrt(1-x) over x=[0,1]
xi.f = ((-0.0198439236f * xi.f + 0.0762021306f) * xi.f + -0.212940971f) * xi.f + 1.57079633f;
xi.f *= r;
return (sign ? 3.141592654f - xi.f : xi.f);
}
//
// Quantize a non-negative gain value to the nearest 0.5dB, and pack to a byte.
//