// // AudioLimiter.cpp // libraries/audio/src // // Created by Ken Cooke on 2/11/15. // Copyright 2016 High Fidelity, Inc. // #include #include #include "AudioLimiter.h" #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif #ifndef MIN #define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif #ifdef _MSC_VER #include #define MUL64(a,b) __emul((a), (b)) #define MULHI(a,b) ((int)(MUL64(a, b) >> 32)) #define MULQ31(a,b) ((int)(MUL64(a, b) >> 31)) #else #define MUL64(a,b) ((long long)(a) * (b)) #define MULHI(a,b) ((int)(MUL64(a, b) >> 32)) #define MULQ31(a,b) ((int)(MUL64(a, b) >> 31)) #endif // _MSC_VER // // on x86 architecture, assume that SSE2 is present // #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__) #include // convert float to int using round-to-nearest static inline int32_t floatToInt(float x) { return _mm_cvt_ss2si(_mm_load_ss(&x)); } #else // convert float to int using round-to-nearest static inline int32_t floatToInt(float x) { x += (x < 0.0f ? -0.5f : 0.5f); // round return (int32_t)x; } #endif // _M_IX86 static const double FIXQ31 = 2147483648.0; // convert float to Q31 static const double DB_TO_LOG2 = 0.16609640474436813; // convert dB to log2 // convert dB to amplitude static double dBToGain(double dB) { return pow(10.0, dB / 20.0); } // convert milliseconds to first-order time constant static int32_t msToTc(double ms, double sampleRate) { double tc = exp(-1000.0 / (ms * sampleRate)); return (int32_t)(FIXQ31 * tc); // Q31 } // log2 domain values are Q26 static const int LOG2_INTBITS = 5; static const int LOG2_FRACBITS = 31 - LOG2_INTBITS; // log2 domain headroom bits above 0dB static const int LOG2_HEADROOM = 15; // log2 domain offsets so error < 0 static const int32_t LOG2_BIAS = 347; static const int32_t EXP2_BIAS = 64; // // P(x) = log2(1+x) for x=[0,1] // scaled by 1, 0.5, 0.25 // // |error| < 347 ulp, smooth // static const int LOG2_TABBITS = 4; static const int32_t log2Table[1 << LOG2_TABBITS][3] = { { -0x56dfe26d, 0x5c46daff, 0x00000000 }, { -0x4d397571, 0x5bae58e7, 0x00025a75 }, { -0x4518f84b, 0x5aabcac4, 0x000a62db }, { -0x3e3075ec, 0x596168c0, 0x0019d0e6 }, { -0x384486e9, 0x57e769c7, 0x00316109 }, { -0x332742ba, 0x564f1461, 0x00513776 }, { -0x2eb4bad4, 0x54a4cdfe, 0x00791de2 }, { -0x2ad07c6c, 0x52f18320, 0x00a8aa46 }, { -0x2763c4d6, 0x513ba123, 0x00df574c }, { -0x245c319b, 0x4f87c5c4, 0x011c9399 }, { -0x21aac79f, 0x4dd93bef, 0x015fcb52 }, { -0x1f433872, 0x4c325584, 0x01a86ddc }, { -0x1d1b54b4, 0x4a94ac6e, 0x01f5f13e }, { -0x1b2a9f81, 0x4901524f, 0x0247d3f2 }, { -0x1969fa57, 0x4778f3a7, 0x029d9dbf }, { -0x17d36370, 0x45fbf1e8, 0x02f6dfe8 }, }; // // P(x) = exp2(x) for x=[0,1] // scaled by 2, 1, 0.5 // Uses exp2(-x) = exp2(1-x)/2 // // |error| < 1387 ulp, smooth // static const int EXP2_TABBITS = 4; static const int32_t exp2Table[1 << EXP2_TABBITS][3] = { { 0x3ed838c8, 0x58b574b7, 0x40000000 }, { 0x41a0821c, 0x5888db8f, 0x4000b2b7 }, { 0x4488548d, 0x582bcbc6, 0x40039be1 }, { 0x4791158a, 0x579a1128, 0x400a71ae }, { 0x4abc3a53, 0x56cf3089, 0x4017212e }, { 0x4e0b48af, 0x55c66396, 0x402bd31b }, { 0x517fd7a7, 0x547a946d, 0x404af0ec }, { 0x551b9049, 0x52e658f9, 0x40772a57 }, { 0x58e02e75, 0x5103ee08, 0x40b37b31 }, { 0x5ccf81b1, 0x4ecd321f, 0x410331b5 }, { 0x60eb6e09, 0x4c3ba007, 0x4169f548 }, { 0x6535ecf9, 0x49484909, 0x41ebcdaf }, { 0x69b10e5b, 0x45ebcede, 0x428d2acd }, { 0x6e5ef96c, 0x421e5d48, 0x4352ece7 }, { 0x7341edcb, 0x3dd7a354, 0x44426d7b }, { 0x785c4499, 0x390ecc3a, 0x456188bd }, }; static const int IEEE754_FABS_MASK = 0x7fffffff; static const int IEEE754_MANT_BITS = 23; static const int IEEE754_EXPN_BIAS = 127; // // Peak detection and -log2(x) for float input (mono) // x < 2^(31-LOG2_HEADROOM) returns 0x7fffffff // x > 2^LOG2_HEADROOM undefined // static inline int32_t peaklog2(float* input) { // float as integer bits int32_t u = *(int32_t*)input; // absolute value int32_t peak = u & IEEE754_FABS_MASK; // split into e and x - 1.0 int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM; int32_t x = (peak << (31 - IEEE754_MANT_BITS)) & 0x7fffffff; // saturate if (e > 31) { return 0x7fffffff; } int k = x >> (31 - LOG2_TABBITS); // polynomial for log2(1+x) over x=[0,1] int32_t c0 = log2Table[k][0]; int32_t c1 = log2Table[k][1]; int32_t c2 = log2Table[k][2]; c1 += MULHI(c0, x); c2 += MULHI(c1, x); // reconstruct result in Q26 return (e << LOG2_FRACBITS) - (c2 >> 3); } // // Peak detection and -log2(x) for float input (stereo) // x < 2^(31-LOG2_HEADROOM) returns 0x7fffffff // x > 2^LOG2_HEADROOM undefined // static inline int32_t peaklog2(float* input0, float* input1) { // float as integer bits int32_t u0 = *(int32_t*)input0; int32_t u1 = *(int32_t*)input1; // max absolute value u0 &= IEEE754_FABS_MASK; u1 &= IEEE754_FABS_MASK; int32_t peak = MAX(u0, u1); // split into e and x - 1.0 int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM; int32_t x = (peak << (31 - IEEE754_MANT_BITS)) & 0x7fffffff; // saturate if (e > 31) { return 0x7fffffff; } int k = x >> (31 - LOG2_TABBITS); // polynomial for log2(1+x) over x=[0,1] int32_t c0 = log2Table[k][0]; int32_t c1 = log2Table[k][1]; int32_t c2 = log2Table[k][2]; c1 += MULHI(c0, x); c2 += MULHI(c1, x); // reconstruct result in Q26 return (e << LOG2_FRACBITS) - (c2 >> 3); } // // Compute exp2(-x) for x=[0,32] in Q26, result in Q31 // x < 0 undefined // static inline int32_t fixexp2(int32_t x) { // split into e and 1.0 - x int32_t e = x >> LOG2_FRACBITS; x = ~(x << LOG2_INTBITS) & 0x7fffffff; int k = x >> (31 - EXP2_TABBITS); // polynomial for exp2(x) int32_t c0 = exp2Table[k][0]; int32_t c1 = exp2Table[k][1]; int32_t c2 = exp2Table[k][2]; c1 += MULHI(c0, x); c2 += MULHI(c1, x); // reconstruct result in Q31 return c2 >> e; } // fast TPDF dither in [-1.0f, 1.0f] static inline float dither() { static uint32_t rz = 0; rz = rz * 69069 + 1; int32_t r0 = rz & 0xffff; int32_t r1 = rz >> 16; return (int32_t)(r0 - r1) * (1/65536.0f); } // // Peak-hold lowpass filter // // Bandlimits the gain control signal to greatly reduce the modulation distortion, // while still reaching the peak attenuation after exactly N-1 samples of delay. // N completely determines the limiter attack time. // template class PeakFilterT { static_assert((N & (N - 1)) == 0, "N must be a power of 2"); static_assert((CIC1 - 1) + (CIC2 - 1) == (N - 1), "Total CIC delay must be N-1"); int32_t _buffer[2*N] = {}; // shared FIFO int _index = 0; int32_t _acc1 = 0; // CIC1 integrator int32_t _acc2 = 0; // CIC2 integrator public: PeakFilterT() { // fill history for (int n = 0; n < N-1; n++) { process(0x7fffffff); } } int32_t process(int32_t x) { const int MASK = 2*N - 1; // buffer wrap int i = _index; // Fast peak-hold using a running-min filter. Finds the peak (min) value // in the sliding window of N-1 samples, using only log2(N) comparisons. // Hold time of N-1 samples exactly cancels the step response of FIR filter. for (int n = 1; n < N; n <<= 1) { _buffer[i] = x; i = (i + n) & MASK; x = MIN(x, _buffer[i]); } // Fast FIR attack/lowpass filter using a 2-stage CIC filter. // The step response reaches final value after N-1 samples. const int32_t CICGAIN = 0xffffffff / (CIC1 * CIC2); // Q32 x = MULHI(x, CICGAIN); _buffer[i] = _acc1; _acc1 += x; // integrator i = (i + CIC1 - 1) & MASK; x = _acc1 - _buffer[i]; // comb _buffer[i] = _acc2; _acc2 += x; // integrator i = (i + CIC2 - 1) & MASK; x = _acc2 - _buffer[i]; // comb _index = (i + 1) & MASK; // skip unused tap return x; } }; // // Specializations that define the optimum lowpass filter for each length. // template class PeakFilter; template<> class PeakFilter< 16> : public PeakFilterT< 16, 7, 10> {}; template<> class PeakFilter< 32> : public PeakFilterT< 32, 14, 19> {}; template<> class PeakFilter< 64> : public PeakFilterT< 64, 27, 38> {}; template<> class PeakFilter<128> : public PeakFilterT<128, 53, 76> {}; template<> class PeakFilter<256> : public PeakFilterT<256, 106, 151> {}; // // N-1 sample delay (mono) // template class MonoDelay { static_assert((N & (N - 1)) == 0, "N must be a power of 2"); float _buffer[N] = {}; int _index = 0; public: void process(float& x) { const int MASK = N - 1; // buffer wrap int i = _index; _buffer[i] = x; i = (i + (N - 1)) & MASK; x = _buffer[i]; _index = i; } }; // // N-1 sample delay (stereo) // template class StereoDelay { static_assert((N & (N - 1)) == 0, "N must be a power of 2"); float _buffer[2*N] = {}; int _index = 0; public: void process(float& x0, float& x1) { const int MASK = 2*N - 1; // buffer wrap int i = _index; _buffer[i+0] = x0; _buffer[i+1] = x1; i = (i + 2*(N - 1)) & MASK; x0 = _buffer[i+0]; x1 = _buffer[i+1]; _index = i; } }; // // Limiter (common) // class LimiterImpl { protected: static const int NARC = 64; int32_t _holdTable[NARC]; int32_t _releaseTable[NARC]; int32_t _rmsAttack = 0x7fffffff; int32_t _rmsRelease = 0x7fffffff; int32_t _arcRelease = 0x7fffffff; int32_t _threshold = 0; int32_t _attn = 0; int32_t _rms = 0; int32_t _arc = 0; int _sampleRate; float _outGain = 0.0f; public: LimiterImpl(int sampleRate); virtual ~LimiterImpl() {} void setThreshold(float threshold); void setRelease(float release); int32_t envelope(int32_t attn); virtual void process(float* input, int16_t* output, int numFrames) = 0; }; LimiterImpl::LimiterImpl(int sampleRate) { sampleRate = MAX(sampleRate, 8000); sampleRate = MIN(sampleRate, 96000); _sampleRate = sampleRate; // defaults setThreshold(0.0); setRelease(250.0); } // // Set the limiter threshold (dB) // Brickwall limiting will begin when the signal exceeds the threshold. // Makeup gain is applied, to reach but never exceed the output ceiling. // void LimiterImpl::setThreshold(float threshold) { const double OUT_CEILING = -0.3; const double Q31_TO_Q15 = 32768 / 2147483648.0; // limiter threshold = -48dB to 0dB threshold = MAX(threshold, -48.0f); threshold = MIN(threshold, 0.0f); // limiter threshold in log2 domain _threshold = (int32_t)(-(double)threshold * DB_TO_LOG2 * (1 << LOG2_FRACBITS)); _threshold += LOG2_BIAS + EXP2_BIAS; _threshold += LOG2_HEADROOM << LOG2_FRACBITS; // makeup gain and conversion to 16-bit _outGain = (float)(dBToGain(OUT_CEILING - (double)threshold) * Q31_TO_Q15); } // // Set the limiter release time (milliseconds) // This is a base value that scales the adaptive hold and release algorithms. // void LimiterImpl::setRelease(float release) { const double MAXHOLD = 0.100; // max hold = 100ms const double MINREL = 0.025; // min release = 0.025 * release const int NHOLD = 16; // adaptive hold to adaptive release transition // limiter release = 50 to 5000ms release = MAX(release, 50.0f); release = MIN(release, 5000.0f); int32_t maxRelease = msToTc((double)release, _sampleRate); _rmsAttack = msToTc(0.1 * (double)release, _sampleRate); _rmsRelease = maxRelease; // Compute ARC tables, working from low peak/rms to high peak/rms. // // At low peak/rms, release = max and hold is progressive to max // At high peak/rms, hold = 0 and release is progressive to min double x = MAXHOLD * _sampleRate; double xstep = x / NHOLD; // 1.0 to 1.0/NHOLD int i = 0; for (; i < NHOLD; i++) { // max release _releaseTable[i] = maxRelease; // progressive hold _holdTable[i] = (int32_t)((maxRelease - 0x7fffffff) / x); _holdTable[i] = MIN(_holdTable[i], -1); // prevent 0 on long releases x -= xstep; x = MAX(x, 1.0); } x = release; xstep = x * (1.0-MINREL) / (NARC-NHOLD-1); // 1.0 to MINREL for (; i < NARC; i++) { // progressive release _releaseTable[i] = msToTc(x, _sampleRate); // min hold _holdTable[i] = (_releaseTable[i] - 0x7fffffff); // 1 sample x -= xstep; } } // // Limiter envelope processing // zero attack, adaptive hold and release // int32_t LimiterImpl::envelope(int32_t attn) { // table of (1/attn) for 1dB to 6dB, rounded to prevent overflow static const int16_t invTable[64] = { 0x6000, 0x6000, 0x6000, 0x6000, 0x6000, 0x6000, 0x6000, 0x6000, 0x6000, 0x6000, 0x5d17, 0x5555, 0x4ec4, 0x4924, 0x4444, 0x4000, 0x3c3c, 0x38e3, 0x35e5, 0x3333, 0x30c3, 0x2e8b, 0x2c85, 0x2aaa, 0x28f5, 0x2762, 0x25ed, 0x2492, 0x234f, 0x2222, 0x2108, 0x2000, 0x1f07, 0x1e1e, 0x1d41, 0x1c71, 0x1bac, 0x1af2, 0x1a41, 0x1999, 0x18f9, 0x1861, 0x17d0, 0x1745, 0x16c1, 0x1642, 0x15c9, 0x1555, 0x14e5, 0x147a, 0x1414, 0x13b1, 0x1352, 0x12f6, 0x129e, 0x1249, 0x11f7, 0x11a7, 0x115b, 0x1111, 0x10c9, 0x1084, 0x1041, 0x1000, }; if (attn < _attn) { // RELEASE // update release before use, to implement hold = 0 _arcRelease += _holdTable[_arc]; // update progressive hold _arcRelease = MAX(_arcRelease, _releaseTable[_arc]); // saturate at final value attn += MULQ31((_attn - attn), _arcRelease); // apply release } else { // ATTACK // update ARC with normalized peak/rms // // arc = (attn-rms)*6/1 for attn < 1dB // arc = (attn-rms)*6/attn for attn = 1dB to 6dB // arc = (attn-rms)*6/6 for attn > 6dB int bits = MIN(attn >> 20, 0x3f); // saturate 1/attn at 6dB _arc = MAX(attn - _rms, 0); // peak/rms = (attn-rms) _arc = MULHI(_arc, invTable[bits]); // normalized peak/rms = (attn-rms)/attn _arc = MIN(_arc, NARC - 1); // saturate at 6dB _arcRelease = 0x7fffffff; // reset release } _attn = attn; // Update the RMS estimate after release is applied. // The feedback loop with adaptive hold will damp any sustained modulation distortion. int32_t tc = (attn > _rms) ? _rmsAttack : _rmsRelease; _rms = attn + MULQ31((_rms - attn), tc); return attn; } // // Limiter (mono) // template class LimiterMono : public LimiterImpl { PeakFilter _filter; MonoDelay _delay; public: LimiterMono(int sampleRate) : LimiterImpl(sampleRate) {} void process(float* input, int16_t* output, int numFrames); }; template void LimiterMono::process(float* input, int16_t* output, int numFrames) { for (int n = 0; n < numFrames; n++) { // peak detect and convert to log2 domain int32_t peak = peaklog2(&input[n]); // compute limiter attenuation int32_t attn = MAX(_threshold - peak, 0); // apply envelope attn = envelope(attn); // convert from log2 domain attn = fixexp2(attn); // lowpass filter attn = _filter.process(attn); float gain = attn * _outGain; // delay audio float x = input[n]; _delay.process(x); // apply gain x *= gain; // apply dither x += dither(); // store 16-bit output output[n] = (int16_t)floatToInt(x); } } // // Limiter (stereo) // template class LimiterStereo : public LimiterImpl { PeakFilter _filter; StereoDelay _delay; public: LimiterStereo(int sampleRate) : LimiterImpl(sampleRate) {} // interleaved stereo input/output void process(float* input, int16_t* output, int numFrames); }; template void LimiterStereo::process(float* input, int16_t* output, int numFrames) { for (int n = 0; n < numFrames; n++) { // peak detect and convert to log2 domain int32_t peak = peaklog2(&input[2*n+0], &input[2*n+1]); // compute limiter attenuation int32_t attn = MAX(_threshold - peak, 0); // apply envelope attn = envelope(attn); // convert from log2 domain attn = fixexp2(attn); // lowpass filter attn = _filter.process(attn); float gain = attn * _outGain; // delay audio float x0 = input[2*n+0]; float x1 = input[2*n+1]; _delay.process(x0, x1); // apply gain x0 *= gain; x1 *= gain; // apply dither float d = dither(); x0 += d; x1 += d; // store 16-bit output output[2*n+0] = (int16_t)floatToInt(x0); output[2*n+1] = (int16_t)floatToInt(x1); } } // // Public API // AudioLimiter::AudioLimiter(int sampleRate, int numChannels) { if (numChannels == 1) { // ~1.5ms lookahead for all rates if (sampleRate < 16000) { _impl = new LimiterMono<16>(sampleRate); } else if (sampleRate < 32000) { _impl = new LimiterMono<32>(sampleRate); } else if (sampleRate < 64000) { _impl = new LimiterMono<64>(sampleRate); } else { _impl = new LimiterMono<128>(sampleRate); } } else if (numChannels == 2) { // ~1.5ms lookahead for all rates if (sampleRate < 16000) { _impl = new LimiterStereo<16>(sampleRate); } else if (sampleRate < 32000) { _impl = new LimiterStereo<32>(sampleRate); } else if (sampleRate < 64000) { _impl = new LimiterStereo<64>(sampleRate); } else { _impl = new LimiterStereo<128>(sampleRate); } } else { assert(0); // unsupported } } AudioLimiter::~AudioLimiter() { delete _impl; } void AudioLimiter::render(float* input, int16_t* output, int numFrames) { _impl->process(input, output, numFrames); } void AudioLimiter::setThreshold(float threshold) { _impl->setThreshold(threshold); } void AudioLimiter::setRelease(float release) { _impl->setRelease(release); }