Merge pull request #10502 from kencooke/audio-noisegate-new

Advanced noise gate
2025-04-08 07:02:25 +02:00 · 2017-05-25 08:21:49 -07:00 · 2017-05-25 08:21:49 -07:00 · 756e00e9a9
commit 756e00e9a9
parent a186d4f5e7 0fe404adbb
12 changed files with 1432 additions and 711 deletions
--- a/assignment-client/src/Agent.cpp
+++ b/assignment-client/src/Agent.cpp
@ -55,7 +55,8 @@ static const int RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES = 10;

 Agent::Agent(ReceivedMessage& message) :
    ThreadedAssignment(message),
-    _receivedAudioStream(RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES, RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES)
+    _receivedAudioStream(RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES, RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES),
+    _audioGate(AudioConstants::SAMPLE_RATE, AudioConstants::MONO)
 {
    _entityEditSender.setPacketsPerSecond(DEFAULT_ENTITY_PPS_PER_SCRIPT);
    DependencyManager::get<EntityScriptingInterface>()->setPacketSender(&_entityEditSender);
@ -397,16 +398,23 @@ void Agent::executeScript() {
        QByteArray audio(frame->data);

        if (_isNoiseGateEnabled) {
-            static int numSamples = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
-            _noiseGate.gateSamples(reinterpret_cast<int16_t*>(audio.data()), numSamples);
+            int16_t* samples = reinterpret_cast<int16_t*>(audio.data());
+            int numSamples = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
+            _audioGate.render(samples, samples, numSamples);
        }

        computeLoudness(&audio, scriptedAvatar);

-        // the codec needs a flush frame before sending silent packets, so
-        // do not send one if the gate closed in this block (eventually this can be crossfaded).
+        // state machine to detect gate opening and closing
+        bool audioGateOpen = (scriptedAvatar->getAudioLoudness() != 0.0f);
+        bool openedInLastBlock = !_audioGateOpen && audioGateOpen;  // the gate just opened
+        bool closedInLastBlock = _audioGateOpen && !audioGateOpen;  // the gate just closed
+        _audioGateOpen = audioGateOpen;
+
+        // the codec must be flushed to silence before sending silent packets,
+        // so delay the transition to silent packets by one packet after becoming silent.
        auto packetType = PacketType::MicrophoneAudioNoEcho;
-        if (scriptedAvatar->getAudioLoudness() == 0.0f && !_noiseGate.closedInLastBlock()) {
+        if (!audioGateOpen && !closedInLastBlock) {
            packetType = PacketType::SilentAudioFrame;
        }

@ -620,19 +628,21 @@ void Agent::encodeFrameOfZeros(QByteArray& encodedZeros) {
 }

 void Agent::computeLoudness(const QByteArray* decodedBuffer, QSharedPointer<ScriptableAvatar> scriptableAvatar) {
-    float loudness = 0.0f;
+    float lastInputLoudness = 0.0f;
    if (decodedBuffer) {
-        auto soundData = reinterpret_cast<const int16_t*>(decodedBuffer->constData());
-        int numFrames = decodedBuffer->size() / sizeof(int16_t);
-        // now iterate and come up with average
-        if (numFrames > 0) {
-            for(int i = 0; i < numFrames; i++) {
-                loudness += (float) std::abs(soundData[i]);
+        auto samples = reinterpret_cast<const int16_t*>(decodedBuffer->constData());
+        int numSamples = decodedBuffer->size() / AudioConstants::SAMPLE_SIZE;
+
+        assert(numSamples < 65536); // int32_t loudness cannot overflow
+        if (numSamples > 0) {
+            int32_t loudness = 0;
+            for (int i = 0; i < numSamples; ++i) {
+                loudness += std::abs((int32_t)samples[i]);
            }
-            loudness /= numFrames;
+            lastInputLoudness = (float)loudness / numSamples;
        }
    }
-    scriptableAvatar->setAudioLoudness(loudness);
+    scriptableAvatar->setAudioLoudness(lastInputLoudness);
 }

 void Agent::processAgentAvatarAudio() {
--- a/assignment-client/src/Agent.h
+++ b/assignment-client/src/Agent.h
@ -29,7 +29,7 @@

 #include <plugins/CodecPlugin.h>

-#include "AudioNoiseGate.h"
+#include "AudioGate.h"
 #include "MixedAudioStream.h"
 #include "avatars/ScriptableAvatar.h"

@ -111,7 +111,8 @@ private:
    QTimer* _avatarIdentityTimer = nullptr;
    QHash<QUuid, quint16> _outgoingScriptAudioSequenceNumbers;

-    AudioNoiseGate _noiseGate;
+    AudioGate _audioGate;
+    bool _audioGateOpen { false };
    bool _isNoiseGateEnabled { false };

    CodecPluginPointer _codec;
--- a/libraries/audio-client/src/AudioClient.cpp
+++ b/libraries/audio-client/src/AudioClient.cpp
@ -1007,30 +1007,27 @@ void AudioClient::handleAudioInput(QByteArray& audioBuffer) {
        _timeSinceLastClip = 0.0f;
    } else {
        int16_t* samples = reinterpret_cast<int16_t*>(audioBuffer.data());
-        int numSamples = audioBuffer.size() / sizeof(AudioConstants::SAMPLE_SIZE);
-        bool didClip = false;
+        int numSamples = audioBuffer.size() / AudioConstants::SAMPLE_SIZE;
+        int numFrames = numSamples / (_isStereoInput ? AudioConstants::STEREO : AudioConstants::MONO);

-        bool shouldRemoveDCOffset = !_isPlayingBackRecording && !_isStereoInput;
-        if (shouldRemoveDCOffset) {
-            _noiseGate.removeDCOffset(samples, numSamples);
-        }
-
-        bool shouldNoiseGate = (_isPlayingBackRecording || !_isStereoInput) && _isNoiseGateEnabled;
-        if (shouldNoiseGate) {
-            _noiseGate.gateSamples(samples, numSamples);
-            _lastInputLoudness = _noiseGate.getLastLoudness();
-            didClip = _noiseGate.clippedInLastBlock();
+        if (_isNoiseGateEnabled) {
+            // The audio gate includes DC removal
+            _audioGate->render(samples, samples, numFrames);
        } else {
-            float loudness = 0.0f;
-            for (int i = 0; i < numSamples; ++i) {
-                int16_t sample = std::abs(samples[i]);
-                loudness += (float)sample;
-                didClip = didClip ||
-                    (sample > (AudioConstants::MAX_SAMPLE_VALUE * AudioNoiseGate::CLIPPING_THRESHOLD));
-            }
-            _lastInputLoudness = fabs(loudness / numSamples);
+            _audioGate->removeDC(samples, samples, numFrames);
        }

+        int32_t loudness = 0;
+        assert(numSamples < 65536); // int32_t loudness cannot overflow
+        bool didClip = false;
+        for (int i = 0; i < numSamples; ++i) {
+            const int32_t CLIPPING_THRESHOLD = (int32_t)(AudioConstants::MAX_SAMPLE_VALUE * 0.9f);
+            int32_t sample = std::abs((int32_t)samples[i]);
+            loudness += sample;
+            didClip |= (sample > CLIPPING_THRESHOLD);
+        }
+        _lastInputLoudness = (float)loudness / numSamples;
+
        if (didClip) {
            _timeSinceLastClip = 0.0f;
        } else if (_timeSinceLastClip >= 0.0f) {
@ -1038,19 +1035,24 @@ void AudioClient::handleAudioInput(QByteArray& audioBuffer) {
        }

        emit inputReceived(audioBuffer);
-
-        if (_noiseGate.openedInLastBlock()) {
-            emit noiseGateOpened();
-        } else if (_noiseGate.closedInLastBlock()) {
-            emit noiseGateClosed();
-        }
    }

-    // the codec needs a flush frame before sending silent packets, so
-    // do not send one if the gate closed in this block (eventually this can be crossfaded).
-    auto packetType = _shouldEchoToServer ?
-        PacketType::MicrophoneAudioWithEcho : PacketType::MicrophoneAudioNoEcho;
-    if (_lastInputLoudness == 0.0f && !_noiseGate.closedInLastBlock()) {
+    // state machine to detect gate opening and closing
+    bool audioGateOpen = (_lastInputLoudness != 0.0f);
+    bool openedInLastBlock = !_audioGateOpen && audioGateOpen;  // the gate just opened
+    bool closedInLastBlock = _audioGateOpen && !audioGateOpen;  // the gate just closed
+    _audioGateOpen = audioGateOpen;
+
+    if (openedInLastBlock) {
+        emit noiseGateOpened();
+    } else if (closedInLastBlock) {
+        emit noiseGateClosed();
+    }
+
+    // the codec must be flushed to silence before sending silent packets,
+    // so delay the transition to silent packets by one packet after becoming silent.
+    auto packetType = _shouldEchoToServer ? PacketType::MicrophoneAudioWithEcho : PacketType::MicrophoneAudioNoEcho;
+    if (!audioGateOpen && !closedInLastBlock) {
        packetType = PacketType::SilentAudioFrame;
        _silentOutbound.increment();
    } else {
@ -1415,6 +1417,10 @@ bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo& inputDeviceIn
        delete _inputToNetworkResampler;
        _inputToNetworkResampler = NULL;
    }
+    if (_audioGate) {
+        delete _audioGate;
+        _audioGate = nullptr;
+    }

    if (!inputDeviceInfo.isNull()) {
        qCDebug(audioclient) << "The audio input device " << inputDeviceInfo.deviceName() << "is available.";
@ -1440,6 +1446,10 @@ bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo& inputDeviceIn
                qCDebug(audioclient) << "No resampling required for audio input to match desired network format.";
            }

+            // the audio gate runs after the resampler
+            _audioGate = new AudioGate(_desiredInputFormat.sampleRate(), _desiredInputFormat.channelCount());
+            qCDebug(audioclient) << "Noise gate created with" << _desiredInputFormat.channelCount() << "channels.";
+
            // if the user wants stereo but this device can't provide then bail
            if (!_isStereoInput || _inputFormat.channelCount() == 2) {
                _audioInput = new QAudioInput(inputDeviceInfo, _inputFormat, this);
--- a/libraries/audio-client/src/AudioClient.h
+++ b/libraries/audio-client/src/AudioClient.h
@ -45,7 +45,7 @@
 #include <AudioReverb.h>
 #include <AudioLimiter.h>
 #include <AudioConstants.h>
-#include <AudioNoiseGate.h>
+#include <AudioGate.h>

 #include <shared/RateCounter.h>

@ -108,7 +108,7 @@ public:
    void selectAudioFormat(const QString& selectedCodecName);

    Q_INVOKABLE QString getSelectedAudioFormat() const { return _selectedCodecName; }
-    Q_INVOKABLE bool getNoiseGateOpen() const { return _noiseGate.isOpen(); }
+    Q_INVOKABLE bool getNoiseGateOpen() const { return _audioGateOpen; }
    Q_INVOKABLE float getSilentInboundPPS() const { return _silentInbound.rate(); }
    Q_INVOKABLE float getAudioInboundPPS() const { return _audioInbound.rate(); }
    Q_INVOKABLE float getSilentOutboundPPS() const { return _silentOutbound.rate(); }
@ -117,7 +117,7 @@ public:
    const MixedProcessedAudioStream& getReceivedAudioStream() const { return _receivedAudioStream; }
    MixedProcessedAudioStream& getReceivedAudioStream() { return _receivedAudioStream; }

-    float getLastInputLoudness() const { return glm::max(_lastInputLoudness - _noiseGate.getMeasuredFloor(), 0.0f); }
+    float getLastInputLoudness() const { return _lastInputLoudness; }   // TODO: relative to noise floor?

    float getTimeSinceLastClip() const { return _timeSinceLastClip; }
    float getAudioAverageInputLoudness() const { return _lastInputLoudness; }
@ -359,7 +359,8 @@ private:

    AudioIOStats _stats;

-    AudioNoiseGate _noiseGate;
+    AudioGate* _audioGate { nullptr };
+    bool _audioGateOpen { false };

    AudioPositionGetter _positionGetter;
    AudioOrientationGetter _orientationGetter;
--- a/libraries/audio/src/AudioDynamics.h
+++ b/libraries/audio/src/AudioDynamics.h
@ -0,0 +1,585 @@
+//
+//  AudioDynamics.h
+//  libraries/audio/src
+//
+//  Created by Ken Cooke on 5/5/17.
+//  Copyright 2017 High Fidelity, Inc.
+//
+
+//
+// Inline functions to implement audio dynamics processing
+//
+
+#include <math.h>
+#include <stdint.h>
+
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif 
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#define MUL64(a,b)  __emul((a), (b))
+#else
+#define MUL64(a,b)  ((int64_t)(a) * (int64_t)(b))
+#endif
+
+#define MULHI(a,b)      ((int32_t)(MUL64(a, b) >> 32))
+#define MULQ31(a,b)     ((int32_t)(MUL64(a, b) >> 31))
+#define MULDIV64(a,b,c) (int32_t)(MUL64(a, b) / (c))
+
+//
+// on x86 architecture, assume that SSE2 is present
+//
+#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
+
+#include <xmmintrin.h>
+// convert float to int using round-to-nearest
+static inline int32_t floatToInt(float x) {
+    return _mm_cvt_ss2si(_mm_load_ss(&x));
+}
+
+#else 
+
+// convert float to int using round-to-nearest
+static inline int32_t floatToInt(float x) {
+    x += (x < 0.0f ? -0.5f : 0.5f); // round
+    return (int32_t)x;
+}
+
+#endif  // _M_IX86
+
+static const double FIXQ31 = 2147483648.0;              // convert float to Q31
+static const double DB_TO_LOG2 = 0.16609640474436813;   // convert dB to log2
+
+// convert dB to amplitude
+static inline double dBToGain(double dB) {
+    return pow(10.0, dB / 20.0);
+}
+
+// convert milliseconds to first-order time constant
+static inline int32_t msToTc(double ms, double sampleRate) {
+    double tc = exp(-1000.0 / (ms * sampleRate));
+    return (int32_t)(FIXQ31 * tc);  // Q31
+}
+
+// log2 domain values are Q26
+static const int LOG2_INTBITS = 5;
+static const int LOG2_FRACBITS = 31 - LOG2_INTBITS;
+
+// log2 domain headroom bits above 0dB
+static const int LOG2_HEADROOM = 15;
+
+// log2 domain offsets so error < 0
+static const int32_t LOG2_BIAS = 347;
+static const int32_t EXP2_BIAS = 64;
+
+//
+// P(x) = log2(1+x) for x=[0,1]
+// scaled by 1, 0.5, 0.25
+//
+// |error| < 347 ulp, smooth
+//
+static const int LOG2_TABBITS = 4;
+static const int32_t log2Table[1 << LOG2_TABBITS][3] = {
+    { -0x56dfe26d, 0x5c46daff, 0x00000000 },
+    { -0x4d397571, 0x5bae58e7, 0x00025a75 },
+    { -0x4518f84b, 0x5aabcac4, 0x000a62db },
+    { -0x3e3075ec, 0x596168c0, 0x0019d0e6 },
+    { -0x384486e9, 0x57e769c7, 0x00316109 },
+    { -0x332742ba, 0x564f1461, 0x00513776 },
+    { -0x2eb4bad4, 0x54a4cdfe, 0x00791de2 },
+    { -0x2ad07c6c, 0x52f18320, 0x00a8aa46 },
+    { -0x2763c4d6, 0x513ba123, 0x00df574c },
+    { -0x245c319b, 0x4f87c5c4, 0x011c9399 },
+    { -0x21aac79f, 0x4dd93bef, 0x015fcb52 },
+    { -0x1f433872, 0x4c325584, 0x01a86ddc },
+    { -0x1d1b54b4, 0x4a94ac6e, 0x01f5f13e },
+    { -0x1b2a9f81, 0x4901524f, 0x0247d3f2 },
+    { -0x1969fa57, 0x4778f3a7, 0x029d9dbf },
+    { -0x17d36370, 0x45fbf1e8, 0x02f6dfe8 },
+};
+
+//
+// P(x) = exp2(x) for x=[0,1]
+// scaled by 2, 1, 0.5
+// Uses exp2(-x) = exp2(1-x)/2
+//
+// |error| < 1387 ulp, smooth
+//
+static const int EXP2_TABBITS = 4;
+static const int32_t exp2Table[1 << EXP2_TABBITS][3] = {
+    { 0x3ed838c8, 0x58b574b7, 0x40000000 },
+    { 0x41a0821c, 0x5888db8f, 0x4000b2b7 },
+    { 0x4488548d, 0x582bcbc6, 0x40039be1 },
+    { 0x4791158a, 0x579a1128, 0x400a71ae },
+    { 0x4abc3a53, 0x56cf3089, 0x4017212e },
+    { 0x4e0b48af, 0x55c66396, 0x402bd31b },
+    { 0x517fd7a7, 0x547a946d, 0x404af0ec },
+    { 0x551b9049, 0x52e658f9, 0x40772a57 },
+    { 0x58e02e75, 0x5103ee08, 0x40b37b31 },
+    { 0x5ccf81b1, 0x4ecd321f, 0x410331b5 },
+    { 0x60eb6e09, 0x4c3ba007, 0x4169f548 },
+    { 0x6535ecf9, 0x49484909, 0x41ebcdaf },
+    { 0x69b10e5b, 0x45ebcede, 0x428d2acd },
+    { 0x6e5ef96c, 0x421e5d48, 0x4352ece7 },
+    { 0x7341edcb, 0x3dd7a354, 0x44426d7b },
+    { 0x785c4499, 0x390ecc3a, 0x456188bd },
+};
+
+static const int IEEE754_FABS_MASK = 0x7fffffff;
+static const int IEEE754_MANT_BITS = 23;
+static const int IEEE754_EXPN_BIAS = 127;
+
+//
+// Peak detection and -log2(x) for float input (mono)
+// x < 2^(31-LOG2_HEADROOM) returns 0x7fffffff
+// x > 2^LOG2_HEADROOM undefined
+//
+static inline int32_t peaklog2(float* input) {
+
+    // float as integer bits
+    int32_t u = *(int32_t*)input;
+
+    // absolute value
+    int32_t peak = u & IEEE754_FABS_MASK;
+
+    // split into e and x - 1.0
+    int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
+    int32_t x = (peak << (31 - IEEE754_MANT_BITS)) & 0x7fffffff;
+
+    // saturate
+    if (e > 31) {
+        return 0x7fffffff;
+    }
+
+    int k = x >> (31 - LOG2_TABBITS);
+
+    // polynomial for log2(1+x) over x=[0,1]
+    int32_t c0 = log2Table[k][0];
+    int32_t c1 = log2Table[k][1];
+    int32_t c2 = log2Table[k][2];
+
+    c1 += MULHI(c0, x);
+    c2 += MULHI(c1, x);
+
+    // reconstruct result in Q26
+    return (e << LOG2_FRACBITS) - (c2 >> 3);
+}
+
+//
+// Peak detection and -log2(x) for float input (stereo)
+// x < 2^(31-LOG2_HEADROOM) returns 0x7fffffff
+// x > 2^LOG2_HEADROOM undefined
+//
+static inline int32_t peaklog2(float* input0, float* input1) {
+
+    // float as integer bits
+    int32_t u0 = *(int32_t*)input0;
+    int32_t u1 = *(int32_t*)input1;
+
+    // max absolute value
+    u0 &= IEEE754_FABS_MASK;
+    u1 &= IEEE754_FABS_MASK;
+    int32_t peak = MAX(u0, u1);
+
+    // split into e and x - 1.0
+    int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
+    int32_t x = (peak << (31 - IEEE754_MANT_BITS)) & 0x7fffffff;
+
+    // saturate
+    if (e > 31) {
+        return 0x7fffffff;
+    }
+
+    int k = x >> (31 - LOG2_TABBITS);
+
+    // polynomial for log2(1+x) over x=[0,1]
+    int32_t c0 = log2Table[k][0];
+    int32_t c1 = log2Table[k][1];
+    int32_t c2 = log2Table[k][2];
+
+    c1 += MULHI(c0, x);
+    c2 += MULHI(c1, x);
+
+    // reconstruct result in Q26
+    return (e << LOG2_FRACBITS) - (c2 >> 3);
+}
+
+//
+// Peak detection and -log2(x) for float input (quad)
+// x < 2^(31-LOG2_HEADROOM) returns 0x7fffffff
+// x > 2^LOG2_HEADROOM undefined
+//
+static inline int32_t peaklog2(float* input0, float* input1, float* input2, float* input3) {
+
+    // float as integer bits
+    int32_t u0 = *(int32_t*)input0;
+    int32_t u1 = *(int32_t*)input1;
+    int32_t u2 = *(int32_t*)input2;
+    int32_t u3 = *(int32_t*)input3;
+
+    // max absolute value
+    u0 &= IEEE754_FABS_MASK;
+    u1 &= IEEE754_FABS_MASK;
+    u2 &= IEEE754_FABS_MASK;
+    u3 &= IEEE754_FABS_MASK;
+    int32_t peak = MAX(MAX(u0, u1), MAX(u2, u3));
+
+    // split into e and x - 1.0
+    int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
+    int32_t x = (peak << (31 - IEEE754_MANT_BITS)) & 0x7fffffff;
+
+    // saturate
+    if (e > 31) {
+        return 0x7fffffff;
+    }
+
+    int k = x >> (31 - LOG2_TABBITS);
+
+    // polynomial for log2(1+x) over x=[0,1]
+    int32_t c0 = log2Table[k][0];
+    int32_t c1 = log2Table[k][1];
+    int32_t c2 = log2Table[k][2];
+
+    c1 += MULHI(c0, x);
+    c2 += MULHI(c1, x);
+
+    // reconstruct result in Q26
+    return (e << LOG2_FRACBITS) - (c2 >> 3);
+}
+
+//
+// Count Leading Zeros
+// Emulates the CLZ (ARM) and LZCNT (x86) instruction
+//
+static inline int CLZ(uint32_t x) {
+
+    if (x == 0) {
+        return 32;
+    }
+
+    int e = 0;
+    if (x < 0x00010000) {
+        x <<= 16;
+        e += 16;
+    }
+    if (x < 0x01000000) {
+        x <<= 8;
+        e += 8;
+    }
+    if (x < 0x10000000) {
+        x <<= 4;
+        e += 4;
+    }
+    if (x < 0x40000000) {
+        x <<= 2;
+        e += 2;
+    }
+    if (x < 0x80000000) {
+        e += 1;
+    }
+    return e;
+}
+
+//
+// Compute -log2(x) for x=[0,1] in Q31, result in Q26
+// x = 0 returns 0x7fffffff
+// x < 0 undefined
+//
+static inline int32_t fixlog2(int32_t x) {
+
+    if (x == 0) {
+        return 0x7fffffff;
+    }
+
+    // split into e and x - 1.0
+    int e = CLZ((uint32_t)x);
+    x <<= e;            // normalize to [0x80000000, 0xffffffff]
+    x &= 0x7fffffff;    // x - 1.0
+
+    int k = x >> (31 - LOG2_TABBITS);
+
+    // polynomial for log2(1+x) over x=[0,1]
+    int32_t c0 = log2Table[k][0];
+    int32_t c1 = log2Table[k][1];
+    int32_t c2 = log2Table[k][2];
+
+    c1 += MULHI(c0, x);
+    c2 += MULHI(c1, x);
+
+    // reconstruct result in Q26
+    return (e << LOG2_FRACBITS) - (c2 >> 3);
+}
+
+//
+// Compute exp2(-x) for x=[0,32] in Q26, result in Q31
+// x < 0 undefined
+//
+static inline int32_t fixexp2(int32_t x) {
+
+    // split into e and 1.0 - x
+    int e = x >> LOG2_FRACBITS;
+    x = ~(x << LOG2_INTBITS) & 0x7fffffff;
+
+    int k = x >> (31 - EXP2_TABBITS);
+
+    // polynomial for exp2(x)
+    int32_t c0 = exp2Table[k][0];
+    int32_t c1 = exp2Table[k][1];
+    int32_t c2 = exp2Table[k][2];
+
+    c1 += MULHI(c0, x);
+    c2 += MULHI(c1, x);
+
+    // reconstruct result in Q31
+    return c2 >> e;
+}
+
+// fast TPDF dither in [-1.0f, 1.0f]
+static inline float dither() {
+    static uint32_t rz = 0;
+    rz = rz * 69069 + 1;
+    int32_t r0 = rz & 0xffff;
+    int32_t r1 = rz >> 16;
+    return (int32_t)(r0 - r1) * (1/65536.0f);
+}
+
+//
+// Min-hold lowpass filter
+//
+// Bandlimits the gain control signal to greatly reduce the modulation distortion,
+// while still reaching the peak attenuation after exactly N-1 samples of delay.
+// N completely determines the attack time.
+// 
+template<int N, int CIC1, int CIC2>
+class MinFilterT {
+
+    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
+    static_assert((CIC1 - 1) + (CIC2 - 1) == (N - 1), "Total CIC delay must be N-1");
+
+    int32_t _buffer[2*N] = {};  // shared FIFO
+    size_t _index = 0;
+
+    int32_t _acc1 = 0;  // CIC1 integrator
+    int32_t _acc2 = 0;  // CIC2 integrator
+
+public:
+    MinFilterT() {
+
+        // fill history
+        for (size_t n = 0; n < N-1; n++) {
+            process(0x7fffffff);
+        }
+    }
+
+    int32_t process(int32_t x) {
+
+        const size_t MASK = 2*N - 1;    // buffer wrap
+        size_t i = _index;
+
+        // Fast min-hold using a running-min filter.  Finds the peak (min) value
+        // in the sliding window of N-1 samples, using only log2(N) comparisons.
+        // Hold time of N-1 samples exactly cancels the step response of FIR filter.
+
+        for (size_t n = 1; n < N; n <<= 1) {
+
+            _buffer[i] = x;
+            i = (i + n) & MASK;
+            x = MIN(x, _buffer[i]);
+        }
+
+        // Fast FIR attack/lowpass filter using a 2-stage CIC filter.
+        // The step response reaches final value after N-1 samples.
+
+        const int32_t CICGAIN = 0xffffffff / (CIC1 * CIC2); // Q32
+        x = MULHI(x, CICGAIN);
+
+        _buffer[i] = _acc1;
+        _acc1 += x;                 // integrator
+        i = (i + CIC1 - 1) & MASK;
+        x = _acc1 - _buffer[i];     // comb
+
+        _buffer[i] = _acc2;
+        _acc2 += x;                 // integrator
+        i = (i + CIC2 - 1) & MASK;
+        x = _acc2 - _buffer[i];     // comb
+
+        _index = (i + 1) & MASK;    // skip unused tap
+        return x;
+    }
+};
+
+//
+// Max-hold lowpass filter
+//
+// Bandlimits the gain control signal to greatly reduce the modulation distortion,
+// while still reaching the peak attenuation after exactly N-1 samples of delay.
+// N completely determines the attack time.
+// 
+template<int N, int CIC1, int CIC2>
+class MaxFilterT {
+
+    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
+    static_assert((CIC1 - 1) + (CIC2 - 1) == (N - 1), "Total CIC delay must be N-1");
+
+    int32_t _buffer[2*N] = {};  // shared FIFO
+    size_t _index = 0;
+
+    int32_t _acc1 = 0;  // CIC1 integrator
+    int32_t _acc2 = 0;  // CIC2 integrator
+
+public:
+    MaxFilterT() {
+
+        // fill history
+        for (size_t n = 0; n < N-1; n++) {
+            process(0);
+        }
+    }
+
+    int32_t process(int32_t x) {
+
+        const size_t MASK = 2*N - 1;    // buffer wrap
+        size_t i = _index;
+
+        // Fast max-hold using a running-max filter.  Finds the peak (max) value
+        // in the sliding window of N-1 samples, using only log2(N) comparisons.
+        // Hold time of N-1 samples exactly cancels the step response of FIR filter.
+
+        for (size_t n = 1; n < N; n <<= 1) {
+
+            _buffer[i] = x;
+            i = (i + n) & MASK;
+            x = MAX(x, _buffer[i]);
+        }
+
+        // Fast FIR attack/lowpass filter using a 2-stage CIC filter.
+        // The step response reaches final value after N-1 samples.
+
+        const int32_t CICGAIN = 0xffffffff / (CIC1 * CIC2); // Q32
+        x = MULHI(x, CICGAIN);
+
+        _buffer[i] = _acc1;
+        _acc1 += x;                 // integrator
+        i = (i + CIC1 - 1) & MASK;
+        x = _acc1 - _buffer[i];     // comb
+
+        _buffer[i] = _acc2;
+        _acc2 += x;                 // integrator
+        i = (i + CIC2 - 1) & MASK;
+        x = _acc2 - _buffer[i];     // comb
+
+        _index = (i + 1) & MASK;    // skip unused tap
+        return x;
+    }
+};
+
+//
+// Specializations that define the optimum lowpass filter for each length.
+//
+template<int N> class MinFilter;
+template<> class MinFilter< 16> : public MinFilterT< 16,   7,  10> {};
+template<> class MinFilter< 32> : public MinFilterT< 32,  14,  19> {};
+template<> class MinFilter< 64> : public MinFilterT< 64,  27,  38> {};
+template<> class MinFilter<128> : public MinFilterT<128,  53,  76> {};
+template<> class MinFilter<256> : public MinFilterT<256, 106, 151> {};
+
+template<int N> class MaxFilter;
+template<> class MaxFilter< 16> : public MaxFilterT< 16,   7,  10> {};
+template<> class MaxFilter< 32> : public MaxFilterT< 32,  14,  19> {};
+template<> class MaxFilter< 64> : public MaxFilterT< 64,  27,  38> {};
+template<> class MaxFilter<128> : public MaxFilterT<128,  53,  76> {};
+template<> class MaxFilter<256> : public MaxFilterT<256, 106, 151> {};
+
+//
+// N-1 sample delay (mono)
+//
+template<int N, typename T = float>
+class MonoDelay {
+
+    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
+
+    T _buffer[N] = {};
+    size_t _index = 0;
+
+public:
+    void process(T& x) {
+
+        const size_t MASK = N - 1;  // buffer wrap
+        size_t i = _index;
+
+        _buffer[i] = x;
+
+        i = (i + (N - 1)) & MASK;
+
+        x = _buffer[i];
+
+        _index = i;
+    }
+};
+
+//
+// N-1 sample delay (stereo)
+//
+template<int N, typename T = float>
+class StereoDelay {
+
+    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
+
+    T _buffer[2*N] = {};
+    size_t _index = 0;
+
+public:
+    void process(T& x0, T& x1) {
+
+        const size_t MASK = 2*N - 1;    // buffer wrap
+        size_t i = _index;
+
+        _buffer[i+0] = x0;
+        _buffer[i+1] = x1;
+
+        i = (i + 2*(N - 1)) & MASK;
+
+        x0 = _buffer[i+0];
+        x1 = _buffer[i+1];
+
+        _index = i;
+    }
+};
+
+//
+// N-1 sample delay (quad)
+//
+template<int N, typename T = float>
+class QuadDelay {
+
+    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
+
+    T _buffer[4*N] = {};
+    size_t _index = 0;
+
+public:
+    void process(T& x0, T& x1, T& x2, T& x3) {
+
+        const size_t MASK = 4*N - 1;    // buffer wrap
+        size_t i = _index;
+
+        _buffer[i+0] = x0;
+        _buffer[i+1] = x1;
+        _buffer[i+2] = x2;
+        _buffer[i+3] = x3;
+
+        i = (i + 4*(N - 1)) & MASK;
+
+        x0 = _buffer[i+0];
+        x1 = _buffer[i+1];
+        x2 = _buffer[i+2];
+        x3 = _buffer[i+3];
+
+        _index = i;
+    }
+};
--- a/libraries/audio/src/AudioGate.cpp
+++ b/libraries/audio/src/AudioGate.cpp
@ -0,0 +1,735 @@
+//
+//  AudioGate.cpp
+//  libraries/audio/src
+//
+//  Created by Ken Cooke on 5/5/17.
+//  Copyright 2017 High Fidelity, Inc.
+//
+
+#include <string.h>
+#include <assert.h>
+
+#include "AudioDynamics.h"
+#include "AudioGate.h"
+
+// log2 domain headroom bits above 0dB (int32_t)
+static const int LOG2_HEADROOM_Q30 = 1;
+
+// convert Q30 to Q15 with saturation
+static inline int32_t saturateQ30(int32_t x) {
+
+    x = (x + (1 << 14)) >> 15;
+    x = MIN(MAX(x, -32768), 32767);
+
+    return x;
+}
+
+//
+// First-order DC-blocking filter, with zero at 1.0 and pole at 0.99994
+//
+// -3dB @ 0.5 Hz (48KHz)
+// -3dB @ 0.2 Hz (24KHz)
+//
+// input in Q15, output in Q30
+//
+class MonoDCBlock {
+
+    int32_t _dcOffset = {};     // Q30, cannot overflow
+
+public:
+    void process(int32_t& x) {
+
+        x <<= 15;               // scale to Q30
+        x -= _dcOffset;         // remove DC
+        _dcOffset += x >> 14;   // pole = (1.0 - 2^-14) = 0.99994
+    }
+};
+
+class StereoDCBlock {
+
+    int32_t _dcOffset[2] = {};
+
+public:
+    void process(int32_t& x0, int32_t& x1) {
+
+        x0 <<= 15;
+        x1 <<= 15;
+
+        x0 -= _dcOffset[0];
+        x1 -= _dcOffset[1];
+
+        _dcOffset[0] += x0 >> 14;
+        _dcOffset[1] += x1 >> 14;
+    }
+};
+
+class QuadDCBlock {
+
+    int32_t _dcOffset[4] = {};
+
+public:
+    void process(int32_t& x0, int32_t& x1, int32_t& x2, int32_t& x3) {
+
+        x0 <<= 15;
+        x1 <<= 15;
+        x2 <<= 15;
+        x3 <<= 15;
+
+        x0 -= _dcOffset[0];
+        x1 -= _dcOffset[1];
+        x2 -= _dcOffset[2];
+        x3 -= _dcOffset[3];
+
+        _dcOffset[0] += x0 >> 14;
+        _dcOffset[1] += x1 >> 14;
+        _dcOffset[2] += x2 >> 14;
+        _dcOffset[3] += x3 >> 14;
+    }
+};
+
+//
+// Gate (common)
+//
+class GateImpl {
+protected:
+
+    // histogram
+    static const int NHIST = 256;
+    int _update[NHIST] = {};
+    int _histogram[NHIST] = {};
+
+    // peakhold
+    int32_t _holdMin = 0x7fffffff;
+    int32_t _holdInc = 0x7fffffff;
+    uint32_t _holdMax = 0x7fffffff;
+    int32_t _holdRel = 0x7fffffff;
+    int32_t _holdPeak = 0x7fffffff;
+
+    // hysteresis
+    int32_t _hysteresis = 0;
+    int32_t _hystOffset = 0;
+    int32_t _hystPeak = 0x7fffffff;
+
+    int32_t _release = 0x7fffffff;
+
+    int32_t _threshFixed = 0;
+    int32_t _threshAdapt = 0;
+    int32_t _attn = 0x7fffffff;
+
+    int _sampleRate;
+
+public:
+    GateImpl(int sampleRate);
+    virtual ~GateImpl() {}
+
+    void setThreshold(float threshold);
+    void setHold(float hold);
+    void setHysteresis(float hysteresis);
+    void setRelease(float release);
+
+    void clearHistogram() { memset(_update, 0, sizeof(_update)); }
+    void updateHistogram(int32_t value, int count);
+    int partitionHistogram();
+    void processHistogram(int numFrames);
+
+    int32_t peakhold(int32_t peak);
+    int32_t hysteresis(int32_t peak);
+    int32_t envelope(int32_t attn);
+
+    virtual void process(int16_t* input, int16_t* output, int numFrames) = 0;
+    virtual void removeDC(int16_t* input, int16_t* output, int numFrames) = 0;
+};
+
+GateImpl::GateImpl(int sampleRate) {
+
+    sampleRate = MAX(sampleRate, 8000);
+    sampleRate = MIN(sampleRate, 96000);
+    _sampleRate = sampleRate;
+
+    // defaults
+    setThreshold(-36.0);
+    setHold(20.0);
+    setHysteresis(6.0);
+    setRelease(1000.0);
+}
+
+//
+// Set the gate threshold (dB)
+// This is a base value that is modulated by the adaptive threshold algorithm.
+//
+void GateImpl::setThreshold(float threshold) {
+
+    // gate threshold = -96dB to 0dB
+    threshold = MAX(threshold, -96.0f);
+    threshold = MIN(threshold, 0.0f);
+
+    // gate threshold in log2 domain
+    _threshFixed = (int32_t)(-(double)threshold * DB_TO_LOG2 * (1 << LOG2_FRACBITS));
+    _threshFixed += LOG2_HEADROOM_Q30 << LOG2_FRACBITS;
+
+    _threshAdapt = _threshFixed;
+}
+
+//
+// Set the detector hold time (milliseconds)
+//
+void GateImpl::setHold(float hold) {
+
+    const double RELEASE = 100.0;   // release = 100ms
+    const double PROGHOLD = 0.100;  // progressive hold = 100ms
+
+    // pure hold = 1 to 1000ms
+    hold = MAX(hold, 1.0f);
+    hold = MIN(hold, 1000.0f);
+
+    _holdMin = msToTc(RELEASE, _sampleRate);
+
+    _holdInc = (int32_t)((_holdMin - 0x7fffffff) / (PROGHOLD * _sampleRate));
+    _holdInc = MIN(_holdInc, -1); // prevent 0 on long releases
+
+    _holdMax = 0x7fffffff - (uint32_t)(_holdInc * (double)hold/1000.0 * _sampleRate);
+}
+
+//
+// Set the detector hysteresis (dB)
+//
+void GateImpl::setHysteresis(float hysteresis) {
+
+    // gate hysteresis in log2 domain
+    _hysteresis = (int32_t)((double)hysteresis * DB_TO_LOG2 * (1 << LOG2_FRACBITS));
+}
+
+//
+// Set the gate release time (milliseconds)
+//
+void GateImpl::setRelease(float release) {
+
+    // gate release = 50 to 5000ms
+    release = MAX(release, 50.0f);
+    release = MIN(release, 5000.0f);
+
+    _release = msToTc((double)release, _sampleRate);
+}
+
+//
+// Update the histogram count of the bin which contains value
+//
+void GateImpl::updateHistogram(int32_t value, int count = 1) {
+
+    // quantize to LOG2 + 3 fraction bits (0.75dB steps)
+    int index = (NHIST-1) - (value >> (LOG2_FRACBITS - 3));
+
+    assert(index >= 0);
+    assert(index < NHIST);
+
+    _update[index] += count << 16;  // Q16 for filtering
+
+    assert(_update[index] >= 0);
+}
+
+//
+// Partition the histogram
+//
+// The idea behind the adaptive threshold:
+//
+// When processing a gaussian mixture of signal and noise, separated by minimal SNR, 
+// a bimodal distribution emerges in the histogram of preprocessed peak levels.
+// In this case, the threshold adapts toward the level that optimally partitions the distributions.
+// Partitioning is computed using Otsu's method.
+//
+// When only a single distribution is present, the threshold becomes level-dependent:
+// At levels below the fixed threshold, the threshold adapts toward the upper edge
+// of the distribution, presumed to be noise.
+// At levels above the fixed threshold, the threshold adapts toward the lower edge
+// of the distribution, presumed to be signal.
+// This is implemented by adding a hidden (bias) distribution at the fixed threshold.
+// 
+int GateImpl::partitionHistogram() {
+
+    // initialize
+    int total = 0;
+    float sum = 0.0f;
+    for (int i = 0; i < NHIST; i++) {
+        total += _histogram[i];
+        sum += (float)i * _histogram[i];
+    }
+
+    int w0 = 0;
+    float sum0 = 0.0f;
+    float max = 0.0f;
+    int index = 0;
+
+    // find the index that maximizes the between-class variance
+    for (int i = 0 ; i < NHIST; i++) {
+
+        // update weights
+        w0 += _histogram[i];
+        int w1 = total - w0;
+
+        if (w0 == 0) {
+            continue;   // skip leading zeros
+        }
+        if (w1 == 0) {
+            break;      // skip trailing zeros
+        }
+
+        // update means
+        sum0 += (float)i * _histogram[i];
+        float sum1 = sum - sum0;
+
+        float m0 = sum0 / (float)w0;
+        float m1 = sum1 / (float)w1;
+
+        // between-class variance
+        float variance = (float)w0 * (float)w1 * (m0 - m1) * (m0 - m1);
+
+        // update threshold
+        if (variance > max) {
+            max = variance;
+            index = i;
+        }
+    }
+    return index;
+}
+
+//
+// Process the histogram to update the adaptive threshold
+//
+void GateImpl::processHistogram(int numFrames) {
+
+    const int32_t LOG2E_Q26 = (int32_t)(log2(exp(1.0)) * (1 << LOG2_FRACBITS) + 0.5);
+
+    // compute time constants, for sampleRate downsampled by numFrames
+    int32_t tcHistogram = fixexp2(MULDIV64(numFrames, LOG2E_Q26, _sampleRate * 10));    // 10 seconds
+    int32_t tcThreshold = fixexp2(MULDIV64(numFrames, LOG2E_Q26, _sampleRate * 1));     // 1 second
+    
+    // add bias at the fixed threshold
+    updateHistogram(_threshFixed, (numFrames+7)/8);
+
+    // leaky integrate into long-term histogram
+    for (int i = 0; i < NHIST; i++) {
+        _histogram[i] = _update[i] + MULQ31((_histogram[i] - _update[i]), tcHistogram);
+    }
+
+    // compute new threshold
+    int index = partitionHistogram();
+    int32_t threshold = ((NHIST-1) - index) << (LOG2_FRACBITS - 3);
+
+    // smooth threshold update
+    _threshAdapt = threshold + MULQ31((_threshAdapt - threshold), tcThreshold);
+
+    //printf("threshold = %0.1f\n", (_threshAdapt - (LOG2_HEADROOM_Q15 << LOG2_FRACBITS)) * -6.02f / (1 << LOG2_FRACBITS));
+}
+
+//
+// Gate detector peakhold
+//
+int32_t GateImpl::peakhold(int32_t peak) {
+
+    if (peak > _holdPeak) {
+
+        // RELEASE
+        // 3-stage progressive hold
+        //
+        // (_holdRel > 0x7fffffff) pure hold
+        // (_holdRel > _holdMin) progressive hold
+        // (_holdRel = _holdMin) release
+
+        _holdRel += _holdInc;                                   // update progressive hold
+        _holdRel = MAX((uint32_t)_holdRel, (uint32_t)_holdMin); // saturate at final value
+
+        int32_t tc = MIN((uint32_t)_holdRel, 0x7fffffff);
+        peak += MULQ31((_holdPeak - peak), tc);                 // apply release
+
+    } else {
+
+        // ATTACK
+        _holdRel = _holdMax;    // reset release
+    }
+    _holdPeak = peak;
+
+    return peak;
+}
+
+//
+// Gate hysteresis
+// Implemented as detector hysteresis instead of high/low thresholds, to simplify adaptive threshold.
+//
+int32_t GateImpl::hysteresis(int32_t peak) {
+
+    // by construction, cannot overflow/underflow
+    assert((double)_hystOffset + (peak - _hystPeak) <= +(double)0x7fffffff);
+    assert((double)_hystOffset + (peak - _hystPeak) >= -(double)0x80000000);
+
+    // accumulate the offset, with saturation
+    _hystOffset += peak - _hystPeak;
+    _hystOffset = MIN(MAX(_hystOffset, 0), _hysteresis);
+
+    _hystPeak = peak;
+    peak -= _hystOffset;    // apply hysteresis
+
+    assert(peak >= 0);
+    return peak;
+}
+
+//
+// Gate envelope processing
+// zero attack, fixed release
+//
+int32_t GateImpl::envelope(int32_t attn) {
+
+    if (attn > _attn) {
+        attn += MULQ31((_attn - attn), _release);   // apply release
+    }
+    _attn = attn;
+
+    return attn;
+}
+
+//
+// Gate (mono)
+//
+template<int N>
+class GateMono : public GateImpl {
+
+    MonoDCBlock _dc;
+    MaxFilter<N> _filter;
+    MonoDelay<N, int32_t> _delay;
+
+public:
+    GateMono(int sampleRate) : GateImpl(sampleRate) {}
+
+    // mono input/output (in-place is allowed)
+    void process(int16_t* input, int16_t* output, int numFrames) override;
+    void removeDC(int16_t* input, int16_t* output, int numFrames) override;
+};
+
+template<int N>
+void GateMono<N>::process(int16_t* input, int16_t* output, int numFrames) {
+
+    clearHistogram();
+
+    for (int n = 0; n < numFrames; n++) {
+
+        int32_t x = input[n];
+
+        // remove DC
+        _dc.process(x);
+
+        // peak detect
+        int32_t peak = abs(x);
+
+        // convert to log2 domain
+        peak = fixlog2(peak);
+
+        // apply peak hold
+        peak = peakhold(peak);
+
+        // count peak level
+        updateHistogram(peak);
+
+        // apply hysteresis
+        peak = hysteresis(peak);
+
+        // compute gate attenuation
+        int32_t attn = (peak > _threshAdapt) ? 0x7fffffff : 0;    // hard-knee, 1:inf ratio
+
+        // apply envelope
+        attn = envelope(attn);
+
+        // convert from log2 domain
+        attn = fixexp2(attn);
+
+        // lowpass filter
+        attn = _filter.process(attn);
+
+        // delay audio
+        _delay.process(x);
+
+        // apply gain
+        x = MULQ31(x, attn);
+
+        // store 16-bit output
+        output[n] = (int16_t)saturateQ30(x);
+    }
+
+    // update adaptive threshold
+    processHistogram(numFrames);
+}
+
+template<int N>
+void GateMono<N>::removeDC(int16_t* input, int16_t* output, int numFrames) {
+
+    for (int n = 0; n < numFrames; n++) {
+
+        int32_t x = input[n];
+
+        // remove DC
+        _dc.process(x);
+
+        // store 16-bit output
+        output[n] = (int16_t)saturateQ30(x);
+    }
+}
+
+//
+// Gate (stereo)
+//
+template<int N>
+class GateStereo : public GateImpl {
+
+    StereoDCBlock _dc;
+    MaxFilter<N> _filter;
+    StereoDelay<N, int32_t> _delay;
+
+public:
+    GateStereo(int sampleRate) : GateImpl(sampleRate) {}
+
+    // interleaved stereo input/output (in-place is allowed)
+    void process(int16_t* input, int16_t* output, int numFrames) override;
+    void removeDC(int16_t* input, int16_t* output, int numFrames) override;
+};
+
+template<int N>
+void GateStereo<N>::process(int16_t* input, int16_t* output, int numFrames) {
+
+    clearHistogram();
+
+    for (int n = 0; n < numFrames; n++) {
+
+        int32_t x0 = input[2*n+0];
+        int32_t x1 = input[2*n+1];
+
+        // remove DC
+        _dc.process(x0, x1);
+
+        // peak detect
+        int32_t peak = MAX(abs(x0), abs(x1));
+
+        // convert to log2 domain
+        peak = fixlog2(peak);
+
+        // apply peak hold
+        peak = peakhold(peak);
+
+        // count peak level
+        updateHistogram(peak);
+
+        // apply hysteresis
+        peak = hysteresis(peak);
+
+        // compute gate attenuation
+        int32_t attn = (peak > _threshAdapt) ? 0x7fffffff : 0;    // hard-knee, 1:inf ratio
+
+        // apply envelope
+        attn = envelope(attn);
+
+        // convert from log2 domain
+        attn = fixexp2(attn);
+
+        // lowpass filter
+        attn = _filter.process(attn);
+
+        // delay audio
+        _delay.process(x0, x1);
+
+        // apply gain
+        x0 = MULQ31(x0, attn);
+        x1 = MULQ31(x1, attn);
+
+        // store 16-bit output
+        output[2*n+0] = (int16_t)saturateQ30(x0);
+        output[2*n+1] = (int16_t)saturateQ30(x1);
+    }
+
+    // update adaptive threshold
+    processHistogram(numFrames);
+}
+
+template<int N>
+void GateStereo<N>::removeDC(int16_t* input, int16_t* output, int numFrames) {
+
+    for (int n = 0; n < numFrames; n++) {
+
+        int32_t x0 = input[2*n+0];
+        int32_t x1 = input[2*n+1];
+
+        // remove DC
+        _dc.process(x0, x1);
+
+        // store 16-bit output
+        output[2*n+0] = (int16_t)saturateQ30(x0);
+        output[2*n+1] = (int16_t)saturateQ30(x1);
+    }
+}
+
+//
+// Gate (quad)
+//
+template<int N>
+class GateQuad : public GateImpl {
+
+    QuadDCBlock _dc;
+    MaxFilter<N> _filter;
+    QuadDelay<N, int32_t> _delay;
+
+public:
+    GateQuad(int sampleRate) : GateImpl(sampleRate) {}
+
+    // interleaved quad input/output (in-place is allowed)
+    void process(int16_t* input, int16_t* output, int numFrames) override;
+    void removeDC(int16_t* input, int16_t* output, int numFrames) override;
+};
+
+template<int N>
+void GateQuad<N>::process(int16_t* input, int16_t* output, int numFrames) {
+
+    clearHistogram();
+
+    for (int n = 0; n < numFrames; n++) {
+
+        int32_t x0 = input[4*n+0];
+        int32_t x1 = input[4*n+1];
+        int32_t x2 = input[4*n+2];
+        int32_t x3 = input[4*n+3];
+
+        // remove DC
+        _dc.process(x0, x1, x2, x3);
+
+        // peak detect
+        int32_t peak = MAX(MAX(abs(x0), abs(x1)), MAX(abs(x2), abs(x3)));
+
+        // convert to log2 domain
+        peak = fixlog2(peak);
+
+        // apply peak hold
+        peak = peakhold(peak);
+
+        // count peak level
+        updateHistogram(peak);
+
+        // apply hysteresis
+        peak = hysteresis(peak);
+
+        // compute gate attenuation
+        int32_t attn = (peak > _threshAdapt) ? 0x7fffffff : 0;    // hard-knee, 1:inf ratio
+
+        // apply envelope
+        attn = envelope(attn);
+
+        // convert from log2 domain
+        attn = fixexp2(attn);
+
+        // lowpass filter
+        attn = _filter.process(attn);
+
+        // delay audio
+        _delay.process(x0, x1, x2, x3);
+
+        // apply gain
+        x0 = MULQ31(x0, attn);
+        x1 = MULQ31(x1, attn);
+        x2 = MULQ31(x2, attn);
+        x3 = MULQ31(x3, attn);
+
+        // store 16-bit output
+        output[4*n+0] = (int16_t)saturateQ30(x0);
+        output[4*n+1] = (int16_t)saturateQ30(x1);
+        output[4*n+2] = (int16_t)saturateQ30(x2);
+        output[4*n+3] = (int16_t)saturateQ30(x3);
+    }
+
+    // update adaptive threshold
+    processHistogram(numFrames);
+}
+
+template<int N>
+void GateQuad<N>::removeDC(int16_t* input, int16_t* output, int numFrames) {
+
+    for (int n = 0; n < numFrames; n++) {
+
+        int32_t x0 = input[4*n+0];
+        int32_t x1 = input[4*n+1];
+        int32_t x2 = input[4*n+2];
+        int32_t x3 = input[4*n+3];
+
+        // remove DC
+        _dc.process(x0, x1, x2, x3);
+
+        // store 16-bit output
+        output[4*n+0] = (int16_t)saturateQ30(x0);
+        output[4*n+1] = (int16_t)saturateQ30(x1);
+        output[4*n+2] = (int16_t)saturateQ30(x2);
+        output[4*n+3] = (int16_t)saturateQ30(x3);
+    }
+}
+
+//
+// Public API
+//
+
+AudioGate::AudioGate(int sampleRate, int numChannels) {
+
+    if (numChannels == 1) {
+
+        // ~3ms lookahead for all rates
+        if (sampleRate < 16000) {
+            _impl = new GateMono<32>(sampleRate);
+        } else if (sampleRate < 32000) {
+            _impl = new GateMono<64>(sampleRate);
+        } else if (sampleRate < 64000) {
+            _impl = new GateMono<128>(sampleRate);
+        } else {
+            _impl = new GateMono<256>(sampleRate);
+        }
+
+    } else if (numChannels == 2) {
+
+        // ~3ms lookahead for all rates
+        if (sampleRate < 16000) {
+            _impl = new GateStereo<32>(sampleRate);
+        } else if (sampleRate < 32000) {
+            _impl = new GateStereo<64>(sampleRate);
+        } else if (sampleRate < 64000) {
+            _impl = new GateStereo<128>(sampleRate);
+        } else {
+            _impl = new GateStereo<256>(sampleRate);
+        }
+
+    } else if (numChannels == 4) {
+
+        // ~3ms lookahead for all rates
+        if (sampleRate < 16000) {
+            _impl = new GateQuad<32>(sampleRate);
+        } else if (sampleRate < 32000) {
+            _impl = new GateQuad<64>(sampleRate);
+        } else if (sampleRate < 64000) {
+            _impl = new GateQuad<128>(sampleRate);
+        } else {
+            _impl = new GateQuad<256>(sampleRate);
+        }
+
+    } else {
+        assert(0); // unsupported
+    }
+}
+
+AudioGate::~AudioGate() {
+    delete _impl;
+}
+
+void AudioGate::render(int16_t* input, int16_t* output, int numFrames) {
+    _impl->process(input, output, numFrames);
+}
+
+void AudioGate::removeDC(int16_t* input, int16_t* output, int numFrames) {
+    _impl->removeDC(input, output, numFrames);
+}
+
+void AudioGate::setThreshold(float threshold) {
+    _impl->setThreshold(threshold);
+}
+
+void AudioGate::setRelease(float release) {
+    _impl->setRelease(release);
+}
--- a/libraries/audio/src/AudioGate.h
+++ b/libraries/audio/src/AudioGate.h
@ -0,0 +1,32 @@
+//
+//  AudioGate.h
+//  libraries/audio/src
+//
+//  Created by Ken Cooke on 5/5/17.
+//  Copyright 2016 High Fidelity, Inc.
+//
+
+#ifndef hifi_AudioGate_h
+#define hifi_AudioGate_h
+
+#include <stdint.h>
+
+class GateImpl;
+
+class AudioGate {
+public:
+    AudioGate(int sampleRate, int numChannels);
+    ~AudioGate();
+
+    // interleaved int16_t input/output (in-place is allowed)
+    void render(int16_t* input, int16_t* output, int numFrames);
+    void removeDC(int16_t* input, int16_t* output, int numFrames);
+
+    void setThreshold(float threshold);
+    void setRelease(float release);
+
+private:
+    GateImpl* _impl;
+};
+
+#endif // hifi_AudioGate_h
--- a/libraries/audio/src/AudioInjector.cpp
+++ b/libraries/audio/src/AudioInjector.cpp
@ -139,7 +139,7 @@ bool AudioInjector::inject(bool(AudioInjectorManager::*injection)(AudioInjector*
    if (_options.secondOffset > 0.0f) {
        int numChannels = _options.ambisonic ? 4 : (_options.stereo ? 2 : 1);
        byteOffset = (int)(AudioConstants::SAMPLE_RATE * _options.secondOffset * numChannels);
-        byteOffset *= sizeof(AudioConstants::SAMPLE_SIZE);
+        byteOffset *= AudioConstants::SAMPLE_SIZE;
    }
    _currentSendOffset = byteOffset;

--- a/libraries/audio/src/AudioLimiter.cpp
+++ b/libraries/audio/src/AudioLimiter.cpp
@ -6,452 +6,11 @@
 //  Copyright 2016 High Fidelity, Inc.
 //

-#include <math.h>
 #include <assert.h>

+#include "AudioDynamics.h"
 #include "AudioLimiter.h"

-#ifndef MAX
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-#endif 
-#ifndef MIN
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#endif
-
-#ifdef _MSC_VER
-
-#include <intrin.h>
-#define MUL64(a,b)  __emul((a), (b))
-#define MULHI(a,b)  ((int)(MUL64(a, b) >> 32))
-#define MULQ31(a,b) ((int)(MUL64(a, b) >> 31))
-
-#else
-
-#define MUL64(a,b)  ((long long)(a) * (b))
-#define MULHI(a,b)  ((int)(MUL64(a, b) >> 32))
-#define MULQ31(a,b) ((int)(MUL64(a, b) >> 31))
-
-#endif  // _MSC_VER
-
-//
-// on x86 architecture, assume that SSE2 is present
-//
-#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
-
-#include <xmmintrin.h>
-// convert float to int using round-to-nearest
-static inline int32_t floatToInt(float x) {
-    return _mm_cvt_ss2si(_mm_load_ss(&x));
-}
-
-#else 
-
-// convert float to int using round-to-nearest
-static inline int32_t floatToInt(float x) {
-    x += (x < 0.0f ? -0.5f : 0.5f); // round
-    return (int32_t)x;
-}
-
-#endif  // _M_IX86
-
-static const double FIXQ31 = 2147483648.0;              // convert float to Q31
-static const double DB_TO_LOG2 = 0.16609640474436813;   // convert dB to log2
-
-// convert dB to amplitude
-static double dBToGain(double dB) {
-    return pow(10.0, dB / 20.0);
-}
-
-// convert milliseconds to first-order time constant
-static int32_t msToTc(double ms, double sampleRate) {
-    double tc = exp(-1000.0 / (ms * sampleRate));
-    return (int32_t)(FIXQ31 * tc);  // Q31
-}
-
-// log2 domain values are Q26
-static const int LOG2_INTBITS = 5;
-static const int LOG2_FRACBITS = 31 - LOG2_INTBITS;
-
-// log2 domain headroom bits above 0dB
-static const int LOG2_HEADROOM = 15;
-
-// log2 domain offsets so error < 0
-static const int32_t LOG2_BIAS = 347;
-static const int32_t EXP2_BIAS = 64;
-
-//
-// P(x) = log2(1+x) for x=[0,1]
-// scaled by 1, 0.5, 0.25
-//
-// |error| < 347 ulp, smooth
-//
-static const int LOG2_TABBITS = 4;
-static const int32_t log2Table[1 << LOG2_TABBITS][3] = {
-    { -0x56dfe26d, 0x5c46daff, 0x00000000 },
-    { -0x4d397571, 0x5bae58e7, 0x00025a75 },
-    { -0x4518f84b, 0x5aabcac4, 0x000a62db },
-    { -0x3e3075ec, 0x596168c0, 0x0019d0e6 },
-    { -0x384486e9, 0x57e769c7, 0x00316109 },
-    { -0x332742ba, 0x564f1461, 0x00513776 },
-    { -0x2eb4bad4, 0x54a4cdfe, 0x00791de2 },
-    { -0x2ad07c6c, 0x52f18320, 0x00a8aa46 },
-    { -0x2763c4d6, 0x513ba123, 0x00df574c },
-    { -0x245c319b, 0x4f87c5c4, 0x011c9399 },
-    { -0x21aac79f, 0x4dd93bef, 0x015fcb52 },
-    { -0x1f433872, 0x4c325584, 0x01a86ddc },
-    { -0x1d1b54b4, 0x4a94ac6e, 0x01f5f13e },
-    { -0x1b2a9f81, 0x4901524f, 0x0247d3f2 },
-    { -0x1969fa57, 0x4778f3a7, 0x029d9dbf },
-    { -0x17d36370, 0x45fbf1e8, 0x02f6dfe8 },
-};
-
-//
-// P(x) = exp2(x) for x=[0,1]
-// scaled by 2, 1, 0.5
-// Uses exp2(-x) = exp2(1-x)/2
-//
-// |error| < 1387 ulp, smooth
-//
-static const int EXP2_TABBITS = 4;
-static const int32_t exp2Table[1 << EXP2_TABBITS][3] = {
-    { 0x3ed838c8, 0x58b574b7, 0x40000000 },
-    { 0x41a0821c, 0x5888db8f, 0x4000b2b7 },
-    { 0x4488548d, 0x582bcbc6, 0x40039be1 },
-    { 0x4791158a, 0x579a1128, 0x400a71ae },
-    { 0x4abc3a53, 0x56cf3089, 0x4017212e },
-    { 0x4e0b48af, 0x55c66396, 0x402bd31b },
-    { 0x517fd7a7, 0x547a946d, 0x404af0ec },
-    { 0x551b9049, 0x52e658f9, 0x40772a57 },
-    { 0x58e02e75, 0x5103ee08, 0x40b37b31 },
-    { 0x5ccf81b1, 0x4ecd321f, 0x410331b5 },
-    { 0x60eb6e09, 0x4c3ba007, 0x4169f548 },
-    { 0x6535ecf9, 0x49484909, 0x41ebcdaf },
-    { 0x69b10e5b, 0x45ebcede, 0x428d2acd },
-    { 0x6e5ef96c, 0x421e5d48, 0x4352ece7 },
-    { 0x7341edcb, 0x3dd7a354, 0x44426d7b },
-    { 0x785c4499, 0x390ecc3a, 0x456188bd },
-};
-
-static const int IEEE754_FABS_MASK = 0x7fffffff;
-static const int IEEE754_MANT_BITS = 23;
-static const int IEEE754_EXPN_BIAS = 127;
-
-//
-// Peak detection and -log2(x) for float input (mono)
-// x < 2^(31-LOG2_HEADROOM) returns 0x7fffffff
-// x > 2^LOG2_HEADROOM undefined
-//
-static inline int32_t peaklog2(float* input) {
-
-    // float as integer bits
-    int32_t u = *(int32_t*)input;
-
-    // absolute value
-    int32_t peak = u & IEEE754_FABS_MASK;
-
-    // split into e and x - 1.0
-    int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
-    int32_t x = (peak << (31 - IEEE754_MANT_BITS)) & 0x7fffffff;
-
-    // saturate
-    if (e > 31) {
-        return 0x7fffffff;
-    }
-
-    int k = x >> (31 - LOG2_TABBITS);
-
-    // polynomial for log2(1+x) over x=[0,1]
-    int32_t c0 = log2Table[k][0];
-    int32_t c1 = log2Table[k][1];
-    int32_t c2 = log2Table[k][2];
-
-    c1 += MULHI(c0, x);
-    c2 += MULHI(c1, x);
-
-    // reconstruct result in Q26
-    return (e << LOG2_FRACBITS) - (c2 >> 3);
-}
-
-//
-// Peak detection and -log2(x) for float input (stereo)
-// x < 2^(31-LOG2_HEADROOM) returns 0x7fffffff
-// x > 2^LOG2_HEADROOM undefined
-//
-static inline int32_t peaklog2(float* input0, float* input1) {
-
-    // float as integer bits
-    int32_t u0 = *(int32_t*)input0;
-    int32_t u1 = *(int32_t*)input1;
-
-    // max absolute value
-    u0 &= IEEE754_FABS_MASK;
-    u1 &= IEEE754_FABS_MASK;
-    int32_t peak = MAX(u0, u1);
-
-    // split into e and x - 1.0
-    int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
-    int32_t x = (peak << (31 - IEEE754_MANT_BITS)) & 0x7fffffff;
-
-    // saturate
-    if (e > 31) {
-        return 0x7fffffff;
-    }
-
-    int k = x >> (31 - LOG2_TABBITS);
-
-    // polynomial for log2(1+x) over x=[0,1]
-    int32_t c0 = log2Table[k][0];
-    int32_t c1 = log2Table[k][1];
-    int32_t c2 = log2Table[k][2];
-
-    c1 += MULHI(c0, x);
-    c2 += MULHI(c1, x);
-
-    // reconstruct result in Q26
-    return (e << LOG2_FRACBITS) - (c2 >> 3);
-}
-
-//
-// Peak detection and -log2(x) for float input (quad)
-// x < 2^(31-LOG2_HEADROOM) returns 0x7fffffff
-// x > 2^LOG2_HEADROOM undefined
-//
-static inline int32_t peaklog2(float* input0, float* input1, float* input2, float* input3) {
-
-    // float as integer bits
-    int32_t u0 = *(int32_t*)input0;
-    int32_t u1 = *(int32_t*)input1;
-    int32_t u2 = *(int32_t*)input2;
-    int32_t u3 = *(int32_t*)input3;
-
-    // max absolute value
-    u0 &= IEEE754_FABS_MASK;
-    u1 &= IEEE754_FABS_MASK;
-    u2 &= IEEE754_FABS_MASK;
-    u3 &= IEEE754_FABS_MASK;
-    int32_t peak = MAX(MAX(u0, u1), MAX(u2, u3));
-
-    // split into e and x - 1.0
-    int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
-    int32_t x = (peak << (31 - IEEE754_MANT_BITS)) & 0x7fffffff;
-
-    // saturate
-    if (e > 31) {
-        return 0x7fffffff;
-    }
-
-    int k = x >> (31 - LOG2_TABBITS);
-
-    // polynomial for log2(1+x) over x=[0,1]
-    int32_t c0 = log2Table[k][0];
-    int32_t c1 = log2Table[k][1];
-    int32_t c2 = log2Table[k][2];
-
-    c1 += MULHI(c0, x);
-    c2 += MULHI(c1, x);
-
-    // reconstruct result in Q26
-    return (e << LOG2_FRACBITS) - (c2 >> 3);
-}
-
-//
-// Compute exp2(-x) for x=[0,32] in Q26, result in Q31
-// x < 0 undefined
-//
-static inline int32_t fixexp2(int32_t x) {
-
-    // split into e and 1.0 - x
-    int32_t e = x >> LOG2_FRACBITS;
-    x = ~(x << LOG2_INTBITS) & 0x7fffffff;
-
-    int k = x >> (31 - EXP2_TABBITS);
-
-    // polynomial for exp2(x)
-    int32_t c0 = exp2Table[k][0];
-    int32_t c1 = exp2Table[k][1];
-    int32_t c2 = exp2Table[k][2];
-
-    c1 += MULHI(c0, x);
-    c2 += MULHI(c1, x);
-
-    // reconstruct result in Q31
-    return c2 >> e;
-}
-
-// fast TPDF dither in [-1.0f, 1.0f]
-static inline float dither() {
-    static uint32_t rz = 0;
-    rz = rz * 69069 + 1;
-    int32_t r0 = rz & 0xffff;
-    int32_t r1 = rz >> 16;
-    return (int32_t)(r0 - r1) * (1/65536.0f);
-}
-
-//
-// Peak-hold lowpass filter
-//
-// Bandlimits the gain control signal to greatly reduce the modulation distortion,
-// while still reaching the peak attenuation after exactly N-1 samples of delay.
-// N completely determines the limiter attack time.
-// 
-template<int N, int CIC1, int CIC2>
-class PeakFilterT {
-
-    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
-    static_assert((CIC1 - 1) + (CIC2 - 1) == (N - 1), "Total CIC delay must be N-1");
-
-    int32_t _buffer[2*N] = {};  // shared FIFO
-    size_t _index = 0;
-
-    int32_t _acc1 = 0;  // CIC1 integrator
-    int32_t _acc2 = 0;  // CIC2 integrator
-
-public:
-    PeakFilterT() {
-
-        // fill history
-        for (size_t n = 0; n < N-1; n++) {
-            process(0x7fffffff);
-        }
-    }
-
-    int32_t process(int32_t x) {
-
-        const size_t MASK = 2*N - 1;    // buffer wrap
-        size_t i = _index;
-
-        // Fast peak-hold using a running-min filter.  Finds the peak (min) value
-        // in the sliding window of N-1 samples, using only log2(N) comparisons.
-        // Hold time of N-1 samples exactly cancels the step response of FIR filter.
-
-        for (size_t n = 1; n < N; n <<= 1) {
-
-            _buffer[i] = x;
-            i = (i + n) & MASK;
-            x = MIN(x, _buffer[i]);
-        }
-
-        // Fast FIR attack/lowpass filter using a 2-stage CIC filter.
-        // The step response reaches final value after N-1 samples.
-
-        const int32_t CICGAIN = 0xffffffff / (CIC1 * CIC2); // Q32
-        x = MULHI(x, CICGAIN);
-
-        _buffer[i] = _acc1;
-        _acc1 += x;                 // integrator
-        i = (i + CIC1 - 1) & MASK;
-        x = _acc1 - _buffer[i];     // comb
-
-        _buffer[i] = _acc2;
-        _acc2 += x;                 // integrator
-        i = (i + CIC2 - 1) & MASK;
-        x = _acc2 - _buffer[i];     // comb
-
-        _index = (i + 1) & MASK;    // skip unused tap
-        return x;
-    }
-};
-
-//
-// Specializations that define the optimum lowpass filter for each length.
-//
-template<int N> class PeakFilter;
-
-template<> class PeakFilter< 16> : public PeakFilterT< 16,   7,  10> {};
-template<> class PeakFilter< 32> : public PeakFilterT< 32,  14,  19> {};
-template<> class PeakFilter< 64> : public PeakFilterT< 64,  27,  38> {};
-template<> class PeakFilter<128> : public PeakFilterT<128,  53,  76> {};
-template<> class PeakFilter<256> : public PeakFilterT<256, 106, 151> {};
-
-//
-// N-1 sample delay (mono)
-//
-template<int N>
-class MonoDelay {
-
-    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
-
-    float _buffer[N] = {};
-    size_t _index = 0;
-
-public:
-    void process(float& x) {
-
-        const size_t MASK = N - 1;  // buffer wrap
-        size_t i = _index;
-
-        _buffer[i] = x;
-
-        i = (i + (N - 1)) & MASK;
-
-        x = _buffer[i];
-
-        _index = i;
-    }
-};
-
-//
-// N-1 sample delay (stereo)
-//
-template<int N>
-class StereoDelay {
-
-    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
-
-    float _buffer[2*N] = {};
-    size_t _index = 0;
-
-public:
-    void process(float& x0, float& x1) {
-
-        const size_t MASK = 2*N - 1;    // buffer wrap
-        size_t i = _index;
-
-        _buffer[i+0] = x0;
-        _buffer[i+1] = x1;
-
-        i = (i + 2*(N - 1)) & MASK;
-
-        x0 = _buffer[i+0];
-        x1 = _buffer[i+1];
-
-        _index = i;
-    }
-};
-
-//
-// N-1 sample delay (quad)
-//
-template<int N>
-class QuadDelay {
-
-    static_assert((N & (N - 1)) == 0, "N must be a power of 2");
-
-    float _buffer[4*N] = {};
-    size_t _index = 0;
-
-public:
-    void process(float& x0, float& x1, float& x2, float& x3) {
-
-        const size_t MASK = 4*N - 1;    // buffer wrap
-        size_t i = _index;
-
-        _buffer[i+0] = x0;
-        _buffer[i+1] = x1;
-        _buffer[i+2] = x2;
-        _buffer[i+3] = x3;
-
-        i = (i + 4*(N - 1)) & MASK;
-
-        x0 = _buffer[i+0];
-        x1 = _buffer[i+1];
-        x2 = _buffer[i+2];
-        x3 = _buffer[i+3];
-
-        _index = i;
-    }
-};
-
 //
 // Limiter (common)
 //
@ -637,7 +196,7 @@ int32_t LimiterImpl::envelope(int32_t attn) {
 template<int N>
 class LimiterMono : public LimiterImpl {

-    PeakFilter<N> _filter;
+    MinFilter<N> _filter;
    MonoDelay<N> _delay;

 public:
@ -688,7 +247,7 @@ void LimiterMono<N>::process(float* input, int16_t* output, int numFrames) {
 template<int N>
 class LimiterStereo : public LimiterImpl {

-    PeakFilter<N> _filter;
+    MinFilter<N> _filter;
    StereoDelay<N> _delay;

 public:
@ -745,7 +304,7 @@ void LimiterStereo<N>::process(float* input, int16_t* output, int numFrames) {
 template<int N>
 class LimiterQuad : public LimiterImpl {

-    PeakFilter<N> _filter;
+    MinFilter<N> _filter;
    QuadDelay<N> _delay;

 public:
--- a/libraries/audio/src/AudioLimiter.h
+++ b/libraries/audio/src/AudioLimiter.h
@ -9,7 +9,7 @@
 #ifndef hifi_AudioLimiter_h
 #define hifi_AudioLimiter_h

-#include "stdint.h"
+#include <stdint.h>

 class LimiterImpl;

--- a/libraries/audio/src/AudioNoiseGate.cpp
+++ b/libraries/audio/src/AudioNoiseGate.cpp
@ -1,164 +0,0 @@
-//
-//  AudioNoiseGate.cpp
-//  libraries/audio
-//
-//  Created by Stephen Birarda on 2014-12-16.
-//  Copyright 2014 High Fidelity, Inc.
-//
-//  Distributed under the Apache License, Version 2.0.
-//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
-//
-
-#include "AudioNoiseGate.h"
-
-#include <cstdlib>
-#include <string.h>
-
-#include "AudioConstants.h"
-
-const float AudioNoiseGate::CLIPPING_THRESHOLD = 0.90f;
-
-AudioNoiseGate::AudioNoiseGate() :
-    _lastLoudness(0.0f),
-    _didClipInLastBlock(false),
-    _dcOffset(0.0f),
-    _measuredFloor(0.0f),
-    _sampleCounter(0),
-    _isOpen(false),
-    _blocksToClose(0) {}
-
-void AudioNoiseGate::removeDCOffset(int16_t* samples, int numSamples) {
-    //
-    //  DC Offset correction
-    //
-    //  Measure the DC offset over a trailing number of blocks, and remove it from the input signal.
-    //  This causes the noise background measurements and server muting to be more accurate.  Many off-board
-    //  ADC's have a noticeable DC offset.
-    //
-    const float DC_OFFSET_AVERAGING = 0.99f;
-    float measuredDcOffset = 0.0f;
-    //  Remove trailing DC offset from samples
-    for (int i = 0; i < numSamples; i++) {
-        measuredDcOffset += samples[i];
-        samples[i] -= (int16_t) _dcOffset;
-    }
-    // Update measured DC offset
-    measuredDcOffset /= numSamples;
-    if (_dcOffset == 0.0f) {
-        // On first block, copy over measured offset
-        _dcOffset = measuredDcOffset;
-    } else {
-        _dcOffset = DC_OFFSET_AVERAGING * _dcOffset + (1.0f - DC_OFFSET_AVERAGING) * measuredDcOffset;
-    }
-}
-
-void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
-    //
-    //  Impose Noise Gate
-    //
-    //  The Noise Gate is used to reject constant background noise by measuring the noise
-    //  floor observed at the microphone and then opening the 'gate' to allow microphone
-    //  signals to be transmitted when the microphone samples average level exceeds a multiple
-    //  of the noise floor.
-    //
-    //  NOISE_GATE_HEIGHT:  How loud you have to speak relative to noise background to open the gate.
-    //                      Make this value lower for more sensitivity and less rejection of noise.
-    //  NOISE_GATE_WIDTH:   The number of samples in an audio block for which the height must be exceeded
-    //                      to open the gate.
-    //  NOISE_GATE_CLOSE_BLOCK_DELAY:  Once the noise is below the gate height for the block, how many blocks
-    //                      will we wait before closing the gate.
-    //  NOISE_GATE_BLOCKS_TO_AVERAGE:  How many audio blocks should we average together to compute noise floor.
-    //                      More means better rejection but also can reject continuous things like singing.
-    // NUMBER_OF_NOISE_SAMPLE_BLOCKS:  How often should we re-evaluate the noise floor?
-
-    float loudness = 0;
-    int thisSample = 0;
-    int samplesOverNoiseGate = 0;
-
-    const float NOISE_GATE_HEIGHT = 7.0f;
-    const int NOISE_GATE_WIDTH = 5;
-    const int NOISE_GATE_CLOSE_BLOCK_DELAY = 5;
-    const int NOISE_GATE_BLOCKS_TO_AVERAGE = 5;
-
-    //  Check clipping, and check if should open noise gate
-    _didClipInLastBlock = false;
-
-    for (int i = 0; i < numSamples; i++) {
-        thisSample = std::abs(samples[i]);
-        if (thisSample >= ((float) AudioConstants::MAX_SAMPLE_VALUE * CLIPPING_THRESHOLD)) {
-            _didClipInLastBlock = true;
-        }
-
-        loudness += thisSample;
-        //  Noise Reduction:  Count peaks above the average loudness
-        if (thisSample > (_measuredFloor * NOISE_GATE_HEIGHT)) {
-            samplesOverNoiseGate++;
-        }
-    }
-
-    _lastLoudness = fabs(loudness / numSamples);
-
-    //  If Noise Gate is enabled, check and turn the gate on and off
-    float averageOfAllSampleBlocks = 0.0f;
-    _sampleBlocks[_sampleCounter++] = _lastLoudness;
-    if (_sampleCounter == NUMBER_OF_NOISE_SAMPLE_BLOCKS) {
-        float smallestSample = std::numeric_limits<float>::max();
-        for (int i = 0; i <= NUMBER_OF_NOISE_SAMPLE_BLOCKS - NOISE_GATE_BLOCKS_TO_AVERAGE; i += NOISE_GATE_BLOCKS_TO_AVERAGE) {
-            float thisAverage = 0.0f;
-            for (int j = i; j < i + NOISE_GATE_BLOCKS_TO_AVERAGE; j++) {
-                thisAverage += _sampleBlocks[j];
-                averageOfAllSampleBlocks += _sampleBlocks[j];
-            }
-            thisAverage /= NOISE_GATE_BLOCKS_TO_AVERAGE;
-
-            if (thisAverage < smallestSample) {
-                smallestSample = thisAverage;
-            }
-        }
-        averageOfAllSampleBlocks /= NUMBER_OF_NOISE_SAMPLE_BLOCKS;
-        _measuredFloor = smallestSample;
-        _sampleCounter = 0;
-
-    }
-
-    _closedInLastBlock = false;
-    _openedInLastBlock = false;
-
-    if (samplesOverNoiseGate > NOISE_GATE_WIDTH) {
-        _openedInLastBlock = !_isOpen;
-        _isOpen = true;
-        _blocksToClose = NOISE_GATE_CLOSE_BLOCK_DELAY;
-    } else {
-        if (--_blocksToClose == 0) {
-            _closedInLastBlock = _isOpen;
-            _isOpen = false;
-        }
-    }
-    if (!_isOpen) {
-        // First block after being closed gets faded to silence, we fade across
-        // the entire block on fading out. All subsequent blocks are muted by being slammed
-        // to zeros
-        if (_closedInLastBlock) {
-            float fadeSlope = (1.0f / numSamples);
-            for (int i = 0; i < numSamples; i++) {
-                float fadedSample = (1.0f - ((float)i * fadeSlope)) * (float)samples[i];
-                samples[i] = (int16_t)fadedSample;
-            }
-        } else {
-            memset(samples, 0, numSamples * sizeof(int16_t));
-        }
-        _lastLoudness = 0;
-    }
-
-    if (_openedInLastBlock) {
-        // would be nice to do a little crossfade from silence, but we only want to fade
-        // across the first 1/10th of the block, because we don't want to miss early
-        // transients.
-        int fadeSamples = numSamples / 10; // fade over 1/10th of the samples
-        float fadeSlope = (1.0f / fadeSamples);
-        for (int i = 0; i < fadeSamples; i++) {
-            float fadedSample = (float)i * fadeSlope * (float)samples[i];
-            samples[i] = (int16_t)fadedSample;
-        }
-    }
-}
--- a/libraries/audio/src/AudioNoiseGate.h
+++ b/libraries/audio/src/AudioNoiseGate.h
@ -1,48 +0,0 @@
-//
-//  AudioNoiseGate.h
-//  libraries/audio
-//
-//  Created by Stephen Birarda on 2014-12-16.
-//  Copyright 2014 High Fidelity, Inc.
-//
-//  Distributed under the Apache License, Version 2.0.
-//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
-//
-
-#ifndef hifi_AudioNoiseGate_h
-#define hifi_AudioNoiseGate_h
-
-#include <stdint.h>
-
-const int NUMBER_OF_NOISE_SAMPLE_BLOCKS = 300;
-
-class AudioNoiseGate {
-public:
-    AudioNoiseGate();
-
-    void gateSamples(int16_t* samples, int numSamples);
-    void removeDCOffset(int16_t* samples, int numSamples);
-
-    bool clippedInLastBlock() const { return _didClipInLastBlock; }
-    bool closedInLastBlock() const { return _closedInLastBlock; }
-    bool openedInLastBlock() const { return _openedInLastBlock; }
-    bool isOpen() const { return _isOpen; }
-    float getMeasuredFloor() const { return _measuredFloor; }
-    float getLastLoudness() const { return _lastLoudness; }
-
-    static const float CLIPPING_THRESHOLD;
-
-private:
-    float _lastLoudness;
-    bool _didClipInLastBlock;
-    float _dcOffset;
-    float _measuredFloor;
-    float _sampleBlocks[NUMBER_OF_NOISE_SAMPLE_BLOCKS];
-    int _sampleCounter;
-    bool _isOpen;
-    bool _closedInLastBlock { false };
-    bool _openedInLastBlock { false };
-    int _blocksToClose;
-};
-
-#endif // hifi_AudioNoiseGate_h