Merge pull request #9897 from zzmp/record/noise-gate

Add AudioNoiseGate to recordings played through Interface
2025-04-25 17:14:59 +02:00 · 2017-03-21 00:21:20 -04:00 · 2017-03-21 00:21:20 -04:00 · 20ea13e00a
commit 20ea13e00a
parent 92e3f8768c 789bb7c855
5 changed files with 99 additions and 125 deletions
--- a/interface/resources/qml/Stats.qml
+++ b/interface/resources/qml/Stats.qml
@ -198,7 +198,7 @@ Item {
                    }
                    StatText {
                        visible: root.expanded;
-                        text: "Audio Out Mic: " + root.audioMicOutboundPPS + " pps, " +
+                        text: "Audio Out Mic: " + root.audioOutboundPPS + " pps, " +
                            "Silent: " + root.audioSilentOutboundPPS + " pps";
                    }
                    StatText {
--- a/interface/src/ui/Stats.cpp
+++ b/interface/src/ui/Stats.cpp
@ -220,10 +220,10 @@ void Stats::updateStats(bool force) {
            STAT_UPDATE(audioMixerInPps, roundf(bandwidthRecorder->getAverageInputPacketsPerSecond(NodeType::AudioMixer)));
            STAT_UPDATE(audioMixerOutKbps, roundf(bandwidthRecorder->getAverageOutputKilobitsPerSecond(NodeType::AudioMixer)));
            STAT_UPDATE(audioMixerOutPps, roundf(bandwidthRecorder->getAverageOutputPacketsPerSecond(NodeType::AudioMixer)));
-            STAT_UPDATE(audioMicOutboundPPS, audioClient->getMicAudioOutboundPPS());
-            STAT_UPDATE(audioSilentOutboundPPS, audioClient->getSilentOutboundPPS());
            STAT_UPDATE(audioAudioInboundPPS, audioClient->getAudioInboundPPS());
            STAT_UPDATE(audioSilentInboundPPS, audioClient->getSilentInboundPPS());
+            STAT_UPDATE(audioOutboundPPS, audioClient->getAudioOutboundPPS());
+            STAT_UPDATE(audioSilentOutboundPPS, audioClient->getSilentOutboundPPS());
        } else {
            STAT_UPDATE(audioMixerKbps, -1);
            STAT_UPDATE(audioMixerPps, -1);
@ -231,7 +231,7 @@ void Stats::updateStats(bool force) {
            STAT_UPDATE(audioMixerInPps, -1);
            STAT_UPDATE(audioMixerOutKbps, -1);
            STAT_UPDATE(audioMixerOutPps, -1);
-            STAT_UPDATE(audioMicOutboundPPS, -1);
+            STAT_UPDATE(audioOutboundPPS, -1);
            STAT_UPDATE(audioSilentOutboundPPS, -1);
            STAT_UPDATE(audioAudioInboundPPS, -1);
            STAT_UPDATE(audioSilentInboundPPS, -1);
--- a/interface/src/ui/Stats.h
+++ b/interface/src/ui/Stats.h
@ -77,7 +77,7 @@ class Stats : public QQuickItem {
    STATS_PROPERTY(int, audioMixerOutPps, 0)
    STATS_PROPERTY(int, audioMixerKbps, 0)
    STATS_PROPERTY(int, audioMixerPps, 0)
-    STATS_PROPERTY(int, audioMicOutboundPPS, 0)
+    STATS_PROPERTY(int, audioOutboundPPS, 0)
    STATS_PROPERTY(int, audioSilentOutboundPPS, 0)
    STATS_PROPERTY(int, audioAudioInboundPPS, 0)
    STATS_PROPERTY(int, audioSilentInboundPPS, 0)
@ -198,7 +198,7 @@ signals:
    void audioMixerOutPpsChanged();
    void audioMixerKbpsChanged();
    void audioMixerPpsChanged();
-    void audioMicOutboundPPSChanged();
+    void audioOutboundPPSChanged();
    void audioSilentOutboundPPSChanged();
    void audioAudioInboundPPSChanged();
    void audioSilentInboundPPSChanged();
--- a/libraries/audio-client/src/AudioClient.cpp
+++ b/libraries/audio-client/src/AudioClient.cpp
@ -184,7 +184,6 @@ AudioClient::AudioClient() :
    _outgoingAvatarAudioSequenceNumber(0),
    _audioOutputIODevice(_localInjectorsStream, _receivedAudioStream, this),
    _stats(&_receivedAudioStream),
-    _inputGate(),
    _positionGetter(DEFAULT_POSITION_GETTER),
    _orientationGetter(DEFAULT_ORIENTATION_GETTER) {
    // avoid putting a lock in the device callback
@ -971,14 +970,87 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
    }
 }

-void AudioClient::handleAudioInput() {
+void AudioClient::handleAudioInput(QByteArray& audioBuffer) {
+    if (_muted) {
+        _lastInputLoudness = 0.0f;
+        _timeSinceLastClip = 0.0f;
+    } else {
+        int16_t* samples = reinterpret_cast<int16_t*>(audioBuffer.data());
+        int numSamples = audioBuffer.size() / sizeof(AudioConstants::SAMPLE_SIZE);
+        bool didClip = false;
+
+        bool shouldRemoveDCOffset = !_isPlayingBackRecording && !_isStereoInput;
+        if (shouldRemoveDCOffset) {
+            _noiseGate.removeDCOffset(samples, numSamples);
+        }
+
+        bool shouldNoiseGate = (_isPlayingBackRecording || !_isStereoInput) && _isNoiseGateEnabled;
+        if (shouldNoiseGate) {
+            _noiseGate.gateSamples(samples, numSamples);
+            _lastInputLoudness = _noiseGate.getLastLoudness();
+            didClip = _noiseGate.clippedInLastBlock();
+        } else {
+            float loudness = 0.0f;
+            for (int i = 0; i < numSamples; ++i) {
+                int16_t sample = std::abs(samples[i]);
+                loudness += (float)sample;
+                didClip = didClip ||
+                    (sample > (AudioConstants::MAX_SAMPLE_VALUE * AudioNoiseGate::CLIPPING_THRESHOLD));
+            }
+            _lastInputLoudness = fabs(loudness / numSamples);
+        }
+
+        if (didClip) {
+            _timeSinceLastClip = 0.0f;
+        } else if (_timeSinceLastClip >= 0.0f) {
+            _timeSinceLastClip += (float)numSamples / (float)AudioConstants::SAMPLE_RATE;
+        }
+
+        emit inputReceived({ audioBuffer.data(), numSamples });
+
+        if (_noiseGate.openedInLastBlock()) {
+            emit noiseGateOpened();
+        } else if (_noiseGate.closedInLastBlock()) {
+            emit noiseGateClosed();
+        }
+    }
+
+    // the codec needs a flush frame before sending silent packets, so
+    // do not send one if the gate closed in this block (eventually this can be crossfaded).
+    auto packetType = _shouldEchoToServer ?
+        PacketType::MicrophoneAudioWithEcho : PacketType::MicrophoneAudioNoEcho;
+    if (_lastInputLoudness == 0.0f && !_noiseGate.closedInLastBlock()) {
+        packetType = PacketType::SilentAudioFrame;
+        _silentOutbound.increment();
+    } else {
+        _audioOutbound.increment();
+    }
+
+    Transform audioTransform;
+    audioTransform.setTranslation(_positionGetter());
+    audioTransform.setRotation(_orientationGetter());
+
+    QByteArray encodedBuffer;
+    if (_encoder) {
+        _encoder->encode(audioBuffer, encodedBuffer);
+    } else {
+        encodedBuffer = audioBuffer;
+    }
+
+    emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber,
+            audioTransform, avatarBoundingBoxCorner, avatarBoundingBoxScale,
+            packetType, _selectedCodecName);
+    _stats.sentPacket();
+}
+
+void AudioClient::handleMicAudioInput() {
    if (!_inputDevice || _isPlayingBackRecording) {
        return;
    }

    // input samples required to produce exactly NETWORK_FRAME_SAMPLES of output
-    const int inputSamplesRequired = (_inputToNetworkResampler ? 
-                                      _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) : 
+    const int inputSamplesRequired = (_inputToNetworkResampler ?
+                                      _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) :
                                      AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * _inputFormat.channelCount();

    const auto inputAudioSamples = std::unique_ptr<int16_t[]>(new int16_t[inputSamplesRequired]);
@ -1001,126 +1073,27 @@ void AudioClient::handleAudioInput() {
    static int16_t networkAudioSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];

    while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) {
-
-        if (!_muted) {
-
-
-            //  Increment the time since the last clip
-            if (_timeSinceLastClip >= 0.0f) {
-                _timeSinceLastClip += (float)numNetworkSamples / (float)AudioConstants::SAMPLE_RATE;
-            }
-
+        if (_muted) {
+            _inputRingBuffer.shiftReadPosition(inputSamplesRequired);
+        } else {
            _inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired);
            possibleResampling(_inputToNetworkResampler,
                inputAudioSamples.get(), networkAudioSamples,
                inputSamplesRequired, numNetworkSamples,
                _inputFormat.channelCount(), _desiredInputFormat.channelCount());
-
-            //  Remove DC offset
-            if (!_isStereoInput) {
-                _inputGate.removeDCOffset(networkAudioSamples, numNetworkSamples);
-            }
-
-            // only impose the noise gate and perform tone injection if we are sending mono audio
-            if (!_isStereoInput && _isNoiseGateEnabled) {
-                _inputGate.gateSamples(networkAudioSamples, numNetworkSamples);
-
-                // if we performed the noise gate we can get values from it instead of enumerating the samples again
-                _lastInputLoudness = _inputGate.getLastLoudness();
-
-                if (_inputGate.clippedInLastBlock()) {
-                    _timeSinceLastClip = 0.0f;
-                }
-
-            } else {
-                float loudness = 0.0f;
-
-                for (int i = 0; i < numNetworkSamples; i++) {
-                    int thisSample = std::abs(networkAudioSamples[i]);
-                    loudness += (float)thisSample;
-
-                    if (thisSample > (AudioConstants::MAX_SAMPLE_VALUE * AudioNoiseGate::CLIPPING_THRESHOLD)) {
-                        _timeSinceLastClip = 0.0f;
-                    }
-                }
-
-                _lastInputLoudness = fabs(loudness / numNetworkSamples);
-            }
-
-            emit inputReceived({ reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes });
-
-            if (_inputGate.openedInLastBlock()) {
-                emit noiseGateOpened();
-            } else if (_inputGate.closedInLastBlock()) {
-                emit noiseGateClosed();
-            }
-
-        } else {
-            // our input loudness is 0, since we're muted
-            _lastInputLoudness = 0;
-            _timeSinceLastClip = 0.0f;
-
-            _inputRingBuffer.shiftReadPosition(inputSamplesRequired);
        }
-
-        auto packetType = _shouldEchoToServer ?
-            PacketType::MicrophoneAudioWithEcho : PacketType::MicrophoneAudioNoEcho;
-
-        // if the _inputGate closed in this last frame, then we don't actually want
-        // to send a silent packet, instead, we want to go ahead and encode and send
-        // the output from the input gate (eventually, this could be crossfaded)
-        // and allow the codec to properly encode down to silent/zero. If we still
-        // have _lastInputLoudness of 0 in our NEXT frame, we will send a silent packet
-        if (_lastInputLoudness == 0 && !_inputGate.closedInLastBlock()) {
-            packetType = PacketType::SilentAudioFrame;
-            _silentOutbound.increment();
-        } else {
-            _micAudioOutbound.increment();
-        }
-
-        Transform audioTransform;
-        audioTransform.setTranslation(_positionGetter());
-        audioTransform.setRotation(_orientationGetter());
-        // FIXME find a way to properly handle both playback audio and user audio concurrently
-
-        QByteArray decodedBuffer(reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes);
-        QByteArray encodedBuffer;
-        if (_encoder) {
-            _encoder->encode(decodedBuffer, encodedBuffer);
-        } else {
-            encodedBuffer = decodedBuffer;
-        }
-
-        emitAudioPacket(encodedBuffer.constData(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber,
-            audioTransform, avatarBoundingBoxCorner, avatarBoundingBoxScale,
-            packetType, _selectedCodecName);
-        _stats.sentPacket();
-
        int bytesInInputRingBuffer = _inputRingBuffer.samplesAvailable() * AudioConstants::SAMPLE_SIZE;
        float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC));
        _stats.updateInputMsUnplayed(msecsInInputRingBuffer);
+
+        QByteArray audioBuffer(reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes);
+        handleAudioInput(audioBuffer);
    }
 }

-// FIXME - should this go through the noise gate and honor mute and echo?
 void AudioClient::handleRecordedAudioInput(const QByteArray& audio) {
-    Transform audioTransform;
-    audioTransform.setTranslation(_positionGetter());
-    audioTransform.setRotation(_orientationGetter());
-
-    QByteArray encodedBuffer;
-    if (_encoder) {
-        _encoder->encode(audio, encodedBuffer);
-    } else {
-        encodedBuffer = audio;
-    }
-
-    _micAudioOutbound.increment();
-
-    // FIXME check a flag to see if we should echo audio?
-    emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber,
-                    audioTransform, avatarBoundingBoxCorner, avatarBoundingBoxScale,
-                    PacketType::MicrophoneAudioWithEcho, _selectedCodecName);
+    QByteArray audioBuffer(audio);
+    handleAudioInput(audioBuffer);
 }

 void AudioClient::prepareLocalAudioInjectors() {
@ -1434,7 +1407,7 @@ bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo& inputDeviceIn
                lock.unlock();

                if (_inputDevice) {
-                    connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleAudioInput()));
+                    connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleMicAudioInput()));
                    supportedFormat = true;
                } else {
                    qCDebug(audioclient) << "Error starting audio input -" <<  _audioInput->error();
--- a/libraries/audio-client/src/AudioClient.h
+++ b/libraries/audio-client/src/AudioClient.h
@ -124,16 +124,16 @@ public:
    void selectAudioFormat(const QString& selectedCodecName);

    Q_INVOKABLE QString getSelectedAudioFormat() const { return _selectedCodecName; }
-    Q_INVOKABLE bool getNoiseGateOpen() const { return _inputGate.isOpen(); }
-    Q_INVOKABLE float getSilentOutboundPPS() const { return _silentOutbound.rate(); }
-    Q_INVOKABLE float getMicAudioOutboundPPS() const { return _micAudioOutbound.rate(); }
+    Q_INVOKABLE bool getNoiseGateOpen() const { return _noiseGate.isOpen(); }
    Q_INVOKABLE float getSilentInboundPPS() const { return _silentInbound.rate(); }
    Q_INVOKABLE float getAudioInboundPPS() const { return _audioInbound.rate(); }
+    Q_INVOKABLE float getSilentOutboundPPS() const { return _silentOutbound.rate(); }
+    Q_INVOKABLE float getAudioOutboundPPS() const { return _audioOutbound.rate(); }

    const MixedProcessedAudioStream& getReceivedAudioStream() const { return _receivedAudioStream; }
    MixedProcessedAudioStream& getReceivedAudioStream() { return _receivedAudioStream; }

-    float getLastInputLoudness() const { return glm::max(_lastInputLoudness - _inputGate.getMeasuredFloor(), 0.0f); }
+    float getLastInputLoudness() const { return glm::max(_lastInputLoudness - _noiseGate.getMeasuredFloor(), 0.0f); }

    float getTimeSinceLastClip() const { return _timeSinceLastClip; }
    float getAudioAverageInputLoudness() const { return _lastInputLoudness; }
@ -180,7 +180,7 @@ public slots:
    void handleMismatchAudioFormat(SharedNodePointer node, const QString& currentCodec, const QString& recievedCodec);

    void sendDownstreamAudioStatsPacket() { _stats.publish(); }
-    void handleAudioInput();
+    void handleMicAudioInput();
    void handleRecordedAudioInput(const QByteArray& audio);
    void reset();
    void audioMixerKilled();
@ -250,6 +250,7 @@ protected:

 private:
    void outputFormatChanged();
+    void handleAudioInput(QByteArray& audioBuffer);
    bool mixLocalAudioInjectors(float* mixBuffer);
    float azimuthForSource(const glm::vec3& relativePosition);
    float gainForSource(float distance, float volume);
@ -371,7 +372,7 @@ private:

    AudioIOStats _stats;

-    AudioNoiseGate _inputGate;
+    AudioNoiseGate _noiseGate;

    AudioPositionGetter _positionGetter;
    AudioOrientationGetter _orientationGetter;
@ -395,7 +396,7 @@ private:
    QThread* _checkDevicesThread { nullptr };

    RateCounter<> _silentOutbound;
-    RateCounter<> _micAudioOutbound;
+    RateCounter<> _audioOutbound;
    RateCounter<> _silentInbound;
    RateCounter<> _audioInbound;
 };