fixed repeat-popping in audiomixer

2025-07-22 19:46:00 +02:00 · 2014-07-25 12:58:10 -07:00 · 2014-07-25 12:58:10 -07:00 · a9d26b3934
commit a9d26b3934
parent 822ba4da48
7 changed files with 92 additions and 112 deletions
--- a/assignment-client/src/audio/AudioMixer.cpp
+++ b/assignment-client/src/audio/AudioMixer.cpp
@ -94,23 +94,7 @@ const float ATTENUATION_AMOUNT_PER_DOUBLING_IN_DISTANCE = 0.18f;
 const float ATTENUATION_EPSILON_DISTANCE = 0.1f;

 void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuffer* bufferToAdd,
-                                                          AvatarAudioRingBuffer* listeningNodeBuffer,
-                                                          bool bufferToAddBelongsToListener) {
-
-    // if the buffer to be added belongs to the listener and it should not be echoed or
-    // if the buffer frame to be added is too soft, pop a frame from the buffer without mixing it.
-    if ((bufferToAddBelongsToListener && !bufferToAdd->shouldLoopbackForNode())
-        || bufferToAdd->getNextOutputTrailingLoudness() == 0.0f) {
-        bufferToAdd->popFrames(1);
-        return;
-    }
-
-    // get pointer to the frame to be mixed.  If the stream cannot provide a frame (is starved), bail
-    AudioRingBuffer::ConstIterator nextOutputStart;
-    if (!bufferToAdd->popFrames(&nextOutputStart, 1)) {
-        return;
-    }
-
+                                                          AvatarAudioRingBuffer* listeningNodeBuffer) {
    float bearingRelativeAngleToSource = 0.0f;
    float attenuationCoefficient = 1.0f;
    int numSamplesDelay = 0;
@ -219,7 +203,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
        }
    }
    
-
+    AudioRingBuffer::ConstIterator bufferPopOutput = bufferToAdd->getLastPopOutput();
    
    if (!bufferToAdd->isStereo() && shouldAttenuate) {
        // this is a mono buffer, which means it gets full attenuation and spatialization
@ -236,8 +220,8 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
        for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
            
            // setup the int16_t variables for the two sample sets
-            correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient;
-            correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient;
+            correctBufferSample[0] = bufferPopOutput[s / 2] * attenuationCoefficient;
+            correctBufferSample[1] = bufferPopOutput[(s / 2) + 1] * attenuationCoefficient;
            
            delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset;
            
@ -254,15 +238,15 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
            // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput
            // to stick at the beginning
            float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio;
-            AudioRingBuffer::ConstIterator delayNextOutputStart = nextOutputStart - numSamplesDelay;
+            AudioRingBuffer::ConstIterator delayBufferPopOutput = bufferPopOutput - numSamplesDelay;

-            // TODO: delayNextOutputStart may be inside the last frame written if the ringbuffer is completely full
+            // TODO: delayBufferPopOutput may be inside the last frame written if the ringbuffer is completely full
            // maybe make AudioRingBuffer have 1 extra frame in its buffer
            
            for (int i = 0; i < numSamplesDelay; i++) {
                int parentIndex = i * 2;
-                _clientSamples[parentIndex + delayedChannelOffset] += *delayNextOutputStart * attenuationAndWeakChannelRatio;
-                ++delayNextOutputStart;
+                _clientSamples[parentIndex + delayedChannelOffset] += *delayBufferPopOutput * attenuationAndWeakChannelRatio;
+                ++delayBufferPopOutput;
            }
        }
    } else {
@ -274,34 +258,9 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
        }

        for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s++) {
-            _clientSamples[s] = glm::clamp(_clientSamples[s] + (int)(nextOutputStart[s / stereoDivider] * attenuationCoefficient),
+            _clientSamples[s] = glm::clamp(_clientSamples[s] + (int)(bufferPopOutput[s / stereoDivider] * attenuationCoefficient),
                                            MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
        }
-
-        /*for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
-
-            int stereoDivider = bufferToAdd->isStereo() ? 1 : 2;
-
-            if (!shouldAttenuate) {
-                attenuationCoefficient = 1.0f;
-            }
-
-            _clientSamples[s] = glm::clamp(_clientSamples[s]
-                + (int)(nextOutputStart[(s / stereoDivider)] * attenuationCoefficient),
-                MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
-            _clientSamples[s + 1] = glm::clamp(_clientSamples[s + 1]
-                + (int)(nextOutputStart[(s / stereoDivider) + (1 / stereoDivider)]
-                * attenuationCoefficient),
-                MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
-            _clientSamples[s + 2] = glm::clamp(_clientSamples[s + 2]
-                + (int)(nextOutputStart[(s / stereoDivider) + (2 / stereoDivider)]
-                * attenuationCoefficient),
-                MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
-            _clientSamples[s + 3] = glm::clamp(_clientSamples[s + 3]
-                + (int)(nextOutputStart[(s / stereoDivider) + (3 / stereoDivider)]
-                * attenuationCoefficient),
-                MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
-        }*/
    }
 }

@ -324,7 +283,12 @@ void AudioMixer::prepareMixForListeningNode(Node* node) {
            for (i = otherNodeRingBuffers.begin(); i != end; i++) {
                PositionalAudioRingBuffer* otherNodeBuffer = i.value();

-                addBufferToMixForListeningNodeWithBuffer(otherNodeBuffer, nodeRingBuffer, *otherNode == *node);
+                if ((*otherNode != *node || otherNodeBuffer->shouldLoopbackForNode())
+                    && otherNodeBuffer->lastPopSucceeded()
+                    && otherNodeBuffer->getNextOutputTrailingLoudness() > 0.0f) {
+
+                    addBufferToMixForListeningNodeWithBuffer(otherNodeBuffer, nodeRingBuffer);
+                }
            }
        }
    }
@ -581,36 +545,43 @@ void AudioMixer::run() {
        }

        foreach (const SharedNodePointer& node, nodeList->getNodeHash()) {
-            if (node->getType() == NodeType::Agent && node->getActiveSocket() && node->getLinkedData()
-                && ((AudioMixerClientData*) node->getLinkedData())->getAvatarAudioRingBuffer()) {
-
+            if (node->getActiveSocket() && node->getLinkedData()) {
+                
                AudioMixerClientData* nodeData = (AudioMixerClientData*)node->getLinkedData();

-                prepareMixForListeningNode(node.data());
-                
-                // pack header
-                int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeMixedAudio);
-                char* dataAt = clientMixBuffer + numBytesPacketHeader;
+                // request a frame from each audio stream. a pointer to the popped data is stored as a member
+                // in InboundAudioStream.  That's how the popped audio data will be read for mixing
+                nodeData->audioStreamsPopFrameForMixing();

-                // pack sequence number
-                quint16 sequence = nodeData->getOutgoingSequenceNumber();
-                memcpy(dataAt, &sequence, sizeof(quint16));
-                dataAt += sizeof(quint16);
+                if (node->getType() == NodeType::Agent //&& node->getActiveSocket() && node->getLinkedData()
+                    && ((AudioMixerClientData*)node->getLinkedData())->getAvatarAudioRingBuffer()) {

-                // pack mixed audio samples
-                memcpy(dataAt, _clientSamples, NETWORK_BUFFER_LENGTH_BYTES_STEREO);
-                dataAt += NETWORK_BUFFER_LENGTH_BYTES_STEREO;
+                    prepareMixForListeningNode(node.data());

-                // send mixed audio packet
-                nodeList->writeDatagram(clientMixBuffer, dataAt - clientMixBuffer, node);
-                nodeData->incrementOutgoingMixedAudioSequenceNumber();
-                
-                // send an audio stream stats packet if it's time
-                if (sendAudioStreamStats) {
-                    nodeData->sendAudioStreamStatsPackets(node);
+                    // pack header
+                    int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeMixedAudio);
+                    char* dataAt = clientMixBuffer + numBytesPacketHeader;
+
+                    // pack sequence number
+                    quint16 sequence = nodeData->getOutgoingSequenceNumber();
+                    memcpy(dataAt, &sequence, sizeof(quint16));
+                    dataAt += sizeof(quint16);
+
+                    // pack mixed audio samples
+                    memcpy(dataAt, _clientSamples, NETWORK_BUFFER_LENGTH_BYTES_STEREO);
+                    dataAt += NETWORK_BUFFER_LENGTH_BYTES_STEREO;
+
+                    // send mixed audio packet
+                    nodeList->writeDatagram(clientMixBuffer, dataAt - clientMixBuffer, node);
+                    nodeData->incrementOutgoingMixedAudioSequenceNumber();
+
+                    // send an audio stream stats packet if it's time
+                    if (sendAudioStreamStats) {
+                        nodeData->sendAudioStreamStatsPackets(node);
+                    }
+
+                    ++_sumListeners;
                }
-
-                ++_sumListeners;
            }
        }
        
--- a/assignment-client/src/audio/AudioMixer.h
+++ b/assignment-client/src/audio/AudioMixer.h
@ -42,8 +42,7 @@ public slots:
 private:
    /// adds one buffer to the mix for a listening node
    void addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuffer* bufferToAdd,
-                                                  AvatarAudioRingBuffer* listeningNodeBuffer,
-                                                  bool bufferToAddBelongsToListener);
+                                                  AvatarAudioRingBuffer* listeningNodeBuffer);
    
    /// prepares and sends a mix to one Node
    void prepareMixForListeningNode(Node* node);
--- a/assignment-client/src/audio/AudioMixerClientData.cpp
+++ b/assignment-client/src/audio/AudioMixerClientData.cpp
@ -98,6 +98,13 @@ int AudioMixerClientData::parseData(const QByteArray& packet) {
    return 0;
 }

+void AudioMixerClientData::audioStreamsPopFrameForMixing() {
+    QHash<QUuid, PositionalAudioRingBuffer*>::ConstIterator i, end = _ringBuffers.constEnd();
+    for (i = _ringBuffers.constBegin(); i != end; i++) {
+        i.value()->popFrames(1);
+    }
+}
+
 void AudioMixerClientData::sendAudioStreamStatsPackets(const SharedNodePointer& destinationNode) {
    char packet[MAX_PACKET_SIZE];
    NodeList* nodeList = NodeList::getInstance();
--- a/assignment-client/src/audio/AudioMixerClientData.h
+++ b/assignment-client/src/audio/AudioMixerClientData.h
@ -27,6 +27,8 @@ public:
    
    int parseData(const QByteArray& packet);

+    void audioStreamsPopFrameForMixing();
+
    QString getAudioStreamStatsString() const;
    
    void sendAudioStreamStatsPackets(const SharedNodePointer& destinationNode);
--- a/interface/src/Audio.cpp
+++ b/interface/src/Audio.cpp
@ -925,8 +925,7 @@ void Audio::pushAudioToOutput() {

    // if there is data in the ring buffer and room in the audio output, decide what to do

-    AudioRingBuffer::ConstIterator ringBufferNextOutput;
-    if (numFramesToPush > 0 && _ringBuffer.popFrames(&ringBufferNextOutput, numFramesToPush, false)) {
+    if (numFramesToPush > 0 && _ringBuffer.popFrames(numFramesToPush, false)) {

        int numNetworkOutputSamples = numFramesToPush * NETWORK_BUFFER_LENGTH_SAMPLES_STEREO;
        int numDeviceOutputSamples = numNetworkOutputSamples / networkOutputToOutputRatio;
@ -934,6 +933,7 @@ void Audio::pushAudioToOutput() {
        QByteArray outputBuffer;
        outputBuffer.resize(numDeviceOutputSamples * sizeof(int16_t));

+        AudioRingBuffer::ConstIterator ringBufferPopOutput = _ringBuffer.getLastPopOutput();

        int16_t* ringBufferSamples = new int16_t[numNetworkOutputSamples];
        if (_processSpatialAudio) {
@ -941,7 +941,7 @@ void Audio::pushAudioToOutput() {
            QByteArray buffer;
            buffer.resize(numNetworkOutputSamples * sizeof(int16_t));

-            ringBufferNextOutput.readSamples((int16_t*)buffer.data(), numNetworkOutputSamples);
+            ringBufferPopOutput.readSamples((int16_t*)buffer.data(), numNetworkOutputSamples);

            // Accumulate direct transmission of audio from sender to receiver
            if (Menu::getInstance()->isOptionChecked(MenuOption::AudioSpatialProcessingIncludeOriginal)) {
@ -961,7 +961,7 @@ void Audio::pushAudioToOutput() {
        } else {
            // copy the samples we'll resample from the ring buffer - this also
            // pushes the read pointer of the ring buffer forwards
-            ringBufferNextOutput.readSamples(ringBufferSamples, numNetworkOutputSamples);
+            ringBufferPopOutput.readSamples(ringBufferSamples, numNetworkOutputSamples);
        }

        // copy the packet from the RB to the output
--- a/libraries/audio/src/InboundAudioStream.cpp
+++ b/libraries/audio/src/InboundAudioStream.cpp
@ -14,6 +14,8 @@

 InboundAudioStream::InboundAudioStream(int numFrameSamples, int numFramesCapacity, bool dynamicJitterBuffers) :
    _ringBuffer(numFrameSamples, false, numFramesCapacity),
+    _lastPopSucceeded(false),
+    _lastPopOutput(),
    _dynamicJitterBuffers(dynamicJitterBuffers),
    _desiredJitterBufferFrames(1),
    _isStarved(true),
@ -98,37 +100,30 @@ int InboundAudioStream::parseData(const QByteArray& packet) {
 }

 bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) {
-    bool popped;
    int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
-    if (popped = shouldPop(numSamplesRequested, starveOnFail)) {
-        _ringBuffer.shiftReadPosition(numSamplesRequested);
+    if (_isStarved) {
+        // we're still refilling; don't pop
+        _consecutiveNotMixedCount++;
+        _lastPopSucceeded = false;
+    } else {
+        if (_ringBuffer.samplesAvailable() >= numSamplesRequested) {
+            // we have enough samples to pop, so we're good to mix
+            _lastPopOutput = _ringBuffer.nextOutput();
+            _ringBuffer.shiftReadPosition(numSamplesRequested);
+
+            _hasStarted = true;
+            _lastPopSucceeded = true;
+        } else {
+            // we don't have enough samples, so set this stream to starve
+            // if starveOnFail is true
+            if (starveOnFail) {
+                starved();
+                _consecutiveNotMixedCount++;
+            }
+            _lastPopSucceeded = false;
+        }
    }
-    _framesAvailableStats.update(_ringBuffer.framesAvailable());
-
-    return popped;
-}
-
-bool InboundAudioStream::popFrames(int16_t* dest, int numFrames, bool starveOnFail) {
-    bool popped;
-    int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
-    if (popped = shouldPop(numSamplesRequested, starveOnFail)) {
-        _ringBuffer.readSamples(dest, numSamplesRequested);
-    }
-    _framesAvailableStats.update(_ringBuffer.framesAvailable());
-
-    return popped;
-}
-
-bool InboundAudioStream::popFrames(AudioRingBuffer::ConstIterator* nextOutput, int numFrames, bool starveOnFail) {
-    bool popped;
-    int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
-    if (popped = shouldPop(numSamplesRequested, starveOnFail)) {
-        *nextOutput = _ringBuffer.nextOutput();
-        _ringBuffer.shiftReadPosition(numSamplesRequested);
-    }
-    _framesAvailableStats.update(_ringBuffer.framesAvailable());
-
-    return popped;
+    return _lastPopSucceeded;
 }

 bool InboundAudioStream::shouldPop(int numSamples, bool starveOnFail) {
--- a/libraries/audio/src/InboundAudioStream.h
+++ b/libraries/audio/src/InboundAudioStream.h
@ -53,9 +53,12 @@ public:

    virtual int parseData(const QByteArray& packet);

+
    bool popFrames(int numFrames, bool starveOnFail = true);
-    bool popFrames(int16_t* dest, int numFrames, bool starveOnFail = true);
-    bool popFrames(AudioRingBuffer::ConstIterator* nextOutput, int numFrames, bool starveOnFail = true);
+
+    bool lastPopSucceeded() const { return _lastPopSucceeded; };
+    const AudioRingBuffer::ConstIterator& getLastPopOutput() const { return _lastPopOutput; }
+

    void setToStarved();

@ -105,6 +108,9 @@ protected:

    AudioRingBuffer _ringBuffer;

+    bool _lastPopSucceeded;
+    AudioRingBuffer::ConstIterator _lastPopOutput;
+
    bool _dynamicJitterBuffers;
    int _desiredJitterBufferFrames;