From a9d26b3934f3e6e0e02ed352dc98c5298213a55b Mon Sep 17 00:00:00 2001
From: wangyix <wangyix@gmail.com>
Date: Fri, 25 Jul 2014 12:58:10 -0700
Subject: [PATCH] fixed repeat-popping in audiomixer

---
 assignment-client/src/audio/AudioMixer.cpp    | 121 +++++++-----------
 assignment-client/src/audio/AudioMixer.h      |   3 +-
 .../src/audio/AudioMixerClientData.cpp        |   7 +
 .../src/audio/AudioMixerClientData.h          |   2 +
 interface/src/Audio.cpp                       |   8 +-
 libraries/audio/src/InboundAudioStream.cpp    |  53 ++++----
 libraries/audio/src/InboundAudioStream.h      |  10 +-
 7 files changed, 92 insertions(+), 112 deletions(-)

diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp
index 069ba7476c..5e28ca05e1 100644
--- a/assignment-client/src/audio/AudioMixer.cpp
+++ b/assignment-client/src/audio/AudioMixer.cpp
@@ -94,23 +94,7 @@ const float ATTENUATION_AMOUNT_PER_DOUBLING_IN_DISTANCE = 0.18f;
 const float ATTENUATION_EPSILON_DISTANCE = 0.1f;
 
 void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuffer* bufferToAdd,
-                                                          AvatarAudioRingBuffer* listeningNodeBuffer,
-                                                          bool bufferToAddBelongsToListener) {
-
-    // if the buffer to be added belongs to the listener and it should not be echoed or
-    // if the buffer frame to be added is too soft, pop a frame from the buffer without mixing it.
-    if ((bufferToAddBelongsToListener && !bufferToAdd->shouldLoopbackForNode())
-        || bufferToAdd->getNextOutputTrailingLoudness() == 0.0f) {
-        bufferToAdd->popFrames(1);
-        return;
-    }
-
-    // get pointer to the frame to be mixed.  If the stream cannot provide a frame (is starved), bail
-    AudioRingBuffer::ConstIterator nextOutputStart;
-    if (!bufferToAdd->popFrames(&nextOutputStart, 1)) {
-        return;
-    }
-
+                                                          AvatarAudioRingBuffer* listeningNodeBuffer) {
     float bearingRelativeAngleToSource = 0.0f;
     float attenuationCoefficient = 1.0f;
     int numSamplesDelay = 0;
@@ -219,7 +203,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
         }
     }
     
-
+    AudioRingBuffer::ConstIterator bufferPopOutput = bufferToAdd->getLastPopOutput();
     
     if (!bufferToAdd->isStereo() && shouldAttenuate) {
         // this is a mono buffer, which means it gets full attenuation and spatialization
@@ -236,8 +220,8 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
         for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
             
             // setup the int16_t variables for the two sample sets
-            correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient;
-            correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient;
+            correctBufferSample[0] = bufferPopOutput[s / 2] * attenuationCoefficient;
+            correctBufferSample[1] = bufferPopOutput[(s / 2) + 1] * attenuationCoefficient;
             
             delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset;
             
@@ -254,15 +238,15 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
             // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput
             // to stick at the beginning
             float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio;
-            AudioRingBuffer::ConstIterator delayNextOutputStart = nextOutputStart - numSamplesDelay;
+            AudioRingBuffer::ConstIterator delayBufferPopOutput = bufferPopOutput - numSamplesDelay;
 
-            // TODO: delayNextOutputStart may be inside the last frame written if the ringbuffer is completely full
+            // TODO: delayBufferPopOutput may be inside the last frame written if the ringbuffer is completely full
             // maybe make AudioRingBuffer have 1 extra frame in its buffer
             
             for (int i = 0; i < numSamplesDelay; i++) {
                 int parentIndex = i * 2;
-                _clientSamples[parentIndex + delayedChannelOffset] += *delayNextOutputStart * attenuationAndWeakChannelRatio;
-                ++delayNextOutputStart;
+                _clientSamples[parentIndex + delayedChannelOffset] += *delayBufferPopOutput * attenuationAndWeakChannelRatio;
+                ++delayBufferPopOutput;
             }
         }
     } else {
@@ -274,34 +258,9 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
         }
 
         for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s++) {
-            _clientSamples[s] = glm::clamp(_clientSamples[s] + (int)(nextOutputStart[s / stereoDivider] * attenuationCoefficient),
+            _clientSamples[s] = glm::clamp(_clientSamples[s] + (int)(bufferPopOutput[s / stereoDivider] * attenuationCoefficient),
                                             MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
         }
-
-        /*for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
-
-            int stereoDivider = bufferToAdd->isStereo() ? 1 : 2;
-
-            if (!shouldAttenuate) {
-                attenuationCoefficient = 1.0f;
-            }
-
-            _clientSamples[s] = glm::clamp(_clientSamples[s]
-                + (int)(nextOutputStart[(s / stereoDivider)] * attenuationCoefficient),
-                MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
-            _clientSamples[s + 1] = glm::clamp(_clientSamples[s + 1]
-                + (int)(nextOutputStart[(s / stereoDivider) + (1 / stereoDivider)]
-                * attenuationCoefficient),
-                MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
-            _clientSamples[s + 2] = glm::clamp(_clientSamples[s + 2]
-                + (int)(nextOutputStart[(s / stereoDivider) + (2 / stereoDivider)]
-                * attenuationCoefficient),
-                MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
-            _clientSamples[s + 3] = glm::clamp(_clientSamples[s + 3]
-                + (int)(nextOutputStart[(s / stereoDivider) + (3 / stereoDivider)]
-                * attenuationCoefficient),
-                MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
-        }*/
     }
 }
 
@@ -324,7 +283,12 @@ void AudioMixer::prepareMixForListeningNode(Node* node) {
             for (i = otherNodeRingBuffers.begin(); i != end; i++) {
                 PositionalAudioRingBuffer* otherNodeBuffer = i.value();
 
-                addBufferToMixForListeningNodeWithBuffer(otherNodeBuffer, nodeRingBuffer, *otherNode == *node);
+                if ((*otherNode != *node || otherNodeBuffer->shouldLoopbackForNode())
+                    && otherNodeBuffer->lastPopSucceeded()
+                    && otherNodeBuffer->getNextOutputTrailingLoudness() > 0.0f) {
+
+                    addBufferToMixForListeningNodeWithBuffer(otherNodeBuffer, nodeRingBuffer);
+                }
             }
         }
     }
@@ -581,36 +545,43 @@ void AudioMixer::run() {
         }
 
         foreach (const SharedNodePointer& node, nodeList->getNodeHash()) {
-            if (node->getType() == NodeType::Agent && node->getActiveSocket() && node->getLinkedData()
-                && ((AudioMixerClientData*) node->getLinkedData())->getAvatarAudioRingBuffer()) {
-
+            if (node->getActiveSocket() && node->getLinkedData()) {
+                
                 AudioMixerClientData* nodeData = (AudioMixerClientData*)node->getLinkedData();
 
-                prepareMixForListeningNode(node.data());
-                
-                // pack header
-                int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeMixedAudio);
-                char* dataAt = clientMixBuffer + numBytesPacketHeader;
+                // request a frame from each audio stream. a pointer to the popped data is stored as a member
+                // in InboundAudioStream.  That's how the popped audio data will be read for mixing
+                nodeData->audioStreamsPopFrameForMixing();
 
-                // pack sequence number
-                quint16 sequence = nodeData->getOutgoingSequenceNumber();
-                memcpy(dataAt, &sequence, sizeof(quint16));
-                dataAt += sizeof(quint16);
+                if (node->getType() == NodeType::Agent //&& node->getActiveSocket() && node->getLinkedData()
+                    && ((AudioMixerClientData*)node->getLinkedData())->getAvatarAudioRingBuffer()) {
 
-                // pack mixed audio samples
-                memcpy(dataAt, _clientSamples, NETWORK_BUFFER_LENGTH_BYTES_STEREO);
-                dataAt += NETWORK_BUFFER_LENGTH_BYTES_STEREO;
+                    prepareMixForListeningNode(node.data());
 
-                // send mixed audio packet
-                nodeList->writeDatagram(clientMixBuffer, dataAt - clientMixBuffer, node);
-                nodeData->incrementOutgoingMixedAudioSequenceNumber();
-                
-                // send an audio stream stats packet if it's time
-                if (sendAudioStreamStats) {
-                    nodeData->sendAudioStreamStatsPackets(node);
+                    // pack header
+                    int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeMixedAudio);
+                    char* dataAt = clientMixBuffer + numBytesPacketHeader;
+
+                    // pack sequence number
+                    quint16 sequence = nodeData->getOutgoingSequenceNumber();
+                    memcpy(dataAt, &sequence, sizeof(quint16));
+                    dataAt += sizeof(quint16);
+
+                    // pack mixed audio samples
+                    memcpy(dataAt, _clientSamples, NETWORK_BUFFER_LENGTH_BYTES_STEREO);
+                    dataAt += NETWORK_BUFFER_LENGTH_BYTES_STEREO;
+
+                    // send mixed audio packet
+                    nodeList->writeDatagram(clientMixBuffer, dataAt - clientMixBuffer, node);
+                    nodeData->incrementOutgoingMixedAudioSequenceNumber();
+
+                    // send an audio stream stats packet if it's time
+                    if (sendAudioStreamStats) {
+                        nodeData->sendAudioStreamStatsPackets(node);
+                    }
+
+                    ++_sumListeners;
                 }
-
-                ++_sumListeners;
             }
         }
         
diff --git a/assignment-client/src/audio/AudioMixer.h b/assignment-client/src/audio/AudioMixer.h
index beb2539057..afab7d47dc 100644
--- a/assignment-client/src/audio/AudioMixer.h
+++ b/assignment-client/src/audio/AudioMixer.h
@@ -42,8 +42,7 @@ public slots:
 private:
     /// adds one buffer to the mix for a listening node
     void addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuffer* bufferToAdd,
-                                                  AvatarAudioRingBuffer* listeningNodeBuffer,
-                                                  bool bufferToAddBelongsToListener);
+                                                  AvatarAudioRingBuffer* listeningNodeBuffer);
     
     /// prepares and sends a mix to one Node
     void prepareMixForListeningNode(Node* node);
diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp
index 9d5c1c6a74..c288a4f721 100644
--- a/assignment-client/src/audio/AudioMixerClientData.cpp
+++ b/assignment-client/src/audio/AudioMixerClientData.cpp
@@ -98,6 +98,13 @@ int AudioMixerClientData::parseData(const QByteArray& packet) {
     return 0;
 }
 
+void AudioMixerClientData::audioStreamsPopFrameForMixing() {
+    QHash<QUuid, PositionalAudioRingBuffer*>::ConstIterator i, end = _ringBuffers.constEnd();
+    for (i = _ringBuffers.constBegin(); i != end; i++) {
+        i.value()->popFrames(1);
+    }
+}
+
 void AudioMixerClientData::sendAudioStreamStatsPackets(const SharedNodePointer& destinationNode) {
     char packet[MAX_PACKET_SIZE];
     NodeList* nodeList = NodeList::getInstance();
diff --git a/assignment-client/src/audio/AudioMixerClientData.h b/assignment-client/src/audio/AudioMixerClientData.h
index 19592b1253..92dddab7e4 100644
--- a/assignment-client/src/audio/AudioMixerClientData.h
+++ b/assignment-client/src/audio/AudioMixerClientData.h
@@ -27,6 +27,8 @@ public:
     
     int parseData(const QByteArray& packet);
 
+    void audioStreamsPopFrameForMixing();
+
     QString getAudioStreamStatsString() const;
     
     void sendAudioStreamStatsPackets(const SharedNodePointer& destinationNode);
diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp
index 0f79c0c6a1..808c076cb0 100644
--- a/interface/src/Audio.cpp
+++ b/interface/src/Audio.cpp
@@ -925,8 +925,7 @@ void Audio::pushAudioToOutput() {
 
     // if there is data in the ring buffer and room in the audio output, decide what to do
 
-    AudioRingBuffer::ConstIterator ringBufferNextOutput;
-    if (numFramesToPush > 0 && _ringBuffer.popFrames(&ringBufferNextOutput, numFramesToPush, false)) {
+    if (numFramesToPush > 0 && _ringBuffer.popFrames(numFramesToPush, false)) {
 
         int numNetworkOutputSamples = numFramesToPush * NETWORK_BUFFER_LENGTH_SAMPLES_STEREO;
         int numDeviceOutputSamples = numNetworkOutputSamples / networkOutputToOutputRatio;
@@ -934,6 +933,7 @@ void Audio::pushAudioToOutput() {
         QByteArray outputBuffer;
         outputBuffer.resize(numDeviceOutputSamples * sizeof(int16_t));
 
+        AudioRingBuffer::ConstIterator ringBufferPopOutput = _ringBuffer.getLastPopOutput();
 
         int16_t* ringBufferSamples = new int16_t[numNetworkOutputSamples];
         if (_processSpatialAudio) {
@@ -941,7 +941,7 @@ void Audio::pushAudioToOutput() {
             QByteArray buffer;
             buffer.resize(numNetworkOutputSamples * sizeof(int16_t));
 
-            ringBufferNextOutput.readSamples((int16_t*)buffer.data(), numNetworkOutputSamples);
+            ringBufferPopOutput.readSamples((int16_t*)buffer.data(), numNetworkOutputSamples);
 
             // Accumulate direct transmission of audio from sender to receiver
             if (Menu::getInstance()->isOptionChecked(MenuOption::AudioSpatialProcessingIncludeOriginal)) {
@@ -961,7 +961,7 @@ void Audio::pushAudioToOutput() {
         } else {
             // copy the samples we'll resample from the ring buffer - this also
             // pushes the read pointer of the ring buffer forwards
-            ringBufferNextOutput.readSamples(ringBufferSamples, numNetworkOutputSamples);
+            ringBufferPopOutput.readSamples(ringBufferSamples, numNetworkOutputSamples);
         }
 
         // copy the packet from the RB to the output
diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp
index 591dde772c..501f898654 100644
--- a/libraries/audio/src/InboundAudioStream.cpp
+++ b/libraries/audio/src/InboundAudioStream.cpp
@@ -14,6 +14,8 @@
 
 InboundAudioStream::InboundAudioStream(int numFrameSamples, int numFramesCapacity, bool dynamicJitterBuffers) :
     _ringBuffer(numFrameSamples, false, numFramesCapacity),
+    _lastPopSucceeded(false),
+    _lastPopOutput(),
     _dynamicJitterBuffers(dynamicJitterBuffers),
     _desiredJitterBufferFrames(1),
     _isStarved(true),
@@ -98,37 +100,30 @@ int InboundAudioStream::parseData(const QByteArray& packet) {
 }
 
 bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) {
-    bool popped;
     int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
-    if (popped = shouldPop(numSamplesRequested, starveOnFail)) {
-        _ringBuffer.shiftReadPosition(numSamplesRequested);
+    if (_isStarved) {
+        // we're still refilling; don't pop
+        _consecutiveNotMixedCount++;
+        _lastPopSucceeded = false;
+    } else {
+        if (_ringBuffer.samplesAvailable() >= numSamplesRequested) {
+            // we have enough samples to pop, so we're good to mix
+            _lastPopOutput = _ringBuffer.nextOutput();
+            _ringBuffer.shiftReadPosition(numSamplesRequested);
+
+            _hasStarted = true;
+            _lastPopSucceeded = true;
+        } else {
+            // we don't have enough samples, so set this stream to starve
+            // if starveOnFail is true
+            if (starveOnFail) {
+                starved();
+                _consecutiveNotMixedCount++;
+            }
+            _lastPopSucceeded = false;
+        }
     }
-    _framesAvailableStats.update(_ringBuffer.framesAvailable());
-
-    return popped;
-}
-
-bool InboundAudioStream::popFrames(int16_t* dest, int numFrames, bool starveOnFail) {
-    bool popped;
-    int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
-    if (popped = shouldPop(numSamplesRequested, starveOnFail)) {
-        _ringBuffer.readSamples(dest, numSamplesRequested);
-    }
-    _framesAvailableStats.update(_ringBuffer.framesAvailable());
-
-    return popped;
-}
-
-bool InboundAudioStream::popFrames(AudioRingBuffer::ConstIterator* nextOutput, int numFrames, bool starveOnFail) {
-    bool popped;
-    int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
-    if (popped = shouldPop(numSamplesRequested, starveOnFail)) {
-        *nextOutput = _ringBuffer.nextOutput();
-        _ringBuffer.shiftReadPosition(numSamplesRequested);
-    }
-    _framesAvailableStats.update(_ringBuffer.framesAvailable());
-
-    return popped;
+    return _lastPopSucceeded;
 }
 
 bool InboundAudioStream::shouldPop(int numSamples, bool starveOnFail) {
diff --git a/libraries/audio/src/InboundAudioStream.h b/libraries/audio/src/InboundAudioStream.h
index 375fccae9e..4eaf554ec7 100644
--- a/libraries/audio/src/InboundAudioStream.h
+++ b/libraries/audio/src/InboundAudioStream.h
@@ -53,9 +53,12 @@ public:
 
     virtual int parseData(const QByteArray& packet);
 
+
     bool popFrames(int numFrames, bool starveOnFail = true);
-    bool popFrames(int16_t* dest, int numFrames, bool starveOnFail = true);
-    bool popFrames(AudioRingBuffer::ConstIterator* nextOutput, int numFrames, bool starveOnFail = true);
+
+    bool lastPopSucceeded() const { return _lastPopSucceeded; };
+    const AudioRingBuffer::ConstIterator& getLastPopOutput() const { return _lastPopOutput; }
+
 
     void setToStarved();
 
@@ -105,6 +108,9 @@ protected:
 
     AudioRingBuffer _ringBuffer;
 
+    bool _lastPopSucceeded;
+    AudioRingBuffer::ConstIterator _lastPopOutput;
+
     bool _dynamicJitterBuffers;
     int _desiredJitterBufferFrames;