From a9d26b3934f3e6e0e02ed352dc98c5298213a55b Mon Sep 17 00:00:00 2001 From: wangyix Date: Fri, 25 Jul 2014 12:58:10 -0700 Subject: [PATCH] fixed repeat-popping in audiomixer --- assignment-client/src/audio/AudioMixer.cpp | 121 +++++++----------- assignment-client/src/audio/AudioMixer.h | 3 +- .../src/audio/AudioMixerClientData.cpp | 7 + .../src/audio/AudioMixerClientData.h | 2 + interface/src/Audio.cpp | 8 +- libraries/audio/src/InboundAudioStream.cpp | 53 ++++---- libraries/audio/src/InboundAudioStream.h | 10 +- 7 files changed, 92 insertions(+), 112 deletions(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index 069ba7476c..5e28ca05e1 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -94,23 +94,7 @@ const float ATTENUATION_AMOUNT_PER_DOUBLING_IN_DISTANCE = 0.18f; const float ATTENUATION_EPSILON_DISTANCE = 0.1f; void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuffer* bufferToAdd, - AvatarAudioRingBuffer* listeningNodeBuffer, - bool bufferToAddBelongsToListener) { - - // if the buffer to be added belongs to the listener and it should not be echoed or - // if the buffer frame to be added is too soft, pop a frame from the buffer without mixing it. - if ((bufferToAddBelongsToListener && !bufferToAdd->shouldLoopbackForNode()) - || bufferToAdd->getNextOutputTrailingLoudness() == 0.0f) { - bufferToAdd->popFrames(1); - return; - } - - // get pointer to the frame to be mixed. If the stream cannot provide a frame (is starved), bail - AudioRingBuffer::ConstIterator nextOutputStart; - if (!bufferToAdd->popFrames(&nextOutputStart, 1)) { - return; - } - + AvatarAudioRingBuffer* listeningNodeBuffer) { float bearingRelativeAngleToSource = 0.0f; float attenuationCoefficient = 1.0f; int numSamplesDelay = 0; @@ -219,7 +203,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf } } - + AudioRingBuffer::ConstIterator bufferPopOutput = bufferToAdd->getLastPopOutput(); if (!bufferToAdd->isStereo() && shouldAttenuate) { // this is a mono buffer, which means it gets full attenuation and spatialization @@ -236,8 +220,8 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { // setup the int16_t variables for the two sample sets - correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient; - correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient; + correctBufferSample[0] = bufferPopOutput[s / 2] * attenuationCoefficient; + correctBufferSample[1] = bufferPopOutput[(s / 2) + 1] * attenuationCoefficient; delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset; @@ -254,15 +238,15 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput // to stick at the beginning float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio; - AudioRingBuffer::ConstIterator delayNextOutputStart = nextOutputStart - numSamplesDelay; + AudioRingBuffer::ConstIterator delayBufferPopOutput = bufferPopOutput - numSamplesDelay; - // TODO: delayNextOutputStart may be inside the last frame written if the ringbuffer is completely full + // TODO: delayBufferPopOutput may be inside the last frame written if the ringbuffer is completely full // maybe make AudioRingBuffer have 1 extra frame in its buffer for (int i = 0; i < numSamplesDelay; i++) { int parentIndex = i * 2; - _clientSamples[parentIndex + delayedChannelOffset] += *delayNextOutputStart * attenuationAndWeakChannelRatio; - ++delayNextOutputStart; + _clientSamples[parentIndex + delayedChannelOffset] += *delayBufferPopOutput * attenuationAndWeakChannelRatio; + ++delayBufferPopOutput; } } } else { @@ -274,34 +258,9 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf } for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s++) { - _clientSamples[s] = glm::clamp(_clientSamples[s] + (int)(nextOutputStart[s / stereoDivider] * attenuationCoefficient), + _clientSamples[s] = glm::clamp(_clientSamples[s] + (int)(bufferPopOutput[s / stereoDivider] * attenuationCoefficient), MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); } - - /*for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { - - int stereoDivider = bufferToAdd->isStereo() ? 1 : 2; - - if (!shouldAttenuate) { - attenuationCoefficient = 1.0f; - } - - _clientSamples[s] = glm::clamp(_clientSamples[s] - + (int)(nextOutputStart[(s / stereoDivider)] * attenuationCoefficient), - MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); - _clientSamples[s + 1] = glm::clamp(_clientSamples[s + 1] - + (int)(nextOutputStart[(s / stereoDivider) + (1 / stereoDivider)] - * attenuationCoefficient), - MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); - _clientSamples[s + 2] = glm::clamp(_clientSamples[s + 2] - + (int)(nextOutputStart[(s / stereoDivider) + (2 / stereoDivider)] - * attenuationCoefficient), - MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); - _clientSamples[s + 3] = glm::clamp(_clientSamples[s + 3] - + (int)(nextOutputStart[(s / stereoDivider) + (3 / stereoDivider)] - * attenuationCoefficient), - MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); - }*/ } } @@ -324,7 +283,12 @@ void AudioMixer::prepareMixForListeningNode(Node* node) { for (i = otherNodeRingBuffers.begin(); i != end; i++) { PositionalAudioRingBuffer* otherNodeBuffer = i.value(); - addBufferToMixForListeningNodeWithBuffer(otherNodeBuffer, nodeRingBuffer, *otherNode == *node); + if ((*otherNode != *node || otherNodeBuffer->shouldLoopbackForNode()) + && otherNodeBuffer->lastPopSucceeded() + && otherNodeBuffer->getNextOutputTrailingLoudness() > 0.0f) { + + addBufferToMixForListeningNodeWithBuffer(otherNodeBuffer, nodeRingBuffer); + } } } } @@ -581,36 +545,43 @@ void AudioMixer::run() { } foreach (const SharedNodePointer& node, nodeList->getNodeHash()) { - if (node->getType() == NodeType::Agent && node->getActiveSocket() && node->getLinkedData() - && ((AudioMixerClientData*) node->getLinkedData())->getAvatarAudioRingBuffer()) { - + if (node->getActiveSocket() && node->getLinkedData()) { + AudioMixerClientData* nodeData = (AudioMixerClientData*)node->getLinkedData(); - prepareMixForListeningNode(node.data()); - - // pack header - int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeMixedAudio); - char* dataAt = clientMixBuffer + numBytesPacketHeader; + // request a frame from each audio stream. a pointer to the popped data is stored as a member + // in InboundAudioStream. That's how the popped audio data will be read for mixing + nodeData->audioStreamsPopFrameForMixing(); - // pack sequence number - quint16 sequence = nodeData->getOutgoingSequenceNumber(); - memcpy(dataAt, &sequence, sizeof(quint16)); - dataAt += sizeof(quint16); + if (node->getType() == NodeType::Agent //&& node->getActiveSocket() && node->getLinkedData() + && ((AudioMixerClientData*)node->getLinkedData())->getAvatarAudioRingBuffer()) { - // pack mixed audio samples - memcpy(dataAt, _clientSamples, NETWORK_BUFFER_LENGTH_BYTES_STEREO); - dataAt += NETWORK_BUFFER_LENGTH_BYTES_STEREO; + prepareMixForListeningNode(node.data()); - // send mixed audio packet - nodeList->writeDatagram(clientMixBuffer, dataAt - clientMixBuffer, node); - nodeData->incrementOutgoingMixedAudioSequenceNumber(); - - // send an audio stream stats packet if it's time - if (sendAudioStreamStats) { - nodeData->sendAudioStreamStatsPackets(node); + // pack header + int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeMixedAudio); + char* dataAt = clientMixBuffer + numBytesPacketHeader; + + // pack sequence number + quint16 sequence = nodeData->getOutgoingSequenceNumber(); + memcpy(dataAt, &sequence, sizeof(quint16)); + dataAt += sizeof(quint16); + + // pack mixed audio samples + memcpy(dataAt, _clientSamples, NETWORK_BUFFER_LENGTH_BYTES_STEREO); + dataAt += NETWORK_BUFFER_LENGTH_BYTES_STEREO; + + // send mixed audio packet + nodeList->writeDatagram(clientMixBuffer, dataAt - clientMixBuffer, node); + nodeData->incrementOutgoingMixedAudioSequenceNumber(); + + // send an audio stream stats packet if it's time + if (sendAudioStreamStats) { + nodeData->sendAudioStreamStatsPackets(node); + } + + ++_sumListeners; } - - ++_sumListeners; } } diff --git a/assignment-client/src/audio/AudioMixer.h b/assignment-client/src/audio/AudioMixer.h index beb2539057..afab7d47dc 100644 --- a/assignment-client/src/audio/AudioMixer.h +++ b/assignment-client/src/audio/AudioMixer.h @@ -42,8 +42,7 @@ public slots: private: /// adds one buffer to the mix for a listening node void addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuffer* bufferToAdd, - AvatarAudioRingBuffer* listeningNodeBuffer, - bool bufferToAddBelongsToListener); + AvatarAudioRingBuffer* listeningNodeBuffer); /// prepares and sends a mix to one Node void prepareMixForListeningNode(Node* node); diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp index 9d5c1c6a74..c288a4f721 100644 --- a/assignment-client/src/audio/AudioMixerClientData.cpp +++ b/assignment-client/src/audio/AudioMixerClientData.cpp @@ -98,6 +98,13 @@ int AudioMixerClientData::parseData(const QByteArray& packet) { return 0; } +void AudioMixerClientData::audioStreamsPopFrameForMixing() { + QHash::ConstIterator i, end = _ringBuffers.constEnd(); + for (i = _ringBuffers.constBegin(); i != end; i++) { + i.value()->popFrames(1); + } +} + void AudioMixerClientData::sendAudioStreamStatsPackets(const SharedNodePointer& destinationNode) { char packet[MAX_PACKET_SIZE]; NodeList* nodeList = NodeList::getInstance(); diff --git a/assignment-client/src/audio/AudioMixerClientData.h b/assignment-client/src/audio/AudioMixerClientData.h index 19592b1253..92dddab7e4 100644 --- a/assignment-client/src/audio/AudioMixerClientData.h +++ b/assignment-client/src/audio/AudioMixerClientData.h @@ -27,6 +27,8 @@ public: int parseData(const QByteArray& packet); + void audioStreamsPopFrameForMixing(); + QString getAudioStreamStatsString() const; void sendAudioStreamStatsPackets(const SharedNodePointer& destinationNode); diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp index 0f79c0c6a1..808c076cb0 100644 --- a/interface/src/Audio.cpp +++ b/interface/src/Audio.cpp @@ -925,8 +925,7 @@ void Audio::pushAudioToOutput() { // if there is data in the ring buffer and room in the audio output, decide what to do - AudioRingBuffer::ConstIterator ringBufferNextOutput; - if (numFramesToPush > 0 && _ringBuffer.popFrames(&ringBufferNextOutput, numFramesToPush, false)) { + if (numFramesToPush > 0 && _ringBuffer.popFrames(numFramesToPush, false)) { int numNetworkOutputSamples = numFramesToPush * NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; int numDeviceOutputSamples = numNetworkOutputSamples / networkOutputToOutputRatio; @@ -934,6 +933,7 @@ void Audio::pushAudioToOutput() { QByteArray outputBuffer; outputBuffer.resize(numDeviceOutputSamples * sizeof(int16_t)); + AudioRingBuffer::ConstIterator ringBufferPopOutput = _ringBuffer.getLastPopOutput(); int16_t* ringBufferSamples = new int16_t[numNetworkOutputSamples]; if (_processSpatialAudio) { @@ -941,7 +941,7 @@ void Audio::pushAudioToOutput() { QByteArray buffer; buffer.resize(numNetworkOutputSamples * sizeof(int16_t)); - ringBufferNextOutput.readSamples((int16_t*)buffer.data(), numNetworkOutputSamples); + ringBufferPopOutput.readSamples((int16_t*)buffer.data(), numNetworkOutputSamples); // Accumulate direct transmission of audio from sender to receiver if (Menu::getInstance()->isOptionChecked(MenuOption::AudioSpatialProcessingIncludeOriginal)) { @@ -961,7 +961,7 @@ void Audio::pushAudioToOutput() { } else { // copy the samples we'll resample from the ring buffer - this also // pushes the read pointer of the ring buffer forwards - ringBufferNextOutput.readSamples(ringBufferSamples, numNetworkOutputSamples); + ringBufferPopOutput.readSamples(ringBufferSamples, numNetworkOutputSamples); } // copy the packet from the RB to the output diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp index 591dde772c..501f898654 100644 --- a/libraries/audio/src/InboundAudioStream.cpp +++ b/libraries/audio/src/InboundAudioStream.cpp @@ -14,6 +14,8 @@ InboundAudioStream::InboundAudioStream(int numFrameSamples, int numFramesCapacity, bool dynamicJitterBuffers) : _ringBuffer(numFrameSamples, false, numFramesCapacity), + _lastPopSucceeded(false), + _lastPopOutput(), _dynamicJitterBuffers(dynamicJitterBuffers), _desiredJitterBufferFrames(1), _isStarved(true), @@ -98,37 +100,30 @@ int InboundAudioStream::parseData(const QByteArray& packet) { } bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) { - bool popped; int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples(); - if (popped = shouldPop(numSamplesRequested, starveOnFail)) { - _ringBuffer.shiftReadPosition(numSamplesRequested); + if (_isStarved) { + // we're still refilling; don't pop + _consecutiveNotMixedCount++; + _lastPopSucceeded = false; + } else { + if (_ringBuffer.samplesAvailable() >= numSamplesRequested) { + // we have enough samples to pop, so we're good to mix + _lastPopOutput = _ringBuffer.nextOutput(); + _ringBuffer.shiftReadPosition(numSamplesRequested); + + _hasStarted = true; + _lastPopSucceeded = true; + } else { + // we don't have enough samples, so set this stream to starve + // if starveOnFail is true + if (starveOnFail) { + starved(); + _consecutiveNotMixedCount++; + } + _lastPopSucceeded = false; + } } - _framesAvailableStats.update(_ringBuffer.framesAvailable()); - - return popped; -} - -bool InboundAudioStream::popFrames(int16_t* dest, int numFrames, bool starveOnFail) { - bool popped; - int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples(); - if (popped = shouldPop(numSamplesRequested, starveOnFail)) { - _ringBuffer.readSamples(dest, numSamplesRequested); - } - _framesAvailableStats.update(_ringBuffer.framesAvailable()); - - return popped; -} - -bool InboundAudioStream::popFrames(AudioRingBuffer::ConstIterator* nextOutput, int numFrames, bool starveOnFail) { - bool popped; - int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples(); - if (popped = shouldPop(numSamplesRequested, starveOnFail)) { - *nextOutput = _ringBuffer.nextOutput(); - _ringBuffer.shiftReadPosition(numSamplesRequested); - } - _framesAvailableStats.update(_ringBuffer.framesAvailable()); - - return popped; + return _lastPopSucceeded; } bool InboundAudioStream::shouldPop(int numSamples, bool starveOnFail) { diff --git a/libraries/audio/src/InboundAudioStream.h b/libraries/audio/src/InboundAudioStream.h index 375fccae9e..4eaf554ec7 100644 --- a/libraries/audio/src/InboundAudioStream.h +++ b/libraries/audio/src/InboundAudioStream.h @@ -53,9 +53,12 @@ public: virtual int parseData(const QByteArray& packet); + bool popFrames(int numFrames, bool starveOnFail = true); - bool popFrames(int16_t* dest, int numFrames, bool starveOnFail = true); - bool popFrames(AudioRingBuffer::ConstIterator* nextOutput, int numFrames, bool starveOnFail = true); + + bool lastPopSucceeded() const { return _lastPopSucceeded; }; + const AudioRingBuffer::ConstIterator& getLastPopOutput() const { return _lastPopOutput; } + void setToStarved(); @@ -105,6 +108,9 @@ protected: AudioRingBuffer _ringBuffer; + bool _lastPopSucceeded; + AudioRingBuffer::ConstIterator _lastPopOutput; + bool _dynamicJitterBuffers; int _desiredJitterBufferFrames;