From a5457eb86b7e6d766b46d4106dfa098ba3a49a4d Mon Sep 17 00:00:00 2001 From: wangyix Date: Fri, 20 Jun 2014 14:43:18 -0700 Subject: [PATCH] fixed bug in Audio.cpp where numSilentSamples was written to wrong place in packet added more debug stuff, other minor changes and fixes --- .../src/audio/AudioMixerClientData.cpp | 5 +- .../src/audio/AvatarAudioRingBuffer.cpp | 2 +- interface/src/Audio.cpp | 12 ++-- libraries/audio/src/AudioRingBuffer.cpp | 1 + .../audio/src/InjectedAudioRingBuffer.cpp | 2 +- .../audio/src/PositionalAudioRingBuffer.cpp | 63 +++++++++++++------ .../audio/src/PositionalAudioRingBuffer.h | 10 ++- 7 files changed, 63 insertions(+), 32 deletions(-) diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp index 3ee571fc46..0c41cc70f9 100644 --- a/assignment-client/src/audio/AudioMixerClientData.cpp +++ b/assignment-client/src/audio/AudioMixerClientData.cpp @@ -119,15 +119,14 @@ void AudioMixerClientData::checkBuffersBeforeFrameSend(AABox* checkSourceZone, A } void AudioMixerClientData::pushBuffersAfterFrameSend() { + QList::iterator i = _ringBuffers.begin(); while (i != _ringBuffers.end()) { // this was a used buffer, push the output pointer forwards PositionalAudioRingBuffer* audioBuffer = *i; if (audioBuffer->willBeAddedToMix()) { - audioBuffer->shiftReadPosition(audioBuffer->isStereo() - ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL); - + audioBuffer->shiftReadPosition(audioBuffer->getSamplesPerFrame()); audioBuffer->setWillBeAddedToMix(false); } else if (audioBuffer->getType() == PositionalAudioRingBuffer::Injector && audioBuffer->hasStarted() && audioBuffer->isStarved()) { diff --git a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp index 6658e446cf..0df50f99db 100644 --- a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp +++ b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp @@ -20,7 +20,7 @@ AvatarAudioRingBuffer::AvatarAudioRingBuffer(bool isStereo) : int AvatarAudioRingBuffer::parseData(const QByteArray& packet) { _interframeTimeGapHistory.frameReceived(); - updateDesiredJitterBufferNumSamples(); + updateDesiredJitterBufferFrames(); _shouldLoopbackForNode = (packetTypeForPacket(packet) == PacketTypeMicrophoneAudioWithEcho); return PositionalAudioRingBuffer::parseData(packet); diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp index 271bcd5279..47e55ddc90 100644 --- a/interface/src/Audio.cpp +++ b/interface/src/Audio.cpp @@ -461,8 +461,8 @@ void Audio::handleAudioInput() { int16_t* inputAudioSamples = new int16_t[inputSamplesRequired]; _inputRingBuffer.readSamples(inputAudioSamples, inputSamplesRequired); - int numNetworkBytes = _isStereoInput ? NETWORK_BUFFER_LENGTH_BYTES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; - int numNetworkSamples = _isStereoInput ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; + const int numNetworkBytes = _isStereoInput ? NETWORK_BUFFER_LENGTH_BYTES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; + const int numNetworkSamples = _isStereoInput ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; // zero out the monoAudioSamples array and the locally injected audio memset(networkAudioSamples, 0, numNetworkBytes); @@ -622,6 +622,7 @@ void Audio::handleAudioInput() { SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer); if (audioMixer && audioMixer->getActiveSocket()) { + MyAvatar* interfaceAvatar = Application::getInstance()->getAvatar(); glm::vec3 headPosition = interfaceAvatar->getHead()->getPosition(); glm::quat headOrientation = interfaceAvatar->getHead()->getFinalOrientationInWorldFrame(); @@ -634,12 +635,11 @@ void Audio::handleAudioInput() { packetType = PacketTypeSilentAudioFrame; // we need to indicate how many silent samples this is to the audio mixer - audioDataPacket[0] = _isStereoInput - ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO - : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; + networkAudioSamples[0] = numNetworkSamples; numAudioBytes = sizeof(int16_t); } else { - numAudioBytes = _isStereoInput ? NETWORK_BUFFER_LENGTH_BYTES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; + numAudioBytes = numNetworkBytes; + //_isStereoInput ? NETWORK_BUFFER_LENGTH_BYTES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; if (Menu::getInstance()->isOptionChecked(MenuOption::EchoServerAudio)) { packetType = PacketTypeMicrophoneAudioWithEcho; diff --git a/libraries/audio/src/AudioRingBuffer.cpp b/libraries/audio/src/AudioRingBuffer.cpp index 2101fcb9cd..7b1e68ffbd 100644 --- a/libraries/audio/src/AudioRingBuffer.cpp +++ b/libraries/audio/src/AudioRingBuffer.cpp @@ -158,6 +158,7 @@ const int16_t& AudioRingBuffer::operator[] (const int index) const { void AudioRingBuffer::shiftReadPosition(unsigned int numSamples) { _nextOutput = shiftedPositionAccomodatingWrap(_nextOutput, numSamples); + printf("\n mixed. %d samples remaining\n", samplesAvailable()); } unsigned int AudioRingBuffer::samplesAvailable() const { diff --git a/libraries/audio/src/InjectedAudioRingBuffer.cpp b/libraries/audio/src/InjectedAudioRingBuffer.cpp index ffe5876bfd..80bcda5acb 100644 --- a/libraries/audio/src/InjectedAudioRingBuffer.cpp +++ b/libraries/audio/src/InjectedAudioRingBuffer.cpp @@ -32,7 +32,7 @@ const uchar MAX_INJECTOR_VOLUME = 255; int InjectedAudioRingBuffer::parseData(const QByteArray& packet) { _interframeTimeGapHistory.frameReceived(); - updateDesiredJitterBufferNumSamples(); + updateDesiredJitterBufferFrames(); // setup a data stream to read from this packet QDataStream packetStream(packet); diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp index b537e5c6d6..813323ecb1 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.cpp +++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp @@ -33,6 +33,10 @@ InterframeTimeGapHistory::InterframeTimeGapHistory() } void InterframeTimeGapHistory::frameReceived() { + + static QQueue gaps; + static quint64 gapsSum = 0; + quint64 now = usecTimestampNow(); // make sure this isn't the first time frameReceived() is called, meaning there's actually a gap to calculate. @@ -87,9 +91,9 @@ PositionalAudioRingBuffer::PositionalAudioRingBuffer(PositionalAudioRingBuffer:: _shouldOutputStarveDebug(true), _isStereo(isStereo), _listenerUnattenuatedZone(NULL), - _desiredJitterBufferNumSamples(getNumSamplesPerFrame()) + _desiredJitterBufferFrames(1), + _currentJitterBufferFrames(0) { - } int PositionalAudioRingBuffer::parseData(const QByteArray& packet) { @@ -114,10 +118,15 @@ int PositionalAudioRingBuffer::parseData(const QByteArray& packet) { if (numSilentSamples > 0) { addSilentFrame(numSilentSamples); } + printf("\nparsed silent packet of %d samples\n", numSilentSamples); } else { // there is audio data to read - readBytes += writeData(packet.data() + readBytes, packet.size() - readBytes); + int dataBytes = writeData(packet.data() + readBytes, packet.size() - readBytes); + readBytes += dataBytes; + + printf("\nparsed packet of %d data bytes\n", dataBytes); } + printf("%d samples available\n", samplesAvailable()); return readBytes; } @@ -166,40 +175,58 @@ void PositionalAudioRingBuffer::updateNextOutputTrailingLoudness() { bool PositionalAudioRingBuffer::shouldBeAddedToMix() { - int samplesPerFrame = getNumSamplesPerFrame(); + int samplesPerFrame = getSamplesPerFrame(); + int currentJitterBufferSamples = 3 * samplesPerFrame; //_currentJitterBufferFrames * samplesPerFrame; + +//printf("\nsamples available: %d frames available: %d\n", samplesAvailable(), samplesAvailable() / samplesPerFrame); + if (!isNotStarvedOrHasMinimumSamples(samplesPerFrame + currentJitterBufferSamples)) { - if (!isNotStarvedOrHasMinimumSamples(samplesPerFrame + _desiredJitterBufferNumSamples)) { +//printf("\nMIXING DELAYED! waiting for jitter buffer to fill after being starved\n"); +//printf("samples available: %d frames available: %d\n", samplesAvailable(), samplesAvailable() / samplesPerFrame); + // if the buffer was starved and hasn't filled back up all the way, don't mix yet if (_shouldOutputStarveDebug) { _shouldOutputStarveDebug = false; } - - return false; + + return false; + } else if (samplesAvailable() < samplesPerFrame) { + +//printf("\nMIXING DELAYED! jitter buffer is starved!!!\n"); +//printf("samples available: %d frames available: %d\n", samplesAvailable(), samplesAvailable() / samplesPerFrame); + // if the buffer doesn't have a full frame of samples for mixing, it is starved _isStarved = true; // reset our _shouldOutputStarveDebug to true so the next is printed _shouldOutputStarveDebug = true; + + // if buffer was starved, we've effectively increased the jitter buffer by one frame + // by "holding back" this ring buffer's contents until the next client frame is prepared. + _currentJitterBufferFrames++; +//printf("jbuffer size increased: new size: %d\n", _currentJitterBufferFrames); return false; - } else { - // good buffer, add this to the mix - _isStarved = false; - // since we've read data from ring buffer at least once - we've started - _hasStarted = true; - - return true; } +//printf("WILL MIX\n"); - return false; + // good buffer, add this to the mix + _isStarved = false; + + // since we've read data from ring buffer at least once - we've started + _hasStarted = true; + + return true; } -void PositionalAudioRingBuffer::updateDesiredJitterBufferNumSamples() { +void PositionalAudioRingBuffer::updateDesiredJitterBufferFrames() { const float USECS_PER_FRAME = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL * USECS_PER_SECOND / (float)SAMPLE_RATE; if (_interframeTimeGapHistory.hasNewWindowMaxGapAvailable()) { - int desiredJitterBufferNumFrames = (int)((float)_interframeTimeGapHistory.getWindowMaxGap() / USECS_PER_FRAME + 0.5f); - _desiredJitterBufferNumSamples = desiredJitterBufferNumFrames * getNumSamplesPerFrame(); + _desiredJitterBufferFrames = ceilf((float)_interframeTimeGapHistory.getWindowMaxGap() / USECS_PER_FRAME); + if (_desiredJitterBufferFrames < 1) { + _desiredJitterBufferFrames = 1; + } } } \ No newline at end of file diff --git a/libraries/audio/src/PositionalAudioRingBuffer.h b/libraries/audio/src/PositionalAudioRingBuffer.h index de731c6136..87a53038a3 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.h +++ b/libraries/audio/src/PositionalAudioRingBuffer.h @@ -29,6 +29,7 @@ public: void frameReceived(); bool hasNewWindowMaxGapAvailable() const { return _newWindowMaxGapAvailable; } + quint64 peekWindowMaxGap() const { return _windowMaxGap; } quint64 getWindowMaxGap(); private: @@ -75,14 +76,14 @@ public: AABox* getListenerUnattenuatedZone() const { return _listenerUnattenuatedZone; } void setListenerUnattenuatedZone(AABox* listenerUnattenuatedZone) { _listenerUnattenuatedZone = listenerUnattenuatedZone; } - int getNumSamplesPerFrame() const { return _isStereo ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; } + int getSamplesPerFrame() const { return _isStereo ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; } protected: // disallow copying of PositionalAudioRingBuffer objects PositionalAudioRingBuffer(const PositionalAudioRingBuffer&); PositionalAudioRingBuffer& operator= (const PositionalAudioRingBuffer&); - void updateDesiredJitterBufferNumSamples(); + void updateDesiredJitterBufferFrames(); PositionalAudioRingBuffer::Type _type; glm::vec3 _position; @@ -96,7 +97,10 @@ protected: AABox* _listenerUnattenuatedZone; InterframeTimeGapHistory _interframeTimeGapHistory; - int _desiredJitterBufferNumSamples; + int _desiredJitterBufferFrames; + int _currentJitterBufferFrames; + +quint64 _lastMixTime; }; #endif // hifi_PositionalAudioRingBuffer_h