added InboundAudioStream class

2025-08-07 20:31:29 +02:00 · 2014-07-23 16:46:23 -07:00 · 2014-07-23 16:46:23 -07:00 · e2f957d6dc
commit e2f957d6dc
parent a36c9e872e
4 changed files with 357 additions and 3 deletions
--- a/interface/src/Audio.cpp
+++ b/interface/src/Audio.cpp
@ -152,9 +152,6 @@ void Audio::init(QGLWidget *parent) {
 void Audio::reset() {
    _ringBuffer.reset();
    // we don't want to reset seq numbers when space-bar reset occurs.
    //_outgoingAvatarAudioSequenceNumber = 0;
    resetStats();
 }
--- a/libraries/audio/src/AudioRingBuffer.h
+++ b/libraries/audio/src/AudioRingBuffer.h
@ -47,6 +47,7 @@ public:
    void resizeForFrameSize(int numFrameSamples);
    int getSampleCapacity() const { return _sampleCapacity; }
    int getFrameCapacity() const { return _frameCapacity; }
    int parseData(const QByteArray& packet);
--- a/libraries/audio/src/InboundAudioStream.cpp
+++ b/libraries/audio/src/InboundAudioStream.cpp
@ -0,0 +1,232 @@
 //
 //  InboundAudioStream.cpp
 //  libraries/audio/src
 //
 //  Created by Yixin Wang on 7/17/2014
 //  Copyright 2013 High Fidelity, Inc.
 //
 //  Distributed under the Apache License, Version 2.0.
 //  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
 //
 #include "InboundAudioStream.h"
 #include "PacketHeaders.h"
 InboundAudioStream::InboundAudioStream(int numFrameSamples, int numFramesCapacity, bool dynamicJitterBuffers) :
 _ringBuffer(numFrameSamples, false, numFramesCapacity),
 _dynamicJitterBuffers(dynamicJitterBuffers),
 _desiredJitterBufferFrames(1),
 _isStarved(true),
 _hasStarted(false),
 _consecutiveNotMixedCount(0),
 _starveCount(0),
 _silentFramesDropped(0),
 _incomingSequenceNumberStats(INCOMING_SEQ_STATS_HISTORY_LENGTH_SECONDS),
 _lastFrameReceivedTime(0),
 _interframeTimeGapStatsForJitterCalc(TIME_GAPS_FOR_JITTER_CALC_INTERVAL_SAMPLES, TIME_GAPS_FOR_JITTER_CALC_WINDOW_INTERVALS),
 _interframeTimeGapStatsForStatsPacket(TIME_GAPS_FOR_STATS_PACKET_INTERVAL_SAMPLES, TIME_GAPS_FOR_STATS_PACKET_WINDOW_INTERVALS),
 _framesAvailableStats(FRAMES_AVAILABLE_STATS_INTERVAL_SAMPLES, FRAMES_AVAILABLE_STATS_WINDOW_INTERVALS)
 {
 }
 void InboundAudioStream::reset() {
    _ringBuffer.reset();
    _desiredJitterBufferFrames = 1;
    _isStarved = true;
    _hasStarted = false;
    _consecutiveNotMixedCount = 0;
    _starveCount = 0;
    _silentFramesDropped = 0;
    _incomingSequenceNumberStats.reset();
    _lastFrameReceivedTime = 0;
    _interframeTimeGapStatsForJitterCalc.reset();
    _interframeTimeGapStatsForStatsPacket.reset();
    _framesAvailableStats.reset();
 }
 int InboundAudioStream::parseData(const QByteArray& packet) {
    frameReceivedUpdateTimingStats();
    PacketType packetType = packetTypeForPacket(packet);
    QUuid senderUUID = uuidFromPacketHeader(packet);
    // parse header 
    int numBytesHeader = numBytesForPacketHeader(packet);
    const char* sequenceAt = packet.constData() + numBytesHeader;
    int readBytes = numBytesHeader;
    // parse sequence number and track it
    quint16 sequence = *(reinterpret_cast<const quint16*>(sequenceAt));
    readBytes += sizeof(quint16);
    SequenceNumberStats::ArrivalInfo arrivalInfo = _incomingSequenceNumberStats.sequenceNumberReceived(sequence, senderUUID);
    // TODO: handle generalized silent packet here?????
    // parse the info after the seq number and before the audio data.(the stream properties)
    int numAudioSamples;
    readBytes += parseStreamProperties(packetType, packet.mid(readBytes), numAudioSamples);
    // handle this packet based on its arrival status.
    // For now, late packets are ignored.  It may be good in the future to insert the late audio frame
    // into the ring buffer to fill in the missing frame if it hasn't been mixed yet.
    switch (arrivalInfo._status) {
    case SequenceNumberStats::Early: {
        int packetsDropped = arrivalInfo._seqDiffFromExpected;
        writeSamplesForDroppedPackets(packetsDropped * numAudioSamples);
        // fall through to OnTime case
    }
    case SequenceNumberStats::OnTime: {
        readBytes += parseAudioData(packetType, packet.mid(readBytes), numAudioSamples);
        break;
    }
    default: {
        break;
    }
    }
    if (_isStarved && _ringBuffer.samplesAvailable() >= _desiredJitterBufferFrames * _ringBuffer.getNumFrameSamples()) {
        _isStarved = false;
    }
    _framesAvailableStats.update(_ringBuffer.framesAvailable());
    return readBytes;
 }
 bool InboundAudioStream::popFrames(int16_t* dest, int numFrames, bool starveOnFail) {
    if (_isStarved) {
        _consecutiveNotMixedCount++;
        return false;
    }
    bool framesPopped = false;
    int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
    if (_ringBuffer.samplesAvailable >= numSamplesRequested) {
        _ringBuffer.readSamples(dest, numSamplesRequested);
        _hasStarted = true;
        framesPopped = true;
    } else {
        if (starveOnFail) {
            setToStarved();
            _consecutiveNotMixedCount++;
        }
    }
    _framesAvailableStats.update(_ringBuffer.framesAvailable());
    return framesPopped;
 }
 void InboundAudioStream::setToStarved() {
    _isStarved = true;
    _consecutiveNotMixedCount = 0;
    _starveCount++;
 }
 int InboundAudioStream::getCalculatedDesiredJitterBufferFrames() const {
    const float USECS_PER_FRAME = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL * USECS_PER_SECOND / (float)SAMPLE_RATE;
    int calculatedDesiredJitterBufferFrames = ceilf((float)_interframeTimeGapStatsForJitterCalc.getWindowMax() / USECS_PER_FRAME);
    if (calculatedDesiredJitterBufferFrames < 1) {
        calculatedDesiredJitterBufferFrames = 1;
    }
    return calculatedDesiredJitterBufferFrames;
 }
 void InboundAudioStream::frameReceivedUpdateTimingStats() {
    // update the two time gap stats we're keeping
    quint64 now = usecTimestampNow();
    if (_lastFrameReceivedTime != 0) {
        quint64 gap = now - _lastFrameReceivedTime;
        _interframeTimeGapStatsForJitterCalc.update(gap);
        _interframeTimeGapStatsForStatsPacket.update(gap);
    }
    _lastFrameReceivedTime = now;
    // recalculate the _desiredJitterBufferFrames if _interframeTimeGapStatsForJitterCalc has updated stats for us
    if (_interframeTimeGapStatsForJitterCalc.getNewStatsAvailableFlag()) {
        if (!_dynamicJitterBuffers) {
            _desiredJitterBufferFrames = 1; // HACK to see if this fixes the audio silence
        } else {
            _desiredJitterBufferFrames = getCalculatedDesiredJitterBufferFrames();
            const int maxDesired = _ringBuffer.getFrameCapacity() - 1;
            if (_desiredJitterBufferFrames > maxDesired) {
                _desiredJitterBufferFrames = maxDesired;
            }
        }
        _interframeTimeGapStatsForJitterCalc.clearNewStatsAvailableFlag();
    }
 }
 int InboundAudioStream::writeDroppableSilentSamples(int numSilentSamples) {
    // This adds some number of frames to the desired jitter buffer frames target we use.
    // The larger this value is, the less aggressive we are about reducing the jitter buffer length.
    // Setting this to 0 will try to get the jitter buffer to be exactly _desiredJitterBufferFrames long,
    // which could lead immediately to a starve.
    const int DESIRED_JITTER_BUFFER_FRAMES_PADDING = 1;
    // calculate how many silent frames we should drop.  We only drop silent frames if
    // the running avg num frames available has stabilized and it's more than
    // our desired number of frames by the margin defined above.
    int samplesPerFrame = _ringBuffer.getNumFrameSamples();
    int numSilentFramesToDrop = 0;
    if (_framesAvailableStats.getNewStatsAvailableFlag() && _framesAvailableStats.isWindowFilled()
        && numSilentSamples >= samplesPerFrame) {
        _framesAvailableStats.clearNewStatsAvailableFlag();
        int averageJitterBufferFrames = (int)getFramesAvailableAverage();
        int desiredJitterBufferFramesPlusPadding = _desiredJitterBufferFrames + DESIRED_JITTER_BUFFER_FRAMES_PADDING;
        if (averageJitterBufferFrames > desiredJitterBufferFramesPlusPadding) {
            // our avg jitter buffer size exceeds its desired value, so ignore some silent
            // frames to get that size as close to desired as possible
            int numSilentFramesToDropDesired = averageJitterBufferFrames - desiredJitterBufferFramesPlusPadding;
            int numSilentFramesReceived = numSilentSamples / samplesPerFrame;
            numSilentFramesToDrop = std::min(numSilentFramesToDropDesired, numSilentFramesReceived);
            // since we now have a new jitter buffer length, reset the frames available stats.
            _framesAvailableStats.reset();
            _silentFramesDropped += numSilentFramesToDrop;
        }
    }
    return _ringBuffer.addSilentFrame(numSilentSamples - numSilentFramesToDrop * samplesPerFrame);
 }
 int InboundAudioStream::writeSamplesForDroppedPackets(int numSamples) {
    return writeDroppableSilentSamples(numSamples);
 }
 AudioStreamStats InboundAudioStream::getAudioStreamStats() const {
    AudioStreamStats streamStats;
    streamStats._timeGapMin = _interframeTimeGapStatsForStatsPacket.getMin();
    streamStats._timeGapMax = _interframeTimeGapStatsForStatsPacket.getMax();
    streamStats._timeGapAverage = _interframeTimeGapStatsForStatsPacket.getAverage();
    streamStats._timeGapWindowMin = _interframeTimeGapStatsForStatsPacket.getWindowMin();
    streamStats._timeGapWindowMax = _interframeTimeGapStatsForStatsPacket.getWindowMax();
    streamStats._timeGapWindowAverage = _interframeTimeGapStatsForStatsPacket.getWindowAverage();
    streamStats._ringBufferFramesAvailable = _ringBuffer.framesAvailable();
    streamStats._ringBufferFramesAvailableAverage = _framesAvailableStats.getWindowAverage();
    streamStats._ringBufferDesiredJitterBufferFrames = _desiredJitterBufferFrames;
    streamStats._ringBufferStarveCount = _starveCount;
    streamStats._ringBufferConsecutiveNotMixedCount = _consecutiveNotMixedCount;
    streamStats._ringBufferOverflowCount = _ringBuffer.getOverflowCount();
    streamStats._ringBufferSilentFramesDropped = _silentFramesDropped;
    streamStats._packetStreamStats = _incomingSequenceNumberStats.getStats();
    streamStats._packetStreamWindowStats = _incomingSequenceNumberStats.getStatsForHistoryWindow();
    return streamStats;
 }
 AudioStreamStats InboundAudioStream::updateSeqHistoryAndGetAudioStreamStats() {
    _incomingSequenceNumberStats.pushStatsToHistory();
    return getAudioStreamStats();
 }
--- a/libraries/audio/src/InboundAudioStream.h
+++ b/libraries/audio/src/InboundAudioStream.h
@ -0,0 +1,124 @@
 //
 //  InboundAudioStream.h
 //  libraries/audio/src
 //
 //  Created by Yixin Wang on 7/17/2014.
 //  Copyright 2013 High Fidelity, Inc.
 //
 //  Distributed under the Apache License, Version 2.0.
 //  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
 //
 #ifndef hifi_InboundAudioStream_h
 #define hifi_InboundAudioStream_h
 #include "NodeData.h"
 #include "AudioRingBuffer.h"
 #include "MovingMinMaxAvg.h"
 #include "SequenceNumberStats.h"
 #include "AudioStreamStats.h"
 #include "PacketHeaders.h"
 // the time gaps stats for _desiredJitterBufferFrames calculation
 // will recalculate the max for the past 5000 samples every 500 samples
 const int TIME_GAPS_FOR_JITTER_CALC_INTERVAL_SAMPLES = 500;
 const int TIME_GAPS_FOR_JITTER_CALC_WINDOW_INTERVALS = 10;
 // the time gap stats for constructing AudioStreamStats will
 // recalculate min/max/avg every ~1 second for the past ~30 seconds of time gap data
 const int TIME_GAPS_FOR_STATS_PACKET_INTERVAL_SAMPLES = USECS_PER_SECOND / BUFFER_SEND_INTERVAL_USECS;
 const int TIME_GAPS_FOR_STATS_PACKET_WINDOW_INTERVALS = 30;
 // the stats for calculating the average frames available  will recalculate every ~1 second
 // and will include data for the past ~2 seconds 
 const int FRAMES_AVAILABLE_STATS_INTERVAL_SAMPLES = USECS_PER_SECOND / BUFFER_SEND_INTERVAL_USECS;
 const int FRAMES_AVAILABLE_STATS_WINDOW_INTERVALS = 2;
 // the internal history buffer of the incoming seq stats will cover 30s to calculate
 // packet loss % over last 30s
 const int INCOMING_SEQ_STATS_HISTORY_LENGTH_SECONDS = 30;
 const int INBOUND_RING_BUFFER_FRAME_CAPACITY = 100;
 class InboundAudioStream : public NodeData {
    Q_OBJECT
 public:
    InboundAudioStream(int numFrameSamples, int numFramesCapacity, bool dynamicJitterBuffers);
    void reset();
    void flushBuffer() { _ringBuffer.reset(); }
    void resetSequenceNumberStats() { _incomingSequenceNumberStats.reset(); }
    int parseData(const QByteArray& packet);
    bool popFrames(int16_t* dest, int numFrames, bool starveOnFail = true);
    void setToStarved();
    AudioStreamStats updateSeqHistoryAndGetAudioStreamStats();
    virtual AudioStreamStats getAudioStreamStats() const;
    int getCalculatedDesiredJitterBufferFrames() const;
    int getDesiredJitterBufferFrames() const { return _desiredJitterBufferFrames; }
    int getNumFrameSamples() const { return _ringBuffer.getNumFrameSamples(); }
    int getFramesAvailable() const { return _ringBuffer.framesAvailable(); }
    double getFramesAvailableAverage() const { return _framesAvailableStats.getWindowAverage(); }
    bool isStarved() const { return _isStarved; }
    bool hasStarted() const { return _hasStarted; }
    int getConsecutiveNotMixedCount() const { return _consecutiveNotMixedCount; }
    int getStarveCount() const { return _starveCount; }
    int getSilentFramesDropped() const { return _silentFramesDropped; }
    int getOverflowCount() const { return _ringBuffer.getOverflowCount(); }
 protected:
    // disallow copying of InboundAudioStream objects
    InboundAudioStream(const InboundAudioStream&);
    InboundAudioStream& operator= (const InboundAudioStream&);
    /// parses the info between the seq num and the audio data in the network packet and calculates
    /// how many audio samples this packet contains
    virtual int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) = 0;
    /// parses the audio data in the network packet
    virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) = 0;
    int writeDroppableSilentSamples(int numSilentSamples);
    int writeSamplesForDroppedPackets(int numSamples);
    void frameReceivedUpdateTimingStats();
 protected:
    AudioRingBuffer _ringBuffer;
    bool _dynamicJitterBuffers;
    int _desiredJitterBufferFrames;
    bool _isStarved;
    bool _hasStarted;
    // stats
    int _consecutiveNotMixedCount;
    int _starveCount;
    int _silentFramesDropped;
    SequenceNumberStats _incomingSequenceNumberStats;
    quint64 _lastFrameReceivedTime;
    MovingMinMaxAvg<quint64> _interframeTimeGapStatsForJitterCalc;
    MovingMinMaxAvg<quint64> _interframeTimeGapStatsForStatsPacket;
    // TODO: change this to time-weighted moving avg
    MovingMinMaxAvg<int> _framesAvailableStats;
 };
 #endif // hifi_InboundAudioStream_h