// // InboundAudioStream.h // libraries/audio/src // // Created by Yixin Wang on 7/17/2014. // Copyright 2013 High Fidelity, Inc. // // Distributed under the Apache License, Version 2.0. // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // #ifndef hifi_InboundAudioStream_h #define hifi_InboundAudioStream_h #include "NodeData.h" #include "AudioRingBuffer.h" #include "MovingMinMaxAvg.h" #include "SequenceNumberStats.h" #include "AudioStreamStats.h" #include "PacketHeaders.h" #include "StdDev.h" #include "TimeWeightedAvg.h" // This adds some number of frames to the desired jitter buffer frames target we use when we're dropping frames. // The larger this value is, the less frames we drop when attempting to reduce the jitter buffer length. // Setting this to 0 will try to get the jitter buffer to be exactly _desiredJitterBufferFrames when dropping frames, // which could lead to a starve soon after. const int DESIRED_JITTER_BUFFER_FRAMES_PADDING = 1; // this controls the length of the window for stats used in the stats packet (not the stats used in // _desiredJitterBufferFrames calculation) const int STATS_FOR_STATS_PACKET_WINDOW_SECONDS = 30; // this controls the window size of the time-weighted avg of frames available. Every time the window fills up, // _currentJitterBufferFrames is updated with the time-weighted avg and the running time-weighted avg is reset. const int FRAMES_AVAILABLE_STAT_WINDOW_USECS = 10 * USECS_PER_SECOND; // default values for members of the Settings struct const int DEFAULT_MAX_FRAMES_OVER_DESIRED = 10; const bool DEFAULT_DYNAMIC_JITTER_BUFFERS = true; const int DEFAULT_STATIC_DESIRED_JITTER_BUFFER_FRAMES = 1; const bool DEFAULT_USE_STDEV_FOR_JITTER_CALC = false; const int DEFAULT_WINDOW_STARVE_THRESHOLD = 3; const int DEFAULT_WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES = 50; const int DEFAULT_WINDOW_SECONDS_FOR_DESIRED_REDUCTION = 10; const bool DEFAULT_REPETITION_WITH_FADE = true; class InboundAudioStream : public NodeData { Q_OBJECT public: class Settings { public: Settings() : _maxFramesOverDesired(DEFAULT_MAX_FRAMES_OVER_DESIRED), _dynamicJitterBuffers(DEFAULT_DYNAMIC_JITTER_BUFFERS), _staticDesiredJitterBufferFrames(DEFAULT_STATIC_DESIRED_JITTER_BUFFER_FRAMES), _useStDevForJitterCalc(DEFAULT_USE_STDEV_FOR_JITTER_CALC), _windowStarveThreshold(DEFAULT_WINDOW_STARVE_THRESHOLD), _windowSecondsForDesiredCalcOnTooManyStarves(DEFAULT_WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES), _windowSecondsForDesiredReduction(DEFAULT_WINDOW_SECONDS_FOR_DESIRED_REDUCTION), _repetitionWithFade(DEFAULT_REPETITION_WITH_FADE) {} Settings(int maxFramesOverDesired, bool dynamicJitterBuffers, int staticDesiredJitterBufferFrames, bool useStDevForJitterCalc, int windowStarveThreshold, int windowSecondsForDesiredCalcOnTooManyStarves, int _windowSecondsForDesiredReduction, bool repetitionWithFade) : _maxFramesOverDesired(maxFramesOverDesired), _dynamicJitterBuffers(dynamicJitterBuffers), _staticDesiredJitterBufferFrames(staticDesiredJitterBufferFrames), _useStDevForJitterCalc(useStDevForJitterCalc), _windowStarveThreshold(windowStarveThreshold), _windowSecondsForDesiredCalcOnTooManyStarves(windowSecondsForDesiredCalcOnTooManyStarves), _windowSecondsForDesiredReduction(windowSecondsForDesiredCalcOnTooManyStarves), _repetitionWithFade(repetitionWithFade) {} // max number of frames over desired in the ringbuffer. int _maxFramesOverDesired; // if false, _desiredJitterBufferFrames will always be _staticDesiredJitterBufferFrames. Otherwise, // either fred or philip's method will be used to calculate _desiredJitterBufferFrames based on packet timegaps. bool _dynamicJitterBuffers; // settings for static jitter buffer mode int _staticDesiredJitterBufferFrames; // settings for dynamic jitter buffer mode bool _useStDevForJitterCalc; // if true, philip's method is used. otherwise, fred's method is used. int _windowStarveThreshold; int _windowSecondsForDesiredCalcOnTooManyStarves; int _windowSecondsForDesiredReduction; // if true, the prev frame will be repeated (fading to silence) for dropped frames. // otherwise, silence will be inserted. bool _repetitionWithFade; }; public: InboundAudioStream(int numFrameSamples, int numFramesCapacity, const Settings& settings); void reset(); void resetStats(); void clearBuffer(); virtual int parseData(const QByteArray& packet); int popFrames(int maxFrames, bool allOrNothing, bool starveIfNoFramesPopped = true); int popSamples(int maxSamples, bool allOrNothing, bool starveIfNoSamplesPopped = true); bool lastPopSucceeded() const { return _lastPopSucceeded; }; const AudioRingBuffer::ConstIterator& getLastPopOutput() const { return _lastPopOutput; } void setToStarved(); void setSettings(const Settings& settings); void setMaxFramesOverDesired(int maxFramesOverDesired) { _maxFramesOverDesired = maxFramesOverDesired; } void setDynamicJitterBuffers(bool setDynamicJitterBuffers); void setStaticDesiredJitterBufferFrames(int staticDesiredJitterBufferFrames); void setUseStDevForJitterCalc(bool useStDevForJitterCalc) { _useStDevForJitterCalc = useStDevForJitterCalc; } void setWindowStarveThreshold(int windowStarveThreshold) { _starveThreshold = windowStarveThreshold; } void setWindowSecondsForDesiredCalcOnTooManyStarves(int windowSecondsForDesiredCalcOnTooManyStarves); void setWindowSecondsForDesiredReduction(int windowSecondsForDesiredReduction); void setRepetitionWithFade(bool repetitionWithFade) { _repetitionWithFade = repetitionWithFade; } virtual AudioStreamStats getAudioStreamStats() const; /// returns the desired number of jitter buffer frames under the dyanmic jitter buffers scheme int getCalculatedJitterBufferFrames() const { return _useStDevForJitterCalc ? _calculatedJitterBufferFramesUsingStDev : _calculatedJitterBufferFramesUsingMaxGap; }; /// returns the desired number of jitter buffer frames using Philip's method int getCalculatedJitterBufferFramesUsingStDev() const { return _calculatedJitterBufferFramesUsingStDev; } /// returns the desired number of jitter buffer frames using Freddy's method int getCalculatedJitterBufferFramesUsingMaxGap() const { return _calculatedJitterBufferFramesUsingMaxGap; } float getLastPopOutputFrameLoudness() const; int getDesiredJitterBufferFrames() const { return _desiredJitterBufferFrames; } int getMaxFramesOverDesired() const { return _maxFramesOverDesired; } int getNumFrameSamples() const { return _ringBuffer.getNumFrameSamples(); } int getFrameCapacity() const { return _ringBuffer.getFrameCapacity(); } int getFramesAvailable() const { return _ringBuffer.framesAvailable(); } double getFramesAvailableAverage() const { return _framesAvailableStat.getAverage(); } bool isStarved() const { return _isStarved; } bool hasStarted() const { return _hasStarted; } int getConsecutiveNotMixedCount() const { return _consecutiveNotMixedCount; } int getStarveCount() const { return _starveCount; } int getSilentFramesDropped() const { return _silentFramesDropped; } int getOverflowCount() const { return _ringBuffer.getOverflowCount(); } int getPacketsReceived() const { return _incomingSequenceNumberStats.getReceived(); } public slots: /// This function should be called every second for all the stats to function properly. If dynamic jitter buffers /// is enabled, those stats are used to calculate _desiredJitterBufferFrames. /// If the stats are not used and dynamic jitter buffers is disabled, it's not necessary to call this function. void perSecondCallbackForUpdatingStats(); private: void packetReceivedUpdateTimingStats(); int clampDesiredJitterBufferFramesValue(int desired) const; int writeSamplesForDroppedPackets(int networkSamples); void popSamplesNoCheck(int samples); void framesAvailableChanged(); protected: // disallow copying of InboundAudioStream objects InboundAudioStream(const InboundAudioStream&); InboundAudioStream& operator= (const InboundAudioStream&); /// parses the info between the seq num and the audio data in the network packet and calculates /// how many audio samples this packet contains (used when filling in samples for dropped packets). /// default implementation assumes no stream properties and raw audio samples after stream propertiess virtual int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& networkSamples); /// parses the audio data in the network packet. /// default implementation assumes packet contains raw audio samples after stream properties virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int networkSamples); /// writes silent samples to the buffer that may be dropped to reduce latency caused by the buffer virtual int writeDroppableSilentSamples(int silentSamples); /// writes the last written frame repeatedly, gradually fading to silence. /// used for writing samples for dropped packets. virtual int writeLastFrameRepeatedWithFade(int samples); protected: AudioRingBuffer _ringBuffer; bool _lastPopSucceeded; AudioRingBuffer::ConstIterator _lastPopOutput; bool _dynamicJitterBuffers; // if false, _desiredJitterBufferFrames is locked at 1 (old behavior) int _staticDesiredJitterBufferFrames; // if jitter buffer is dynamic, this determines what method of calculating _desiredJitterBufferFrames // if true, Philip's timegap std dev calculation is used. Otherwise, Freddy's max timegap calculation is used bool _useStDevForJitterCalc; int _desiredJitterBufferFrames; // if there are more than _desiredJitterBufferFrames + _maxFramesOverDesired frames, old ringbuffer frames // will be dropped to keep audio delay from building up int _maxFramesOverDesired; bool _isStarved; bool _hasStarted; // stats int _consecutiveNotMixedCount; int _starveCount; int _silentFramesDropped; int _oldFramesDropped; SequenceNumberStats _incomingSequenceNumberStats; quint64 _lastPacketReceivedTime; MovingMinMaxAvg _timeGapStatsForDesiredCalcOnTooManyStarves; // for Freddy's method int _calculatedJitterBufferFramesUsingMaxGap; StDev _stdevStatsForDesiredCalcOnTooManyStarves; // for Philip's method int _calculatedJitterBufferFramesUsingStDev; // the most recent desired frames calculated by Philip's method MovingMinMaxAvg _timeGapStatsForDesiredReduction; int _starveHistoryWindowSeconds; RingBufferHistory _starveHistory; int _starveThreshold; TimeWeightedAvg _framesAvailableStat; // this value is periodically updated with the time-weighted avg from _framesAvailableStat. it is only used for // dropping silent frames right now. int _currentJitterBufferFrames; MovingMinMaxAvg _timeGapStatsForStatsPacket; bool _repetitionWithFade; }; float calculateRepeatedFrameFadeFactor(int indexOfRepeat); #endif // hifi_InboundAudioStream_h