diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp index 2686bd53c5..67259c5d99 100644 --- a/libraries/audio/src/InboundAudioStream.cpp +++ b/libraries/audio/src/InboundAudioStream.cpp @@ -104,13 +104,15 @@ int InboundAudioStream::parseData(const QByteArray& packet) { packetReceivedUpdateTimingStats(); - int numAudioSamples; + int networkSamples; if (packetType == PacketTypeSilentAudioFrame) { - readBytes += parseSilentPacketStreamProperties(packet.mid(readBytes), numAudioSamples); + quint16 numSilentSamples = *(reinterpret_cast(dataAt)); + readBytes += sizeof(quint16); + networkSamples = (int)numSilentSamples; } else { // parse the info after the seq number and before the audio data (the stream properties) - readBytes += parseStreamProperties(packetType, packet.mid(readBytes), numAudioSamples); + readBytes += parseStreamProperties(packetType, packet.mid(readBytes), networkSamples); } // handle this packet based on its arrival status. @@ -120,16 +122,16 @@ int InboundAudioStream::parseData(const QByteArray& packet) { // NOTE: we assume that each dropped packet contains the same number of samples // as the packet we just received. int packetsDropped = arrivalInfo._seqDiffFromExpected; - writeSamplesForDroppedPackets(packetsDropped * numAudioSamples); + writeSamplesForDroppedPackets(packetsDropped * networkSamples); // fall through to OnTime case } case SequenceNumberStats::OnTime: { // Packet is on time; parse its data to the ringbuffer if (packetType == PacketTypeSilentAudioFrame) { - writeDroppableSilentSamples(numAudioSamples); + writeDroppableSilentSamples(networkSamples); } else { - readBytes += parseAudioData(packetType, packet.mid(readBytes), numAudioSamples); + readBytes += parseAudioData(packetType, packet.mid(readBytes), networkSamples); } break; } @@ -162,15 +164,40 @@ int InboundAudioStream::parseData(const QByteArray& packet) { return readBytes; } +int InboundAudioStream::parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) { + // mixed audio packets do not have any info between the seq num and the audio data. + numAudioSamples = packetAfterSeqNum.size() / sizeof(int16_t); + return 0; +} + int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) { return _ringBuffer.writeData(packetAfterStreamProperties.data(), numAudioSamples * sizeof(int16_t)); } -int InboundAudioStream::parseSilentPacketStreamProperties(const QByteArray& packetAfterSeqNum, int& numAudioSamples) { - // this is a general silent packet; parse the number of silent samples - quint16 numSilentSamples = *(reinterpret_cast(packetAfterSeqNum.data())); - numAudioSamples = numSilentSamples; - return sizeof(quint16); +int InboundAudioStream::writeDroppableSilentSamples(int silentSamples) { + + // calculate how many silent frames we should drop. + int samplesPerFrame = _ringBuffer.getNumFrameSamples(); + int desiredJitterBufferFramesPlusPadding = _desiredJitterBufferFrames + DESIRED_JITTER_BUFFER_FRAMES_PADDING; + int numSilentFramesToDrop = 0; + + if (silentSamples >= samplesPerFrame && _currentJitterBufferFrames > desiredJitterBufferFramesPlusPadding) { + + // our avg jitter buffer size exceeds its desired value, so ignore some silent + // frames to get that size as close to desired as possible + int numSilentFramesToDropDesired = _currentJitterBufferFrames - desiredJitterBufferFramesPlusPadding; + int numSilentFramesReceived = silentSamples / samplesPerFrame; + numSilentFramesToDrop = std::min(numSilentFramesToDropDesired, numSilentFramesReceived); + + // dont reset _currentJitterBufferFrames here; we want to be able to drop further silent frames + // without waiting for _framesAvailableStat to fill up to 10s of samples. + _currentJitterBufferFrames -= numSilentFramesToDrop; + _silentFramesDropped += numSilentFramesToDrop; + + _framesAvailableStat.reset(); + } + + return _ringBuffer.addSilentFrame(silentSamples - numSilentFramesToDrop * samplesPerFrame); } int InboundAudioStream::popSamples(int maxSamples, bool allOrNothing, bool starveIfNoSamplesPopped) { @@ -386,34 +413,8 @@ void InboundAudioStream::packetReceivedUpdateTimingStats() { _lastPacketReceivedTime = now; } -int InboundAudioStream::writeDroppableSilentSamples(int numSilentSamples) { - - // calculate how many silent frames we should drop. - int samplesPerFrame = _ringBuffer.getNumFrameSamples(); - int desiredJitterBufferFramesPlusPadding = _desiredJitterBufferFrames + DESIRED_JITTER_BUFFER_FRAMES_PADDING; - int numSilentFramesToDrop = 0; - - if (numSilentSamples >= samplesPerFrame && _currentJitterBufferFrames > desiredJitterBufferFramesPlusPadding) { - - // our avg jitter buffer size exceeds its desired value, so ignore some silent - // frames to get that size as close to desired as possible - int numSilentFramesToDropDesired = _currentJitterBufferFrames - desiredJitterBufferFramesPlusPadding; - int numSilentFramesReceived = numSilentSamples / samplesPerFrame; - numSilentFramesToDrop = std::min(numSilentFramesToDropDesired, numSilentFramesReceived); - - // dont reset _currentJitterBufferFrames here; we want to be able to drop further silent frames - // without waiting for _framesAvailableStat to fill up to 10s of samples. - _currentJitterBufferFrames -= numSilentFramesToDrop; - _silentFramesDropped += numSilentFramesToDrop; - - _framesAvailableStat.reset(); - } - - return _ringBuffer.addSilentFrame(numSilentSamples - numSilentFramesToDrop * samplesPerFrame); -} - -int InboundAudioStream::writeSamplesForDroppedPackets(int numSamples) { - return writeDroppableSilentSamples(numSamples); +int InboundAudioStream::writeSamplesForDroppedPackets(int networkSamples) { + return writeDroppableSilentSamples(networkSamples); } float InboundAudioStream::getLastPopOutputFrameLoudness() const { diff --git a/libraries/audio/src/InboundAudioStream.h b/libraries/audio/src/InboundAudioStream.h index a3d8729120..62a22b61ab 100644 --- a/libraries/audio/src/InboundAudioStream.h +++ b/libraries/audio/src/InboundAudioStream.h @@ -159,7 +159,7 @@ private: void packetReceivedUpdateTimingStats(); int clampDesiredJitterBufferFramesValue(int desired) const; - int writeSamplesForDroppedPackets(int numSamples); + int writeSamplesForDroppedPackets(int networkSamples); void popSamplesNoCheck(int samples); void framesAvailableChanged(); @@ -171,17 +171,15 @@ protected: /// parses the info between the seq num and the audio data in the network packet and calculates /// how many audio samples this packet contains (used when filling in samples for dropped packets). - virtual int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) = 0; + /// default implementation assumes no stream properties and raw audio samples after stream propertiess + virtual int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& networkSamples); - /// parses a silent packet after the seq. default implementation assumes the number of silent samples - /// is the only thing in packetAfterSeqNum and should work in most cases - virtual int parseSilentPacketStreamProperties(const QByteArray& packetAfterSeqNum, int& numAudioSamples); - /// parses the audio data in the network packet. /// default implementation assumes packet contains raw audio samples after stream properties - virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples); + virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int networkSamples); - int writeDroppableSilentSamples(int numSilentSamples); + /// writes silent samples to the buffer that may be dropped to reduce latency caused by the buffer + virtual int writeDroppableSilentSamples(int silentSamples); protected: diff --git a/libraries/audio/src/MixedAudioStream.cpp b/libraries/audio/src/MixedAudioStream.cpp index 0041348d26..85bf71747a 100644 --- a/libraries/audio/src/MixedAudioStream.cpp +++ b/libraries/audio/src/MixedAudioStream.cpp @@ -15,9 +15,3 @@ MixedAudioStream::MixedAudioStream(int numFrameSamples, int numFramesCapacity, c : InboundAudioStream(numFrameSamples, numFramesCapacity, settings) { } - -int MixedAudioStream::parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) { - // mixed audio packets do not have any info between the seq num and the audio data. - numAudioSamples = packetAfterSeqNum.size() / sizeof(int16_t); - return 0; -} diff --git a/libraries/audio/src/MixedAudioStream.h b/libraries/audio/src/MixedAudioStream.h index 0b1979003d..edb26c486f 100644 --- a/libraries/audio/src/MixedAudioStream.h +++ b/libraries/audio/src/MixedAudioStream.h @@ -20,9 +20,6 @@ public: MixedAudioStream(int numFrameSamples, int numFramesCapacity, const InboundAudioStream::Settings& settings); float getNextOutputFrameLoudness() const { return _ringBuffer.getNextOutputFrameLoudness(); } - -protected: - int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples); }; #endif // hifi_MixedAudioStream_h diff --git a/libraries/audio/src/MixedProcessedAudioStream.cpp b/libraries/audio/src/MixedProcessedAudioStream.cpp index fd3ecb3bc9..2922459140 100644 --- a/libraries/audio/src/MixedProcessedAudioStream.cpp +++ b/libraries/audio/src/MixedProcessedAudioStream.cpp @@ -22,29 +22,7 @@ void MixedProcessedAudioStream::outputFormatChanged(int outputFormatChannelCount _ringBuffer.resizeForFrameSize(deviceOutputFrameSize); } -int MixedProcessedAudioStream::parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) { - // mixed audio packets do not have any info between the seq num and the audio data. - int numNetworkSamples = packetAfterSeqNum.size() / sizeof(int16_t); - - // since numAudioSamples is used to know how many samples to add for each dropped packet before this one, - // we want to set it to the number of device audio samples since this stream contains device audio samples, not network samples. - numAudioSamples = networkToDeviceSamples(numNetworkSamples); - - return 0; -} - -int MixedProcessedAudioStream::parseSilentPacketStreamProperties(const QByteArray& packetAfterSeqNum, int& numAudioSamples) { - int numNetworkSamples; - int bytesRead = InboundAudioStream::parseSilentPacketStreamProperties(packetAfterSeqNum, numNetworkSamples); - - // since numAudioSamples is used to know how many samples to add for each dropped packet before this one, - // we want to set it to the number of device audio samples since this stream contains device audio samples, not network samples. - numAudioSamples = networkToDeviceSamples(numNetworkSamples); - - return bytesRead; -} - -int MixedProcessedAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) { +int MixedProcessedAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int networkSamples) { QByteArray outputBuffer; emit processSamples(packetAfterStreamProperties, outputBuffer); @@ -54,7 +32,11 @@ int MixedProcessedAudioStream::parseAudioData(PacketType type, const QByteArray& return packetAfterStreamProperties.size(); } +int MixedProcessedAudioStream::writeDroppableSilentSamples(int silentSamples) { + return InboundAudioStream::writeDroppableSilentSamples(networkToDeviceSamples(silentSamples)); +} + int MixedProcessedAudioStream::networkToDeviceSamples(int networkSamples) { const int STEREO_DIVIDER = 2; return networkSamples * _outputFormatChannelsTimesSampleRate / (STEREO_DIVIDER * SAMPLE_RATE); -} \ No newline at end of file +} diff --git a/libraries/audio/src/MixedProcessedAudioStream.h b/libraries/audio/src/MixedProcessedAudioStream.h index 7c89a5106d..ec65c8f712 100644 --- a/libraries/audio/src/MixedProcessedAudioStream.h +++ b/libraries/audio/src/MixedProcessedAudioStream.h @@ -27,9 +27,8 @@ public: void outputFormatChanged(int outputFormatChannelCountTimesSampleRate); protected: - int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples); - int parseSilentPacketStreamProperties(const QByteArray& packetAfterSeqNum, int& numAudioSamples); - int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples); + int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int networkSamples); + int writeDroppableSilentSamples(int silentSamples); private: int networkToDeviceSamples(int networkSamples);