From 20824f038c67b63aedf38481c359a9bc668b7822 Mon Sep 17 00:00:00 2001 From: Brad Hefta-Gaub Date: Mon, 18 Jul 2016 21:59:44 -0700 Subject: [PATCH] include codec in audio stream packets so that each side can discard packets that don't match --- assignment-client/src/audio/AudioMixer.cpp | 17 ++++++++++--- .../src/audio/AudioMixerClientData.cpp | 2 +- .../src/audio/AudioMixerClientData.h | 3 +++ libraries/audio-client/src/AudioClient.cpp | 5 ++-- .../audio/src/AbstractAudioInterface.cpp | 18 ++++++++++--- libraries/audio/src/AbstractAudioInterface.h | 3 ++- libraries/audio/src/AudioInjector.cpp | 12 +++++++++ libraries/audio/src/InboundAudioStream.cpp | 25 ++++++++++++++++--- libraries/audio/src/InjectedAudioStream.cpp | 1 + libraries/networking/src/udt/BasePacket.cpp | 5 ++-- .../networking/src/udt/PacketHeaders.cpp | 7 ++++++ libraries/networking/src/udt/PacketHeaders.h | 5 ++++ 12 files changed, 84 insertions(+), 19 deletions(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index 40e22f855a..f4b80f55b4 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -768,28 +768,39 @@ void AudioMixer::broadcastMixes() { std::unique_ptr mixPacket; + const int MAX_CODEC_NAME = 30; // way over estimate + if (mixHasAudio) { - int mixPacketBytes = sizeof(quint16) + AudioConstants::NETWORK_FRAME_BYTES_STEREO; + int mixPacketBytes = sizeof(quint16) + MAX_CODEC_NAME+ AudioConstants::NETWORK_FRAME_BYTES_STEREO; mixPacket = NLPacket::create(PacketType::MixedAudio, mixPacketBytes); // pack sequence number quint16 sequence = nodeData->getOutgoingSequenceNumber(); mixPacket->writePrimitive(sequence); + // write the codec + QString codecInPacket = nodeData->getCodecName(); + mixPacket->writeString(codecInPacket); + QByteArray decodedBuffer(reinterpret_cast(_clampedSamples), AudioConstants::NETWORK_FRAME_BYTES_STEREO); QByteArray encodedBuffer; nodeData->encode(decodedBuffer, encodedBuffer); // pack mixed audio samples mixPacket->write(encodedBuffer.constData(), encodedBuffer.size()); - } else { - int silentPacketBytes = sizeof(quint16) + sizeof(quint16); + } + else { + int silentPacketBytes = sizeof(quint16) + sizeof(quint16) + MAX_CODEC_NAME; mixPacket = NLPacket::create(PacketType::SilentAudioFrame, silentPacketBytes); // pack sequence number quint16 sequence = nodeData->getOutgoingSequenceNumber(); mixPacket->writePrimitive(sequence); + // write the codec + QString codecInPacket = nodeData->getCodecName(); + mixPacket->writeString(codecInPacket); + // pack number of silent audio samples quint16 numSilentSamples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; mixPacket->writePrimitive(numSilentSamples); diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp index 5c2ce8bf57..dc481e1c59 100644 --- a/assignment-client/src/audio/AudioMixerClientData.cpp +++ b/assignment-client/src/audio/AudioMixerClientData.cpp @@ -128,7 +128,6 @@ int AudioMixerClientData::parseData(ReceivedMessage& message) { isMicStream = true; } else if (packetType == PacketType::InjectAudio) { // this is injected audio - // grab the stream identifier for this injected audio message.seek(sizeof(quint16)); QUuid streamIdentifier = QUuid::fromRfc4122(message.readWithoutCopy(NUM_BYTES_RFC4122_UUID)); @@ -167,6 +166,7 @@ int AudioMixerClientData::parseData(ReceivedMessage& message) { // check the overflow count before we parse data auto overflowBefore = matchingStream->getOverflowCount(); + auto parseResult = matchingStream->parseData(message); if (matchingStream->getOverflowCount() > overflowBefore) { diff --git a/assignment-client/src/audio/AudioMixerClientData.h b/assignment-client/src/audio/AudioMixerClientData.h index da2bf8997c..85bf3fa3a1 100644 --- a/assignment-client/src/audio/AudioMixerClientData.h +++ b/assignment-client/src/audio/AudioMixerClientData.h @@ -78,6 +78,9 @@ public: } } + QString getCodecName() { return _selectedCodecName; } + + signals: void injectorStreamFinished(const QUuid& streamIdentifier); diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index 7cf8574529..ac42de903d 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -834,7 +834,7 @@ void AudioClient::handleAudioInput() { encodedBuffer = decocedBuffer; } - emitAudioPacket(encodedBuffer.constData(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, packetType); + emitAudioPacket(encodedBuffer.constData(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, packetType, _selectedCodecName); _stats.sentPacket(); } } @@ -852,7 +852,7 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) { } // FIXME check a flag to see if we should echo audio? - emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, PacketType::MicrophoneAudioWithEcho); + emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, PacketType::MicrophoneAudioWithEcho, _selectedCodecName); } void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) { @@ -1015,7 +1015,6 @@ bool AudioClient::outputLocalInjector(bool isStereo, AudioInjector* injector) { // no reason to lock access to the vector of injectors. if (!_activeLocalAudioInjectors.contains(injector)) { qDebug() << "adding new injector"; - _activeLocalAudioInjectors.append(injector); } else { qDebug() << "injector exists in active list already"; diff --git a/libraries/audio/src/AbstractAudioInterface.cpp b/libraries/audio/src/AbstractAudioInterface.cpp index b347d57450..aa5c85283c 100644 --- a/libraries/audio/src/AbstractAudioInterface.cpp +++ b/libraries/audio/src/AbstractAudioInterface.cpp @@ -19,7 +19,8 @@ #include "AudioConstants.h" -void AbstractAudioInterface::emitAudioPacket(const void* audioData, size_t bytes, quint16& sequenceNumber, const Transform& transform, PacketType packetType) { +void AbstractAudioInterface::emitAudioPacket(const void* audioData, size_t bytes, quint16& sequenceNumber, + const Transform& transform, PacketType packetType, QString codecName) { static std::mutex _mutex; using Locker = std::unique_lock; auto nodeList = DependencyManager::get(); @@ -27,10 +28,19 @@ void AbstractAudioInterface::emitAudioPacket(const void* audioData, size_t bytes if (audioMixer && audioMixer->getActiveSocket()) { Locker lock(_mutex); auto audioPacket = NLPacket::create(packetType); + + // FIXME - this is not a good way to determine stereoness with codecs.... quint8 isStereo = bytes == AudioConstants::NETWORK_FRAME_BYTES_STEREO ? 1 : 0; // write sequence number - audioPacket->writePrimitive(sequenceNumber++); + auto sequence = sequenceNumber++; + audioPacket->writePrimitive(sequence); + + // write the codec - don't include this for injected audio + if (packetType != PacketType::InjectAudio) { + auto stringSize = audioPacket->writeString(codecName); + } + if (packetType == PacketType::SilentAudioFrame) { // pack num silent samples quint16 numSilentSamples = isStereo ? @@ -49,8 +59,8 @@ void AbstractAudioInterface::emitAudioPacket(const void* audioData, size_t bytes if (audioPacket->getType() != PacketType::SilentAudioFrame) { // audio samples have already been packed (written to networkAudioSamples) - audioPacket->setPayloadSize(audioPacket->getPayloadSize() + bytes); - static const int leadingBytes = sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8); + int leadingBytes = audioPacket->getPayloadSize(); + audioPacket->setPayloadSize(leadingBytes + bytes); memcpy(audioPacket->getPayload() + leadingBytes, audioData, bytes); } nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::SendAudioPacket); diff --git a/libraries/audio/src/AbstractAudioInterface.h b/libraries/audio/src/AbstractAudioInterface.h index ee52622d7e..223421a7ab 100644 --- a/libraries/audio/src/AbstractAudioInterface.h +++ b/libraries/audio/src/AbstractAudioInterface.h @@ -28,7 +28,8 @@ class AbstractAudioInterface : public QObject { public: AbstractAudioInterface(QObject* parent = 0) : QObject(parent) {}; - static void emitAudioPacket(const void* audioData, size_t bytes, quint16& sequenceNumber, const Transform& transform, PacketType packetType); + static void emitAudioPacket(const void* audioData, size_t bytes, quint16& sequenceNumber, const Transform& transform, + PacketType packetType, QString codecName = QString("")); public slots: virtual bool outputLocalInjector(bool isStereo, AudioInjector* injector) = 0; diff --git a/libraries/audio/src/AudioInjector.cpp b/libraries/audio/src/AudioInjector.cpp index 9c49ce66d8..5064686565 100644 --- a/libraries/audio/src/AudioInjector.cpp +++ b/libraries/audio/src/AudioInjector.cpp @@ -218,6 +218,14 @@ const uchar MAX_INJECTOR_VOLUME = 0xFF; static const int64_t NEXT_FRAME_DELTA_ERROR_OR_FINISHED = -1; static const int64_t NEXT_FRAME_DELTA_IMMEDIATELY = 0; +qint64 writeStringToStream(const QString& string, QDataStream& stream) { + QByteArray data = string.toUtf8(); + uint32_t length = data.length(); + stream << static_cast(length); + stream << data; + return length + sizeof(uint32_t); +} + int64_t AudioInjector::injectNextFrame() { if (stateHas(AudioInjectorState::NetworkInjectionFinished)) { qDebug() << "AudioInjector::injectNextFrame called but AudioInjector has finished and was not restarted. Returning."; @@ -264,6 +272,10 @@ int64_t AudioInjector::injectNextFrame() { // pack some placeholder sequence number for now audioPacketStream << (quint16) 0; + // pack some placeholder sequence number for now + //QString noCodecForInjectors(""); + //writeStringToStream(noCodecForInjectors, audioPacketStream); + // pack stream identifier (a generated UUID) audioPacketStream << QUuid::createUuid(); diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp index c9b9363b1b..0a207acba0 100644 --- a/libraries/audio/src/InboundAudioStream.cpp +++ b/libraries/audio/src/InboundAudioStream.cpp @@ -58,6 +58,7 @@ void InboundAudioStream::reset() { _isStarved = true; _hasStarted = false; resetStats(); + //cleanupCodec(); // FIXME??? } void InboundAudioStream::resetStats() { @@ -99,12 +100,17 @@ void InboundAudioStream::perSecondCallbackForUpdatingStats() { } int InboundAudioStream::parseData(ReceivedMessage& message) { - + PacketType packetType = message.getType(); + // parse sequence number and track it quint16 sequence; message.readPrimitive(&sequence); SequenceNumberStats::ArrivalInfo arrivalInfo = _incomingSequenceNumberStats.sequenceNumberReceived(sequence, message.getSourceID()); + QString codecInPacket(""); + if (packetType != PacketType::InjectAudio) { + codecInPacket = message.readString(); + } packetReceivedUpdateTimingStats(); @@ -112,9 +118,10 @@ int InboundAudioStream::parseData(ReceivedMessage& message) { // parse the info after the seq number and before the audio data (the stream properties) int prePropertyPosition = message.getPosition(); - int propertyBytes = parseStreamProperties(message.getType(), message.readWithoutCopy(message.getBytesLeftToRead()), networkSamples); + auto afterHeader = message.readWithoutCopy(message.getBytesLeftToRead()); + int propertyBytes = parseStreamProperties(message.getType(), afterHeader, networkSamples); message.seek(prePropertyPosition + propertyBytes); - + // handle this packet based on its arrival status. switch (arrivalInfo._status) { case SequenceNumberStats::Early: { @@ -129,9 +136,19 @@ int InboundAudioStream::parseData(ReceivedMessage& message) { case SequenceNumberStats::OnTime: { // Packet is on time; parse its data to the ringbuffer if (message.getType() == PacketType::SilentAudioFrame) { + // FIXME - do some codecs need to know about these silen frames? writeDroppableSilentSamples(networkSamples); } else { - parseAudioData(message.getType(), message.readWithoutCopy(message.getBytesLeftToRead())); + // note: PCM and no codec are identical + bool selectedPCM = _selectedCodecName == "pcm" || _selectedCodecName == ""; + bool packetPCM = codecInPacket == "pcm" || codecInPacket == ""; + if (codecInPacket == _selectedCodecName || (packetPCM && selectedPCM)) { + auto afterProperties = message.readWithoutCopy(message.getBytesLeftToRead()); + parseAudioData(message.getType(), afterProperties); + } else { + qDebug() << __FUNCTION__ << "codec mismatch: expected" << _selectedCodecName << "got" << codecInPacket << "writing silence"; + writeDroppableSilentSamples(networkSamples); + } } break; } diff --git a/libraries/audio/src/InjectedAudioStream.cpp b/libraries/audio/src/InjectedAudioStream.cpp index 54e0f92bea..ccd581959f 100644 --- a/libraries/audio/src/InjectedAudioStream.cpp +++ b/libraries/audio/src/InjectedAudioStream.cpp @@ -33,6 +33,7 @@ const uchar MAX_INJECTOR_VOLUME = 255; int InjectedAudioStream::parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) { + // setup a data stream to read from this packet QDataStream packetStream(packetAfterSeqNum); diff --git a/libraries/networking/src/udt/BasePacket.cpp b/libraries/networking/src/udt/BasePacket.cpp index 18552ca966..c6501943c7 100644 --- a/libraries/networking/src/udt/BasePacket.cpp +++ b/libraries/networking/src/udt/BasePacket.cpp @@ -154,8 +154,8 @@ qint64 BasePacket::writeString(const QString& string) { QByteArray data = string.toUtf8(); uint32_t length = data.length(); writePrimitive(length); - writeData(data.constData(), data.length()); - seek(pos() + length); + write(data.constData(), data.length()); + //seek(pos() + length); return length + sizeof(uint32_t); } @@ -176,7 +176,6 @@ bool BasePacket::reset() { } qint64 BasePacket::writeData(const char* data, qint64 maxSize) { - Q_ASSERT_X(maxSize <= bytesAvailableForWrite(), "BasePacket::writeData", "not enough space for write"); // make sure we have the space required to write this block diff --git a/libraries/networking/src/udt/PacketHeaders.cpp b/libraries/networking/src/udt/PacketHeaders.cpp index fca006ae87..eb8739dd49 100644 --- a/libraries/networking/src/udt/PacketHeaders.cpp +++ b/libraries/networking/src/udt/PacketHeaders.cpp @@ -72,6 +72,13 @@ PacketVersion versionForPacketType(PacketType packetType) { case PacketType::DomainServerAddedNode: return static_cast(DomainServerAddedNodeVersion::PermissionsGrid); + case PacketType::MixedAudio: + case PacketType::SilentAudioFrame: + case PacketType::InjectAudio: + case PacketType::MicrophoneAudioNoEcho: + case PacketType::MicrophoneAudioWithEcho: + return static_cast(AudioVersion::CodecNameInAudioPackets); + default: return 17; } diff --git a/libraries/networking/src/udt/PacketHeaders.h b/libraries/networking/src/udt/PacketHeaders.h index 85030135a1..e723ea38eb 100644 --- a/libraries/networking/src/udt/PacketHeaders.h +++ b/libraries/networking/src/udt/PacketHeaders.h @@ -213,4 +213,9 @@ enum class DomainListVersion : PacketVersion { PermissionsGrid }; +enum class AudioVersion : PacketVersion { + HasCompressedAudio = 17, + CodecNameInAudioPackets +}; + #endif // hifi_PacketHeaders_h