diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index 40e22f855a..8f752e70d0 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -525,7 +525,6 @@ void AudioMixer::handleNegotiateAudioFormat(QSharedPointer mess } } - auto clientData = dynamic_cast(sendingNode->getLinkedData()); // FIXME - why would we not have client data at this point?? @@ -539,14 +538,7 @@ void AudioMixer::handleNegotiateAudioFormat(QSharedPointer mess clientData->setupCodec(selectedCodec, selectedCodecName); qDebug() << "selectedCodecName:" << selectedCodecName; - - auto replyPacket = NLPacket::create(PacketType::SelectedAudioFormat); - - // write them to our packet - replyPacket->writeString(selectedCodecName); - - auto nodeList = DependencyManager::get(); - nodeList->sendPacket(std::move(replyPacket), *sendingNode); + clientData->sendSelectAudioFormat(sendingNode, selectedCodecName); } void AudioMixer::handleNodeKilled(SharedNodePointer killedNode) { @@ -769,13 +761,18 @@ void AudioMixer::broadcastMixes() { std::unique_ptr mixPacket; if (mixHasAudio) { - int mixPacketBytes = sizeof(quint16) + AudioConstants::NETWORK_FRAME_BYTES_STEREO; + int mixPacketBytes = sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE + + AudioConstants::NETWORK_FRAME_BYTES_STEREO; mixPacket = NLPacket::create(PacketType::MixedAudio, mixPacketBytes); // pack sequence number quint16 sequence = nodeData->getOutgoingSequenceNumber(); mixPacket->writePrimitive(sequence); + // write the codec + QString codecInPacket = nodeData->getCodecName(); + mixPacket->writeString(codecInPacket); + QByteArray decodedBuffer(reinterpret_cast(_clampedSamples), AudioConstants::NETWORK_FRAME_BYTES_STEREO); QByteArray encodedBuffer; nodeData->encode(decodedBuffer, encodedBuffer); @@ -783,13 +780,17 @@ void AudioMixer::broadcastMixes() { // pack mixed audio samples mixPacket->write(encodedBuffer.constData(), encodedBuffer.size()); } else { - int silentPacketBytes = sizeof(quint16) + sizeof(quint16); + int silentPacketBytes = sizeof(quint16) + sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE; mixPacket = NLPacket::create(PacketType::SilentAudioFrame, silentPacketBytes); // pack sequence number quint16 sequence = nodeData->getOutgoingSequenceNumber(); mixPacket->writePrimitive(sequence); + // write the codec + QString codecInPacket = nodeData->getCodecName(); + mixPacket->writeString(codecInPacket); + // pack number of silent audio samples quint16 numSilentSamples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; mixPacket->writePrimitive(numSilentSamples); diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp index 5c2ce8bf57..85491537a2 100644 --- a/assignment-client/src/audio/AudioMixerClientData.cpp +++ b/assignment-client/src/audio/AudioMixerClientData.cpp @@ -113,6 +113,8 @@ int AudioMixerClientData::parseData(ReceivedMessage& message) { avatarAudioStream->setupCodec(_codec, _selectedCodecName, AudioConstants::MONO); qDebug() << "creating new AvatarAudioStream... codec:" << _selectedCodecName; + connect(avatarAudioStream, &InboundAudioStream::mismatchedAudioCodec, this, &AudioMixerClientData::sendSelectAudioFormat); + auto emplaced = _audioStreams.emplace( QUuid(), std::unique_ptr { avatarAudioStream } @@ -128,7 +130,6 @@ int AudioMixerClientData::parseData(ReceivedMessage& message) { isMicStream = true; } else if (packetType == PacketType::InjectAudio) { // this is injected audio - // grab the stream identifier for this injected audio message.seek(sizeof(quint16)); QUuid streamIdentifier = QUuid::fromRfc4122(message.readWithoutCopy(NUM_BYTES_RFC4122_UUID)); @@ -344,6 +345,14 @@ QJsonObject AudioMixerClientData::getAudioStreamStats() { return result; } +void AudioMixerClientData::sendSelectAudioFormat(SharedNodePointer node, const QString& selectedCodecName) { + auto replyPacket = NLPacket::create(PacketType::SelectedAudioFormat); + replyPacket->writeString(selectedCodecName); + auto nodeList = DependencyManager::get(); + nodeList->sendPacket(std::move(replyPacket), *node); +} + + void AudioMixerClientData::setupCodec(CodecPluginPointer codec, const QString& codecName) { cleanupCodec(); // cleanup any previously allocated coders first _codec = codec; diff --git a/assignment-client/src/audio/AudioMixerClientData.h b/assignment-client/src/audio/AudioMixerClientData.h index da2bf8997c..babfae3539 100644 --- a/assignment-client/src/audio/AudioMixerClientData.h +++ b/assignment-client/src/audio/AudioMixerClientData.h @@ -78,9 +78,14 @@ public: } } + QString getCodecName() { return _selectedCodecName; } + signals: void injectorStreamFinished(const QUuid& streamIdentifier); +public slots: + void sendSelectAudioFormat(SharedNodePointer node, const QString& selectedCodecName); + private: QReadWriteLock _streamsLock; AudioStreamMap _audioStreams; // microphone stream from avatar is stored under key of null UUID diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index 7cf8574529..ac42de903d 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -834,7 +834,7 @@ void AudioClient::handleAudioInput() { encodedBuffer = decocedBuffer; } - emitAudioPacket(encodedBuffer.constData(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, packetType); + emitAudioPacket(encodedBuffer.constData(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, packetType, _selectedCodecName); _stats.sentPacket(); } } @@ -852,7 +852,7 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) { } // FIXME check a flag to see if we should echo audio? - emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, PacketType::MicrophoneAudioWithEcho); + emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, PacketType::MicrophoneAudioWithEcho, _selectedCodecName); } void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) { @@ -1015,7 +1015,6 @@ bool AudioClient::outputLocalInjector(bool isStereo, AudioInjector* injector) { // no reason to lock access to the vector of injectors. if (!_activeLocalAudioInjectors.contains(injector)) { qDebug() << "adding new injector"; - _activeLocalAudioInjectors.append(injector); } else { qDebug() << "injector exists in active list already"; diff --git a/libraries/audio/src/AbstractAudioInterface.cpp b/libraries/audio/src/AbstractAudioInterface.cpp index b347d57450..bf43c35cb9 100644 --- a/libraries/audio/src/AbstractAudioInterface.cpp +++ b/libraries/audio/src/AbstractAudioInterface.cpp @@ -19,7 +19,8 @@ #include "AudioConstants.h" -void AbstractAudioInterface::emitAudioPacket(const void* audioData, size_t bytes, quint16& sequenceNumber, const Transform& transform, PacketType packetType) { +void AbstractAudioInterface::emitAudioPacket(const void* audioData, size_t bytes, quint16& sequenceNumber, + const Transform& transform, PacketType packetType, QString codecName) { static std::mutex _mutex; using Locker = std::unique_lock; auto nodeList = DependencyManager::get(); @@ -27,10 +28,17 @@ void AbstractAudioInterface::emitAudioPacket(const void* audioData, size_t bytes if (audioMixer && audioMixer->getActiveSocket()) { Locker lock(_mutex); auto audioPacket = NLPacket::create(packetType); + + // FIXME - this is not a good way to determine stereoness with codecs.... quint8 isStereo = bytes == AudioConstants::NETWORK_FRAME_BYTES_STEREO ? 1 : 0; // write sequence number - audioPacket->writePrimitive(sequenceNumber++); + auto sequence = sequenceNumber++; + audioPacket->writePrimitive(sequence); + + // write the codec + audioPacket->writeString(codecName); + if (packetType == PacketType::SilentAudioFrame) { // pack num silent samples quint16 numSilentSamples = isStereo ? @@ -49,8 +57,8 @@ void AbstractAudioInterface::emitAudioPacket(const void* audioData, size_t bytes if (audioPacket->getType() != PacketType::SilentAudioFrame) { // audio samples have already been packed (written to networkAudioSamples) - audioPacket->setPayloadSize(audioPacket->getPayloadSize() + bytes); - static const int leadingBytes = sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8); + int leadingBytes = audioPacket->getPayloadSize(); + audioPacket->setPayloadSize(leadingBytes + bytes); memcpy(audioPacket->getPayload() + leadingBytes, audioData, bytes); } nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::SendAudioPacket); diff --git a/libraries/audio/src/AbstractAudioInterface.h b/libraries/audio/src/AbstractAudioInterface.h index ee52622d7e..223421a7ab 100644 --- a/libraries/audio/src/AbstractAudioInterface.h +++ b/libraries/audio/src/AbstractAudioInterface.h @@ -28,7 +28,8 @@ class AbstractAudioInterface : public QObject { public: AbstractAudioInterface(QObject* parent = 0) : QObject(parent) {}; - static void emitAudioPacket(const void* audioData, size_t bytes, quint16& sequenceNumber, const Transform& transform, PacketType packetType); + static void emitAudioPacket(const void* audioData, size_t bytes, quint16& sequenceNumber, const Transform& transform, + PacketType packetType, QString codecName = QString("")); public slots: virtual bool outputLocalInjector(bool isStereo, AudioInjector* injector) = 0; diff --git a/libraries/audio/src/AudioConstants.h b/libraries/audio/src/AudioConstants.h index dbbe434915..9271323498 100644 --- a/libraries/audio/src/AudioConstants.h +++ b/libraries/audio/src/AudioConstants.h @@ -26,6 +26,8 @@ namespace AudioConstants { inline const char* getAudioFrameName() { return "com.highfidelity.recording.Audio"; } + const int MAX_CODEC_NAME_LENGTH = 30; + const int MAX_CODEC_NAME_LENGTH_ON_WIRE = MAX_CODEC_NAME_LENGTH + sizeof(uint32_t); const int NETWORK_FRAME_BYTES_STEREO = 1024; const int NETWORK_FRAME_SAMPLES_STEREO = NETWORK_FRAME_BYTES_STEREO / sizeof(AudioSample); const int NETWORK_FRAME_BYTES_PER_CHANNEL = 512; diff --git a/libraries/audio/src/AudioInjector.cpp b/libraries/audio/src/AudioInjector.cpp index 58122fee3c..873e9b7923 100644 --- a/libraries/audio/src/AudioInjector.cpp +++ b/libraries/audio/src/AudioInjector.cpp @@ -214,6 +214,14 @@ const uchar MAX_INJECTOR_VOLUME = 0xFF; static const int64_t NEXT_FRAME_DELTA_ERROR_OR_FINISHED = -1; static const int64_t NEXT_FRAME_DELTA_IMMEDIATELY = 0; +qint64 writeStringToStream(const QString& string, QDataStream& stream) { + QByteArray data = string.toUtf8(); + uint32_t length = data.length(); + stream << static_cast(length); + stream << data; + return length + sizeof(uint32_t); +} + int64_t AudioInjector::injectNextFrame() { if (stateHas(AudioInjectorState::NetworkInjectionFinished)) { qDebug() << "AudioInjector::injectNextFrame called but AudioInjector has finished and was not restarted. Returning."; @@ -260,6 +268,10 @@ int64_t AudioInjector::injectNextFrame() { // pack some placeholder sequence number for now audioPacketStream << (quint16) 0; + // current injectors don't use codecs, so pack in the unknown codec name + QString noCodecForInjectors(""); + writeStringToStream(noCodecForInjectors, audioPacketStream); + // pack stream identifier (a generated UUID) audioPacketStream << QUuid::createUuid(); diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp index c9b9363b1b..d781a1991b 100644 --- a/libraries/audio/src/InboundAudioStream.cpp +++ b/libraries/audio/src/InboundAudioStream.cpp @@ -13,6 +13,7 @@ #include #include +#include #include "InboundAudioStream.h" @@ -58,6 +59,7 @@ void InboundAudioStream::reset() { _isStarved = true; _hasStarted = false; resetStats(); + cleanupCodec(); } void InboundAudioStream::resetStats() { @@ -99,12 +101,12 @@ void InboundAudioStream::perSecondCallbackForUpdatingStats() { } int InboundAudioStream::parseData(ReceivedMessage& message) { - // parse sequence number and track it quint16 sequence; message.readPrimitive(&sequence); SequenceNumberStats::ArrivalInfo arrivalInfo = _incomingSequenceNumberStats.sequenceNumberReceived(sequence, message.getSourceID()); + QString codecInPacket = message.readString(); packetReceivedUpdateTimingStats(); @@ -114,7 +116,7 @@ int InboundAudioStream::parseData(ReceivedMessage& message) { int prePropertyPosition = message.getPosition(); int propertyBytes = parseStreamProperties(message.getType(), message.readWithoutCopy(message.getBytesLeftToRead()), networkSamples); message.seek(prePropertyPosition + propertyBytes); - + // handle this packet based on its arrival status. switch (arrivalInfo._status) { case SequenceNumberStats::Early: { @@ -129,9 +131,22 @@ int InboundAudioStream::parseData(ReceivedMessage& message) { case SequenceNumberStats::OnTime: { // Packet is on time; parse its data to the ringbuffer if (message.getType() == PacketType::SilentAudioFrame) { + // FIXME - Some codecs need to know about these silent frames... and can produce better output writeDroppableSilentSamples(networkSamples); } else { - parseAudioData(message.getType(), message.readWithoutCopy(message.getBytesLeftToRead())); + // note: PCM and no codec are identical + bool selectedPCM = _selectedCodecName == "pcm" || _selectedCodecName == ""; + bool packetPCM = codecInPacket == "pcm" || codecInPacket == ""; + if (codecInPacket == _selectedCodecName || (packetPCM && selectedPCM)) { + auto afterProperties = message.readWithoutCopy(message.getBytesLeftToRead()); + parseAudioData(message.getType(), afterProperties); + } else { + qDebug() << "Codec mismatch: expected" << _selectedCodecName << "got" << codecInPacket << "writing silence"; + writeDroppableSilentSamples(networkSamples); + // inform others of the mismatch + auto sendingNode = DependencyManager::get()->nodeWithUUID(message.getSourceID()); + emit mismatchedAudioCodec(sendingNode, _selectedCodecName); + } } break; } diff --git a/libraries/audio/src/InboundAudioStream.h b/libraries/audio/src/InboundAudioStream.h index 5da63f96c2..af79ff6164 100644 --- a/libraries/audio/src/InboundAudioStream.h +++ b/libraries/audio/src/InboundAudioStream.h @@ -12,6 +12,7 @@ #ifndef hifi_InboundAudioStream_h #define hifi_InboundAudioStream_h +#include #include #include #include @@ -180,6 +181,9 @@ public: void setupCodec(CodecPluginPointer codec, const QString& codecName, int numChannels); void cleanupCodec(); +signals: + void mismatchedAudioCodec(SharedNodePointer sendingNode, const QString& desiredCodec); + public slots: /// This function should be called every second for all the stats to function properly. If dynamic jitter buffers /// is enabled, those stats are used to calculate _desiredJitterBufferFrames. diff --git a/libraries/audio/src/InjectedAudioStream.cpp b/libraries/audio/src/InjectedAudioStream.cpp index 54e0f92bea..ccd581959f 100644 --- a/libraries/audio/src/InjectedAudioStream.cpp +++ b/libraries/audio/src/InjectedAudioStream.cpp @@ -33,6 +33,7 @@ const uchar MAX_INJECTOR_VOLUME = 255; int InjectedAudioStream::parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) { + // setup a data stream to read from this packet QDataStream packetStream(packetAfterSeqNum); diff --git a/libraries/networking/src/udt/BasePacket.cpp b/libraries/networking/src/udt/BasePacket.cpp index 18552ca966..8a4b98de87 100644 --- a/libraries/networking/src/udt/BasePacket.cpp +++ b/libraries/networking/src/udt/BasePacket.cpp @@ -154,8 +154,7 @@ qint64 BasePacket::writeString(const QString& string) { QByteArray data = string.toUtf8(); uint32_t length = data.length(); writePrimitive(length); - writeData(data.constData(), data.length()); - seek(pos() + length); + write(data.constData(), data.length()); return length + sizeof(uint32_t); } @@ -176,7 +175,6 @@ bool BasePacket::reset() { } qint64 BasePacket::writeData(const char* data, qint64 maxSize) { - Q_ASSERT_X(maxSize <= bytesAvailableForWrite(), "BasePacket::writeData", "not enough space for write"); // make sure we have the space required to write this block diff --git a/libraries/networking/src/udt/PacketHeaders.cpp b/libraries/networking/src/udt/PacketHeaders.cpp index fca006ae87..eb8739dd49 100644 --- a/libraries/networking/src/udt/PacketHeaders.cpp +++ b/libraries/networking/src/udt/PacketHeaders.cpp @@ -72,6 +72,13 @@ PacketVersion versionForPacketType(PacketType packetType) { case PacketType::DomainServerAddedNode: return static_cast(DomainServerAddedNodeVersion::PermissionsGrid); + case PacketType::MixedAudio: + case PacketType::SilentAudioFrame: + case PacketType::InjectAudio: + case PacketType::MicrophoneAudioNoEcho: + case PacketType::MicrophoneAudioWithEcho: + return static_cast(AudioVersion::CodecNameInAudioPackets); + default: return 17; } diff --git a/libraries/networking/src/udt/PacketHeaders.h b/libraries/networking/src/udt/PacketHeaders.h index 85030135a1..e723ea38eb 100644 --- a/libraries/networking/src/udt/PacketHeaders.h +++ b/libraries/networking/src/udt/PacketHeaders.h @@ -213,4 +213,9 @@ enum class DomainListVersion : PacketVersion { PermissionsGrid }; +enum class AudioVersion : PacketVersion { + HasCompressedAudio = 17, + CodecNameInAudioPackets +}; + #endif // hifi_PacketHeaders_h