From 192f4791d5e17f122be8fb06d1b8692ce306a2b7 Mon Sep 17 00:00:00 2001 From: Zach Pomerantz Date: Tue, 22 Nov 2016 16:19:18 -0500 Subject: [PATCH] move mixing into AudioMixerSlave --- assignment-client/src/audio/AudioMixer.cpp | 900 +++++++++++---------- assignment-client/src/audio/AudioMixer.h | 117 +-- 2 files changed, 528 insertions(+), 489 deletions(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index 7d1f4aba7c..c1402529fa 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -69,19 +69,17 @@ static const QString AUDIO_ENV_GROUP_KEY = "audio_env"; static const QString AUDIO_BUFFER_GROUP_KEY = "audio_buffer"; int AudioMixer::_numStaticJitterFrames{ -1 }; - -bool AudioMixer::shouldMute(float quietestFrame) { - return (quietestFrame > _noiseMutingThreshold); -} +float AudioMixer::_noiseMutingThreshold{ DEFAULT_NOISE_MUTING_THRESHOLD }; +float AudioMixer::_attenuationPerDoublingInDistance{ DEFAULT_ATTENUATION_PER_DOUBLING_IN_DISTANCE }; +float AudioMixer::_trailingSleepRatio{ 1.0f }; +float AudioMixer::_performanceThrottlingRatio{ 0.0f }; +float AudioMixer::_minAudibilityThreshold{ LOUDNESS_TO_DISTANCE_RATIO / 2.0f }; +QHash AudioMixer::_audioZones; +QVector AudioMixer::_zoneSettings; +QVector AudioMixer::_zoneReverbSettings; AudioMixer::AudioMixer(ReceivedMessage& message) : - ThreadedAssignment(message), - _trailingSleepRatio(1.0f), - _minAudibilityThreshold(LOUDNESS_TO_DISTANCE_RATIO / 2.0f), - _performanceThrottlingRatio(0.0f), - _attenuationPerDoublingInDistance(DEFAULT_ATTENUATION_PER_DOUBLING_IN_DISTANCE), - _noiseMutingThreshold(DEFAULT_NOISE_MUTING_THRESHOLD) -{ + ThreadedAssignment(message) { auto nodeList = DependencyManager::get(); auto& packetReceiver = nodeList->getPacketReceiver(); @@ -158,343 +156,6 @@ static inline float fastexp2(float x) { return x * xi.f; } -float AudioMixer::gainForSource(const PositionalAudioStream& streamToAdd, - const AvatarAudioStream& listeningNodeStream, const glm::vec3& relativePosition, bool isEcho) { - float gain = 1.0f; - - float distanceBetween = glm::length(relativePosition); - - if (distanceBetween < EPSILON) { - distanceBetween = EPSILON; - } - - if (streamToAdd.getType() == PositionalAudioStream::Injector) { - gain *= reinterpret_cast(&streamToAdd)->getAttenuationRatio(); - } - - if (!isEcho && (streamToAdd.getType() == PositionalAudioStream::Microphone)) { - // source is another avatar, apply fixed off-axis attenuation to make them quieter as they turn away from listener - glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd.getOrientation()) * relativePosition; - - float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), - glm::normalize(rotatedListenerPosition)); - - const float MAX_OFF_AXIS_ATTENUATION = 0.2f; - const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; - - float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + - (OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO)); - - // multiply the current attenuation coefficient by the calculated off axis coefficient - gain *= offAxisCoefficient; - } - - float attenuationPerDoublingInDistance = _attenuationPerDoublingInDistance; - for (int i = 0; i < _zonesSettings.length(); ++i) { - if (_audioZones[_zonesSettings[i].source].contains(streamToAdd.getPosition()) && - _audioZones[_zonesSettings[i].listener].contains(listeningNodeStream.getPosition())) { - attenuationPerDoublingInDistance = _zonesSettings[i].coefficient; - break; - } - } - - if (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE) { - - // translate the zone setting to gain per log2(distance) - float g = 1.0f - attenuationPerDoublingInDistance; - g = (g < EPSILON) ? EPSILON : g; - g = (g > 1.0f) ? 1.0f : g; - - // calculate the distance coefficient using the distance to this node - float distanceCoefficient = fastexp2(fastlog2(g) * fastlog2(distanceBetween/ATTENUATION_BEGINS_AT_DISTANCE)); - - // multiply the current attenuation coefficient by the distance coefficient - gain *= distanceCoefficient; - } - - return gain; -} - -float AudioMixer::azimuthForSource(const PositionalAudioStream& streamToAdd, const AvatarAudioStream& listeningNodeStream, - const glm::vec3& relativePosition) { - glm::quat inverseOrientation = glm::inverse(listeningNodeStream.getOrientation()); - - // Compute sample delay for the two ears to create phase panning - glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; - - // project the rotated source position vector onto the XZ plane - rotatedSourcePosition.y = 0.0f; - - static const float SOURCE_DISTANCE_THRESHOLD = 1e-30f; - - if (glm::length2(rotatedSourcePosition) > SOURCE_DISTANCE_THRESHOLD) { - // produce an oriented angle about the y-axis - return glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, -1.0f, 0.0f)); - } else { - // there is no distance between listener and source - return no azimuth - return 0; - } -} - -void AudioMixer::addStreamToMixForListeningNodeWithStream(AudioMixerClientData& listenerNodeData, - const PositionalAudioStream& streamToAdd, - const QUuid& sourceNodeID, - const AvatarAudioStream& listeningNodeStream) { - - - // to reduce artifacts we calculate the gain and azimuth for every source for this listener - // even if we are not going to end up mixing in this source - - ++_totalMixes; - - // this ensures that the tail of any previously mixed audio or the first block of new audio sounds correct - - // check if this is a server echo of a source back to itself - bool isEcho = (&streamToAdd == &listeningNodeStream); - - glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition(); - - // figure out the distance between source and listener - float distance = glm::max(glm::length(relativePosition), EPSILON); - - // figure out the gain for this source at the listener - float gain = gainForSource(streamToAdd, listeningNodeStream, relativePosition, isEcho); - - // figure out the azimuth to this source at the listener - float azimuth = isEcho ? 0.0f : azimuthForSource(streamToAdd, listeningNodeStream, relativePosition); - - float repeatedFrameFadeFactor = 1.0f; - - static const int HRTF_DATASET_INDEX = 1; - - if (!streamToAdd.lastPopSucceeded()) { - bool forceSilentBlock = true; - - if (!streamToAdd.getLastPopOutput().isNull()) { - bool isInjector = dynamic_cast(&streamToAdd); - - // in an injector, just go silent - the injector has likely ended - // in other inputs (microphone, &c.), repeat with fade to avoid the harsh jump to silence - - // we'll repeat the last block until it has a block to mix - // and we'll gradually fade that repeated block into silence. - - // calculate its fade factor, which depends on how many times it's already been repeated. - repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd.getConsecutiveNotMixedCount() - 1); - if (!isInjector && repeatedFrameFadeFactor > 0.0f) { - // apply the repeatedFrameFadeFactor to the gain - gain *= repeatedFrameFadeFactor; - - forceSilentBlock = false; - } - } - - if (forceSilentBlock) { - // we're deciding not to repeat either since we've already done it enough times or repetition with fade is disabled - // in this case we will call renderSilent with a forced silent block - // this ensures the correct tail from the previously mixed block and the correct spatialization of first block - // of any upcoming audio - - if (!streamToAdd.isStereo() && !isEcho) { - // get the existing listener-source HRTF object, or create a new one - auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); - - // this is not done for stereo streams since they do not go through the HRTF - static int16_t silentMonoBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL] = {}; - hrtf.renderSilent(silentMonoBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - - ++_hrtfSilentRenders;; - } - - return; - } - } - - // grab the stream from the ring buffer - AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd.getLastPopOutput(); - - if (streamToAdd.isStereo() || isEcho) { - // this is a stereo source or server echo so we do not pass it through the HRTF - // simply apply our calculated gain to each sample - if (streamToAdd.isStereo()) { - for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) { - _mixedSamples[i] += float(streamPopOutput[i] * gain / AudioConstants::MAX_SAMPLE_VALUE); - } - - ++_manualStereoMixes; - } else { - for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i += 2) { - auto monoSample = float(streamPopOutput[i / 2] * gain / AudioConstants::MAX_SAMPLE_VALUE); - _mixedSamples[i] += monoSample; - _mixedSamples[i + 1] += monoSample; - } - - ++_manualEchoMixes; - } - - return; - } - - // get the existing listener-source HRTF object, or create a new one - auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); - - static int16_t streamBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL]; - - streamPopOutput.readSamples(streamBlock, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - - // if the frame we're about to mix is silent, simply call render silent and move on - if (streamToAdd.getLastPopOutputLoudness() == 0.0f) { - // silent frame from source - - // we still need to call renderSilent via the HRTF for mono source - hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - - ++_hrtfSilentRenders; - - return; - } - - if (_performanceThrottlingRatio > 0.0f - && streamToAdd.getLastPopOutputTrailingLoudness() / glm::length(relativePosition) <= _minAudibilityThreshold) { - // the mixer is struggling so we're going to drop off some streams - - // we call renderSilent via the HRTF with the actual frame data and a gain of 0.0 - hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, 0.0f, - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - - ++_hrtfStruggleRenders; - - return; - } - - ++_hrtfRenders; - - // mono stream, call the HRTF with our block and calculated azimuth and gain - hrtf.render(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); -} - -bool AudioMixer::prepareMixForListeningNode(Node* node) { - AvatarAudioStream* nodeAudioStream = static_cast(node->getLinkedData())->getAvatarAudioStream(); - AudioMixerClientData* listenerNodeData = static_cast(node->getLinkedData()); - - // zero out the client mix for this node - memset(_mixedSamples, 0, sizeof(_mixedSamples)); - - // loop through all other nodes that have sufficient audio to mix - - DependencyManager::get()->eachNode([&](const SharedNodePointer& otherNode){ - // make sure that we have audio data for this other node - // and that it isn't being ignored by our listening node - // and that it isn't ignoring our listening node - if (otherNode->getLinkedData() - && !node->isIgnoringNodeWithID(otherNode->getUUID()) && !otherNode->isIgnoringNodeWithID(node->getUUID())) { - AudioMixerClientData* otherNodeClientData = (AudioMixerClientData*) otherNode->getLinkedData(); - - // check to see if we're ignoring in radius - bool insideIgnoreRadius = false; - if (node->isIgnoreRadiusEnabled() || otherNode->isIgnoreRadiusEnabled()) { - AudioMixerClientData* otherData = reinterpret_cast(otherNode->getLinkedData()); - AudioMixerClientData* nodeData = reinterpret_cast(node->getLinkedData()); - float ignoreRadius = glm::min(node->getIgnoreRadius(), otherNode->getIgnoreRadius()); - if (glm::distance(nodeData->getPosition(), otherData->getPosition()) < ignoreRadius) { - insideIgnoreRadius = true; - } - } - - if (!insideIgnoreRadius) { - // enumerate the ARBs attached to the otherNode and add all that should be added to mix - auto streamsCopy = otherNodeClientData->getAudioStreams(); - for (auto& streamPair : streamsCopy) { - auto otherNodeStream = streamPair.second; - if (*otherNode != *node || otherNodeStream->shouldLoopbackForNode()) { - addStreamToMixForListeningNodeWithStream(*listenerNodeData, *otherNodeStream, otherNode->getUUID(), - *nodeAudioStream); - } - } - } - } - }); - - // use the per listner AudioLimiter to render the mixed data... - listenerNodeData->audioLimiter.render(_mixedSamples, _clampedSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - - // check for silent audio after the peak limitor has converted the samples - bool hasAudio = false; - for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) { - if (_clampedSamples[i] != 0) { - hasAudio = true; - break; - } - } - return hasAudio; -} - -void AudioMixer::sendAudioEnvironmentPacket(SharedNodePointer node) { - // Send stream properties - bool hasReverb = false; - float reverbTime, wetLevel; - // find reverb properties - for (int i = 0; i < _zoneReverbSettings.size(); ++i) { - AudioMixerClientData* data = static_cast(node->getLinkedData()); - glm::vec3 streamPosition = data->getAvatarAudioStream()->getPosition(); - AABox box = _audioZones[_zoneReverbSettings[i].zone]; - if (box.contains(streamPosition)) { - hasReverb = true; - reverbTime = _zoneReverbSettings[i].reverbTime; - wetLevel = _zoneReverbSettings[i].wetLevel; - - break; - } - } - - AudioMixerClientData* nodeData = static_cast(node->getLinkedData()); - AvatarAudioStream* stream = nodeData->getAvatarAudioStream(); - bool dataChanged = (stream->hasReverb() != hasReverb) || - (stream->hasReverb() && (stream->getRevebTime() != reverbTime || - stream->getWetLevel() != wetLevel)); - if (dataChanged) { - // Update stream - if (hasReverb) { - stream->setReverb(reverbTime, wetLevel); - } else { - stream->clearReverb(); - } - } - - // Send at change or every so often - float CHANCE_OF_SEND = 0.01f; - bool sendData = dataChanged || (randFloat() < CHANCE_OF_SEND); - - if (sendData) { - auto nodeList = DependencyManager::get(); - - unsigned char bitset = 0; - - int packetSize = sizeof(bitset); - - if (hasReverb) { - packetSize += sizeof(reverbTime) + sizeof(wetLevel); - } - - auto envPacket = NLPacket::create(PacketType::AudioEnvironment, packetSize); - - if (hasReverb) { - setAtBit(bitset, HAS_REVERB_BIT); - } - - envPacket->writePrimitive(bitset); - - if (hasReverb) { - envPacket->writePrimitive(reverbTime); - envPacket->writePrimitive(wetLevel); - } - nodeList->sendPacket(std::move(envPacket), *node); - } -} - void AudioMixer::handleNodeAudioPacket(QSharedPointer message, SharedNodePointer sendingNode) { getOrCreateClientData(sendingNode.data()); DependencyManager::get()->updateNodeWithDataFromPacket(message, sendingNode); @@ -780,20 +441,25 @@ void AudioMixer::start() { auto frameTimestamp = p_high_resolution_clock::time_point::min(); unsigned int framesSinceManagement = std::numeric_limits::max(); - // mixFrame state + // mix state unsigned int frame = 1; while (!_isFinished) { manageLoad(frameTimestamp, framesSinceManagement); + + slave.resetStats(); + nodeList->eachNode([&](const SharedNodePointer& node) { _sumStreams += prepareFrame(node, frame); }); nodeList->eachNode([&](const SharedNodePointer& node) { - if(mixFrame(node, frame)) { + if (slave.mix(node, frame)) { ++_sumListeners; } }); + slave.getStats(); + ++frame; ++_numStatFrames; @@ -885,96 +551,6 @@ int AudioMixer::prepareFrame(const SharedNodePointer& node, unsigned int frame) return data->checkBuffersBeforeFrameSend(); } -bool AudioMixer::mixFrame(const SharedNodePointer& node, unsigned int frame) { - AudioMixerClientData* data = (AudioMixerClientData*)node->getLinkedData(); - if (data == nullptr) { - return false; - } - - auto avatarStream = data->getAvatarAudioStream(); - if (avatarStream == nullptr) { - return false; - } - - auto nodeList = DependencyManager::get(); - - // mute the avatar, if necessary - if (shouldMute(avatarStream->getQuietestFrameLoudness()) || data->shouldMuteClient()) { - auto mutePacket = NLPacket::create(PacketType::NoisyMute, 0); - nodeList->sendPacket(std::move(mutePacket), *node); - - // probably now we just reset the flag, once should do it (?) - data->setShouldMuteClient(false); - } - - // mix streams - if (node->getType() == NodeType::Agent && node->getActiveSocket()) { - - bool mixHasAudio = prepareMixForListeningNode(node.data()); - - std::unique_ptr mixPacket; - - if (mixHasAudio || data->shouldFlushEncoder()) { - - int mixPacketBytes = sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE - + AudioConstants::NETWORK_FRAME_BYTES_STEREO; - mixPacket = NLPacket::create(PacketType::MixedAudio, mixPacketBytes); - - // pack sequence number - quint16 sequence = data->getOutgoingSequenceNumber(); - mixPacket->writePrimitive(sequence); - - // write the codec - QString codecInPacket = data->getCodecName(); - mixPacket->writeString(codecInPacket); - - QByteArray encodedBuffer; - if (mixHasAudio) { - QByteArray decodedBuffer(reinterpret_cast(_clampedSamples), AudioConstants::NETWORK_FRAME_BYTES_STEREO); - data->encode(decodedBuffer, encodedBuffer); - } else { - // time to flush, which resets the shouldFlush until next time we encode something - data->encodeFrameOfZeros(encodedBuffer); - } - // pack mixed audio samples - mixPacket->write(encodedBuffer.constData(), encodedBuffer.size()); - - } else { - int silentPacketBytes = sizeof(quint16) + sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE; - mixPacket = NLPacket::create(PacketType::SilentAudioFrame, silentPacketBytes); - - // pack sequence number - quint16 sequence = data->getOutgoingSequenceNumber(); - mixPacket->writePrimitive(sequence); - - // write the codec - QString codecInPacket = data->getCodecName(); - mixPacket->writeString(codecInPacket); - - // pack number of silent audio samples - quint16 numSilentSamples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; - mixPacket->writePrimitive(numSilentSamples); - } - - // Send audio environment - sendAudioEnvironmentPacket(node); - - // send mixed audio packet - nodeList->sendPacket(std::move(mixPacket), *node); - data->incrementOutgoingMixedAudioSequenceNumber(); - - // send an audio stream stats packet to the client approximately every second - static const unsigned int NUM_FRAMES_PER_SEC = (int) ceil(AudioConstants::NETWORK_FRAMES_PER_SEC); - if (data->shouldSendStats(frame % NUM_FRAMES_PER_SEC)) { - data->sendAudioStreamStatsPackets(node); - } - - return true; - } - - return false; -} - void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) { if (settingsObject.contains(AUDIO_BUFFER_GROUP_KEY)) { QJsonObject audioBufferGroupObject = settingsObject[AUDIO_BUFFER_GROUP_KEY].toObject(); @@ -1126,7 +702,7 @@ void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) { coefficientObject.contains(LISTENER) && coefficientObject.contains(COEFFICIENT)) { - ZonesSettings settings; + ZoneSettings settings; bool ok; settings.source = coefficientObject.value(SOURCE).toString(); @@ -1136,7 +712,7 @@ void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) { if (ok && settings.coefficient >= 0.0f && settings.coefficient <= 1.0f && _audioZones.contains(settings.source) && _audioZones.contains(settings.listener)) { - _zonesSettings.push_back(settings); + _zoneSettings.push_back(settings); qDebug() << "Added Coefficient:" << settings.source << settings.listener << settings.coefficient; } } @@ -1169,6 +745,7 @@ void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) { settings.wetLevel = wetLevel; _zoneReverbSettings.push_back(settings); + qDebug() << "Added Reverb:" << zone << reverbTime << wetLevel; } } @@ -1176,3 +753,442 @@ void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) { } } } + +bool AudioMixerSlave::mix(const SharedNodePointer& node, unsigned int frame) { + AudioMixerClientData* data = (AudioMixerClientData*)node->getLinkedData(); + if (data == nullptr) { + return false; + } + + auto avatarStream = data->getAvatarAudioStream(); + if (avatarStream == nullptr) { + return false; + } + + auto nodeList = DependencyManager::get(); + + // mute the avatar, if necessary + if (AudioMixer::shouldMute(avatarStream->getQuietestFrameLoudness()) || data->shouldMuteClient()) { + auto mutePacket = NLPacket::create(PacketType::NoisyMute, 0); + nodeList->sendPacket(std::move(mutePacket), *node); + + // probably now we just reset the flag, once should do it (?) + data->setShouldMuteClient(false); + } + + // generate and send audio packets + if (node->getType() == NodeType::Agent && node->getActiveSocket()) { + + // mix streams + bool mixHasAudio = prepareMix(node); + + // write the packet + std::unique_ptr mixPacket; + if (mixHasAudio || data->shouldFlushEncoder()) { + // encode the audio + QByteArray encodedBuffer; + if (mixHasAudio) { + QByteArray decodedBuffer(reinterpret_cast(_clampedSamples), AudioConstants::NETWORK_FRAME_BYTES_STEREO); + data->encode(decodedBuffer, encodedBuffer); + } else { + // time to flush, which resets the shouldFlush until next time we encode something + data->encodeFrameOfZeros(encodedBuffer); + } + + // write it to a packet + writeMixPacket(mixPacket, data, encodedBuffer); + } else { + writeSilentPacket(mixPacket, data); + } + + // send audio environment packet + sendEnvironmentPacket(node); + + // send mixed audio packet + nodeList->sendPacket(std::move(mixPacket), *node); + data->incrementOutgoingMixedAudioSequenceNumber(); + + // send an audio stream stats packet to the client approximately every second + static const unsigned int NUM_FRAMES_PER_SEC = (int) ceil(AudioConstants::NETWORK_FRAMES_PER_SEC); + if (data->shouldSendStats(frame % NUM_FRAMES_PER_SEC)) { + data->sendAudioStreamStatsPackets(node); + } + + return true; + } + + return false; +} + +void AudioMixerSlave::writeMixPacket(std::unique_ptr& mixPacket, AudioMixerClientData* data, QByteArray& buffer) { + int mixPacketBytes = sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE + + AudioConstants::NETWORK_FRAME_BYTES_STEREO; + mixPacket = NLPacket::create(PacketType::MixedAudio, mixPacketBytes); + + // pack sequence number + quint16 sequence = data->getOutgoingSequenceNumber(); + mixPacket->writePrimitive(sequence); + + // write the codec + QString codecInPacket = data->getCodecName(); + mixPacket->writeString(codecInPacket); + + // pack mixed audio samples + mixPacket->write(buffer.constData(), buffer.size()); +} + +void AudioMixerSlave::writeSilentPacket(std::unique_ptr& mixPacket, AudioMixerClientData* data) { + int silentPacketBytes = sizeof(quint16) + sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE; + mixPacket = NLPacket::create(PacketType::SilentAudioFrame, silentPacketBytes); + + // pack sequence number + quint16 sequence = data->getOutgoingSequenceNumber(); + mixPacket->writePrimitive(sequence); + + // write the codec + QString codecInPacket = data->getCodecName(); + mixPacket->writeString(codecInPacket); + + // pack number of silent audio samples + quint16 numSilentSamples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; + mixPacket->writePrimitive(numSilentSamples); +} + +void AudioMixerSlave::sendEnvironmentPacket(const SharedNodePointer& node) { + // Send stream properties + bool hasReverb = false; + float reverbTime, wetLevel; + + auto& reverbSettings = AudioMixer::getReverbSettings(); + auto& audioZones = AudioMixer::getAudioZones(); + + // find reverb properties + for (int i = 0; i < reverbSettings.size(); ++i) { + AudioMixerClientData* data = static_cast(node->getLinkedData()); + glm::vec3 streamPosition = data->getAvatarAudioStream()->getPosition(); + AABox box = audioZones[reverbSettings[i].zone]; + if (box.contains(streamPosition)) { + hasReverb = true; + reverbTime = reverbSettings[i].reverbTime; + wetLevel = reverbSettings[i].wetLevel; + + break; + } + } + + AudioMixerClientData* nodeData = static_cast(node->getLinkedData()); + AvatarAudioStream* stream = nodeData->getAvatarAudioStream(); + bool dataChanged = (stream->hasReverb() != hasReverb) || + (stream->hasReverb() && (stream->getRevebTime() != reverbTime || + stream->getWetLevel() != wetLevel)); + if (dataChanged) { + // Update stream + if (hasReverb) { + stream->setReverb(reverbTime, wetLevel); + } else { + stream->clearReverb(); + } + } + + // Send at change or every so often + float CHANCE_OF_SEND = 0.01f; + bool sendData = dataChanged || (randFloat() < CHANCE_OF_SEND); + + if (sendData) { + auto nodeList = DependencyManager::get(); + + unsigned char bitset = 0; + + int packetSize = sizeof(bitset); + + if (hasReverb) { + packetSize += sizeof(reverbTime) + sizeof(wetLevel); + } + + auto envPacket = NLPacket::create(PacketType::AudioEnvironment, packetSize); + + if (hasReverb) { + setAtBit(bitset, HAS_REVERB_BIT); + } + + envPacket->writePrimitive(bitset); + + if (hasReverb) { + envPacket->writePrimitive(reverbTime); + envPacket->writePrimitive(wetLevel); + } + nodeList->sendPacket(std::move(envPacket), *node); + } +} + +bool AudioMixerSlave::prepareMix(const SharedNodePointer& node) { + AvatarAudioStream* nodeAudioStream = static_cast(node->getLinkedData())->getAvatarAudioStream(); + AudioMixerClientData* listenerNodeData = static_cast(node->getLinkedData()); + + // zero out the client mix for this node + memset(_mixedSamples, 0, sizeof(_mixedSamples)); + + // loop through all other nodes that have sufficient audio to mix + + DependencyManager::get()->eachNode([&](const SharedNodePointer& otherNode){ + // make sure that we have audio data for this other node + // and that it isn't being ignored by our listening node + // and that it isn't ignoring our listening node + if (otherNode->getLinkedData() + && !node->isIgnoringNodeWithID(otherNode->getUUID()) && !otherNode->isIgnoringNodeWithID(node->getUUID())) { + AudioMixerClientData* otherNodeClientData = (AudioMixerClientData*) otherNode->getLinkedData(); + + // check to see if we're ignoring in radius + bool insideIgnoreRadius = false; + if (node->isIgnoreRadiusEnabled() || otherNode->isIgnoreRadiusEnabled()) { + AudioMixerClientData* otherData = reinterpret_cast(otherNode->getLinkedData()); + AudioMixerClientData* nodeData = reinterpret_cast(node->getLinkedData()); + float ignoreRadius = glm::min(node->getIgnoreRadius(), otherNode->getIgnoreRadius()); + if (glm::distance(nodeData->getPosition(), otherData->getPosition()) < ignoreRadius) { + insideIgnoreRadius = true; + } + } + + if (!insideIgnoreRadius) { + // enumerate the ARBs attached to the otherNode and add all that should be added to mix + auto streamsCopy = otherNodeClientData->getAudioStreams(); + for (auto& streamPair : streamsCopy) { + auto otherNodeStream = streamPair.second; + if (*otherNode != *node || otherNodeStream->shouldLoopbackForNode()) { + addStreamToMix(*listenerNodeData, otherNode->getUUID(), *nodeAudioStream, *otherNodeStream); + } + } + } + } + }); + + // use the per listner AudioLimiter to render the mixed data... + listenerNodeData->audioLimiter.render(_mixedSamples, _clampedSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + + // check for silent audio after the peak limitor has converted the samples + bool hasAudio = false; + for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) { + if (_clampedSamples[i] != 0) { + hasAudio = true; + break; + } + } + return hasAudio; +} + +void AudioMixerSlave::addStreamToMix(AudioMixerClientData& listenerNodeData, const QUuid& sourceNodeID, + const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd) { + // to reduce artifacts we calculate the gain and azimuth for every source for this listener + // even if we are not going to end up mixing in this source + + ++_totalMixes; + + // this ensures that the tail of any previously mixed audio or the first block of new audio sounds correct + + // check if this is a server echo of a source back to itself + bool isEcho = (&streamToAdd == &listeningNodeStream); + + glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition(); + + // figure out the distance between source and listener + float distance = glm::max(glm::length(relativePosition), EPSILON); + + // figure out the gain for this source at the listener + float gain = gainForSource(listeningNodeStream, streamToAdd, relativePosition, isEcho); + + // figure out the azimuth to this source at the listener + float azimuth = isEcho ? 0.0f : azimuthForSource(listeningNodeStream, listeningNodeStream, relativePosition); + + float repeatedFrameFadeFactor = 1.0f; + + static const int HRTF_DATASET_INDEX = 1; + + if (!streamToAdd.lastPopSucceeded()) { + bool forceSilentBlock = true; + + if (!streamToAdd.getLastPopOutput().isNull()) { + bool isInjector = dynamic_cast(&streamToAdd); + + // in an injector, just go silent - the injector has likely ended + // in other inputs (microphone, &c.), repeat with fade to avoid the harsh jump to silence + + // we'll repeat the last block until it has a block to mix + // and we'll gradually fade that repeated block into silence. + + // calculate its fade factor, which depends on how many times it's already been repeated. + repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd.getConsecutiveNotMixedCount() - 1); + if (!isInjector && repeatedFrameFadeFactor > 0.0f) { + // apply the repeatedFrameFadeFactor to the gain + gain *= repeatedFrameFadeFactor; + + forceSilentBlock = false; + } + } + + if (forceSilentBlock) { + // we're deciding not to repeat either since we've already done it enough times or repetition with fade is disabled + // in this case we will call renderSilent with a forced silent block + // this ensures the correct tail from the previously mixed block and the correct spatialization of first block + // of any upcoming audio + + if (!streamToAdd.isStereo() && !isEcho) { + // get the existing listener-source HRTF object, or create a new one + auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); + + // this is not done for stereo streams since they do not go through the HRTF + static int16_t silentMonoBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL] = {}; + hrtf.renderSilent(silentMonoBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + + ++_hrtfSilentRenders;; + } + + return; + } + } + + // grab the stream from the ring buffer + AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd.getLastPopOutput(); + + if (streamToAdd.isStereo() || isEcho) { + // this is a stereo source or server echo so we do not pass it through the HRTF + // simply apply our calculated gain to each sample + if (streamToAdd.isStereo()) { + for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) { + _mixedSamples[i] += float(streamPopOutput[i] * gain / AudioConstants::MAX_SAMPLE_VALUE); + } + + ++_manualStereoMixes; + } else { + for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i += 2) { + auto monoSample = float(streamPopOutput[i / 2] * gain / AudioConstants::MAX_SAMPLE_VALUE); + _mixedSamples[i] += monoSample; + _mixedSamples[i + 1] += monoSample; + } + + ++_manualEchoMixes; + } + + return; + } + + // get the existing listener-source HRTF object, or create a new one + auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); + + static int16_t streamBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL]; + + streamPopOutput.readSamples(streamBlock, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + + // if the frame we're about to mix is silent, simply call render silent and move on + if (streamToAdd.getLastPopOutputLoudness() == 0.0f) { + // silent frame from source + + // we still need to call renderSilent via the HRTF for mono source + hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + + ++_hrtfSilentRenders; + + return; + } + + float audibilityThreshold = AudioMixer::getMinimumAudibilityThreshold(); + if (audibilityThreshold > 0.0f && + streamToAdd.getLastPopOutputTrailingLoudness() / glm::length(relativePosition) <= audibilityThreshold) { + // the mixer is struggling so we're going to drop off some streams + + // we call renderSilent via the HRTF with the actual frame data and a gain of 0.0 + hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, 0.0f, + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + + ++_hrtfStruggleRenders; + + return; + } + + ++_hrtfRenders; + + // mono stream, call the HRTF with our block and calculated azimuth and gain + hrtf.render(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); +} + +float AudioMixerSlave::gainForSource(const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd, + const glm::vec3& relativePosition, bool isEcho) { + float gain = 1.0f; + + float distanceBetween = glm::length(relativePosition); + + if (distanceBetween < EPSILON) { + distanceBetween = EPSILON; + } + + if (streamToAdd.getType() == PositionalAudioStream::Injector) { + gain *= reinterpret_cast(&streamToAdd)->getAttenuationRatio(); + } + + if (!isEcho && (streamToAdd.getType() == PositionalAudioStream::Microphone)) { + // source is another avatar, apply fixed off-axis attenuation to make them quieter as they turn away from listener + glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd.getOrientation()) * relativePosition; + + float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), + glm::normalize(rotatedListenerPosition)); + + const float MAX_OFF_AXIS_ATTENUATION = 0.2f; + const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; + + float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + + (OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO)); + + // multiply the current attenuation coefficient by the calculated off axis coefficient + gain *= offAxisCoefficient; + } + + float attenuationPerDoublingInDistance = AudioMixer::getAttenuationPerDoublingInDistance(); + auto& zoneSettings = AudioMixer::getZoneSettings(); + auto& audioZones = AudioMixer::getAudioZones(); + for (int i = 0; i < zoneSettings.length(); ++i) { + if (audioZones[zoneSettings[i].source].contains(streamToAdd.getPosition()) && + audioZones[zoneSettings[i].listener].contains(listeningNodeStream.getPosition())) { + attenuationPerDoublingInDistance = zoneSettings[i].coefficient; + break; + } + } + + if (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE) { + + // translate the zone setting to gain per log2(distance) + float g = 1.0f - attenuationPerDoublingInDistance; + g = (g < EPSILON) ? EPSILON : g; + g = (g > 1.0f) ? 1.0f : g; + + // calculate the distance coefficient using the distance to this node + float distanceCoefficient = fastexp2(fastlog2(g) * fastlog2(distanceBetween/ATTENUATION_BEGINS_AT_DISTANCE)); + + // multiply the current attenuation coefficient by the distance coefficient + gain *= distanceCoefficient; + } + + return gain; +} + +float AudioMixerSlave::azimuthForSource(const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd, + const glm::vec3& relativePosition) { + glm::quat inverseOrientation = glm::inverse(listeningNodeStream.getOrientation()); + + // Compute sample delay for the two ears to create phase panning + glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; + + // project the rotated source position vector onto the XZ plane + rotatedSourcePosition.y = 0.0f; + + static const float SOURCE_DISTANCE_THRESHOLD = 1e-30f; + + if (glm::length2(rotatedSourcePosition) > SOURCE_DISTANCE_THRESHOLD) { + // produce an oriented angle about the y-axis + return glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, -1.0f, 0.0f)); + } else { + // there is no distance between listener and source - return no azimuth + return 0; + } +} diff --git a/assignment-client/src/audio/AudioMixer.h b/assignment-client/src/audio/AudioMixer.h index 78e9e85e13..3011dda62d 100644 --- a/assignment-client/src/audio/AudioMixer.h +++ b/assignment-client/src/audio/AudioMixer.h @@ -23,18 +23,77 @@ class AvatarAudioStream; class AudioHRTF; class AudioMixerClientData; +class AudioMixerSlave { +public: + // mix and broadcast non-ignored streams to the node + // returns true if a listener mix was broadcast for the node + bool mix(const SharedNodePointer& node, unsigned int frame); + + // reset statistics accumulated over mixes + void resetStats() { /* TODO */ }; + // get statistics accumulated over mixes + void getStats() { /* TODO */ }; + +private: + void writeMixPacket(std::unique_ptr& mixPacket, AudioMixerClientData* data, QByteArray& buffer); + void writeSilentPacket(std::unique_ptr& mixPacket, AudioMixerClientData* data); + + void sendEnvironmentPacket(const SharedNodePointer& node); + + // create mix, returns true if mix has audio + bool prepareMix(const SharedNodePointer& node); + // add a stream to the mix + void addStreamToMix(AudioMixerClientData& listenerData, const QUuid& streamerID, + const AvatarAudioStream& listenerStream, const PositionalAudioStream& streamer); + + float gainForSource(const AvatarAudioStream& listener, const PositionalAudioStream& streamer, + const glm::vec3& relativePosition, bool isEcho); + float azimuthForSource(const AvatarAudioStream& listener, const PositionalAudioStream& streamer, + const glm::vec3& relativePosition); + + // mixing buffers + float _mixedSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO]; + int16_t _clampedSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO]; + + // mixing statistics + unsigned int _sumListeners{ 0 }; + unsigned int _totalMixes{ 0 }; + unsigned int _hrtfRenders{ 0 }; + unsigned int _hrtfStruggleRenders{ 0 }; + unsigned int _hrtfSilentRenders{ 0 }; + unsigned int _manualStereoMixes{ 0 }; + unsigned int _manualEchoMixes{ 0 }; +}; + /// Handles assignments of type AudioMixer - mixing streams of audio and re-distributing to various clients. class AudioMixer : public ThreadedAssignment { Q_OBJECT public: AudioMixer(ReceivedMessage& message); + struct ZoneSettings { + QString source; + QString listener; + float coefficient; + }; + struct ReverbSettings { + QString zone; + float reverbTime; + float wetLevel; + }; + + static int getStaticJitterFrames() { return _numStaticJitterFrames; } + static bool shouldMute(float quietestFrame) { return quietestFrame > _noiseMutingThreshold; } + static float getAttenuationPerDoublingInDistance() { return _attenuationPerDoublingInDistance; } + static float getMinimumAudibilityThreshold() { return _performanceThrottlingRatio > 0.0f ? _minAudibilityThreshold : 0.0f; } + static const QHash& getAudioZones() { return _audioZones; } + static const QVector& getZoneSettings() { return _zoneSettings; } + static const QVector& getReverbSettings() { return _zoneReverbSettings; } + public slots: void run() override; void sendStatsPacket() override; - static int getStaticJitterFrames() { return _numStaticJitterFrames; } - private slots: // packet handlers void handleNodeAudioPacket(QSharedPointer packet, SharedNodePointer sendingNode); @@ -56,42 +115,13 @@ private: // pop a frame from any streams on the node // returns the number of available streams int prepareFrame(const SharedNodePointer& node, unsigned int frame); - // mix and broadcast non-ignored streams to the node - // returns true if a listener mix was broadcast for the node - bool mixFrame(const SharedNodePointer& node, unsigned int frame); AudioMixerClientData* getOrCreateClientData(Node* node); - /// adds one stream to the mix for a listening node - void addStreamToMixForListeningNodeWithStream(AudioMixerClientData& listenerNodeData, - const PositionalAudioStream& streamToAdd, - const QUuid& sourceNodeID, - const AvatarAudioStream& listeningNodeStream); - - float gainForSource(const PositionalAudioStream& streamToAdd, const AvatarAudioStream& listeningNodeStream, - const glm::vec3& relativePosition, bool isEcho); - float azimuthForSource(const PositionalAudioStream& streamToAdd, const AvatarAudioStream& listeningNodeStream, - const glm::vec3& relativePosition); - - /// prepares and sends a mix to one Node - bool prepareMixForListeningNode(Node* node); - - /// Send Audio Environment packet for a single node - void sendAudioEnvironmentPacket(SharedNodePointer node); - - void perSecondActions(); - QString percentageForMixStats(int counter); - bool shouldMute(float quietestFrame); - void parseSettingsObject(const QJsonObject& settingsObject); - float _trailingSleepRatio; - float _minAudibilityThreshold; - float _performanceThrottlingRatio; - float _attenuationPerDoublingInDistance; - float _noiseMutingThreshold; int _numStatFrames { 0 }; int _sumStreams { 0 }; int _sumListeners { 0 }; @@ -104,24 +134,17 @@ private: QString _codecPreferenceOrder; - float _mixedSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO]; - int16_t _clampedSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO]; - - QHash _audioZones; - struct ZonesSettings { - QString source; - QString listener; - float coefficient; - }; - QVector _zonesSettings; - struct ReverbSettings { - QString zone; - float reverbTime; - float wetLevel; - }; - QVector _zoneReverbSettings; + AudioMixerSlave slave; static int _numStaticJitterFrames; // -1 denotes dynamic jitter buffering + static float _noiseMutingThreshold; + static float _attenuationPerDoublingInDistance; + static float _trailingSleepRatio; + static float _performanceThrottlingRatio; + static float _minAudibilityThreshold; + static QHash _audioZones; + static QVector _zoneSettings; + static QVector _zoneReverbSettings; }; #endif // hifi_AudioMixer_h