// // AudioMixer.cpp // assignment-client/src/audio // // Created by Stephen Birarda on 8/22/13. // Copyright 2013 High Fidelity, Inc. // // Distributed under the Apache License, Version 2.0. // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // #include #include #include #include #include #include #include #include #include #include #ifdef _WIN32 #include #else #include #include #include #endif //_WIN32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "AudioRingBuffer.h" #include "AudioMixerClientData.h" #include "AudioMixerDatagramProcessor.h" #include "AvatarAudioStream.h" #include "InjectedAudioStream.h" #include "AudioMixer.h" const float LOUDNESS_TO_DISTANCE_RATIO = 0.00001f; const float DEFAULT_ATTENUATION_PER_DOUBLING_IN_DISTANCE = 0.18; const QString AUDIO_MIXER_LOGGING_TARGET_NAME = "audio-mixer"; void attachNewNodeDataToNode(Node *newNode) { if (!newNode->getLinkedData()) { newNode->setLinkedData(new AudioMixerClientData()); } } InboundAudioStream::Settings AudioMixer::_streamSettings; bool AudioMixer::_printStreamStats = false; bool AudioMixer::_enableFilter = true; AudioMixer::AudioMixer(const QByteArray& packet) : ThreadedAssignment(packet), _trailingSleepRatio(1.0f), _minAudibilityThreshold(LOUDNESS_TO_DISTANCE_RATIO / 2.0f), _performanceThrottlingRatio(0.0f), _attenuationPerDoublingInDistance(DEFAULT_ATTENUATION_PER_DOUBLING_IN_DISTANCE), _numStatFrames(0), _sumListeners(0), _sumMixes(0), _sourceUnattenuatedZone(NULL), _listenerUnattenuatedZone(NULL), _lastPerSecondCallbackTime(usecTimestampNow()), _sendAudioStreamStats(false), _datagramsReadPerCallStats(0, READ_DATAGRAMS_STATS_WINDOW_SECONDS), _timeSpentPerCallStats(0, READ_DATAGRAMS_STATS_WINDOW_SECONDS), _timeSpentPerHashMatchCallStats(0, READ_DATAGRAMS_STATS_WINDOW_SECONDS), _readPendingCallsPerSecondStats(1, READ_DATAGRAMS_STATS_WINDOW_SECONDS) { // constant defined in AudioMixer.h. However, we don't want to include this here // we will soon find a better common home for these audio-related constants } AudioMixer::~AudioMixer() { delete _sourceUnattenuatedZone; delete _listenerUnattenuatedZone; } const float ATTENUATION_BEGINS_AT_DISTANCE = 1.0f; const float RADIUS_OF_HEAD = 0.076f; int AudioMixer::addStreamToMixForListeningNodeWithStream(AudioMixerClientData* listenerNodeData, const QUuid& streamUUID, PositionalAudioStream* streamToAdd, AvatarAudioStream* listeningNodeStream) { // If repetition with fade is enabled: // If streamToAdd could not provide a frame (it was starved), then we'll mix its previously-mixed frame // This is preferable to not mixing it at all since that's equivalent to inserting silence. // Basically, we'll repeat that last frame until it has a frame to mix. Depending on how many times // we've repeated that frame in a row, we'll gradually fade that repeated frame into silence. // This improves the perceived quality of the audio slightly. bool showDebug = false; // (randFloat() < 0.05f); float repeatedFrameFadeFactor = 1.0f; if (!streamToAdd->lastPopSucceeded()) { if (_streamSettings._repetitionWithFade && !streamToAdd->getLastPopOutput().isNull()) { // reptition with fade is enabled, and we do have a valid previous frame to repeat. // calculate its fade factor, which depends on how many times it's already been repeated. repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd->getConsecutiveNotMixedCount() - 1); if (repeatedFrameFadeFactor == 0.0f) { return 0; } } else { return 0; } } // at this point, we know streamToAdd's last pop output is valid // if the frame we're about to mix is silent, bail if (streamToAdd->getLastPopOutputLoudness() == 0.0f) { return 0; } float bearingRelativeAngleToSource = 0.0f; float attenuationCoefficient = 1.0f; int numSamplesDelay = 0; float weakChannelAmplitudeRatio = 1.0f; bool shouldDistanceAttenuate = true; // Is the source that I am mixing my own? bool sourceIsSelf = (streamToAdd == listeningNodeStream); glm::vec3 relativePosition = streamToAdd->getPosition() - listeningNodeStream->getPosition(); float distanceBetween = glm::length(relativePosition); if (distanceBetween < EPSILON) { distanceBetween = EPSILON; } if (streamToAdd->getLastPopOutputTrailingLoudness() / distanceBetween <= _minAudibilityThreshold) { // according to mixer performance we have decided this does not get to be mixed in // bail out return 0; } ++_sumMixes; if (streamToAdd->getListenerUnattenuatedZone()) { shouldDistanceAttenuate = !streamToAdd->getListenerUnattenuatedZone()->contains(listeningNodeStream->getPosition()); } if (streamToAdd->getType() == PositionalAudioStream::Injector) { attenuationCoefficient *= reinterpret_cast(streamToAdd)->getAttenuationRatio(); if (showDebug) { qDebug() << "AttenuationRatio: " << reinterpret_cast(streamToAdd)->getAttenuationRatio(); } } if (showDebug) { qDebug() << "distance: " << distanceBetween; } glm::quat inverseOrientation = glm::inverse(listeningNodeStream->getOrientation()); if (!sourceIsSelf && (streamToAdd->getType() == PositionalAudioStream::Microphone)) { // source is another avatar, apply fixed off-axis attenuation to make them quieter as they turn away from listener glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd->getOrientation()) * relativePosition; float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedListenerPosition)); const float MAX_OFF_AXIS_ATTENUATION = 0.2f; const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + (OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO)); if (showDebug) { qDebug() << "angleOfDelivery" << angleOfDelivery << "offAxisCoefficient: " << offAxisCoefficient; } // multiply the current attenuation coefficient by the calculated off axis coefficient attenuationCoefficient *= offAxisCoefficient; } if (shouldDistanceAttenuate && (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE)) { // calculate the distance coefficient using the distance to this node float distanceCoefficient = 1 - (logf(distanceBetween / ATTENUATION_BEGINS_AT_DISTANCE) / logf(2.0f) * _attenuationPerDoublingInDistance); if (distanceCoefficient < 0) { distanceCoefficient = 0; } // multiply the current attenuation coefficient by the distance coefficient attenuationCoefficient *= distanceCoefficient; if (showDebug) { qDebug() << "distanceCoefficient: " << distanceCoefficient; } } if (!sourceIsSelf) { // Compute sample delay for the two ears to create phase panning glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; // project the rotated source position vector onto the XZ plane rotatedSourcePosition.y = 0.0f; // produce an oriented angle about the y-axis bearingRelativeAngleToSource = glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, 1.0f, 0.0f)); const float PHASE_AMPLITUDE_RATIO_AT_90 = 0.5; // figure out the number of samples of delay and the ratio of the amplitude // in the weak channel for audio spatialization float sinRatio = fabsf(sinf(bearingRelativeAngleToSource)); numSamplesDelay = SAMPLE_PHASE_DELAY_AT_90 * sinRatio; weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio); if (distanceBetween < RADIUS_OF_HEAD) { // Diminish phase panning if source would be inside head numSamplesDelay *= distanceBetween / RADIUS_OF_HEAD; weakChannelAmplitudeRatio += (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio) * distanceBetween / RADIUS_OF_HEAD; } } if (showDebug) { qDebug() << "attenuation: " << attenuationCoefficient; qDebug() << "bearingRelativeAngleToSource: " << bearingRelativeAngleToSource << " numSamplesDelay: " << numSamplesDelay; } AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd->getLastPopOutput(); if (!streamToAdd->isStereo()) { // this is a mono stream, which means it gets full attenuation and spatialization // we need to do several things in this process: // 1) convert from mono to stereo by copying each input sample into the left and right output samples // 2) // 2) apply an attenuation AND fade to all samples (left and right) // 3) based on the bearing relative angle to the source we will weaken and delay either the left or // right channel of the input into the output // 4) because one of these channels is delayed, we will need to use historical samples from // the input stream for that delayed channel // Mono input to stereo output (item 1 above) int OUTPUT_SAMPLES_PER_INPUT_SAMPLE = 2; int inputSampleCount = NETWORK_BUFFER_LENGTH_SAMPLES_STEREO / OUTPUT_SAMPLES_PER_INPUT_SAMPLE; int maxOutputIndex = NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; // attenuation and fade applied to all samples (item 2 above) float attenuationAndFade = attenuationCoefficient * repeatedFrameFadeFactor; // determine which side is weak and delayed (item 3 above) bool rightSideWeakAndDelayed = (bearingRelativeAngleToSource > 0.0f); // since we're converting from mono to stereo, we'll use these two indices to step through // the output samples. we'll increment each index independently in the loop int leftDestinationIndex = 0; int rightDestinationIndex = 1; // One of our two channels will be delayed (determined below). We'll use this index to step // through filling in our output with the historical samples for the delayed channel. (item 4 above) int delayedChannelHistoricalAudioOutputIndex; // All samples will be attenuated by at least this much float leftSideAttenuation = attenuationAndFade; float rightSideAttenuation = attenuationAndFade; // The weak/delayed channel will be attenuated by this additional amount float attenuationAndWeakChannelRatioAndFade = attenuationAndFade * weakChannelAmplitudeRatio; // Now, based on the determination of which side is weak and delayed, set up our true starting point // for our indexes, as well as the appropriate attenuation for each channel if (rightSideWeakAndDelayed) { delayedChannelHistoricalAudioOutputIndex = rightDestinationIndex; rightSideAttenuation = attenuationAndWeakChannelRatioAndFade; rightDestinationIndex += (numSamplesDelay * OUTPUT_SAMPLES_PER_INPUT_SAMPLE); } else { delayedChannelHistoricalAudioOutputIndex = leftDestinationIndex; leftSideAttenuation = attenuationAndWeakChannelRatioAndFade; leftDestinationIndex += (numSamplesDelay * OUTPUT_SAMPLES_PER_INPUT_SAMPLE); } // If there was a sample delay for this stream, we need to pull samples prior to the official start of the input // and stick those samples at the beginning of the output. We only need to loop through this for the weak/delayed // side, since the normal side is fully handled below. (item 4 above) if (numSamplesDelay > 0) { // TODO: delayStreamSourceSamples may be inside the last frame written if the ringbuffer is completely full // maybe make AudioRingBuffer have 1 extra frame in its buffer AudioRingBuffer::ConstIterator delayStreamSourceSamples = streamPopOutput - numSamplesDelay; for (int i = 0; i < numSamplesDelay; i++) { int16_t originalHistoricalSample = *delayStreamSourceSamples; _preMixSamples[delayedChannelHistoricalAudioOutputIndex] += originalHistoricalSample * attenuationAndWeakChannelRatioAndFade; ++delayStreamSourceSamples; // move our input pointer delayedChannelHistoricalAudioOutputIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE; // move our output sample } } // Here's where we copy the MONO input to the STEREO output, and account for delay and weak side attenuation for (int inputSample = 0; inputSample < inputSampleCount; inputSample++) { int16_t originalSample = streamPopOutput[inputSample]; int16_t leftSideSample = originalSample * leftSideAttenuation; int16_t rightSideSample = originalSample * rightSideAttenuation; // since we might be delayed, don't write beyond our maxOutputIndex if (leftDestinationIndex <= maxOutputIndex) { _preMixSamples[leftDestinationIndex] += leftSideSample; } if (rightDestinationIndex <= maxOutputIndex) { _preMixSamples[rightDestinationIndex] += rightSideSample; } leftDestinationIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE; rightDestinationIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE; } } else { int stereoDivider = streamToAdd->isStereo() ? 1 : 2; float attenuationAndFade = attenuationCoefficient * repeatedFrameFadeFactor; for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s++) { _preMixSamples[s] = glm::clamp(_preMixSamples[s] + (int)(streamPopOutput[s / stereoDivider] * attenuationAndFade), MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); } } if (!sourceIsSelf && _enableFilter) { const float TWO_OVER_PI = 2.0f / PI; const float ZERO_DB = 1.0f; const float NEGATIVE_ONE_DB = 0.891f; const float NEGATIVE_THREE_DB = 0.708f; const float FILTER_GAIN_AT_0 = ZERO_DB; // source is in front const float FILTER_GAIN_AT_90 = NEGATIVE_ONE_DB; // source is incident to left or right ear const float FILTER_GAIN_AT_180 = NEGATIVE_THREE_DB; // source is behind const float FILTER_CUTOFF_FREQUENCY_HZ = 1000.0f; const float penumbraFilterFrequency = FILTER_CUTOFF_FREQUENCY_HZ; // constant frequency const float penumbraFilterSlope = NEGATIVE_THREE_DB; // constant slope float penumbraFilterGainL; float penumbraFilterGainR; // variable gain calculation broken down by quadrant if (-bearingRelativeAngleToSource < -PI_OVER_TWO && -bearingRelativeAngleToSource > -PI) { penumbraFilterGainL = TWO_OVER_PI * (FILTER_GAIN_AT_0 - FILTER_GAIN_AT_180) * (-bearingRelativeAngleToSource + PI_OVER_TWO) + FILTER_GAIN_AT_0; penumbraFilterGainR = TWO_OVER_PI * (FILTER_GAIN_AT_90 - FILTER_GAIN_AT_180) * (-bearingRelativeAngleToSource + PI_OVER_TWO) + FILTER_GAIN_AT_90; } else if (-bearingRelativeAngleToSource <= PI && -bearingRelativeAngleToSource > PI_OVER_TWO) { penumbraFilterGainL = TWO_OVER_PI * (FILTER_GAIN_AT_180 - FILTER_GAIN_AT_90) * (-bearingRelativeAngleToSource - PI) + FILTER_GAIN_AT_180; penumbraFilterGainR = TWO_OVER_PI * (FILTER_GAIN_AT_180 - FILTER_GAIN_AT_0) * (-bearingRelativeAngleToSource - PI) + FILTER_GAIN_AT_180; } else if (-bearingRelativeAngleToSource <= PI_OVER_TWO && -bearingRelativeAngleToSource > 0) { penumbraFilterGainL = TWO_OVER_PI * (FILTER_GAIN_AT_90 - FILTER_GAIN_AT_0) * (-bearingRelativeAngleToSource - PI_OVER_TWO) + FILTER_GAIN_AT_90; penumbraFilterGainR = FILTER_GAIN_AT_0; } else { penumbraFilterGainL = FILTER_GAIN_AT_0; penumbraFilterGainR = TWO_OVER_PI * (FILTER_GAIN_AT_0 - FILTER_GAIN_AT_90) * (-bearingRelativeAngleToSource) + FILTER_GAIN_AT_0; } if (distanceBetween < RADIUS_OF_HEAD) { // Diminish effect if source would be inside head penumbraFilterGainL += (1.f - penumbraFilterGainL) * (1.f - distanceBetween / RADIUS_OF_HEAD); penumbraFilterGainR += (1.f - penumbraFilterGainR) * (1.f - distanceBetween / RADIUS_OF_HEAD); } #if 0 qDebug() << "gainL=" << penumbraFilterGainL << "gainR=" << penumbraFilterGainR << "angle=" << -bearingRelativeAngleToSource; #endif // Get our per listener/source data so we can get our filter AudioFilterHSF1s& penumbraFilter = listenerNodeData->getListenerSourcePairData(streamUUID)->getPenumbraFilter(); // set the gain on both filter channels penumbraFilter.setParameters(0, 0, SAMPLE_RATE, penumbraFilterFrequency, penumbraFilterGainL, penumbraFilterSlope); penumbraFilter.setParameters(0, 1, SAMPLE_RATE, penumbraFilterFrequency, penumbraFilterGainR, penumbraFilterSlope); penumbraFilter.render(_preMixSamples, _preMixSamples, NETWORK_BUFFER_LENGTH_SAMPLES_STEREO / 2); } // Actually mix the _preMixSamples into the _mixSamples here. for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s++) { _mixSamples[s] = glm::clamp(_mixSamples[s] + _preMixSamples[s], MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); } return 1; } int AudioMixer::prepareMixForListeningNode(Node* node) { AvatarAudioStream* nodeAudioStream = ((AudioMixerClientData*) node->getLinkedData())->getAvatarAudioStream(); AudioMixerClientData* listenerNodeData = (AudioMixerClientData*)node->getLinkedData(); // zero out the client mix for this node memset(_preMixSamples, 0, sizeof(_preMixSamples)); memset(_mixSamples, 0, sizeof(_mixSamples)); // loop through all other nodes that have sufficient audio to mix int streamsMixed = 0; foreach (const SharedNodePointer& otherNode, NodeList::getInstance()->getNodeHash()) { if (otherNode->getLinkedData()) { AudioMixerClientData* otherNodeClientData = (AudioMixerClientData*) otherNode->getLinkedData(); // enumerate the ARBs attached to the otherNode and add all that should be added to mix const QHash& otherNodeAudioStreams = otherNodeClientData->getAudioStreams(); QHash::ConstIterator i; for (i = otherNodeAudioStreams.constBegin(); i != otherNodeAudioStreams.constEnd(); i++) { PositionalAudioStream* otherNodeStream = i.value(); QUuid streamUUID = i.key(); if (otherNodeStream->getType() == PositionalAudioStream::Microphone) { streamUUID = otherNode->getUUID(); } if (*otherNode != *node || otherNodeStream->shouldLoopbackForNode()) { streamsMixed += addStreamToMixForListeningNodeWithStream(listenerNodeData, streamUUID, otherNodeStream, nodeAudioStream); } } } } return streamsMixed; } void AudioMixer::readPendingDatagram(const QByteArray& receivedPacket, const HifiSockAddr& senderSockAddr) { NodeList* nodeList = NodeList::getInstance(); if (nodeList->packetVersionAndHashMatch(receivedPacket)) { // pull any new audio data from nodes off of the network stack PacketType mixerPacketType = packetTypeForPacket(receivedPacket); if (mixerPacketType == PacketTypeMicrophoneAudioNoEcho || mixerPacketType == PacketTypeMicrophoneAudioWithEcho || mixerPacketType == PacketTypeInjectAudio || mixerPacketType == PacketTypeSilentAudioFrame || mixerPacketType == PacketTypeAudioStreamStats) { nodeList->findNodeAndUpdateWithDataFromPacket(receivedPacket); } else if (mixerPacketType == PacketTypeMuteEnvironment) { QByteArray packet = receivedPacket; populatePacketHeader(packet, PacketTypeMuteEnvironment); foreach (const SharedNodePointer& node, nodeList->getNodeHash()) { if (node->getType() == NodeType::Agent && node->getActiveSocket() && node->getLinkedData() && node != nodeList->sendingNodeForPacket(receivedPacket)) { nodeList->writeDatagram(packet, packet.size(), node); } } } else { // let processNodeData handle it. nodeList->processNodeData(senderSockAddr, receivedPacket); } } } void AudioMixer::sendStatsPacket() { static QJsonObject statsObject; statsObject["useDynamicJitterBuffers"] = _streamSettings._dynamicJitterBuffers; statsObject["trailing_sleep_percentage"] = _trailingSleepRatio * 100.0f; statsObject["performance_throttling_ratio"] = _performanceThrottlingRatio; statsObject["average_listeners_per_frame"] = (float) _sumListeners / (float) _numStatFrames; if (_sumListeners > 0) { statsObject["average_mixes_per_listener"] = (float) _sumMixes / (float) _sumListeners; } else { statsObject["average_mixes_per_listener"] = 0.0; } ThreadedAssignment::addPacketStatsAndSendStatsPacket(statsObject); _sumListeners = 0; _sumMixes = 0; _numStatFrames = 0; // NOTE: These stats can be too large to fit in an MTU, so we break it up into multiple packts... QJsonObject statsObject2; // add stats for each listerner bool somethingToSend = false; int sizeOfStats = 0; int TOO_BIG_FOR_MTU = 1200; // some extra space for JSONification QString property = "readPendingDatagram_calls_stats"; QString value = getReadPendingDatagramsCallsPerSecondsStatsString(); statsObject2[qPrintable(property)] = value; somethingToSend = true; sizeOfStats += property.size() + value.size(); property = "readPendingDatagram_packets_per_call_stats"; value = getReadPendingDatagramsPacketsPerCallStatsString(); statsObject2[qPrintable(property)] = value; somethingToSend = true; sizeOfStats += property.size() + value.size(); property = "readPendingDatagram_packets_time_per_call_stats"; value = getReadPendingDatagramsTimeStatsString(); statsObject2[qPrintable(property)] = value; somethingToSend = true; sizeOfStats += property.size() + value.size(); property = "readPendingDatagram_hashmatch_time_per_call_stats"; value = getReadPendingDatagramsHashMatchTimeStatsString(); statsObject2[qPrintable(property)] = value; somethingToSend = true; sizeOfStats += property.size() + value.size(); NodeList* nodeList = NodeList::getInstance(); int clientNumber = 0; foreach (const SharedNodePointer& node, nodeList->getNodeHash()) { // if we're too large, send the packet if (sizeOfStats > TOO_BIG_FOR_MTU) { nodeList->sendStatsToDomainServer(statsObject2); sizeOfStats = 0; statsObject2 = QJsonObject(); // clear it somethingToSend = false; } clientNumber++; AudioMixerClientData* clientData = static_cast(node->getLinkedData()); if (clientData) { QString property = "jitterStats." + node->getUUID().toString(); QString value = clientData->getAudioStreamStatsString(); statsObject2[qPrintable(property)] = value; somethingToSend = true; sizeOfStats += property.size() + value.size(); } } if (somethingToSend) { nodeList->sendStatsToDomainServer(statsObject2); } } void AudioMixer::run() { ThreadedAssignment::commonInit(AUDIO_MIXER_LOGGING_TARGET_NAME, NodeType::AudioMixer); NodeList* nodeList = NodeList::getInstance(); // we do not want this event loop to be the handler for UDP datagrams, so disconnect disconnect(&nodeList->getNodeSocket(), 0, this, 0); // setup a QThread with us as parent that will house the AudioMixerDatagramProcessor _datagramProcessingThread = new QThread(this); // create an AudioMixerDatagramProcessor and move it to that thread AudioMixerDatagramProcessor* datagramProcessor = new AudioMixerDatagramProcessor(nodeList->getNodeSocket(), thread()); datagramProcessor->moveToThread(_datagramProcessingThread); // remove the NodeList as the parent of the node socket nodeList->getNodeSocket().setParent(NULL); nodeList->getNodeSocket().moveToThread(_datagramProcessingThread); // let the datagram processor handle readyRead from node socket connect(&nodeList->getNodeSocket(), &QUdpSocket::readyRead, datagramProcessor, &AudioMixerDatagramProcessor::readPendingDatagrams); // connect to the datagram processing thread signal that tells us we have to handle a packet connect(datagramProcessor, &AudioMixerDatagramProcessor::packetRequiresProcessing, this, &AudioMixer::readPendingDatagram); // delete the datagram processor and the associated thread when the QThread quits connect(_datagramProcessingThread, &QThread::finished, datagramProcessor, &QObject::deleteLater); connect(datagramProcessor, &QObject::destroyed, _datagramProcessingThread, &QThread::deleteLater); // start the datagram processing thread _datagramProcessingThread->start(); nodeList->addNodeTypeToInterestSet(NodeType::Agent); nodeList->linkedDataCreateCallback = attachNewNodeDataToNode; // wait until we have the domain-server settings, otherwise we bail DomainHandler& domainHandler = nodeList->getDomainHandler(); qDebug() << "Waiting for domain settings from domain-server."; // block until we get the settingsRequestComplete signal QEventLoop loop; connect(&domainHandler, &DomainHandler::settingsReceived, &loop, &QEventLoop::quit); connect(&domainHandler, &DomainHandler::settingsReceiveFail, &loop, &QEventLoop::quit); domainHandler.requestDomainSettings(); loop.exec(); if (domainHandler.getSettingsObject().isEmpty()) { qDebug() << "Failed to retreive settings object from domain-server. Bailing on assignment."; setFinished(true); return; } const QJsonObject& settingsObject = domainHandler.getSettingsObject(); // check the settings object to see if we have anything we can parse out const QString AUDIO_GROUP_KEY = "audio"; if (settingsObject.contains(AUDIO_GROUP_KEY)) { QJsonObject audioGroupObject = settingsObject[AUDIO_GROUP_KEY].toObject(); // check the payload to see if we have asked for dynamicJitterBuffer support const QString DYNAMIC_JITTER_BUFFER_JSON_KEY = "dynamic_jitter_buffer"; _streamSettings._dynamicJitterBuffers = audioGroupObject[DYNAMIC_JITTER_BUFFER_JSON_KEY].toBool(); if (_streamSettings._dynamicJitterBuffers) { qDebug() << "Enable dynamic jitter buffers."; } else { qDebug() << "Dynamic jitter buffers disabled."; } bool ok; const QString DESIRED_JITTER_BUFFER_FRAMES_KEY = "static_desired_jitter_buffer_frames"; _streamSettings._staticDesiredJitterBufferFrames = audioGroupObject[DESIRED_JITTER_BUFFER_FRAMES_KEY].toString().toInt(&ok); if (!ok) { _streamSettings._staticDesiredJitterBufferFrames = DEFAULT_STATIC_DESIRED_JITTER_BUFFER_FRAMES; } qDebug() << "Static desired jitter buffer frames:" << _streamSettings._staticDesiredJitterBufferFrames; const QString MAX_FRAMES_OVER_DESIRED_JSON_KEY = "max_frames_over_desired"; _streamSettings._maxFramesOverDesired = audioGroupObject[MAX_FRAMES_OVER_DESIRED_JSON_KEY].toString().toInt(&ok); if (!ok) { _streamSettings._maxFramesOverDesired = DEFAULT_MAX_FRAMES_OVER_DESIRED; } qDebug() << "Max frames over desired:" << _streamSettings._maxFramesOverDesired; const QString USE_STDEV_FOR_DESIRED_CALC_JSON_KEY = "use_stdev_for_desired_calc"; _streamSettings._useStDevForJitterCalc = audioGroupObject[USE_STDEV_FOR_DESIRED_CALC_JSON_KEY].toBool(); if (_streamSettings._useStDevForJitterCalc) { qDebug() << "Using Philip's stdev method for jitter calc if dynamic jitter buffers enabled"; } else { qDebug() << "Using Fred's max-gap method for jitter calc if dynamic jitter buffers enabled"; } const QString WINDOW_STARVE_THRESHOLD_JSON_KEY = "window_starve_threshold"; _streamSettings._windowStarveThreshold = audioGroupObject[WINDOW_STARVE_THRESHOLD_JSON_KEY].toString().toInt(&ok); if (!ok) { _streamSettings._windowStarveThreshold = DEFAULT_WINDOW_STARVE_THRESHOLD; } qDebug() << "Window A starve threshold:" << _streamSettings._windowStarveThreshold; const QString WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES_JSON_KEY = "window_seconds_for_desired_calc_on_too_many_starves"; _streamSettings._windowSecondsForDesiredCalcOnTooManyStarves = audioGroupObject[WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES_JSON_KEY].toString().toInt(&ok); if (!ok) { _streamSettings._windowSecondsForDesiredCalcOnTooManyStarves = DEFAULT_WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES; } qDebug() << "Window A length:" << _streamSettings._windowSecondsForDesiredCalcOnTooManyStarves << "seconds"; const QString WINDOW_SECONDS_FOR_DESIRED_REDUCTION_JSON_KEY = "window_seconds_for_desired_reduction"; _streamSettings._windowSecondsForDesiredReduction = audioGroupObject[WINDOW_SECONDS_FOR_DESIRED_REDUCTION_JSON_KEY].toString().toInt(&ok); if (!ok) { _streamSettings._windowSecondsForDesiredReduction = DEFAULT_WINDOW_SECONDS_FOR_DESIRED_REDUCTION; } qDebug() << "Window B length:" << _streamSettings._windowSecondsForDesiredReduction << "seconds"; const QString REPETITION_WITH_FADE_JSON_KEY = "repetition_with_fade"; _streamSettings._repetitionWithFade = audioGroupObject[REPETITION_WITH_FADE_JSON_KEY].toBool(); if (_streamSettings._repetitionWithFade) { qDebug() << "Repetition with fade enabled"; } else { qDebug() << "Repetition with fade disabled"; } const QString PRINT_STREAM_STATS_JSON_KEY = "print_stream_stats"; _printStreamStats = audioGroupObject[PRINT_STREAM_STATS_JSON_KEY].toBool(); if (_printStreamStats) { qDebug() << "Stream stats will be printed to stdout"; } const QString FILTER_KEY = "enable_filter"; if (audioGroupObject[FILTER_KEY].isBool()) { _enableFilter = audioGroupObject[FILTER_KEY].toBool(); } if (_enableFilter) { qDebug() << "Filter enabled"; } const QString UNATTENUATED_ZONE_KEY = "unattenuated_zone"; QString unattenuatedZoneString = audioGroupObject[UNATTENUATED_ZONE_KEY].toString(); if (!unattenuatedZoneString.isEmpty()) { QStringList zoneStringList = unattenuatedZoneString.split(','); glm::vec3 sourceCorner(zoneStringList[0].toFloat(), zoneStringList[1].toFloat(), zoneStringList[2].toFloat()); glm::vec3 sourceDimensions(zoneStringList[3].toFloat(), zoneStringList[4].toFloat(), zoneStringList[5].toFloat()); glm::vec3 listenerCorner(zoneStringList[6].toFloat(), zoneStringList[7].toFloat(), zoneStringList[8].toFloat()); glm::vec3 listenerDimensions(zoneStringList[9].toFloat(), zoneStringList[10].toFloat(), zoneStringList[11].toFloat()); _sourceUnattenuatedZone = new AABox(sourceCorner, sourceDimensions); _listenerUnattenuatedZone = new AABox(listenerCorner, listenerDimensions); glm::vec3 sourceCenter = _sourceUnattenuatedZone->calcCenter(); glm::vec3 destinationCenter = _listenerUnattenuatedZone->calcCenter(); qDebug() << "There is an unattenuated zone with source center at" << QString("%1, %2, %3").arg(sourceCenter.x).arg(sourceCenter.y).arg(sourceCenter.z); qDebug() << "Buffers inside this zone will not be attenuated inside a box with center at" << QString("%1, %2, %3").arg(destinationCenter.x).arg(destinationCenter.y).arg(destinationCenter.z); } const QString ATTENATION_PER_DOULING_IN_DISTANCE = "attenuation_per_doubling_in_distance"; if (audioGroupObject[ATTENATION_PER_DOULING_IN_DISTANCE].isString()) { bool ok = false; float attenuation = audioGroupObject[ATTENATION_PER_DOULING_IN_DISTANCE].toString().toFloat(&ok); if (ok) { _attenuationPerDoublingInDistance = attenuation; qDebug() << "Attenuation per doubling in distance changed to" << _attenuationPerDoublingInDistance; } } } int nextFrame = 0; QElapsedTimer timer; timer.start(); char clientMixBuffer[MAX_PACKET_SIZE]; int usecToSleep = BUFFER_SEND_INTERVAL_USECS; const int TRAILING_AVERAGE_FRAMES = 100; int framesSinceCutoffEvent = TRAILING_AVERAGE_FRAMES; while (!_isFinished) { const float STRUGGLE_TRIGGER_SLEEP_PERCENTAGE_THRESHOLD = 0.10f; const float BACK_OFF_TRIGGER_SLEEP_PERCENTAGE_THRESHOLD = 0.20f; const float RATIO_BACK_OFF = 0.02f; const float CURRENT_FRAME_RATIO = 1.0f / TRAILING_AVERAGE_FRAMES; const float PREVIOUS_FRAMES_RATIO = 1.0f - CURRENT_FRAME_RATIO; if (usecToSleep < 0) { usecToSleep = 0; } _trailingSleepRatio = (PREVIOUS_FRAMES_RATIO * _trailingSleepRatio) + (usecToSleep * CURRENT_FRAME_RATIO / (float) BUFFER_SEND_INTERVAL_USECS); float lastCutoffRatio = _performanceThrottlingRatio; bool hasRatioChanged = false; if (framesSinceCutoffEvent >= TRAILING_AVERAGE_FRAMES) { if (_trailingSleepRatio <= STRUGGLE_TRIGGER_SLEEP_PERCENTAGE_THRESHOLD) { // we're struggling - change our min required loudness to reduce some load _performanceThrottlingRatio = _performanceThrottlingRatio + (0.5f * (1.0f - _performanceThrottlingRatio)); qDebug() << "Mixer is struggling, sleeping" << _trailingSleepRatio * 100 << "% of frame time. Old cutoff was" << lastCutoffRatio << "and is now" << _performanceThrottlingRatio; hasRatioChanged = true; } else if (_trailingSleepRatio >= BACK_OFF_TRIGGER_SLEEP_PERCENTAGE_THRESHOLD && _performanceThrottlingRatio != 0) { // we've recovered and can back off the required loudness _performanceThrottlingRatio = _performanceThrottlingRatio - RATIO_BACK_OFF; if (_performanceThrottlingRatio < 0) { _performanceThrottlingRatio = 0; } qDebug() << "Mixer is recovering, sleeping" << _trailingSleepRatio * 100 << "% of frame time. Old cutoff was" << lastCutoffRatio << "and is now" << _performanceThrottlingRatio; hasRatioChanged = true; } if (hasRatioChanged) { // set out min audability threshold from the new ratio _minAudibilityThreshold = LOUDNESS_TO_DISTANCE_RATIO / (2.0f * (1.0f - _performanceThrottlingRatio)); qDebug() << "Minimum audability required to be mixed is now" << _minAudibilityThreshold; framesSinceCutoffEvent = 0; } } if (!hasRatioChanged) { ++framesSinceCutoffEvent; } quint64 now = usecTimestampNow(); if (now - _lastPerSecondCallbackTime > USECS_PER_SECOND) { perSecondActions(); _lastPerSecondCallbackTime = now; } foreach (const SharedNodePointer& node, nodeList->getNodeHash()) { if (node->getLinkedData()) { AudioMixerClientData* nodeData = (AudioMixerClientData*)node->getLinkedData(); // this function will attempt to pop a frame from each audio stream. // a pointer to the popped data is stored as a member in InboundAudioStream. // That's how the popped audio data will be read for mixing (but only if the pop was successful) nodeData->checkBuffersBeforeFrameSend(_sourceUnattenuatedZone, _listenerUnattenuatedZone); if (node->getType() == NodeType::Agent && node->getActiveSocket() && nodeData->getAvatarAudioStream()) { int streamsMixed = prepareMixForListeningNode(node.data()); char* dataAt; if (streamsMixed > 0) { // pack header int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeMixedAudio); dataAt = clientMixBuffer + numBytesPacketHeader; // pack sequence number quint16 sequence = nodeData->getOutgoingSequenceNumber(); memcpy(dataAt, &sequence, sizeof(quint16)); dataAt += sizeof(quint16); // pack mixed audio samples memcpy(dataAt, _mixSamples, NETWORK_BUFFER_LENGTH_BYTES_STEREO); dataAt += NETWORK_BUFFER_LENGTH_BYTES_STEREO; } else { // pack header int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeSilentAudioFrame); dataAt = clientMixBuffer + numBytesPacketHeader; // pack sequence number quint16 sequence = nodeData->getOutgoingSequenceNumber(); memcpy(dataAt, &sequence, sizeof(quint16)); dataAt += sizeof(quint16); // pack number of silent audio samples quint16 numSilentSamples = NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; memcpy(dataAt, &numSilentSamples, sizeof(quint16)); dataAt += sizeof(quint16); } // send mixed audio packet nodeList->writeDatagram(clientMixBuffer, dataAt - clientMixBuffer, node); nodeData->incrementOutgoingMixedAudioSequenceNumber(); // send an audio stream stats packet if it's time if (_sendAudioStreamStats) { nodeData->sendAudioStreamStatsPackets(node); _sendAudioStreamStats = false; } ++_sumListeners; } } } ++_numStatFrames; QCoreApplication::processEvents(); if (_isFinished) { break; } usecToSleep = (++nextFrame * BUFFER_SEND_INTERVAL_USECS) - timer.nsecsElapsed() / 1000; // ns to us if (usecToSleep > 0) { usleep(usecToSleep); } } } void AudioMixer::perSecondActions() { _sendAudioStreamStats = true; int callsLastSecond = _datagramsReadPerCallStats.getCurrentIntervalSamples(); _readPendingCallsPerSecondStats.update(callsLastSecond); if (_printStreamStats) { printf("\n================================================================================\n\n"); printf(" readPendingDatagram() calls per second | avg: %.2f, avg_30s: %.2f, last_second: %d\n", _readPendingCallsPerSecondStats.getAverage(), _readPendingCallsPerSecondStats.getWindowAverage(), callsLastSecond); printf(" Datagrams read per call | avg: %.2f, avg_30s: %.2f, last_second: %.2f\n", _datagramsReadPerCallStats.getAverage(), _datagramsReadPerCallStats.getWindowAverage(), _datagramsReadPerCallStats.getCurrentIntervalAverage()); printf(" Usecs spent per readPendingDatagram() call | avg: %.2f, avg_30s: %.2f, last_second: %.2f\n", _timeSpentPerCallStats.getAverage(), _timeSpentPerCallStats.getWindowAverage(), _timeSpentPerCallStats.getCurrentIntervalAverage()); printf(" Usecs spent per packetVersionAndHashMatch() call | avg: %.2f, avg_30s: %.2f, last_second: %.2f\n", _timeSpentPerHashMatchCallStats.getAverage(), _timeSpentPerHashMatchCallStats.getWindowAverage(), _timeSpentPerHashMatchCallStats.getCurrentIntervalAverage()); double WINDOW_LENGTH_USECS = READ_DATAGRAMS_STATS_WINDOW_SECONDS * USECS_PER_SECOND; printf(" %% time spent in readPendingDatagram() calls | avg_30s: %.6f%%, last_second: %.6f%%\n", _timeSpentPerCallStats.getWindowSum() / WINDOW_LENGTH_USECS * 100.0, _timeSpentPerCallStats.getCurrentIntervalSum() / USECS_PER_SECOND * 100.0); printf("%% time spent in packetVersionAndHashMatch() calls: | avg_30s: %.6f%%, last_second: %.6f%%\n", _timeSpentPerHashMatchCallStats.getWindowSum() / WINDOW_LENGTH_USECS * 100.0, _timeSpentPerHashMatchCallStats.getCurrentIntervalSum() / USECS_PER_SECOND * 100.0); foreach(const SharedNodePointer& node, NodeList::getInstance()->getNodeHash()) { if (node->getLinkedData()) { AudioMixerClientData* nodeData = (AudioMixerClientData*)node->getLinkedData(); if (node->getType() == NodeType::Agent && node->getActiveSocket()) { printf("\nStats for agent %s --------------------------------\n", node->getUUID().toString().toLatin1().data()); nodeData->printUpstreamDownstreamStats(); } } } } _datagramsReadPerCallStats.currentIntervalComplete(); _timeSpentPerCallStats.currentIntervalComplete(); _timeSpentPerHashMatchCallStats.currentIntervalComplete(); } QString AudioMixer::getReadPendingDatagramsCallsPerSecondsStatsString() const { QString result = "calls_per_sec_avg_30s: " + QString::number(_readPendingCallsPerSecondStats.getWindowAverage(), 'f', 2) + " calls_last_sec: " + QString::number(_readPendingCallsPerSecondStats.getLastCompleteIntervalStats().getSum() + 0.5, 'f', 0); return result; } QString AudioMixer::getReadPendingDatagramsPacketsPerCallStatsString() const { QString result = "pkts_per_call_avg_30s: " + QString::number(_datagramsReadPerCallStats.getWindowAverage(), 'f', 2) + " pkts_per_call_avg_1s: " + QString::number(_datagramsReadPerCallStats.getLastCompleteIntervalStats().getAverage(), 'f', 2); return result; } QString AudioMixer::getReadPendingDatagramsTimeStatsString() const { QString result = "usecs_per_call_avg_30s: " + QString::number(_timeSpentPerCallStats.getWindowAverage(), 'f', 2) + " usecs_per_call_avg_1s: " + QString::number(_timeSpentPerCallStats.getLastCompleteIntervalStats().getAverage(), 'f', 2) + " prct_time_in_call_30s: " + QString::number(_timeSpentPerCallStats.getWindowSum() / (READ_DATAGRAMS_STATS_WINDOW_SECONDS*USECS_PER_SECOND) * 100.0, 'f', 6) + "%" + " prct_time_in_call_1s: " + QString::number(_timeSpentPerCallStats.getLastCompleteIntervalStats().getSum() / USECS_PER_SECOND * 100.0, 'f', 6) + "%"; return result; } QString AudioMixer::getReadPendingDatagramsHashMatchTimeStatsString() const { QString result = "usecs_per_hashmatch_avg_30s: " + QString::number(_timeSpentPerHashMatchCallStats.getWindowAverage(), 'f', 2) + " usecs_per_hashmatch_avg_1s: " + QString::number(_timeSpentPerHashMatchCallStats.getLastCompleteIntervalStats().getAverage(), 'f', 2) + " prct_time_in_hashmatch_30s: " + QString::number(_timeSpentPerHashMatchCallStats.getWindowSum() / (READ_DATAGRAMS_STATS_WINDOW_SECONDS*USECS_PER_SECOND) * 100.0, 'f', 6) + "%" + " prct_time_in_hashmatch_1s: " + QString::number(_timeSpentPerHashMatchCallStats.getLastCompleteIntervalStats().getSum() / USECS_PER_SECOND * 100.0, 'f', 6) + "%"; return result; }