overte/assignment-client/src/audio/AudioMixer.cpp
2014-10-06 12:34:12 -07:00

984 lines
46 KiB
C++

//
// AudioMixer.cpp
// assignment-client/src/audio
//
// Created by Stephen Birarda on 8/22/13.
// Copyright 2013 High Fidelity, Inc.
//
// Distributed under the Apache License, Version 2.0.
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#include <mmintrin.h>
#include <errno.h>
#include <fcntl.h>
#include <fstream>
#include <iostream>
#include <math.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#include <math.h>
#else
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
#endif //_WIN32
#include <glm/glm.hpp>
#include <glm/gtx/norm.hpp>
#include <glm/gtx/vector_angle.hpp>
#include <QtCore/QCoreApplication>
#include <QtCore/QJsonArray>
#include <QtCore/QJsonDocument>
#include <QtCore/QJsonObject>
#include <QtCore/QJsonValue>
#include <QtCore/QThread>
#include <QtCore/QTimer>
#include <QtNetwork/QNetworkRequest>
#include <QtNetwork/QNetworkReply>
#include <Logging.h>
#include <NetworkAccessManager.h>
#include <NodeList.h>
#include <Node.h>
#include <PacketHeaders.h>
#include <SharedUtil.h>
#include <StdDev.h>
#include <UUID.h>
#include "AudioRingBuffer.h"
#include "AudioMixerClientData.h"
#include "AudioMixerDatagramProcessor.h"
#include "AvatarAudioStream.h"
#include "InjectedAudioStream.h"
#include "AudioMixer.h"
const float LOUDNESS_TO_DISTANCE_RATIO = 0.00001f;
const float DEFAULT_ATTENUATION_PER_DOUBLING_IN_DISTANCE = 0.18;
const QString AUDIO_MIXER_LOGGING_TARGET_NAME = "audio-mixer";
void attachNewNodeDataToNode(Node *newNode) {
if (!newNode->getLinkedData()) {
newNode->setLinkedData(new AudioMixerClientData());
}
}
InboundAudioStream::Settings AudioMixer::_streamSettings;
bool AudioMixer::_printStreamStats = false;
bool AudioMixer::_enableFilter = true;
AudioMixer::AudioMixer(const QByteArray& packet) :
ThreadedAssignment(packet),
_trailingSleepRatio(1.0f),
_minAudibilityThreshold(LOUDNESS_TO_DISTANCE_RATIO / 2.0f),
_performanceThrottlingRatio(0.0f),
_attenuationPerDoublingInDistance(DEFAULT_ATTENUATION_PER_DOUBLING_IN_DISTANCE),
_numStatFrames(0),
_sumListeners(0),
_sumMixes(0),
_sourceUnattenuatedZone(NULL),
_listenerUnattenuatedZone(NULL),
_lastPerSecondCallbackTime(usecTimestampNow()),
_sendAudioStreamStats(false),
_datagramsReadPerCallStats(0, READ_DATAGRAMS_STATS_WINDOW_SECONDS),
_timeSpentPerCallStats(0, READ_DATAGRAMS_STATS_WINDOW_SECONDS),
_timeSpentPerHashMatchCallStats(0, READ_DATAGRAMS_STATS_WINDOW_SECONDS),
_readPendingCallsPerSecondStats(1, READ_DATAGRAMS_STATS_WINDOW_SECONDS)
{
// constant defined in AudioMixer.h. However, we don't want to include this here
// we will soon find a better common home for these audio-related constants
}
AudioMixer::~AudioMixer() {
delete _sourceUnattenuatedZone;
delete _listenerUnattenuatedZone;
}
const float ATTENUATION_BEGINS_AT_DISTANCE = 1.0f;
const float RADIUS_OF_HEAD = 0.076f;
int AudioMixer::addStreamToMixForListeningNodeWithStream(AudioMixerClientData* listenerNodeData,
const QUuid& streamUUID,
PositionalAudioStream* streamToAdd,
AvatarAudioStream* listeningNodeStream) {
// If repetition with fade is enabled:
// If streamToAdd could not provide a frame (it was starved), then we'll mix its previously-mixed frame
// This is preferable to not mixing it at all since that's equivalent to inserting silence.
// Basically, we'll repeat that last frame until it has a frame to mix. Depending on how many times
// we've repeated that frame in a row, we'll gradually fade that repeated frame into silence.
// This improves the perceived quality of the audio slightly.
bool showDebug = false; // (randFloat() < 0.05f);
float repeatedFrameFadeFactor = 1.0f;
if (!streamToAdd->lastPopSucceeded()) {
if (_streamSettings._repetitionWithFade && !streamToAdd->getLastPopOutput().isNull()) {
// reptition with fade is enabled, and we do have a valid previous frame to repeat.
// calculate its fade factor, which depends on how many times it's already been repeated.
repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd->getConsecutiveNotMixedCount() - 1);
if (repeatedFrameFadeFactor == 0.0f) {
return 0;
}
} else {
return 0;
}
}
// at this point, we know streamToAdd's last pop output is valid
// if the frame we're about to mix is silent, bail
if (streamToAdd->getLastPopOutputLoudness() == 0.0f) {
return 0;
}
float bearingRelativeAngleToSource = 0.0f;
float attenuationCoefficient = 1.0f;
int numSamplesDelay = 0;
float weakChannelAmplitudeRatio = 1.0f;
bool shouldDistanceAttenuate = true;
// Is the source that I am mixing my own?
bool sourceIsSelf = (streamToAdd == listeningNodeStream);
glm::vec3 relativePosition = streamToAdd->getPosition() - listeningNodeStream->getPosition();
float distanceBetween = glm::length(relativePosition);
if (distanceBetween < EPSILON) {
distanceBetween = EPSILON;
}
if (streamToAdd->getLastPopOutputTrailingLoudness() / distanceBetween <= _minAudibilityThreshold) {
// according to mixer performance we have decided this does not get to be mixed in
// bail out
return 0;
}
++_sumMixes;
if (streamToAdd->getListenerUnattenuatedZone()) {
shouldDistanceAttenuate = !streamToAdd->getListenerUnattenuatedZone()->contains(listeningNodeStream->getPosition());
}
if (streamToAdd->getType() == PositionalAudioStream::Injector) {
attenuationCoefficient *= reinterpret_cast<InjectedAudioStream*>(streamToAdd)->getAttenuationRatio();
if (showDebug) {
qDebug() << "AttenuationRatio: " << reinterpret_cast<InjectedAudioStream*>(streamToAdd)->getAttenuationRatio();
}
}
if (showDebug) {
qDebug() << "distance: " << distanceBetween;
}
glm::quat inverseOrientation = glm::inverse(listeningNodeStream->getOrientation());
if (!sourceIsSelf && (streamToAdd->getType() == PositionalAudioStream::Microphone)) {
// source is another avatar, apply fixed off-axis attenuation to make them quieter as they turn away from listener
glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd->getOrientation()) * relativePosition;
float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f),
glm::normalize(rotatedListenerPosition));
const float MAX_OFF_AXIS_ATTENUATION = 0.2f;
const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f;
float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION +
(OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO));
if (showDebug) {
qDebug() << "angleOfDelivery" << angleOfDelivery << "offAxisCoefficient: " << offAxisCoefficient;
}
// multiply the current attenuation coefficient by the calculated off axis coefficient
attenuationCoefficient *= offAxisCoefficient;
}
if (shouldDistanceAttenuate && (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE)) {
// calculate the distance coefficient using the distance to this node
float distanceCoefficient = 1 - (logf(distanceBetween / ATTENUATION_BEGINS_AT_DISTANCE) / logf(2.0f)
* _attenuationPerDoublingInDistance);
if (distanceCoefficient < 0) {
distanceCoefficient = 0;
}
// multiply the current attenuation coefficient by the distance coefficient
attenuationCoefficient *= distanceCoefficient;
if (showDebug) {
qDebug() << "distanceCoefficient: " << distanceCoefficient;
}
}
if (!sourceIsSelf) {
// Compute sample delay for the two ears to create phase panning
glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition;
// project the rotated source position vector onto the XZ plane
rotatedSourcePosition.y = 0.0f;
// produce an oriented angle about the y-axis
bearingRelativeAngleToSource = glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f),
glm::normalize(rotatedSourcePosition),
glm::vec3(0.0f, 1.0f, 0.0f));
const float PHASE_AMPLITUDE_RATIO_AT_90 = 0.5;
// figure out the number of samples of delay and the ratio of the amplitude
// in the weak channel for audio spatialization
float sinRatio = fabsf(sinf(bearingRelativeAngleToSource));
numSamplesDelay = SAMPLE_PHASE_DELAY_AT_90 * sinRatio;
weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio);
if (distanceBetween < RADIUS_OF_HEAD) {
// Diminish phase panning if source would be inside head
numSamplesDelay *= distanceBetween / RADIUS_OF_HEAD;
weakChannelAmplitudeRatio += (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio) * distanceBetween / RADIUS_OF_HEAD;
}
}
if (showDebug) {
qDebug() << "attenuation: " << attenuationCoefficient;
qDebug() << "bearingRelativeAngleToSource: " << bearingRelativeAngleToSource << " numSamplesDelay: " << numSamplesDelay;
}
AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd->getLastPopOutput();
if (!streamToAdd->isStereo()) {
// this is a mono stream, which means it gets full attenuation and spatialization
// we need to do several things in this process:
// 1) convert from mono to stereo by copying each input sample into the left and right output samples
// 2)
// 2) apply an attenuation AND fade to all samples (left and right)
// 3) based on the bearing relative angle to the source we will weaken and delay either the left or
// right channel of the input into the output
// 4) because one of these channels is delayed, we will need to use historical samples from
// the input stream for that delayed channel
// Mono input to stereo output (item 1 above)
int OUTPUT_SAMPLES_PER_INPUT_SAMPLE = 2;
int inputSampleCount = NETWORK_BUFFER_LENGTH_SAMPLES_STEREO / OUTPUT_SAMPLES_PER_INPUT_SAMPLE;
int maxOutputIndex = NETWORK_BUFFER_LENGTH_SAMPLES_STEREO;
// attenuation and fade applied to all samples (item 2 above)
float attenuationAndFade = attenuationCoefficient * repeatedFrameFadeFactor;
// determine which side is weak and delayed (item 3 above)
bool rightSideWeakAndDelayed = (bearingRelativeAngleToSource > 0.0f);
// since we're converting from mono to stereo, we'll use these two indices to step through
// the output samples. we'll increment each index independently in the loop
int leftDestinationIndex = 0;
int rightDestinationIndex = 1;
// One of our two channels will be delayed (determined below). We'll use this index to step
// through filling in our output with the historical samples for the delayed channel. (item 4 above)
int delayedChannelHistoricalAudioOutputIndex;
// All samples will be attenuated by at least this much
float leftSideAttenuation = attenuationAndFade;
float rightSideAttenuation = attenuationAndFade;
// The weak/delayed channel will be attenuated by this additional amount
float attenuationAndWeakChannelRatioAndFade = attenuationAndFade * weakChannelAmplitudeRatio;
// Now, based on the determination of which side is weak and delayed, set up our true starting point
// for our indexes, as well as the appropriate attenuation for each channel
if (rightSideWeakAndDelayed) {
delayedChannelHistoricalAudioOutputIndex = rightDestinationIndex;
rightSideAttenuation = attenuationAndWeakChannelRatioAndFade;
rightDestinationIndex += (numSamplesDelay * OUTPUT_SAMPLES_PER_INPUT_SAMPLE);
} else {
delayedChannelHistoricalAudioOutputIndex = leftDestinationIndex;
leftSideAttenuation = attenuationAndWeakChannelRatioAndFade;
leftDestinationIndex += (numSamplesDelay * OUTPUT_SAMPLES_PER_INPUT_SAMPLE);
}
// If there was a sample delay for this stream, we need to pull samples prior to the official start of the input
// and stick those samples at the beginning of the output. We only need to loop through this for the weak/delayed
// side, since the normal side is fully handled below. (item 4 above)
if (numSamplesDelay > 0) {
// TODO: delayStreamSourceSamples may be inside the last frame written if the ringbuffer is completely full
// maybe make AudioRingBuffer have 1 extra frame in its buffer
AudioRingBuffer::ConstIterator delayStreamSourceSamples = streamPopOutput - numSamplesDelay;
for (int i = 0; i < numSamplesDelay; i++) {
int16_t originalHistoricalSample = *delayStreamSourceSamples;
_preMixSamples[delayedChannelHistoricalAudioOutputIndex] += originalHistoricalSample
* attenuationAndWeakChannelRatioAndFade;
++delayStreamSourceSamples; // move our input pointer
delayedChannelHistoricalAudioOutputIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE; // move our output sample
}
}
// Here's where we copy the MONO input to the STEREO output, and account for delay and weak side attenuation
for (int inputSample = 0; inputSample < inputSampleCount; inputSample++) {
int16_t originalSample = streamPopOutput[inputSample];
int16_t leftSideSample = originalSample * leftSideAttenuation;
int16_t rightSideSample = originalSample * rightSideAttenuation;
// since we might be delayed, don't write beyond our maxOutputIndex
if (leftDestinationIndex <= maxOutputIndex) {
_preMixSamples[leftDestinationIndex] += leftSideSample;
}
if (rightDestinationIndex <= maxOutputIndex) {
_preMixSamples[rightDestinationIndex] += rightSideSample;
}
leftDestinationIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE;
rightDestinationIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE;
}
} else {
int stereoDivider = streamToAdd->isStereo() ? 1 : 2;
float attenuationAndFade = attenuationCoefficient * repeatedFrameFadeFactor;
for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s++) {
_preMixSamples[s] = glm::clamp(_preMixSamples[s] + (int)(streamPopOutput[s / stereoDivider] * attenuationAndFade),
MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
}
}
if (!sourceIsSelf && _enableFilter) {
const float TWO_OVER_PI = 2.0f / PI;
const float ZERO_DB = 1.0f;
const float NEGATIVE_ONE_DB = 0.891f;
const float NEGATIVE_THREE_DB = 0.708f;
const float FILTER_GAIN_AT_0 = ZERO_DB; // source is in front
const float FILTER_GAIN_AT_90 = NEGATIVE_ONE_DB; // source is incident to left or right ear
const float FILTER_GAIN_AT_180 = NEGATIVE_THREE_DB; // source is behind
const float FILTER_CUTOFF_FREQUENCY_HZ = 1000.0f;
const float penumbraFilterFrequency = FILTER_CUTOFF_FREQUENCY_HZ; // constant frequency
const float penumbraFilterSlope = NEGATIVE_THREE_DB; // constant slope
float penumbraFilterGainL;
float penumbraFilterGainR;
// variable gain calculation broken down by quadrant
if (-bearingRelativeAngleToSource < -PI_OVER_TWO && -bearingRelativeAngleToSource > -PI) {
penumbraFilterGainL = TWO_OVER_PI *
(FILTER_GAIN_AT_0 - FILTER_GAIN_AT_180) * (-bearingRelativeAngleToSource + PI_OVER_TWO) + FILTER_GAIN_AT_0;
penumbraFilterGainR = TWO_OVER_PI *
(FILTER_GAIN_AT_90 - FILTER_GAIN_AT_180) * (-bearingRelativeAngleToSource + PI_OVER_TWO) + FILTER_GAIN_AT_90;
} else if (-bearingRelativeAngleToSource <= PI && -bearingRelativeAngleToSource > PI_OVER_TWO) {
penumbraFilterGainL = TWO_OVER_PI *
(FILTER_GAIN_AT_180 - FILTER_GAIN_AT_90) * (-bearingRelativeAngleToSource - PI) + FILTER_GAIN_AT_180;
penumbraFilterGainR = TWO_OVER_PI *
(FILTER_GAIN_AT_180 - FILTER_GAIN_AT_0) * (-bearingRelativeAngleToSource - PI) + FILTER_GAIN_AT_180;
} else if (-bearingRelativeAngleToSource <= PI_OVER_TWO && -bearingRelativeAngleToSource > 0) {
penumbraFilterGainL = TWO_OVER_PI *
(FILTER_GAIN_AT_90 - FILTER_GAIN_AT_0) * (-bearingRelativeAngleToSource - PI_OVER_TWO) + FILTER_GAIN_AT_90;
penumbraFilterGainR = FILTER_GAIN_AT_0;
} else {
penumbraFilterGainL = FILTER_GAIN_AT_0;
penumbraFilterGainR = TWO_OVER_PI *
(FILTER_GAIN_AT_0 - FILTER_GAIN_AT_90) * (-bearingRelativeAngleToSource) + FILTER_GAIN_AT_0;
}
if (distanceBetween < RADIUS_OF_HEAD) {
// Diminish effect if source would be inside head
penumbraFilterGainL += (1.f - penumbraFilterGainL) * (1.f - distanceBetween / RADIUS_OF_HEAD);
penumbraFilterGainR += (1.f - penumbraFilterGainR) * (1.f - distanceBetween / RADIUS_OF_HEAD);
}
#if 0
qDebug() << "gainL="
<< penumbraFilterGainL
<< "gainR="
<< penumbraFilterGainR
<< "angle="
<< -bearingRelativeAngleToSource;
#endif
// Get our per listener/source data so we can get our filter
AudioFilterHSF1s& penumbraFilter = listenerNodeData->getListenerSourcePairData(streamUUID)->getPenumbraFilter();
// set the gain on both filter channels
penumbraFilter.setParameters(0, 0, SAMPLE_RATE, penumbraFilterFrequency, penumbraFilterGainL, penumbraFilterSlope);
penumbraFilter.setParameters(0, 1, SAMPLE_RATE, penumbraFilterFrequency, penumbraFilterGainR, penumbraFilterSlope);
penumbraFilter.render(_preMixSamples, _preMixSamples, NETWORK_BUFFER_LENGTH_SAMPLES_STEREO / 2);
}
// Actually mix the _preMixSamples into the _mixSamples here.
for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s++) {
_mixSamples[s] = glm::clamp(_mixSamples[s] + _preMixSamples[s], MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE);
}
return 1;
}
int AudioMixer::prepareMixForListeningNode(Node* node) {
AvatarAudioStream* nodeAudioStream = ((AudioMixerClientData*) node->getLinkedData())->getAvatarAudioStream();
AudioMixerClientData* listenerNodeData = (AudioMixerClientData*)node->getLinkedData();
// zero out the client mix for this node
memset(_preMixSamples, 0, sizeof(_preMixSamples));
memset(_mixSamples, 0, sizeof(_mixSamples));
// loop through all other nodes that have sufficient audio to mix
int streamsMixed = 0;
foreach (const SharedNodePointer& otherNode, NodeList::getInstance()->getNodeHash()) {
if (otherNode->getLinkedData()) {
AudioMixerClientData* otherNodeClientData = (AudioMixerClientData*) otherNode->getLinkedData();
// enumerate the ARBs attached to the otherNode and add all that should be added to mix
const QHash<QUuid, PositionalAudioStream*>& otherNodeAudioStreams = otherNodeClientData->getAudioStreams();
QHash<QUuid, PositionalAudioStream*>::ConstIterator i;
for (i = otherNodeAudioStreams.constBegin(); i != otherNodeAudioStreams.constEnd(); i++) {
PositionalAudioStream* otherNodeStream = i.value();
QUuid streamUUID = i.key();
if (otherNodeStream->getType() == PositionalAudioStream::Microphone) {
streamUUID = otherNode->getUUID();
}
if (*otherNode != *node || otherNodeStream->shouldLoopbackForNode()) {
streamsMixed += addStreamToMixForListeningNodeWithStream(listenerNodeData, streamUUID,
otherNodeStream, nodeAudioStream);
}
}
}
}
return streamsMixed;
}
void AudioMixer::readPendingDatagram(const QByteArray& receivedPacket, const HifiSockAddr& senderSockAddr) {
NodeList* nodeList = NodeList::getInstance();
if (nodeList->packetVersionAndHashMatch(receivedPacket)) {
// pull any new audio data from nodes off of the network stack
PacketType mixerPacketType = packetTypeForPacket(receivedPacket);
if (mixerPacketType == PacketTypeMicrophoneAudioNoEcho
|| mixerPacketType == PacketTypeMicrophoneAudioWithEcho
|| mixerPacketType == PacketTypeInjectAudio
|| mixerPacketType == PacketTypeSilentAudioFrame
|| mixerPacketType == PacketTypeAudioStreamStats) {
nodeList->findNodeAndUpdateWithDataFromPacket(receivedPacket);
} else if (mixerPacketType == PacketTypeMuteEnvironment) {
QByteArray packet = receivedPacket;
populatePacketHeader(packet, PacketTypeMuteEnvironment);
foreach (const SharedNodePointer& node, nodeList->getNodeHash()) {
if (node->getType() == NodeType::Agent && node->getActiveSocket() && node->getLinkedData() && node != nodeList->sendingNodeForPacket(receivedPacket)) {
nodeList->writeDatagram(packet, packet.size(), node);
}
}
} else {
// let processNodeData handle it.
nodeList->processNodeData(senderSockAddr, receivedPacket);
}
}
}
void AudioMixer::sendStatsPacket() {
static QJsonObject statsObject;
statsObject["useDynamicJitterBuffers"] = _streamSettings._dynamicJitterBuffers;
statsObject["trailing_sleep_percentage"] = _trailingSleepRatio * 100.0f;
statsObject["performance_throttling_ratio"] = _performanceThrottlingRatio;
statsObject["average_listeners_per_frame"] = (float) _sumListeners / (float) _numStatFrames;
if (_sumListeners > 0) {
statsObject["average_mixes_per_listener"] = (float) _sumMixes / (float) _sumListeners;
} else {
statsObject["average_mixes_per_listener"] = 0.0;
}
ThreadedAssignment::addPacketStatsAndSendStatsPacket(statsObject);
_sumListeners = 0;
_sumMixes = 0;
_numStatFrames = 0;
// NOTE: These stats can be too large to fit in an MTU, so we break it up into multiple packts...
QJsonObject statsObject2;
// add stats for each listerner
bool somethingToSend = false;
int sizeOfStats = 0;
int TOO_BIG_FOR_MTU = 1200; // some extra space for JSONification
QString property = "readPendingDatagram_calls_stats";
QString value = getReadPendingDatagramsCallsPerSecondsStatsString();
statsObject2[qPrintable(property)] = value;
somethingToSend = true;
sizeOfStats += property.size() + value.size();
property = "readPendingDatagram_packets_per_call_stats";
value = getReadPendingDatagramsPacketsPerCallStatsString();
statsObject2[qPrintable(property)] = value;
somethingToSend = true;
sizeOfStats += property.size() + value.size();
property = "readPendingDatagram_packets_time_per_call_stats";
value = getReadPendingDatagramsTimeStatsString();
statsObject2[qPrintable(property)] = value;
somethingToSend = true;
sizeOfStats += property.size() + value.size();
property = "readPendingDatagram_hashmatch_time_per_call_stats";
value = getReadPendingDatagramsHashMatchTimeStatsString();
statsObject2[qPrintable(property)] = value;
somethingToSend = true;
sizeOfStats += property.size() + value.size();
NodeList* nodeList = NodeList::getInstance();
int clientNumber = 0;
foreach (const SharedNodePointer& node, nodeList->getNodeHash()) {
// if we're too large, send the packet
if (sizeOfStats > TOO_BIG_FOR_MTU) {
nodeList->sendStatsToDomainServer(statsObject2);
sizeOfStats = 0;
statsObject2 = QJsonObject(); // clear it
somethingToSend = false;
}
clientNumber++;
AudioMixerClientData* clientData = static_cast<AudioMixerClientData*>(node->getLinkedData());
if (clientData) {
QString property = "jitterStats." + node->getUUID().toString();
QString value = clientData->getAudioStreamStatsString();
statsObject2[qPrintable(property)] = value;
somethingToSend = true;
sizeOfStats += property.size() + value.size();
}
}
if (somethingToSend) {
nodeList->sendStatsToDomainServer(statsObject2);
}
}
void AudioMixer::run() {
ThreadedAssignment::commonInit(AUDIO_MIXER_LOGGING_TARGET_NAME, NodeType::AudioMixer);
NodeList* nodeList = NodeList::getInstance();
// we do not want this event loop to be the handler for UDP datagrams, so disconnect
disconnect(&nodeList->getNodeSocket(), 0, this, 0);
// setup a QThread with us as parent that will house the AudioMixerDatagramProcessor
_datagramProcessingThread = new QThread(this);
// create an AudioMixerDatagramProcessor and move it to that thread
AudioMixerDatagramProcessor* datagramProcessor = new AudioMixerDatagramProcessor(nodeList->getNodeSocket(), thread());
datagramProcessor->moveToThread(_datagramProcessingThread);
// remove the NodeList as the parent of the node socket
nodeList->getNodeSocket().setParent(NULL);
nodeList->getNodeSocket().moveToThread(_datagramProcessingThread);
// let the datagram processor handle readyRead from node socket
connect(&nodeList->getNodeSocket(), &QUdpSocket::readyRead,
datagramProcessor, &AudioMixerDatagramProcessor::readPendingDatagrams);
// connect to the datagram processing thread signal that tells us we have to handle a packet
connect(datagramProcessor, &AudioMixerDatagramProcessor::packetRequiresProcessing, this, &AudioMixer::readPendingDatagram);
// delete the datagram processor and the associated thread when the QThread quits
connect(_datagramProcessingThread, &QThread::finished, datagramProcessor, &QObject::deleteLater);
connect(datagramProcessor, &QObject::destroyed, _datagramProcessingThread, &QThread::deleteLater);
// start the datagram processing thread
_datagramProcessingThread->start();
nodeList->addNodeTypeToInterestSet(NodeType::Agent);
nodeList->linkedDataCreateCallback = attachNewNodeDataToNode;
// wait until we have the domain-server settings, otherwise we bail
DomainHandler& domainHandler = nodeList->getDomainHandler();
qDebug() << "Waiting for domain settings from domain-server.";
// block until we get the settingsRequestComplete signal
QEventLoop loop;
connect(&domainHandler, &DomainHandler::settingsReceived, &loop, &QEventLoop::quit);
connect(&domainHandler, &DomainHandler::settingsReceiveFail, &loop, &QEventLoop::quit);
domainHandler.requestDomainSettings();
loop.exec();
if (domainHandler.getSettingsObject().isEmpty()) {
qDebug() << "Failed to retreive settings object from domain-server. Bailing on assignment.";
setFinished(true);
return;
}
const QJsonObject& settingsObject = domainHandler.getSettingsObject();
// check the settings object to see if we have anything we can parse out
const QString AUDIO_GROUP_KEY = "audio";
if (settingsObject.contains(AUDIO_GROUP_KEY)) {
QJsonObject audioGroupObject = settingsObject[AUDIO_GROUP_KEY].toObject();
// check the payload to see if we have asked for dynamicJitterBuffer support
const QString DYNAMIC_JITTER_BUFFER_JSON_KEY = "dynamic_jitter_buffer";
_streamSettings._dynamicJitterBuffers = audioGroupObject[DYNAMIC_JITTER_BUFFER_JSON_KEY].toBool();
if (_streamSettings._dynamicJitterBuffers) {
qDebug() << "Enable dynamic jitter buffers.";
} else {
qDebug() << "Dynamic jitter buffers disabled.";
}
bool ok;
const QString DESIRED_JITTER_BUFFER_FRAMES_KEY = "static_desired_jitter_buffer_frames";
_streamSettings._staticDesiredJitterBufferFrames = audioGroupObject[DESIRED_JITTER_BUFFER_FRAMES_KEY].toString().toInt(&ok);
if (!ok) {
_streamSettings._staticDesiredJitterBufferFrames = DEFAULT_STATIC_DESIRED_JITTER_BUFFER_FRAMES;
}
qDebug() << "Static desired jitter buffer frames:" << _streamSettings._staticDesiredJitterBufferFrames;
const QString MAX_FRAMES_OVER_DESIRED_JSON_KEY = "max_frames_over_desired";
_streamSettings._maxFramesOverDesired = audioGroupObject[MAX_FRAMES_OVER_DESIRED_JSON_KEY].toString().toInt(&ok);
if (!ok) {
_streamSettings._maxFramesOverDesired = DEFAULT_MAX_FRAMES_OVER_DESIRED;
}
qDebug() << "Max frames over desired:" << _streamSettings._maxFramesOverDesired;
const QString USE_STDEV_FOR_DESIRED_CALC_JSON_KEY = "use_stdev_for_desired_calc";
_streamSettings._useStDevForJitterCalc = audioGroupObject[USE_STDEV_FOR_DESIRED_CALC_JSON_KEY].toBool();
if (_streamSettings._useStDevForJitterCalc) {
qDebug() << "Using Philip's stdev method for jitter calc if dynamic jitter buffers enabled";
} else {
qDebug() << "Using Fred's max-gap method for jitter calc if dynamic jitter buffers enabled";
}
const QString WINDOW_STARVE_THRESHOLD_JSON_KEY = "window_starve_threshold";
_streamSettings._windowStarveThreshold = audioGroupObject[WINDOW_STARVE_THRESHOLD_JSON_KEY].toString().toInt(&ok);
if (!ok) {
_streamSettings._windowStarveThreshold = DEFAULT_WINDOW_STARVE_THRESHOLD;
}
qDebug() << "Window A starve threshold:" << _streamSettings._windowStarveThreshold;
const QString WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES_JSON_KEY = "window_seconds_for_desired_calc_on_too_many_starves";
_streamSettings._windowSecondsForDesiredCalcOnTooManyStarves = audioGroupObject[WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES_JSON_KEY].toString().toInt(&ok);
if (!ok) {
_streamSettings._windowSecondsForDesiredCalcOnTooManyStarves = DEFAULT_WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES;
}
qDebug() << "Window A length:" << _streamSettings._windowSecondsForDesiredCalcOnTooManyStarves << "seconds";
const QString WINDOW_SECONDS_FOR_DESIRED_REDUCTION_JSON_KEY = "window_seconds_for_desired_reduction";
_streamSettings._windowSecondsForDesiredReduction = audioGroupObject[WINDOW_SECONDS_FOR_DESIRED_REDUCTION_JSON_KEY].toString().toInt(&ok);
if (!ok) {
_streamSettings._windowSecondsForDesiredReduction = DEFAULT_WINDOW_SECONDS_FOR_DESIRED_REDUCTION;
}
qDebug() << "Window B length:" << _streamSettings._windowSecondsForDesiredReduction << "seconds";
const QString REPETITION_WITH_FADE_JSON_KEY = "repetition_with_fade";
_streamSettings._repetitionWithFade = audioGroupObject[REPETITION_WITH_FADE_JSON_KEY].toBool();
if (_streamSettings._repetitionWithFade) {
qDebug() << "Repetition with fade enabled";
} else {
qDebug() << "Repetition with fade disabled";
}
const QString PRINT_STREAM_STATS_JSON_KEY = "print_stream_stats";
_printStreamStats = audioGroupObject[PRINT_STREAM_STATS_JSON_KEY].toBool();
if (_printStreamStats) {
qDebug() << "Stream stats will be printed to stdout";
}
const QString FILTER_KEY = "enable_filter";
if (audioGroupObject[FILTER_KEY].isBool()) {
_enableFilter = audioGroupObject[FILTER_KEY].toBool();
}
if (_enableFilter) {
qDebug() << "Filter enabled";
}
const QString UNATTENUATED_ZONE_KEY = "unattenuated_zone";
QString unattenuatedZoneString = audioGroupObject[UNATTENUATED_ZONE_KEY].toString();
if (!unattenuatedZoneString.isEmpty()) {
QStringList zoneStringList = unattenuatedZoneString.split(',');
glm::vec3 sourceCorner(zoneStringList[0].toFloat(), zoneStringList[1].toFloat(), zoneStringList[2].toFloat());
glm::vec3 sourceDimensions(zoneStringList[3].toFloat(), zoneStringList[4].toFloat(), zoneStringList[5].toFloat());
glm::vec3 listenerCorner(zoneStringList[6].toFloat(), zoneStringList[7].toFloat(), zoneStringList[8].toFloat());
glm::vec3 listenerDimensions(zoneStringList[9].toFloat(), zoneStringList[10].toFloat(), zoneStringList[11].toFloat());
_sourceUnattenuatedZone = new AABox(sourceCorner, sourceDimensions);
_listenerUnattenuatedZone = new AABox(listenerCorner, listenerDimensions);
glm::vec3 sourceCenter = _sourceUnattenuatedZone->calcCenter();
glm::vec3 destinationCenter = _listenerUnattenuatedZone->calcCenter();
qDebug() << "There is an unattenuated zone with source center at"
<< QString("%1, %2, %3").arg(sourceCenter.x).arg(sourceCenter.y).arg(sourceCenter.z);
qDebug() << "Buffers inside this zone will not be attenuated inside a box with center at"
<< QString("%1, %2, %3").arg(destinationCenter.x).arg(destinationCenter.y).arg(destinationCenter.z);
}
const QString ATTENATION_PER_DOULING_IN_DISTANCE = "attenuation_per_doubling_in_distance";
if (audioGroupObject[ATTENATION_PER_DOULING_IN_DISTANCE].isString()) {
bool ok = false;
float attenuation = audioGroupObject[ATTENATION_PER_DOULING_IN_DISTANCE].toString().toFloat(&ok);
if (ok) {
_attenuationPerDoublingInDistance = attenuation;
qDebug() << "Attenuation per doubling in distance changed to" << _attenuationPerDoublingInDistance;
}
}
}
int nextFrame = 0;
QElapsedTimer timer;
timer.start();
char clientMixBuffer[MAX_PACKET_SIZE];
int usecToSleep = BUFFER_SEND_INTERVAL_USECS;
const int TRAILING_AVERAGE_FRAMES = 100;
int framesSinceCutoffEvent = TRAILING_AVERAGE_FRAMES;
while (!_isFinished) {
const float STRUGGLE_TRIGGER_SLEEP_PERCENTAGE_THRESHOLD = 0.10f;
const float BACK_OFF_TRIGGER_SLEEP_PERCENTAGE_THRESHOLD = 0.20f;
const float RATIO_BACK_OFF = 0.02f;
const float CURRENT_FRAME_RATIO = 1.0f / TRAILING_AVERAGE_FRAMES;
const float PREVIOUS_FRAMES_RATIO = 1.0f - CURRENT_FRAME_RATIO;
if (usecToSleep < 0) {
usecToSleep = 0;
}
_trailingSleepRatio = (PREVIOUS_FRAMES_RATIO * _trailingSleepRatio)
+ (usecToSleep * CURRENT_FRAME_RATIO / (float) BUFFER_SEND_INTERVAL_USECS);
float lastCutoffRatio = _performanceThrottlingRatio;
bool hasRatioChanged = false;
if (framesSinceCutoffEvent >= TRAILING_AVERAGE_FRAMES) {
if (_trailingSleepRatio <= STRUGGLE_TRIGGER_SLEEP_PERCENTAGE_THRESHOLD) {
// we're struggling - change our min required loudness to reduce some load
_performanceThrottlingRatio = _performanceThrottlingRatio + (0.5f * (1.0f - _performanceThrottlingRatio));
qDebug() << "Mixer is struggling, sleeping" << _trailingSleepRatio * 100 << "% of frame time. Old cutoff was"
<< lastCutoffRatio << "and is now" << _performanceThrottlingRatio;
hasRatioChanged = true;
} else if (_trailingSleepRatio >= BACK_OFF_TRIGGER_SLEEP_PERCENTAGE_THRESHOLD && _performanceThrottlingRatio != 0) {
// we've recovered and can back off the required loudness
_performanceThrottlingRatio = _performanceThrottlingRatio - RATIO_BACK_OFF;
if (_performanceThrottlingRatio < 0) {
_performanceThrottlingRatio = 0;
}
qDebug() << "Mixer is recovering, sleeping" << _trailingSleepRatio * 100 << "% of frame time. Old cutoff was"
<< lastCutoffRatio << "and is now" << _performanceThrottlingRatio;
hasRatioChanged = true;
}
if (hasRatioChanged) {
// set out min audability threshold from the new ratio
_minAudibilityThreshold = LOUDNESS_TO_DISTANCE_RATIO / (2.0f * (1.0f - _performanceThrottlingRatio));
qDebug() << "Minimum audability required to be mixed is now" << _minAudibilityThreshold;
framesSinceCutoffEvent = 0;
}
}
if (!hasRatioChanged) {
++framesSinceCutoffEvent;
}
quint64 now = usecTimestampNow();
if (now - _lastPerSecondCallbackTime > USECS_PER_SECOND) {
perSecondActions();
_lastPerSecondCallbackTime = now;
}
foreach (const SharedNodePointer& node, nodeList->getNodeHash()) {
if (node->getLinkedData()) {
AudioMixerClientData* nodeData = (AudioMixerClientData*)node->getLinkedData();
// this function will attempt to pop a frame from each audio stream.
// a pointer to the popped data is stored as a member in InboundAudioStream.
// That's how the popped audio data will be read for mixing (but only if the pop was successful)
nodeData->checkBuffersBeforeFrameSend(_sourceUnattenuatedZone, _listenerUnattenuatedZone);
if (node->getType() == NodeType::Agent && node->getActiveSocket()
&& nodeData->getAvatarAudioStream()) {
int streamsMixed = prepareMixForListeningNode(node.data());
char* dataAt;
if (streamsMixed > 0) {
// pack header
int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeMixedAudio);
dataAt = clientMixBuffer + numBytesPacketHeader;
// pack sequence number
quint16 sequence = nodeData->getOutgoingSequenceNumber();
memcpy(dataAt, &sequence, sizeof(quint16));
dataAt += sizeof(quint16);
// pack mixed audio samples
memcpy(dataAt, _mixSamples, NETWORK_BUFFER_LENGTH_BYTES_STEREO);
dataAt += NETWORK_BUFFER_LENGTH_BYTES_STEREO;
} else {
// pack header
int numBytesPacketHeader = populatePacketHeader(clientMixBuffer, PacketTypeSilentAudioFrame);
dataAt = clientMixBuffer + numBytesPacketHeader;
// pack sequence number
quint16 sequence = nodeData->getOutgoingSequenceNumber();
memcpy(dataAt, &sequence, sizeof(quint16));
dataAt += sizeof(quint16);
// pack number of silent audio samples
quint16 numSilentSamples = NETWORK_BUFFER_LENGTH_SAMPLES_STEREO;
memcpy(dataAt, &numSilentSamples, sizeof(quint16));
dataAt += sizeof(quint16);
}
// send mixed audio packet
nodeList->writeDatagram(clientMixBuffer, dataAt - clientMixBuffer, node);
nodeData->incrementOutgoingMixedAudioSequenceNumber();
// send an audio stream stats packet if it's time
if (_sendAudioStreamStats) {
nodeData->sendAudioStreamStatsPackets(node);
_sendAudioStreamStats = false;
}
++_sumListeners;
}
}
}
++_numStatFrames;
QCoreApplication::processEvents();
if (_isFinished) {
break;
}
usecToSleep = (++nextFrame * BUFFER_SEND_INTERVAL_USECS) - timer.nsecsElapsed() / 1000; // ns to us
if (usecToSleep > 0) {
usleep(usecToSleep);
}
}
}
void AudioMixer::perSecondActions() {
_sendAudioStreamStats = true;
int callsLastSecond = _datagramsReadPerCallStats.getCurrentIntervalSamples();
_readPendingCallsPerSecondStats.update(callsLastSecond);
if (_printStreamStats) {
printf("\n================================================================================\n\n");
printf(" readPendingDatagram() calls per second | avg: %.2f, avg_30s: %.2f, last_second: %d\n",
_readPendingCallsPerSecondStats.getAverage(),
_readPendingCallsPerSecondStats.getWindowAverage(),
callsLastSecond);
printf(" Datagrams read per call | avg: %.2f, avg_30s: %.2f, last_second: %.2f\n",
_datagramsReadPerCallStats.getAverage(),
_datagramsReadPerCallStats.getWindowAverage(),
_datagramsReadPerCallStats.getCurrentIntervalAverage());
printf(" Usecs spent per readPendingDatagram() call | avg: %.2f, avg_30s: %.2f, last_second: %.2f\n",
_timeSpentPerCallStats.getAverage(),
_timeSpentPerCallStats.getWindowAverage(),
_timeSpentPerCallStats.getCurrentIntervalAverage());
printf(" Usecs spent per packetVersionAndHashMatch() call | avg: %.2f, avg_30s: %.2f, last_second: %.2f\n",
_timeSpentPerHashMatchCallStats.getAverage(),
_timeSpentPerHashMatchCallStats.getWindowAverage(),
_timeSpentPerHashMatchCallStats.getCurrentIntervalAverage());
double WINDOW_LENGTH_USECS = READ_DATAGRAMS_STATS_WINDOW_SECONDS * USECS_PER_SECOND;
printf(" %% time spent in readPendingDatagram() calls | avg_30s: %.6f%%, last_second: %.6f%%\n",
_timeSpentPerCallStats.getWindowSum() / WINDOW_LENGTH_USECS * 100.0,
_timeSpentPerCallStats.getCurrentIntervalSum() / USECS_PER_SECOND * 100.0);
printf("%% time spent in packetVersionAndHashMatch() calls: | avg_30s: %.6f%%, last_second: %.6f%%\n",
_timeSpentPerHashMatchCallStats.getWindowSum() / WINDOW_LENGTH_USECS * 100.0,
_timeSpentPerHashMatchCallStats.getCurrentIntervalSum() / USECS_PER_SECOND * 100.0);
foreach(const SharedNodePointer& node, NodeList::getInstance()->getNodeHash()) {
if (node->getLinkedData()) {
AudioMixerClientData* nodeData = (AudioMixerClientData*)node->getLinkedData();
if (node->getType() == NodeType::Agent && node->getActiveSocket()) {
printf("\nStats for agent %s --------------------------------\n",
node->getUUID().toString().toLatin1().data());
nodeData->printUpstreamDownstreamStats();
}
}
}
}
_datagramsReadPerCallStats.currentIntervalComplete();
_timeSpentPerCallStats.currentIntervalComplete();
_timeSpentPerHashMatchCallStats.currentIntervalComplete();
}
QString AudioMixer::getReadPendingDatagramsCallsPerSecondsStatsString() const {
QString result = "calls_per_sec_avg_30s: " + QString::number(_readPendingCallsPerSecondStats.getWindowAverage(), 'f', 2)
+ " calls_last_sec: " + QString::number(_readPendingCallsPerSecondStats.getLastCompleteIntervalStats().getSum() + 0.5, 'f', 0);
return result;
}
QString AudioMixer::getReadPendingDatagramsPacketsPerCallStatsString() const {
QString result = "pkts_per_call_avg_30s: " + QString::number(_datagramsReadPerCallStats.getWindowAverage(), 'f', 2)
+ " pkts_per_call_avg_1s: " + QString::number(_datagramsReadPerCallStats.getLastCompleteIntervalStats().getAverage(), 'f', 2);
return result;
}
QString AudioMixer::getReadPendingDatagramsTimeStatsString() const {
QString result = "usecs_per_call_avg_30s: " + QString::number(_timeSpentPerCallStats.getWindowAverage(), 'f', 2)
+ " usecs_per_call_avg_1s: " + QString::number(_timeSpentPerCallStats.getLastCompleteIntervalStats().getAverage(), 'f', 2)
+ " prct_time_in_call_30s: " + QString::number(_timeSpentPerCallStats.getWindowSum() / (READ_DATAGRAMS_STATS_WINDOW_SECONDS*USECS_PER_SECOND) * 100.0, 'f', 6) + "%"
+ " prct_time_in_call_1s: " + QString::number(_timeSpentPerCallStats.getLastCompleteIntervalStats().getSum() / USECS_PER_SECOND * 100.0, 'f', 6) + "%";
return result;
}
QString AudioMixer::getReadPendingDatagramsHashMatchTimeStatsString() const {
QString result = "usecs_per_hashmatch_avg_30s: " + QString::number(_timeSpentPerHashMatchCallStats.getWindowAverage(), 'f', 2)
+ " usecs_per_hashmatch_avg_1s: " + QString::number(_timeSpentPerHashMatchCallStats.getLastCompleteIntervalStats().getAverage(), 'f', 2)
+ " prct_time_in_hashmatch_30s: " + QString::number(_timeSpentPerHashMatchCallStats.getWindowSum() / (READ_DATAGRAMS_STATS_WINDOW_SECONDS*USECS_PER_SECOND) * 100.0, 'f', 6) + "%"
+ " prct_time_in_hashmatch_1s: " + QString::number(_timeSpentPerHashMatchCallStats.getLastCompleteIntervalStats().getSum() / USECS_PER_SECOND * 100.0, 'f', 6) + "%";
return result;
}