move mixing into AudioMixerSlave

This commit is contained in:
Zach Pomerantz 2016-11-22 16:19:18 -05:00
parent 891084e9db
commit 192f4791d5
2 changed files with 528 additions and 489 deletions

View file

@ -69,19 +69,17 @@ static const QString AUDIO_ENV_GROUP_KEY = "audio_env";
static const QString AUDIO_BUFFER_GROUP_KEY = "audio_buffer";
int AudioMixer::_numStaticJitterFrames{ -1 };
bool AudioMixer::shouldMute(float quietestFrame) {
return (quietestFrame > _noiseMutingThreshold);
}
float AudioMixer::_noiseMutingThreshold{ DEFAULT_NOISE_MUTING_THRESHOLD };
float AudioMixer::_attenuationPerDoublingInDistance{ DEFAULT_ATTENUATION_PER_DOUBLING_IN_DISTANCE };
float AudioMixer::_trailingSleepRatio{ 1.0f };
float AudioMixer::_performanceThrottlingRatio{ 0.0f };
float AudioMixer::_minAudibilityThreshold{ LOUDNESS_TO_DISTANCE_RATIO / 2.0f };
QHash<QString, AABox> AudioMixer::_audioZones;
QVector<AudioMixer::ZoneSettings> AudioMixer::_zoneSettings;
QVector<AudioMixer::ReverbSettings> AudioMixer::_zoneReverbSettings;
AudioMixer::AudioMixer(ReceivedMessage& message) :
ThreadedAssignment(message),
_trailingSleepRatio(1.0f),
_minAudibilityThreshold(LOUDNESS_TO_DISTANCE_RATIO / 2.0f),
_performanceThrottlingRatio(0.0f),
_attenuationPerDoublingInDistance(DEFAULT_ATTENUATION_PER_DOUBLING_IN_DISTANCE),
_noiseMutingThreshold(DEFAULT_NOISE_MUTING_THRESHOLD)
{
ThreadedAssignment(message) {
auto nodeList = DependencyManager::get<NodeList>();
auto& packetReceiver = nodeList->getPacketReceiver();
@ -158,343 +156,6 @@ static inline float fastexp2(float x) {
return x * xi.f;
}
float AudioMixer::gainForSource(const PositionalAudioStream& streamToAdd,
const AvatarAudioStream& listeningNodeStream, const glm::vec3& relativePosition, bool isEcho) {
float gain = 1.0f;
float distanceBetween = glm::length(relativePosition);
if (distanceBetween < EPSILON) {
distanceBetween = EPSILON;
}
if (streamToAdd.getType() == PositionalAudioStream::Injector) {
gain *= reinterpret_cast<const InjectedAudioStream*>(&streamToAdd)->getAttenuationRatio();
}
if (!isEcho && (streamToAdd.getType() == PositionalAudioStream::Microphone)) {
// source is another avatar, apply fixed off-axis attenuation to make them quieter as they turn away from listener
glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd.getOrientation()) * relativePosition;
float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f),
glm::normalize(rotatedListenerPosition));
const float MAX_OFF_AXIS_ATTENUATION = 0.2f;
const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f;
float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION +
(OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO));
// multiply the current attenuation coefficient by the calculated off axis coefficient
gain *= offAxisCoefficient;
}
float attenuationPerDoublingInDistance = _attenuationPerDoublingInDistance;
for (int i = 0; i < _zonesSettings.length(); ++i) {
if (_audioZones[_zonesSettings[i].source].contains(streamToAdd.getPosition()) &&
_audioZones[_zonesSettings[i].listener].contains(listeningNodeStream.getPosition())) {
attenuationPerDoublingInDistance = _zonesSettings[i].coefficient;
break;
}
}
if (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE) {
// translate the zone setting to gain per log2(distance)
float g = 1.0f - attenuationPerDoublingInDistance;
g = (g < EPSILON) ? EPSILON : g;
g = (g > 1.0f) ? 1.0f : g;
// calculate the distance coefficient using the distance to this node
float distanceCoefficient = fastexp2(fastlog2(g) * fastlog2(distanceBetween/ATTENUATION_BEGINS_AT_DISTANCE));
// multiply the current attenuation coefficient by the distance coefficient
gain *= distanceCoefficient;
}
return gain;
}
float AudioMixer::azimuthForSource(const PositionalAudioStream& streamToAdd, const AvatarAudioStream& listeningNodeStream,
const glm::vec3& relativePosition) {
glm::quat inverseOrientation = glm::inverse(listeningNodeStream.getOrientation());
// Compute sample delay for the two ears to create phase panning
glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition;
// project the rotated source position vector onto the XZ plane
rotatedSourcePosition.y = 0.0f;
static const float SOURCE_DISTANCE_THRESHOLD = 1e-30f;
if (glm::length2(rotatedSourcePosition) > SOURCE_DISTANCE_THRESHOLD) {
// produce an oriented angle about the y-axis
return glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, -1.0f, 0.0f));
} else {
// there is no distance between listener and source - return no azimuth
return 0;
}
}
void AudioMixer::addStreamToMixForListeningNodeWithStream(AudioMixerClientData& listenerNodeData,
const PositionalAudioStream& streamToAdd,
const QUuid& sourceNodeID,
const AvatarAudioStream& listeningNodeStream) {
// to reduce artifacts we calculate the gain and azimuth for every source for this listener
// even if we are not going to end up mixing in this source
++_totalMixes;
// this ensures that the tail of any previously mixed audio or the first block of new audio sounds correct
// check if this is a server echo of a source back to itself
bool isEcho = (&streamToAdd == &listeningNodeStream);
glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition();
// figure out the distance between source and listener
float distance = glm::max(glm::length(relativePosition), EPSILON);
// figure out the gain for this source at the listener
float gain = gainForSource(streamToAdd, listeningNodeStream, relativePosition, isEcho);
// figure out the azimuth to this source at the listener
float azimuth = isEcho ? 0.0f : azimuthForSource(streamToAdd, listeningNodeStream, relativePosition);
float repeatedFrameFadeFactor = 1.0f;
static const int HRTF_DATASET_INDEX = 1;
if (!streamToAdd.lastPopSucceeded()) {
bool forceSilentBlock = true;
if (!streamToAdd.getLastPopOutput().isNull()) {
bool isInjector = dynamic_cast<const InjectedAudioStream*>(&streamToAdd);
// in an injector, just go silent - the injector has likely ended
// in other inputs (microphone, &c.), repeat with fade to avoid the harsh jump to silence
// we'll repeat the last block until it has a block to mix
// and we'll gradually fade that repeated block into silence.
// calculate its fade factor, which depends on how many times it's already been repeated.
repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd.getConsecutiveNotMixedCount() - 1);
if (!isInjector && repeatedFrameFadeFactor > 0.0f) {
// apply the repeatedFrameFadeFactor to the gain
gain *= repeatedFrameFadeFactor;
forceSilentBlock = false;
}
}
if (forceSilentBlock) {
// we're deciding not to repeat either since we've already done it enough times or repetition with fade is disabled
// in this case we will call renderSilent with a forced silent block
// this ensures the correct tail from the previously mixed block and the correct spatialization of first block
// of any upcoming audio
if (!streamToAdd.isStereo() && !isEcho) {
// get the existing listener-source HRTF object, or create a new one
auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier());
// this is not done for stereo streams since they do not go through the HRTF
static int16_t silentMonoBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL] = {};
hrtf.renderSilent(silentMonoBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++_hrtfSilentRenders;;
}
return;
}
}
// grab the stream from the ring buffer
AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd.getLastPopOutput();
if (streamToAdd.isStereo() || isEcho) {
// this is a stereo source or server echo so we do not pass it through the HRTF
// simply apply our calculated gain to each sample
if (streamToAdd.isStereo()) {
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) {
_mixedSamples[i] += float(streamPopOutput[i] * gain / AudioConstants::MAX_SAMPLE_VALUE);
}
++_manualStereoMixes;
} else {
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i += 2) {
auto monoSample = float(streamPopOutput[i / 2] * gain / AudioConstants::MAX_SAMPLE_VALUE);
_mixedSamples[i] += monoSample;
_mixedSamples[i + 1] += monoSample;
}
++_manualEchoMixes;
}
return;
}
// get the existing listener-source HRTF object, or create a new one
auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier());
static int16_t streamBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL];
streamPopOutput.readSamples(streamBlock, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
// if the frame we're about to mix is silent, simply call render silent and move on
if (streamToAdd.getLastPopOutputLoudness() == 0.0f) {
// silent frame from source
// we still need to call renderSilent via the HRTF for mono source
hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++_hrtfSilentRenders;
return;
}
if (_performanceThrottlingRatio > 0.0f
&& streamToAdd.getLastPopOutputTrailingLoudness() / glm::length(relativePosition) <= _minAudibilityThreshold) {
// the mixer is struggling so we're going to drop off some streams
// we call renderSilent via the HRTF with the actual frame data and a gain of 0.0
hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, 0.0f,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++_hrtfStruggleRenders;
return;
}
++_hrtfRenders;
// mono stream, call the HRTF with our block and calculated azimuth and gain
hrtf.render(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
}
bool AudioMixer::prepareMixForListeningNode(Node* node) {
AvatarAudioStream* nodeAudioStream = static_cast<AudioMixerClientData*>(node->getLinkedData())->getAvatarAudioStream();
AudioMixerClientData* listenerNodeData = static_cast<AudioMixerClientData*>(node->getLinkedData());
// zero out the client mix for this node
memset(_mixedSamples, 0, sizeof(_mixedSamples));
// loop through all other nodes that have sufficient audio to mix
DependencyManager::get<NodeList>()->eachNode([&](const SharedNodePointer& otherNode){
// make sure that we have audio data for this other node
// and that it isn't being ignored by our listening node
// and that it isn't ignoring our listening node
if (otherNode->getLinkedData()
&& !node->isIgnoringNodeWithID(otherNode->getUUID()) && !otherNode->isIgnoringNodeWithID(node->getUUID())) {
AudioMixerClientData* otherNodeClientData = (AudioMixerClientData*) otherNode->getLinkedData();
// check to see if we're ignoring in radius
bool insideIgnoreRadius = false;
if (node->isIgnoreRadiusEnabled() || otherNode->isIgnoreRadiusEnabled()) {
AudioMixerClientData* otherData = reinterpret_cast<AudioMixerClientData*>(otherNode->getLinkedData());
AudioMixerClientData* nodeData = reinterpret_cast<AudioMixerClientData*>(node->getLinkedData());
float ignoreRadius = glm::min(node->getIgnoreRadius(), otherNode->getIgnoreRadius());
if (glm::distance(nodeData->getPosition(), otherData->getPosition()) < ignoreRadius) {
insideIgnoreRadius = true;
}
}
if (!insideIgnoreRadius) {
// enumerate the ARBs attached to the otherNode and add all that should be added to mix
auto streamsCopy = otherNodeClientData->getAudioStreams();
for (auto& streamPair : streamsCopy) {
auto otherNodeStream = streamPair.second;
if (*otherNode != *node || otherNodeStream->shouldLoopbackForNode()) {
addStreamToMixForListeningNodeWithStream(*listenerNodeData, *otherNodeStream, otherNode->getUUID(),
*nodeAudioStream);
}
}
}
}
});
// use the per listner AudioLimiter to render the mixed data...
listenerNodeData->audioLimiter.render(_mixedSamples, _clampedSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
// check for silent audio after the peak limitor has converted the samples
bool hasAudio = false;
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) {
if (_clampedSamples[i] != 0) {
hasAudio = true;
break;
}
}
return hasAudio;
}
void AudioMixer::sendAudioEnvironmentPacket(SharedNodePointer node) {
// Send stream properties
bool hasReverb = false;
float reverbTime, wetLevel;
// find reverb properties
for (int i = 0; i < _zoneReverbSettings.size(); ++i) {
AudioMixerClientData* data = static_cast<AudioMixerClientData*>(node->getLinkedData());
glm::vec3 streamPosition = data->getAvatarAudioStream()->getPosition();
AABox box = _audioZones[_zoneReverbSettings[i].zone];
if (box.contains(streamPosition)) {
hasReverb = true;
reverbTime = _zoneReverbSettings[i].reverbTime;
wetLevel = _zoneReverbSettings[i].wetLevel;
break;
}
}
AudioMixerClientData* nodeData = static_cast<AudioMixerClientData*>(node->getLinkedData());
AvatarAudioStream* stream = nodeData->getAvatarAudioStream();
bool dataChanged = (stream->hasReverb() != hasReverb) ||
(stream->hasReverb() && (stream->getRevebTime() != reverbTime ||
stream->getWetLevel() != wetLevel));
if (dataChanged) {
// Update stream
if (hasReverb) {
stream->setReverb(reverbTime, wetLevel);
} else {
stream->clearReverb();
}
}
// Send at change or every so often
float CHANCE_OF_SEND = 0.01f;
bool sendData = dataChanged || (randFloat() < CHANCE_OF_SEND);
if (sendData) {
auto nodeList = DependencyManager::get<NodeList>();
unsigned char bitset = 0;
int packetSize = sizeof(bitset);
if (hasReverb) {
packetSize += sizeof(reverbTime) + sizeof(wetLevel);
}
auto envPacket = NLPacket::create(PacketType::AudioEnvironment, packetSize);
if (hasReverb) {
setAtBit(bitset, HAS_REVERB_BIT);
}
envPacket->writePrimitive(bitset);
if (hasReverb) {
envPacket->writePrimitive(reverbTime);
envPacket->writePrimitive(wetLevel);
}
nodeList->sendPacket(std::move(envPacket), *node);
}
}
void AudioMixer::handleNodeAudioPacket(QSharedPointer<ReceivedMessage> message, SharedNodePointer sendingNode) {
getOrCreateClientData(sendingNode.data());
DependencyManager::get<NodeList>()->updateNodeWithDataFromPacket(message, sendingNode);
@ -780,20 +441,25 @@ void AudioMixer::start() {
auto frameTimestamp = p_high_resolution_clock::time_point::min();
unsigned int framesSinceManagement = std::numeric_limits<int>::max();
// mixFrame state
// mix state
unsigned int frame = 1;
while (!_isFinished) {
manageLoad(frameTimestamp, framesSinceManagement);
slave.resetStats();
nodeList->eachNode([&](const SharedNodePointer& node) {
_sumStreams += prepareFrame(node, frame);
});
nodeList->eachNode([&](const SharedNodePointer& node) {
if(mixFrame(node, frame)) {
if (slave.mix(node, frame)) {
++_sumListeners;
}
});
slave.getStats();
++frame;
++_numStatFrames;
@ -885,96 +551,6 @@ int AudioMixer::prepareFrame(const SharedNodePointer& node, unsigned int frame)
return data->checkBuffersBeforeFrameSend();
}
bool AudioMixer::mixFrame(const SharedNodePointer& node, unsigned int frame) {
AudioMixerClientData* data = (AudioMixerClientData*)node->getLinkedData();
if (data == nullptr) {
return false;
}
auto avatarStream = data->getAvatarAudioStream();
if (avatarStream == nullptr) {
return false;
}
auto nodeList = DependencyManager::get<NodeList>();
// mute the avatar, if necessary
if (shouldMute(avatarStream->getQuietestFrameLoudness()) || data->shouldMuteClient()) {
auto mutePacket = NLPacket::create(PacketType::NoisyMute, 0);
nodeList->sendPacket(std::move(mutePacket), *node);
// probably now we just reset the flag, once should do it (?)
data->setShouldMuteClient(false);
}
// mix streams
if (node->getType() == NodeType::Agent && node->getActiveSocket()) {
bool mixHasAudio = prepareMixForListeningNode(node.data());
std::unique_ptr<NLPacket> mixPacket;
if (mixHasAudio || data->shouldFlushEncoder()) {
int mixPacketBytes = sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE
+ AudioConstants::NETWORK_FRAME_BYTES_STEREO;
mixPacket = NLPacket::create(PacketType::MixedAudio, mixPacketBytes);
// pack sequence number
quint16 sequence = data->getOutgoingSequenceNumber();
mixPacket->writePrimitive(sequence);
// write the codec
QString codecInPacket = data->getCodecName();
mixPacket->writeString(codecInPacket);
QByteArray encodedBuffer;
if (mixHasAudio) {
QByteArray decodedBuffer(reinterpret_cast<char*>(_clampedSamples), AudioConstants::NETWORK_FRAME_BYTES_STEREO);
data->encode(decodedBuffer, encodedBuffer);
} else {
// time to flush, which resets the shouldFlush until next time we encode something
data->encodeFrameOfZeros(encodedBuffer);
}
// pack mixed audio samples
mixPacket->write(encodedBuffer.constData(), encodedBuffer.size());
} else {
int silentPacketBytes = sizeof(quint16) + sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE;
mixPacket = NLPacket::create(PacketType::SilentAudioFrame, silentPacketBytes);
// pack sequence number
quint16 sequence = data->getOutgoingSequenceNumber();
mixPacket->writePrimitive(sequence);
// write the codec
QString codecInPacket = data->getCodecName();
mixPacket->writeString(codecInPacket);
// pack number of silent audio samples
quint16 numSilentSamples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO;
mixPacket->writePrimitive(numSilentSamples);
}
// Send audio environment
sendAudioEnvironmentPacket(node);
// send mixed audio packet
nodeList->sendPacket(std::move(mixPacket), *node);
data->incrementOutgoingMixedAudioSequenceNumber();
// send an audio stream stats packet to the client approximately every second
static const unsigned int NUM_FRAMES_PER_SEC = (int) ceil(AudioConstants::NETWORK_FRAMES_PER_SEC);
if (data->shouldSendStats(frame % NUM_FRAMES_PER_SEC)) {
data->sendAudioStreamStatsPackets(node);
}
return true;
}
return false;
}
void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) {
if (settingsObject.contains(AUDIO_BUFFER_GROUP_KEY)) {
QJsonObject audioBufferGroupObject = settingsObject[AUDIO_BUFFER_GROUP_KEY].toObject();
@ -1126,7 +702,7 @@ void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) {
coefficientObject.contains(LISTENER) &&
coefficientObject.contains(COEFFICIENT)) {
ZonesSettings settings;
ZoneSettings settings;
bool ok;
settings.source = coefficientObject.value(SOURCE).toString();
@ -1136,7 +712,7 @@ void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) {
if (ok && settings.coefficient >= 0.0f && settings.coefficient <= 1.0f &&
_audioZones.contains(settings.source) && _audioZones.contains(settings.listener)) {
_zonesSettings.push_back(settings);
_zoneSettings.push_back(settings);
qDebug() << "Added Coefficient:" << settings.source << settings.listener << settings.coefficient;
}
}
@ -1169,6 +745,7 @@ void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) {
settings.wetLevel = wetLevel;
_zoneReverbSettings.push_back(settings);
qDebug() << "Added Reverb:" << zone << reverbTime << wetLevel;
}
}
@ -1176,3 +753,442 @@ void AudioMixer::parseSettingsObject(const QJsonObject &settingsObject) {
}
}
}
bool AudioMixerSlave::mix(const SharedNodePointer& node, unsigned int frame) {
AudioMixerClientData* data = (AudioMixerClientData*)node->getLinkedData();
if (data == nullptr) {
return false;
}
auto avatarStream = data->getAvatarAudioStream();
if (avatarStream == nullptr) {
return false;
}
auto nodeList = DependencyManager::get<NodeList>();
// mute the avatar, if necessary
if (AudioMixer::shouldMute(avatarStream->getQuietestFrameLoudness()) || data->shouldMuteClient()) {
auto mutePacket = NLPacket::create(PacketType::NoisyMute, 0);
nodeList->sendPacket(std::move(mutePacket), *node);
// probably now we just reset the flag, once should do it (?)
data->setShouldMuteClient(false);
}
// generate and send audio packets
if (node->getType() == NodeType::Agent && node->getActiveSocket()) {
// mix streams
bool mixHasAudio = prepareMix(node);
// write the packet
std::unique_ptr<NLPacket> mixPacket;
if (mixHasAudio || data->shouldFlushEncoder()) {
// encode the audio
QByteArray encodedBuffer;
if (mixHasAudio) {
QByteArray decodedBuffer(reinterpret_cast<char*>(_clampedSamples), AudioConstants::NETWORK_FRAME_BYTES_STEREO);
data->encode(decodedBuffer, encodedBuffer);
} else {
// time to flush, which resets the shouldFlush until next time we encode something
data->encodeFrameOfZeros(encodedBuffer);
}
// write it to a packet
writeMixPacket(mixPacket, data, encodedBuffer);
} else {
writeSilentPacket(mixPacket, data);
}
// send audio environment packet
sendEnvironmentPacket(node);
// send mixed audio packet
nodeList->sendPacket(std::move(mixPacket), *node);
data->incrementOutgoingMixedAudioSequenceNumber();
// send an audio stream stats packet to the client approximately every second
static const unsigned int NUM_FRAMES_PER_SEC = (int) ceil(AudioConstants::NETWORK_FRAMES_PER_SEC);
if (data->shouldSendStats(frame % NUM_FRAMES_PER_SEC)) {
data->sendAudioStreamStatsPackets(node);
}
return true;
}
return false;
}
void AudioMixerSlave::writeMixPacket(std::unique_ptr<NLPacket>& mixPacket, AudioMixerClientData* data, QByteArray& buffer) {
int mixPacketBytes = sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE
+ AudioConstants::NETWORK_FRAME_BYTES_STEREO;
mixPacket = NLPacket::create(PacketType::MixedAudio, mixPacketBytes);
// pack sequence number
quint16 sequence = data->getOutgoingSequenceNumber();
mixPacket->writePrimitive(sequence);
// write the codec
QString codecInPacket = data->getCodecName();
mixPacket->writeString(codecInPacket);
// pack mixed audio samples
mixPacket->write(buffer.constData(), buffer.size());
}
void AudioMixerSlave::writeSilentPacket(std::unique_ptr<NLPacket>& mixPacket, AudioMixerClientData* data) {
int silentPacketBytes = sizeof(quint16) + sizeof(quint16) + AudioConstants::MAX_CODEC_NAME_LENGTH_ON_WIRE;
mixPacket = NLPacket::create(PacketType::SilentAudioFrame, silentPacketBytes);
// pack sequence number
quint16 sequence = data->getOutgoingSequenceNumber();
mixPacket->writePrimitive(sequence);
// write the codec
QString codecInPacket = data->getCodecName();
mixPacket->writeString(codecInPacket);
// pack number of silent audio samples
quint16 numSilentSamples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO;
mixPacket->writePrimitive(numSilentSamples);
}
void AudioMixerSlave::sendEnvironmentPacket(const SharedNodePointer& node) {
// Send stream properties
bool hasReverb = false;
float reverbTime, wetLevel;
auto& reverbSettings = AudioMixer::getReverbSettings();
auto& audioZones = AudioMixer::getAudioZones();
// find reverb properties
for (int i = 0; i < reverbSettings.size(); ++i) {
AudioMixerClientData* data = static_cast<AudioMixerClientData*>(node->getLinkedData());
glm::vec3 streamPosition = data->getAvatarAudioStream()->getPosition();
AABox box = audioZones[reverbSettings[i].zone];
if (box.contains(streamPosition)) {
hasReverb = true;
reverbTime = reverbSettings[i].reverbTime;
wetLevel = reverbSettings[i].wetLevel;
break;
}
}
AudioMixerClientData* nodeData = static_cast<AudioMixerClientData*>(node->getLinkedData());
AvatarAudioStream* stream = nodeData->getAvatarAudioStream();
bool dataChanged = (stream->hasReverb() != hasReverb) ||
(stream->hasReverb() && (stream->getRevebTime() != reverbTime ||
stream->getWetLevel() != wetLevel));
if (dataChanged) {
// Update stream
if (hasReverb) {
stream->setReverb(reverbTime, wetLevel);
} else {
stream->clearReverb();
}
}
// Send at change or every so often
float CHANCE_OF_SEND = 0.01f;
bool sendData = dataChanged || (randFloat() < CHANCE_OF_SEND);
if (sendData) {
auto nodeList = DependencyManager::get<NodeList>();
unsigned char bitset = 0;
int packetSize = sizeof(bitset);
if (hasReverb) {
packetSize += sizeof(reverbTime) + sizeof(wetLevel);
}
auto envPacket = NLPacket::create(PacketType::AudioEnvironment, packetSize);
if (hasReverb) {
setAtBit(bitset, HAS_REVERB_BIT);
}
envPacket->writePrimitive(bitset);
if (hasReverb) {
envPacket->writePrimitive(reverbTime);
envPacket->writePrimitive(wetLevel);
}
nodeList->sendPacket(std::move(envPacket), *node);
}
}
bool AudioMixerSlave::prepareMix(const SharedNodePointer& node) {
AvatarAudioStream* nodeAudioStream = static_cast<AudioMixerClientData*>(node->getLinkedData())->getAvatarAudioStream();
AudioMixerClientData* listenerNodeData = static_cast<AudioMixerClientData*>(node->getLinkedData());
// zero out the client mix for this node
memset(_mixedSamples, 0, sizeof(_mixedSamples));
// loop through all other nodes that have sufficient audio to mix
DependencyManager::get<NodeList>()->eachNode([&](const SharedNodePointer& otherNode){
// make sure that we have audio data for this other node
// and that it isn't being ignored by our listening node
// and that it isn't ignoring our listening node
if (otherNode->getLinkedData()
&& !node->isIgnoringNodeWithID(otherNode->getUUID()) && !otherNode->isIgnoringNodeWithID(node->getUUID())) {
AudioMixerClientData* otherNodeClientData = (AudioMixerClientData*) otherNode->getLinkedData();
// check to see if we're ignoring in radius
bool insideIgnoreRadius = false;
if (node->isIgnoreRadiusEnabled() || otherNode->isIgnoreRadiusEnabled()) {
AudioMixerClientData* otherData = reinterpret_cast<AudioMixerClientData*>(otherNode->getLinkedData());
AudioMixerClientData* nodeData = reinterpret_cast<AudioMixerClientData*>(node->getLinkedData());
float ignoreRadius = glm::min(node->getIgnoreRadius(), otherNode->getIgnoreRadius());
if (glm::distance(nodeData->getPosition(), otherData->getPosition()) < ignoreRadius) {
insideIgnoreRadius = true;
}
}
if (!insideIgnoreRadius) {
// enumerate the ARBs attached to the otherNode and add all that should be added to mix
auto streamsCopy = otherNodeClientData->getAudioStreams();
for (auto& streamPair : streamsCopy) {
auto otherNodeStream = streamPair.second;
if (*otherNode != *node || otherNodeStream->shouldLoopbackForNode()) {
addStreamToMix(*listenerNodeData, otherNode->getUUID(), *nodeAudioStream, *otherNodeStream);
}
}
}
}
});
// use the per listner AudioLimiter to render the mixed data...
listenerNodeData->audioLimiter.render(_mixedSamples, _clampedSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
// check for silent audio after the peak limitor has converted the samples
bool hasAudio = false;
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) {
if (_clampedSamples[i] != 0) {
hasAudio = true;
break;
}
}
return hasAudio;
}
void AudioMixerSlave::addStreamToMix(AudioMixerClientData& listenerNodeData, const QUuid& sourceNodeID,
const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd) {
// to reduce artifacts we calculate the gain and azimuth for every source for this listener
// even if we are not going to end up mixing in this source
++_totalMixes;
// this ensures that the tail of any previously mixed audio or the first block of new audio sounds correct
// check if this is a server echo of a source back to itself
bool isEcho = (&streamToAdd == &listeningNodeStream);
glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition();
// figure out the distance between source and listener
float distance = glm::max(glm::length(relativePosition), EPSILON);
// figure out the gain for this source at the listener
float gain = gainForSource(listeningNodeStream, streamToAdd, relativePosition, isEcho);
// figure out the azimuth to this source at the listener
float azimuth = isEcho ? 0.0f : azimuthForSource(listeningNodeStream, listeningNodeStream, relativePosition);
float repeatedFrameFadeFactor = 1.0f;
static const int HRTF_DATASET_INDEX = 1;
if (!streamToAdd.lastPopSucceeded()) {
bool forceSilentBlock = true;
if (!streamToAdd.getLastPopOutput().isNull()) {
bool isInjector = dynamic_cast<const InjectedAudioStream*>(&streamToAdd);
// in an injector, just go silent - the injector has likely ended
// in other inputs (microphone, &c.), repeat with fade to avoid the harsh jump to silence
// we'll repeat the last block until it has a block to mix
// and we'll gradually fade that repeated block into silence.
// calculate its fade factor, which depends on how many times it's already been repeated.
repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd.getConsecutiveNotMixedCount() - 1);
if (!isInjector && repeatedFrameFadeFactor > 0.0f) {
// apply the repeatedFrameFadeFactor to the gain
gain *= repeatedFrameFadeFactor;
forceSilentBlock = false;
}
}
if (forceSilentBlock) {
// we're deciding not to repeat either since we've already done it enough times or repetition with fade is disabled
// in this case we will call renderSilent with a forced silent block
// this ensures the correct tail from the previously mixed block and the correct spatialization of first block
// of any upcoming audio
if (!streamToAdd.isStereo() && !isEcho) {
// get the existing listener-source HRTF object, or create a new one
auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier());
// this is not done for stereo streams since they do not go through the HRTF
static int16_t silentMonoBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL] = {};
hrtf.renderSilent(silentMonoBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++_hrtfSilentRenders;;
}
return;
}
}
// grab the stream from the ring buffer
AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd.getLastPopOutput();
if (streamToAdd.isStereo() || isEcho) {
// this is a stereo source or server echo so we do not pass it through the HRTF
// simply apply our calculated gain to each sample
if (streamToAdd.isStereo()) {
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) {
_mixedSamples[i] += float(streamPopOutput[i] * gain / AudioConstants::MAX_SAMPLE_VALUE);
}
++_manualStereoMixes;
} else {
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i += 2) {
auto monoSample = float(streamPopOutput[i / 2] * gain / AudioConstants::MAX_SAMPLE_VALUE);
_mixedSamples[i] += monoSample;
_mixedSamples[i + 1] += monoSample;
}
++_manualEchoMixes;
}
return;
}
// get the existing listener-source HRTF object, or create a new one
auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier());
static int16_t streamBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL];
streamPopOutput.readSamples(streamBlock, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
// if the frame we're about to mix is silent, simply call render silent and move on
if (streamToAdd.getLastPopOutputLoudness() == 0.0f) {
// silent frame from source
// we still need to call renderSilent via the HRTF for mono source
hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++_hrtfSilentRenders;
return;
}
float audibilityThreshold = AudioMixer::getMinimumAudibilityThreshold();
if (audibilityThreshold > 0.0f &&
streamToAdd.getLastPopOutputTrailingLoudness() / glm::length(relativePosition) <= audibilityThreshold) {
// the mixer is struggling so we're going to drop off some streams
// we call renderSilent via the HRTF with the actual frame data and a gain of 0.0
hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, 0.0f,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++_hrtfStruggleRenders;
return;
}
++_hrtfRenders;
// mono stream, call the HRTF with our block and calculated azimuth and gain
hrtf.render(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
}
float AudioMixerSlave::gainForSource(const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd,
const glm::vec3& relativePosition, bool isEcho) {
float gain = 1.0f;
float distanceBetween = glm::length(relativePosition);
if (distanceBetween < EPSILON) {
distanceBetween = EPSILON;
}
if (streamToAdd.getType() == PositionalAudioStream::Injector) {
gain *= reinterpret_cast<const InjectedAudioStream*>(&streamToAdd)->getAttenuationRatio();
}
if (!isEcho && (streamToAdd.getType() == PositionalAudioStream::Microphone)) {
// source is another avatar, apply fixed off-axis attenuation to make them quieter as they turn away from listener
glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd.getOrientation()) * relativePosition;
float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f),
glm::normalize(rotatedListenerPosition));
const float MAX_OFF_AXIS_ATTENUATION = 0.2f;
const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f;
float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION +
(OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO));
// multiply the current attenuation coefficient by the calculated off axis coefficient
gain *= offAxisCoefficient;
}
float attenuationPerDoublingInDistance = AudioMixer::getAttenuationPerDoublingInDistance();
auto& zoneSettings = AudioMixer::getZoneSettings();
auto& audioZones = AudioMixer::getAudioZones();
for (int i = 0; i < zoneSettings.length(); ++i) {
if (audioZones[zoneSettings[i].source].contains(streamToAdd.getPosition()) &&
audioZones[zoneSettings[i].listener].contains(listeningNodeStream.getPosition())) {
attenuationPerDoublingInDistance = zoneSettings[i].coefficient;
break;
}
}
if (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE) {
// translate the zone setting to gain per log2(distance)
float g = 1.0f - attenuationPerDoublingInDistance;
g = (g < EPSILON) ? EPSILON : g;
g = (g > 1.0f) ? 1.0f : g;
// calculate the distance coefficient using the distance to this node
float distanceCoefficient = fastexp2(fastlog2(g) * fastlog2(distanceBetween/ATTENUATION_BEGINS_AT_DISTANCE));
// multiply the current attenuation coefficient by the distance coefficient
gain *= distanceCoefficient;
}
return gain;
}
float AudioMixerSlave::azimuthForSource(const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd,
const glm::vec3& relativePosition) {
glm::quat inverseOrientation = glm::inverse(listeningNodeStream.getOrientation());
// Compute sample delay for the two ears to create phase panning
glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition;
// project the rotated source position vector onto the XZ plane
rotatedSourcePosition.y = 0.0f;
static const float SOURCE_DISTANCE_THRESHOLD = 1e-30f;
if (glm::length2(rotatedSourcePosition) > SOURCE_DISTANCE_THRESHOLD) {
// produce an oriented angle about the y-axis
return glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, -1.0f, 0.0f));
} else {
// there is no distance between listener and source - return no azimuth
return 0;
}
}

View file

@ -23,18 +23,77 @@ class AvatarAudioStream;
class AudioHRTF;
class AudioMixerClientData;
class AudioMixerSlave {
public:
// mix and broadcast non-ignored streams to the node
// returns true if a listener mix was broadcast for the node
bool mix(const SharedNodePointer& node, unsigned int frame);
// reset statistics accumulated over mixes
void resetStats() { /* TODO */ };
// get statistics accumulated over mixes
void getStats() { /* TODO */ };
private:
void writeMixPacket(std::unique_ptr<NLPacket>& mixPacket, AudioMixerClientData* data, QByteArray& buffer);
void writeSilentPacket(std::unique_ptr<NLPacket>& mixPacket, AudioMixerClientData* data);
void sendEnvironmentPacket(const SharedNodePointer& node);
// create mix, returns true if mix has audio
bool prepareMix(const SharedNodePointer& node);
// add a stream to the mix
void addStreamToMix(AudioMixerClientData& listenerData, const QUuid& streamerID,
const AvatarAudioStream& listenerStream, const PositionalAudioStream& streamer);
float gainForSource(const AvatarAudioStream& listener, const PositionalAudioStream& streamer,
const glm::vec3& relativePosition, bool isEcho);
float azimuthForSource(const AvatarAudioStream& listener, const PositionalAudioStream& streamer,
const glm::vec3& relativePosition);
// mixing buffers
float _mixedSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
int16_t _clampedSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
// mixing statistics
unsigned int _sumListeners{ 0 };
unsigned int _totalMixes{ 0 };
unsigned int _hrtfRenders{ 0 };
unsigned int _hrtfStruggleRenders{ 0 };
unsigned int _hrtfSilentRenders{ 0 };
unsigned int _manualStereoMixes{ 0 };
unsigned int _manualEchoMixes{ 0 };
};
/// Handles assignments of type AudioMixer - mixing streams of audio and re-distributing to various clients.
class AudioMixer : public ThreadedAssignment {
Q_OBJECT
public:
AudioMixer(ReceivedMessage& message);
struct ZoneSettings {
QString source;
QString listener;
float coefficient;
};
struct ReverbSettings {
QString zone;
float reverbTime;
float wetLevel;
};
static int getStaticJitterFrames() { return _numStaticJitterFrames; }
static bool shouldMute(float quietestFrame) { return quietestFrame > _noiseMutingThreshold; }
static float getAttenuationPerDoublingInDistance() { return _attenuationPerDoublingInDistance; }
static float getMinimumAudibilityThreshold() { return _performanceThrottlingRatio > 0.0f ? _minAudibilityThreshold : 0.0f; }
static const QHash<QString, AABox>& getAudioZones() { return _audioZones; }
static const QVector<ZoneSettings>& getZoneSettings() { return _zoneSettings; }
static const QVector<ReverbSettings>& getReverbSettings() { return _zoneReverbSettings; }
public slots:
void run() override;
void sendStatsPacket() override;
static int getStaticJitterFrames() { return _numStaticJitterFrames; }
private slots:
// packet handlers
void handleNodeAudioPacket(QSharedPointer<ReceivedMessage> packet, SharedNodePointer sendingNode);
@ -56,42 +115,13 @@ private:
// pop a frame from any streams on the node
// returns the number of available streams
int prepareFrame(const SharedNodePointer& node, unsigned int frame);
// mix and broadcast non-ignored streams to the node
// returns true if a listener mix was broadcast for the node
bool mixFrame(const SharedNodePointer& node, unsigned int frame);
AudioMixerClientData* getOrCreateClientData(Node* node);
/// adds one stream to the mix for a listening node
void addStreamToMixForListeningNodeWithStream(AudioMixerClientData& listenerNodeData,
const PositionalAudioStream& streamToAdd,
const QUuid& sourceNodeID,
const AvatarAudioStream& listeningNodeStream);
float gainForSource(const PositionalAudioStream& streamToAdd, const AvatarAudioStream& listeningNodeStream,
const glm::vec3& relativePosition, bool isEcho);
float azimuthForSource(const PositionalAudioStream& streamToAdd, const AvatarAudioStream& listeningNodeStream,
const glm::vec3& relativePosition);
/// prepares and sends a mix to one Node
bool prepareMixForListeningNode(Node* node);
/// Send Audio Environment packet for a single node
void sendAudioEnvironmentPacket(SharedNodePointer node);
void perSecondActions();
QString percentageForMixStats(int counter);
bool shouldMute(float quietestFrame);
void parseSettingsObject(const QJsonObject& settingsObject);
float _trailingSleepRatio;
float _minAudibilityThreshold;
float _performanceThrottlingRatio;
float _attenuationPerDoublingInDistance;
float _noiseMutingThreshold;
int _numStatFrames { 0 };
int _sumStreams { 0 };
int _sumListeners { 0 };
@ -104,24 +134,17 @@ private:
QString _codecPreferenceOrder;
float _mixedSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
int16_t _clampedSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
QHash<QString, AABox> _audioZones;
struct ZonesSettings {
QString source;
QString listener;
float coefficient;
};
QVector<ZonesSettings> _zonesSettings;
struct ReverbSettings {
QString zone;
float reverbTime;
float wetLevel;
};
QVector<ReverbSettings> _zoneReverbSettings;
AudioMixerSlave slave;
static int _numStaticJitterFrames; // -1 denotes dynamic jitter buffering
static float _noiseMutingThreshold;
static float _attenuationPerDoublingInDistance;
static float _trailingSleepRatio;
static float _performanceThrottlingRatio;
static float _minAudibilityThreshold;
static QHash<QString, AABox> _audioZones;
static QVector<ZoneSettings> _zoneSettings;
static QVector<ReverbSettings> _zoneReverbSettings;
};
#endif // hifi_AudioMixer_h