mirror of
https://github.com/HifiExperiments/overte.git
synced 2025-07-26 06:43:19 +02:00
Merge pull request #8598 from kencooke/optimized-audio-pipeline
Optimized audio pipeline
This commit is contained in:
commit
06b17995b1
3 changed files with 56 additions and 70 deletions
|
@ -146,9 +146,6 @@ AudioClient::AudioClient() :
|
||||||
_positionGetter(DEFAULT_POSITION_GETTER),
|
_positionGetter(DEFAULT_POSITION_GETTER),
|
||||||
_orientationGetter(DEFAULT_ORIENTATION_GETTER)
|
_orientationGetter(DEFAULT_ORIENTATION_GETTER)
|
||||||
{
|
{
|
||||||
// clear the array of locally injected samples
|
|
||||||
memset(_localProceduralSamples, 0, AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL);
|
|
||||||
|
|
||||||
connect(&_receivedAudioStream, &MixedProcessedAudioStream::processSamples,
|
connect(&_receivedAudioStream, &MixedProcessedAudioStream::processSamples,
|
||||||
this, &AudioClient::processReceivedSamples, Qt::DirectConnection);
|
this, &AudioClient::processReceivedSamples, Qt::DirectConnection);
|
||||||
connect(this, &AudioClient::changeDevice, this, [=](const QAudioDeviceInfo& outputDeviceInfo) { switchOutputToAudioDevice(outputDeviceInfo); });
|
connect(this, &AudioClient::changeDevice, this, [=](const QAudioDeviceInfo& outputDeviceInfo) { switchOutputToAudioDevice(outputDeviceInfo); });
|
||||||
|
@ -374,7 +371,8 @@ bool adjustedFormatForAudioDevice(const QAudioDeviceInfo& audioDevice,
|
||||||
adjustedAudioFormat = desiredAudioFormat;
|
adjustedAudioFormat = desiredAudioFormat;
|
||||||
|
|
||||||
#ifdef Q_OS_ANDROID
|
#ifdef Q_OS_ANDROID
|
||||||
adjustedAudioFormat.setSampleRate(44100);
|
// FIXME: query the native sample rate of the device?
|
||||||
|
adjustedAudioFormat.setSampleRate(48000);
|
||||||
#else
|
#else
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -683,8 +681,8 @@ bool AudioClient::switchOutputToAudioDevice(const QString& outputDeviceName) {
|
||||||
|
|
||||||
void AudioClient::configureReverb() {
|
void AudioClient::configureReverb() {
|
||||||
ReverbParameters p;
|
ReverbParameters p;
|
||||||
p.sampleRate = _outputFormat.sampleRate();
|
|
||||||
|
|
||||||
|
p.sampleRate = AudioConstants::SAMPLE_RATE;
|
||||||
p.bandwidth = _reverbOptions->getBandwidth();
|
p.bandwidth = _reverbOptions->getBandwidth();
|
||||||
p.preDelay = _reverbOptions->getPreDelay();
|
p.preDelay = _reverbOptions->getPreDelay();
|
||||||
p.lateDelay = _reverbOptions->getLateDelay();
|
p.lateDelay = _reverbOptions->getLateDelay();
|
||||||
|
@ -710,6 +708,7 @@ void AudioClient::configureReverb() {
|
||||||
_listenerReverb.setParameters(&p);
|
_listenerReverb.setParameters(&p);
|
||||||
|
|
||||||
// used only for adding self-reverb to loopback audio
|
// used only for adding self-reverb to loopback audio
|
||||||
|
p.sampleRate = _outputFormat.sampleRate();
|
||||||
p.wetDryMix = 100.0f;
|
p.wetDryMix = 100.0f;
|
||||||
p.preDelay = 0.0f;
|
p.preDelay = 0.0f;
|
||||||
p.earlyGain = -96.0f; // disable ER
|
p.earlyGain = -96.0f; // disable ER
|
||||||
|
@ -824,15 +823,14 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
|
||||||
int16_t* inputSamples = reinterpret_cast<int16_t*>(inputByteArray.data());
|
int16_t* inputSamples = reinterpret_cast<int16_t*>(inputByteArray.data());
|
||||||
int16_t* loopbackSamples = reinterpret_cast<int16_t*>(loopBackByteArray.data());
|
int16_t* loopbackSamples = reinterpret_cast<int16_t*>(loopBackByteArray.data());
|
||||||
|
|
||||||
auto NO_RESAMPLER = nullptr;
|
// upmix mono to stereo
|
||||||
possibleResampling(NO_RESAMPLER,
|
if (!sampleChannelConversion(inputSamples, loopbackSamples, numInputSamples, _inputFormat, _outputFormat)) {
|
||||||
inputSamples, loopbackSamples,
|
// no conversion, just copy the samples
|
||||||
numInputSamples, numLoopbackSamples,
|
memcpy(loopbackSamples, inputSamples, numInputSamples * sizeof(int16_t));
|
||||||
_inputFormat, _outputFormat);
|
}
|
||||||
|
|
||||||
// apply stereo reverb at the source, to the loopback audio
|
// apply stereo reverb at the source, to the loopback audio
|
||||||
if (!_shouldEchoLocally && hasReverb) {
|
if (!_shouldEchoLocally && hasReverb) {
|
||||||
assert(_outputFormat.channelCount() == 2);
|
|
||||||
updateReverbOptions();
|
updateReverbOptions();
|
||||||
_sourceReverb.render(loopbackSamples, loopbackSamples, numLoopbackSamples/2);
|
_sourceReverb.render(loopbackSamples, loopbackSamples, numLoopbackSamples/2);
|
||||||
}
|
}
|
||||||
|
@ -841,8 +839,12 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioClient::handleAudioInput() {
|
void AudioClient::handleAudioInput() {
|
||||||
|
|
||||||
// input samples required to produce exactly NETWORK_FRAME_SAMPLES of output
|
// input samples required to produce exactly NETWORK_FRAME_SAMPLES of output
|
||||||
const int inputSamplesRequired = _inputFormat.channelCount() * _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
|
const int inputSamplesRequired = (_inputToNetworkResampler ?
|
||||||
|
_inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) :
|
||||||
|
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * _inputFormat.channelCount();
|
||||||
|
|
||||||
const auto inputAudioSamples = std::unique_ptr<int16_t[]>(new int16_t[inputSamplesRequired]);
|
const auto inputAudioSamples = std::unique_ptr<int16_t[]>(new int16_t[inputSamplesRequired]);
|
||||||
QByteArray inputByteArray = _inputDevice->readAll();
|
QByteArray inputByteArray = _inputDevice->readAll();
|
||||||
|
|
||||||
|
@ -929,12 +931,12 @@ void AudioClient::handleAudioInput() {
|
||||||
audioTransform.setRotation(_orientationGetter());
|
audioTransform.setRotation(_orientationGetter());
|
||||||
// FIXME find a way to properly handle both playback audio and user audio concurrently
|
// FIXME find a way to properly handle both playback audio and user audio concurrently
|
||||||
|
|
||||||
QByteArray decocedBuffer(reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes);
|
QByteArray decodedBuffer(reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes);
|
||||||
QByteArray encodedBuffer;
|
QByteArray encodedBuffer;
|
||||||
if (_encoder) {
|
if (_encoder) {
|
||||||
_encoder->encode(decocedBuffer, encodedBuffer);
|
_encoder->encode(decodedBuffer, encodedBuffer);
|
||||||
} else {
|
} else {
|
||||||
encodedBuffer = decocedBuffer;
|
encodedBuffer = decodedBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
emitAudioPacket(encodedBuffer.constData(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, packetType, _selectedCodecName);
|
emitAudioPacket(encodedBuffer.constData(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, packetType, _selectedCodecName);
|
||||||
|
@ -958,13 +960,9 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) {
|
||||||
emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, PacketType::MicrophoneAudioWithEcho, _selectedCodecName);
|
emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, PacketType::MicrophoneAudioWithEcho, _selectedCodecName);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) {
|
void AudioClient::mixLocalAudioInjectors(float* mixBuffer) {
|
||||||
|
|
||||||
memset(_hrtfBuffer, 0, sizeof(_hrtfBuffer));
|
|
||||||
QVector<AudioInjector*> injectorsToRemove;
|
QVector<AudioInjector*> injectorsToRemove;
|
||||||
static const float INT16_TO_FLOAT_SCALE_FACTOR = 1/32768.0f;
|
|
||||||
|
|
||||||
bool injectorsHaveData = false;
|
|
||||||
|
|
||||||
// lock the injector vector
|
// lock the injector vector
|
||||||
Lock lock(_injectorsMutex);
|
Lock lock(_injectorsMutex);
|
||||||
|
@ -972,19 +970,17 @@ void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) {
|
||||||
for (AudioInjector* injector : getActiveLocalAudioInjectors()) {
|
for (AudioInjector* injector : getActiveLocalAudioInjectors()) {
|
||||||
if (injector->getLocalBuffer()) {
|
if (injector->getLocalBuffer()) {
|
||||||
|
|
||||||
qint64 samplesToRead = injector->isStereo() ?
|
qint64 samplesToRead = injector->isStereo() ? AudioConstants::NETWORK_FRAME_BYTES_STEREO : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
|
||||||
AudioConstants::NETWORK_FRAME_BYTES_STEREO :
|
|
||||||
AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
|
|
||||||
|
|
||||||
// get one frame from the injector (mono or stereo)
|
// get one frame from the injector (mono or stereo)
|
||||||
memset(_scratchBuffer, 0, sizeof(_scratchBuffer));
|
memset(_scratchBuffer, 0, sizeof(_scratchBuffer));
|
||||||
if (0 < injector->getLocalBuffer()->readData((char*)_scratchBuffer, samplesToRead)) {
|
if (0 < injector->getLocalBuffer()->readData((char*)_scratchBuffer, samplesToRead)) {
|
||||||
|
|
||||||
injectorsHaveData = true;
|
if (injector->isStereo()) {
|
||||||
|
|
||||||
if (injector->isStereo() ) {
|
// stereo gets directly mixed into mixBuffer
|
||||||
for(int i=0; i<AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) {
|
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) {
|
||||||
_hrtfBuffer[i] += (float)(_scratchBuffer[i]) * INT16_TO_FLOAT_SCALE_FACTOR;
|
mixBuffer[i] += (float)_scratchBuffer[i] * (1/32768.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -995,7 +991,8 @@ void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) {
|
||||||
float gain = gainForSource(distance, injector->getVolume());
|
float gain = gainForSource(distance, injector->getVolume());
|
||||||
float azimuth = azimuthForSource(relativePosition);
|
float azimuth = azimuthForSource(relativePosition);
|
||||||
|
|
||||||
injector->getLocalHRTF().render(_scratchBuffer, _hrtfBuffer, 1, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
|
// mono gets spatialized into mixBuffer
|
||||||
|
injector->getLocalHRTF().render(_scratchBuffer, mixBuffer, 1, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -1013,55 +1010,47 @@ void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(injectorsHaveData) {
|
for (AudioInjector* injector : injectorsToRemove) {
|
||||||
|
|
||||||
// mix network into the hrtfBuffer
|
|
||||||
for(int i=0; i<AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) {
|
|
||||||
_hrtfBuffer[i] += (float)(inputBuffer[i]) * INT16_TO_FLOAT_SCALE_FACTOR;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now, use limiter to write back to the inputBuffer
|
|
||||||
_audioLimiter.render(_hrtfBuffer, inputBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(AudioInjector* injector : injectorsToRemove) {
|
|
||||||
qDebug() << "removing injector";
|
qDebug() << "removing injector";
|
||||||
getActiveLocalAudioInjectors().removeOne(injector);
|
getActiveLocalAudioInjectors().removeOne(injector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioClient::processReceivedSamples(const QByteArray& decodedBuffer, QByteArray& outputBuffer) {
|
void AudioClient::processReceivedSamples(const QByteArray& decodedBuffer, QByteArray& outputBuffer) {
|
||||||
const int numDecodecSamples = decodedBuffer.size() / sizeof(int16_t);
|
|
||||||
const int numDeviceOutputSamples = _outputFrameSize;
|
|
||||||
|
|
||||||
Q_ASSERT(_outputFrameSize == numDecodecSamples * (_outputFormat.sampleRate() * _outputFormat.channelCount())
|
const int16_t* decodedSamples = reinterpret_cast<const int16_t*>(decodedBuffer.data());
|
||||||
/ (_desiredOutputFormat.sampleRate() * _desiredOutputFormat.channelCount()));
|
|
||||||
|
|
||||||
outputBuffer.resize(numDeviceOutputSamples * sizeof(int16_t));
|
|
||||||
|
|
||||||
const int16_t* decodedSamples;
|
|
||||||
int16_t* outputSamples = reinterpret_cast<int16_t*>(outputBuffer.data());
|
|
||||||
QByteArray decodedBufferCopy = decodedBuffer;
|
|
||||||
assert(decodedBuffer.size() == AudioConstants::NETWORK_FRAME_BYTES_STEREO);
|
assert(decodedBuffer.size() == AudioConstants::NETWORK_FRAME_BYTES_STEREO);
|
||||||
|
|
||||||
if(getActiveLocalAudioInjectors().size() > 0) {
|
outputBuffer.resize(_outputFrameSize * sizeof(int16_t));
|
||||||
mixLocalAudioInjectors((int16_t*)decodedBufferCopy.data());
|
int16_t* outputSamples = reinterpret_cast<int16_t*>(outputBuffer.data());
|
||||||
decodedSamples = reinterpret_cast<const int16_t*>(decodedBufferCopy.data());
|
|
||||||
} else {
|
// convert network audio to float
|
||||||
decodedSamples = reinterpret_cast<const int16_t*>(decodedBuffer.data());
|
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) {
|
||||||
|
_mixBuffer[i] = (float)decodedSamples[i] * (1/32768.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// mix in active injectors
|
||||||
|
if (getActiveLocalAudioInjectors().size() > 0) {
|
||||||
|
mixLocalAudioInjectors(_mixBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy the packet from the RB to the output
|
// apply stereo reverb
|
||||||
possibleResampling(_networkToOutputResampler, decodedSamples, outputSamples,
|
|
||||||
numDecodecSamples, numDeviceOutputSamples,
|
|
||||||
_desiredOutputFormat, _outputFormat);
|
|
||||||
|
|
||||||
// apply stereo reverb at the listener, to the received audio
|
|
||||||
bool hasReverb = _reverb || _receivedAudioStream.hasReverb();
|
bool hasReverb = _reverb || _receivedAudioStream.hasReverb();
|
||||||
if (hasReverb) {
|
if (hasReverb) {
|
||||||
assert(_outputFormat.channelCount() == 2);
|
|
||||||
updateReverbOptions();
|
updateReverbOptions();
|
||||||
_listenerReverb.render(outputSamples, outputSamples, numDeviceOutputSamples/2);
|
_listenerReverb.render(_mixBuffer, _mixBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_networkToOutputResampler) {
|
||||||
|
|
||||||
|
// resample to output sample rate
|
||||||
|
_audioLimiter.render(_mixBuffer, _scratchBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
|
||||||
|
_networkToOutputResampler->render(_scratchBuffer, outputSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// no resampling needed
|
||||||
|
_audioLimiter.render(_mixBuffer, outputSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -227,7 +227,7 @@ protected:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void outputFormatChanged();
|
void outputFormatChanged();
|
||||||
void mixLocalAudioInjectors(int16_t* inputBuffer);
|
void mixLocalAudioInjectors(float* mixBuffer);
|
||||||
float azimuthForSource(const glm::vec3& relativePosition);
|
float azimuthForSource(const glm::vec3& relativePosition);
|
||||||
float gainForSource(float distance, float volume);
|
float gainForSource(float distance, float volume);
|
||||||
|
|
||||||
|
@ -253,18 +253,15 @@ private:
|
||||||
Gate _gate;
|
Gate _gate;
|
||||||
|
|
||||||
Mutex _injectorsMutex;
|
Mutex _injectorsMutex;
|
||||||
QByteArray firstInputFrame;
|
|
||||||
QAudioInput* _audioInput;
|
QAudioInput* _audioInput;
|
||||||
QAudioFormat _desiredInputFormat;
|
QAudioFormat _desiredInputFormat;
|
||||||
QAudioFormat _inputFormat;
|
QAudioFormat _inputFormat;
|
||||||
QIODevice* _inputDevice;
|
QIODevice* _inputDevice;
|
||||||
int _numInputCallbackBytes;
|
int _numInputCallbackBytes;
|
||||||
int16_t _localProceduralSamples[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL];
|
|
||||||
QAudioOutput* _audioOutput;
|
QAudioOutput* _audioOutput;
|
||||||
QAudioFormat _desiredOutputFormat;
|
QAudioFormat _desiredOutputFormat;
|
||||||
QAudioFormat _outputFormat;
|
QAudioFormat _outputFormat;
|
||||||
int _outputFrameSize;
|
int _outputFrameSize;
|
||||||
int16_t _outputProcessingBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
|
|
||||||
int _numOutputCallbackBytes;
|
int _numOutputCallbackBytes;
|
||||||
QAudioOutput* _loopbackAudioOutput;
|
QAudioOutput* _loopbackAudioOutput;
|
||||||
QIODevice* _loopbackOutputDevice;
|
QIODevice* _loopbackOutputDevice;
|
||||||
|
@ -309,7 +306,7 @@ private:
|
||||||
AudioSRC* _networkToOutputResampler;
|
AudioSRC* _networkToOutputResampler;
|
||||||
|
|
||||||
// for local hrtf-ing
|
// for local hrtf-ing
|
||||||
float _hrtfBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
|
float _mixBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
|
||||||
int16_t _scratchBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
|
int16_t _scratchBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
|
||||||
AudioLimiter _audioLimiter;
|
AudioLimiter _audioLimiter;
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
#include "SoundCache.h"
|
#include "SoundCache.h"
|
||||||
#include "AudioSRC.h"
|
#include "AudioSRC.h"
|
||||||
|
|
||||||
//int audioInjectorPtrMetaTypeId = qRegisterMetaType<AudioInjector*>();
|
int audioInjectorPtrMetaTypeId = qRegisterMetaType<AudioInjector*>();
|
||||||
|
|
||||||
AudioInjectorState operator& (AudioInjectorState lhs, AudioInjectorState rhs) {
|
AudioInjectorState operator& (AudioInjectorState lhs, AudioInjectorState rhs) {
|
||||||
return static_cast<AudioInjectorState>(static_cast<uint8_t>(lhs) & static_cast<uint8_t>(rhs));
|
return static_cast<AudioInjectorState>(static_cast<uint8_t>(lhs) & static_cast<uint8_t>(rhs));
|
||||||
|
|
Loading…
Reference in a new issue