Merge pull request #8598 from kencooke/optimized-audio-pipeline

Optimized audio pipeline
This commit is contained in:
Andrew Meadows 2016-09-14 12:18:40 -07:00 committed by GitHub
commit 06b17995b1
3 changed files with 56 additions and 70 deletions

View file

@ -146,9 +146,6 @@ AudioClient::AudioClient() :
_positionGetter(DEFAULT_POSITION_GETTER),
_orientationGetter(DEFAULT_ORIENTATION_GETTER)
{
// clear the array of locally injected samples
memset(_localProceduralSamples, 0, AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL);
connect(&_receivedAudioStream, &MixedProcessedAudioStream::processSamples,
this, &AudioClient::processReceivedSamples, Qt::DirectConnection);
connect(this, &AudioClient::changeDevice, this, [=](const QAudioDeviceInfo& outputDeviceInfo) { switchOutputToAudioDevice(outputDeviceInfo); });
@ -374,7 +371,8 @@ bool adjustedFormatForAudioDevice(const QAudioDeviceInfo& audioDevice,
adjustedAudioFormat = desiredAudioFormat;
#ifdef Q_OS_ANDROID
adjustedAudioFormat.setSampleRate(44100);
// FIXME: query the native sample rate of the device?
adjustedAudioFormat.setSampleRate(48000);
#else
//
@ -683,8 +681,8 @@ bool AudioClient::switchOutputToAudioDevice(const QString& outputDeviceName) {
void AudioClient::configureReverb() {
ReverbParameters p;
p.sampleRate = _outputFormat.sampleRate();
p.sampleRate = AudioConstants::SAMPLE_RATE;
p.bandwidth = _reverbOptions->getBandwidth();
p.preDelay = _reverbOptions->getPreDelay();
p.lateDelay = _reverbOptions->getLateDelay();
@ -710,6 +708,7 @@ void AudioClient::configureReverb() {
_listenerReverb.setParameters(&p);
// used only for adding self-reverb to loopback audio
p.sampleRate = _outputFormat.sampleRate();
p.wetDryMix = 100.0f;
p.preDelay = 0.0f;
p.earlyGain = -96.0f; // disable ER
@ -824,15 +823,14 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
int16_t* inputSamples = reinterpret_cast<int16_t*>(inputByteArray.data());
int16_t* loopbackSamples = reinterpret_cast<int16_t*>(loopBackByteArray.data());
auto NO_RESAMPLER = nullptr;
possibleResampling(NO_RESAMPLER,
inputSamples, loopbackSamples,
numInputSamples, numLoopbackSamples,
_inputFormat, _outputFormat);
// upmix mono to stereo
if (!sampleChannelConversion(inputSamples, loopbackSamples, numInputSamples, _inputFormat, _outputFormat)) {
// no conversion, just copy the samples
memcpy(loopbackSamples, inputSamples, numInputSamples * sizeof(int16_t));
}
// apply stereo reverb at the source, to the loopback audio
if (!_shouldEchoLocally && hasReverb) {
assert(_outputFormat.channelCount() == 2);
updateReverbOptions();
_sourceReverb.render(loopbackSamples, loopbackSamples, numLoopbackSamples/2);
}
@ -841,8 +839,12 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
}
void AudioClient::handleAudioInput() {
// input samples required to produce exactly NETWORK_FRAME_SAMPLES of output
const int inputSamplesRequired = _inputFormat.channelCount() * _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
const int inputSamplesRequired = (_inputToNetworkResampler ?
_inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) :
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * _inputFormat.channelCount();
const auto inputAudioSamples = std::unique_ptr<int16_t[]>(new int16_t[inputSamplesRequired]);
QByteArray inputByteArray = _inputDevice->readAll();
@ -929,12 +931,12 @@ void AudioClient::handleAudioInput() {
audioTransform.setRotation(_orientationGetter());
// FIXME find a way to properly handle both playback audio and user audio concurrently
QByteArray decocedBuffer(reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes);
QByteArray decodedBuffer(reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes);
QByteArray encodedBuffer;
if (_encoder) {
_encoder->encode(decocedBuffer, encodedBuffer);
_encoder->encode(decodedBuffer, encodedBuffer);
} else {
encodedBuffer = decocedBuffer;
encodedBuffer = decodedBuffer;
}
emitAudioPacket(encodedBuffer.constData(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, packetType, _selectedCodecName);
@ -958,13 +960,9 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) {
emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, audioTransform, PacketType::MicrophoneAudioWithEcho, _selectedCodecName);
}
void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) {
void AudioClient::mixLocalAudioInjectors(float* mixBuffer) {
memset(_hrtfBuffer, 0, sizeof(_hrtfBuffer));
QVector<AudioInjector*> injectorsToRemove;
static const float INT16_TO_FLOAT_SCALE_FACTOR = 1/32768.0f;
bool injectorsHaveData = false;
// lock the injector vector
Lock lock(_injectorsMutex);
@ -972,19 +970,17 @@ void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) {
for (AudioInjector* injector : getActiveLocalAudioInjectors()) {
if (injector->getLocalBuffer()) {
qint64 samplesToRead = injector->isStereo() ?
AudioConstants::NETWORK_FRAME_BYTES_STEREO :
AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
qint64 samplesToRead = injector->isStereo() ? AudioConstants::NETWORK_FRAME_BYTES_STEREO : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
// get one frame from the injector (mono or stereo)
memset(_scratchBuffer, 0, sizeof(_scratchBuffer));
if (0 < injector->getLocalBuffer()->readData((char*)_scratchBuffer, samplesToRead)) {
injectorsHaveData = true;
if (injector->isStereo()) {
if (injector->isStereo() ) {
for(int i=0; i<AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) {
_hrtfBuffer[i] += (float)(_scratchBuffer[i]) * INT16_TO_FLOAT_SCALE_FACTOR;
// stereo gets directly mixed into mixBuffer
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) {
mixBuffer[i] += (float)_scratchBuffer[i] * (1/32768.0f);
}
} else {
@ -995,7 +991,8 @@ void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) {
float gain = gainForSource(distance, injector->getVolume());
float azimuth = azimuthForSource(relativePosition);
injector->getLocalHRTF().render(_scratchBuffer, _hrtfBuffer, 1, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
// mono gets spatialized into mixBuffer
injector->getLocalHRTF().render(_scratchBuffer, mixBuffer, 1, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
}
} else {
@ -1013,55 +1010,47 @@ void AudioClient::mixLocalAudioInjectors(int16_t* inputBuffer) {
}
}
if(injectorsHaveData) {
// mix network into the hrtfBuffer
for(int i=0; i<AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) {
_hrtfBuffer[i] += (float)(inputBuffer[i]) * INT16_TO_FLOAT_SCALE_FACTOR;
}
// now, use limiter to write back to the inputBuffer
_audioLimiter.render(_hrtfBuffer, inputBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
}
for(AudioInjector* injector : injectorsToRemove) {
for (AudioInjector* injector : injectorsToRemove) {
qDebug() << "removing injector";
getActiveLocalAudioInjectors().removeOne(injector);
}
}
void AudioClient::processReceivedSamples(const QByteArray& decodedBuffer, QByteArray& outputBuffer) {
const int numDecodecSamples = decodedBuffer.size() / sizeof(int16_t);
const int numDeviceOutputSamples = _outputFrameSize;
Q_ASSERT(_outputFrameSize == numDecodecSamples * (_outputFormat.sampleRate() * _outputFormat.channelCount())
/ (_desiredOutputFormat.sampleRate() * _desiredOutputFormat.channelCount()));
outputBuffer.resize(numDeviceOutputSamples * sizeof(int16_t));
const int16_t* decodedSamples;
int16_t* outputSamples = reinterpret_cast<int16_t*>(outputBuffer.data());
QByteArray decodedBufferCopy = decodedBuffer;
const int16_t* decodedSamples = reinterpret_cast<const int16_t*>(decodedBuffer.data());
assert(decodedBuffer.size() == AudioConstants::NETWORK_FRAME_BYTES_STEREO);
if(getActiveLocalAudioInjectors().size() > 0) {
mixLocalAudioInjectors((int16_t*)decodedBufferCopy.data());
decodedSamples = reinterpret_cast<const int16_t*>(decodedBufferCopy.data());
} else {
decodedSamples = reinterpret_cast<const int16_t*>(decodedBuffer.data());
outputBuffer.resize(_outputFrameSize * sizeof(int16_t));
int16_t* outputSamples = reinterpret_cast<int16_t*>(outputBuffer.data());
// convert network audio to float
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) {
_mixBuffer[i] = (float)decodedSamples[i] * (1/32768.0f);
}
// mix in active injectors
if (getActiveLocalAudioInjectors().size() > 0) {
mixLocalAudioInjectors(_mixBuffer);
}
// copy the packet from the RB to the output
possibleResampling(_networkToOutputResampler, decodedSamples, outputSamples,
numDecodecSamples, numDeviceOutputSamples,
_desiredOutputFormat, _outputFormat);
// apply stereo reverb at the listener, to the received audio
// apply stereo reverb
bool hasReverb = _reverb || _receivedAudioStream.hasReverb();
if (hasReverb) {
assert(_outputFormat.channelCount() == 2);
updateReverbOptions();
_listenerReverb.render(outputSamples, outputSamples, numDeviceOutputSamples/2);
_listenerReverb.render(_mixBuffer, _mixBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
}
if (_networkToOutputResampler) {
// resample to output sample rate
_audioLimiter.render(_mixBuffer, _scratchBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
_networkToOutputResampler->render(_scratchBuffer, outputSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
} else {
// no resampling needed
_audioLimiter.render(_mixBuffer, outputSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
}
}

View file

@ -227,7 +227,7 @@ protected:
private:
void outputFormatChanged();
void mixLocalAudioInjectors(int16_t* inputBuffer);
void mixLocalAudioInjectors(float* mixBuffer);
float azimuthForSource(const glm::vec3& relativePosition);
float gainForSource(float distance, float volume);
@ -253,18 +253,15 @@ private:
Gate _gate;
Mutex _injectorsMutex;
QByteArray firstInputFrame;
QAudioInput* _audioInput;
QAudioFormat _desiredInputFormat;
QAudioFormat _inputFormat;
QIODevice* _inputDevice;
int _numInputCallbackBytes;
int16_t _localProceduralSamples[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL];
QAudioOutput* _audioOutput;
QAudioFormat _desiredOutputFormat;
QAudioFormat _outputFormat;
int _outputFrameSize;
int16_t _outputProcessingBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
int _numOutputCallbackBytes;
QAudioOutput* _loopbackAudioOutput;
QIODevice* _loopbackOutputDevice;
@ -309,7 +306,7 @@ private:
AudioSRC* _networkToOutputResampler;
// for local hrtf-ing
float _hrtfBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
float _mixBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
int16_t _scratchBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
AudioLimiter _audioLimiter;

View file

@ -26,7 +26,7 @@
#include "SoundCache.h"
#include "AudioSRC.h"
//int audioInjectorPtrMetaTypeId = qRegisterMetaType<AudioInjector*>();
int audioInjectorPtrMetaTypeId = qRegisterMetaType<AudioInjector*>();
AudioInjectorState operator& (AudioInjectorState lhs, AudioInjectorState rhs) {
return static_cast<AudioInjectorState>(static_cast<uint8_t>(lhs) & static_cast<uint8_t>(rhs));