Baseline AEC using WebRTC, always enabled.

Audio streams are hooked as close as possible to device input/output, re-buffering as needed.
This commit is contained in:
Ken Cooke 2019-07-20 13:05:11 -07:00 committed by Seth Alves
parent c3b9e4806a
commit 569c76c8ba
2 changed files with 150 additions and 18 deletions

View file

@ -303,12 +303,6 @@ AudioClient::AudioClient() :
#endif
_orientationGetter(DEFAULT_ORIENTATION_GETTER) {
#if defined(WEBRTC_ENABLED)
qDebug() << "QQQQ calling AudioProcessingBuilder";
_apm = webrtc::AudioProcessingBuilder().Create();
qDebug() << "QQQQ done calling AudioProcessingBuilder";
#endif
// avoid putting a lock in the device callback
assert(_localSamplesAvailable.is_lock_free());
@ -360,6 +354,10 @@ AudioClient::AudioClient() :
configureReverb();
#if defined(WEBRTC_ENABLED)
configureWebrtc();
#endif
auto nodeList = DependencyManager::get<NodeList>();
auto& packetReceiver = nodeList->getPacketReceiver();
packetReceiver.registerListener(PacketType::AudioStreamStats, &_stats, "processStreamStatsPacket");
@ -1091,6 +1089,137 @@ void AudioClient::setReverbOptions(const AudioEffectOptions* options) {
}
}
#if defined(WEBRTC_ENABLED)
static const int WEBRTC_FRAMES_MAX = webrtc::AudioProcessing::kChunkSizeMs * webrtc::AudioProcessing::kMaxNativeSampleRateHz / 1000;
static const int WEBRTC_CHANNELS_MAX = 2;
static void deinterleaveToFloat(const int16_t* src, float* const* dst, int numFrames, int numChannels) {
for (int i = 0; i < numFrames; i++) {
for (int ch = 0; ch < numChannels; ch++) {
float f = *src++;
f *= (1/32768.0f); // scale
dst[ch][i] = f; // deinterleave
}
}
}
static void interleaveToInt16(const float* const* src, int16_t* dst, int numFrames, int numChannels) {
for (int i = 0; i < numFrames; i++) {
for (int ch = 0; ch < numChannels; ch++) {
float f = src[ch][i];
f *= 32768.0f; // scale
f += (f < 0.0f) ? -0.5f : 0.5f; // round
f = std::max(std::min(f, 32767.0f), -32768.0f); // saturate
*dst++ = (int16_t)f; // interleave
}
}
}
void AudioClient::configureWebrtc() {
_apm = webrtc::AudioProcessingBuilder().Create();
webrtc::AudioProcessing::Config config;
config.pre_amplifier.enabled = false;
config.high_pass_filter.enabled = false;
config.echo_canceller.enabled = true;
config.echo_canceller.mobile_mode = false;
config.echo_canceller.use_legacy_aec = false;
config.noise_suppression.enabled = false;
config.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::kModerate;
config.voice_detection.enabled = false;
config.gain_controller1.enabled = false;
config.gain_controller2.enabled = false;
config.gain_controller2.fixed_digital.gain_db = 0.0f;
config.gain_controller2.adaptive_digital.enabled = false;
config.residual_echo_detector.enabled = true;
config.level_estimation.enabled = false;
_apm->ApplyConfig(config);
qCDebug(audioclient) << "WebRTC enabled for acoustic echo cancellation.";
}
// rebuffer into 10ms chunks
void AudioClient::processWebrtcFarEnd(const int16_t* samples, int numFrames, int numChannels, int sampleRate) {
// TODO: move to AudioClient.h
static int16_t _fifo[WEBRTC_CHANNELS_MAX * WEBRTC_FRAMES_MAX];
static int _numFifo = 0; // numFrames saved in fifo
const webrtc::StreamConfig streamConfig = webrtc::StreamConfig(sampleRate, numChannels);
const int numChunk = (int)streamConfig.num_frames();
if (sampleRate > webrtc::AudioProcessing::kMaxNativeSampleRateHz) {
qCWarning(audioclient) << "WebRTC does not support" << sampleRate << "output sample rate.";
return;
}
if (numChannels > WEBRTC_CHANNELS_MAX) {
qCWarning(audioclient) << "WebRTC does not support" << numChannels << "output channels.";
return;
}
while (numFrames > 0) {
// number of frames to fill
int numFill = std::min(numFrames, numChunk - _numFifo);
// refill fifo
memcpy(&_fifo[_numFifo], samples, numFill * numChannels * sizeof(int16_t));
samples += numFill * numChannels;
numFrames -= numFill;
_numFifo += numFill;
if (_numFifo == numChunk) {
// convert audio format
float buffer[WEBRTC_CHANNELS_MAX][WEBRTC_FRAMES_MAX];
float* const buffers[WEBRTC_CHANNELS_MAX] = { buffer[0], buffer[1] };
deinterleaveToFloat(_fifo, buffers, numChunk, numChannels);
// process one chunk
if (_apm->kNoError != _apm->ProcessReverseStream(buffers, streamConfig, streamConfig, buffers)) {
qCWarning(audioclient) << "WebRTC ProcessReverseStream() returned an ERROR.";
}
_numFifo = 0;
}
}
}
void AudioClient::processWebrtcNearEnd(int16_t* samples, int numFrames, int numChannels, int sampleRate) {
const webrtc::StreamConfig streamConfig = webrtc::StreamConfig(sampleRate, numChannels);
const int numChunk = (int)streamConfig.num_frames();
if (sampleRate > webrtc::AudioProcessing::kMaxNativeSampleRateHz) {
qCWarning(audioclient) << "WebRTC does not support" << sampleRate << "input sample rate.";
return;
}
if (numChannels > WEBRTC_CHANNELS_MAX) {
qCWarning(audioclient) << "WebRTC does not support" << numChannels << "input channels.";
return;
}
if (numFrames != numChunk) {
qCWarning(audioclient) << "WebRTC requires exactly 10ms of input.";
return;
}
// convert audio format
float buffer[WEBRTC_CHANNELS_MAX][WEBRTC_FRAMES_MAX];
float* const buffers[WEBRTC_CHANNELS_MAX] = { buffer[0], buffer[1] };
deinterleaveToFloat(samples, buffers, numFrames, numChannels);
// process one chunk
if (_apm->kNoError != _apm->ProcessStream(buffers, streamConfig, streamConfig, buffers)) {
qCWarning(audioclient) << "WebRTC ProcessStream() returned an ERROR.";
}
// modify samples in-place
interleaveToInt16(buffers, samples, numFrames, numChannels);
}
#endif // WEBRTC_ENABLED
void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
// If there is server echo, reverb will be applied to the recieved audio stream so no need to have it here.
bool hasReverb = _reverb || _receivedAudioStream.hasReverb();
@ -1269,6 +1398,11 @@ void AudioClient::handleMicAudioInput() {
_inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired);
#if defined(WEBRTC_ENABLED)
processWebrtcNearEnd(inputAudioSamples.get(), inputSamplesRequired / _inputFormat.channelCount(),
_inputFormat.channelCount(), _inputFormat.sampleRate());
#endif
// detect loudness and clipping on the raw input
bool isClipping = false;
float loudness = computeLoudness(inputAudioSamples.get(), inputSamplesRequired, _inputFormat.channelCount(), isClipping);
@ -2185,13 +2319,9 @@ qint64 AudioClient::AudioOutputIODevice::readData(char * data, qint64 maxSize) {
// limit the audio
_audio->_audioLimiter.render(mixBuffer, scratchBuffer, framesPopped);
// TODO:
// At this point, scratchBuffer contains the final (mixed, limited) output audio.
// format = interleaved int16_t
// samples = samplesPopped
// channels = OUTPUT_CHANNEL_COUNT
// sampleRate = _outputFormat.sampleRate()
// This can be used as the far-end signal for AEC.
#if defined(WEBRTC_ENABLED)
_audio->processWebrtcFarEnd(scratchBuffer, framesPopped, OUTPUT_CHANNEL_COUNT, _audio->_outputFormat.sampleRate());
#endif
// if required, upmix or downmix to deviceChannelCount
if (deviceChannelCount == OUTPUT_CHANNEL_COUNT) {

View file

@ -415,9 +415,15 @@ private:
// Adds Reverb
void configureReverb();
void updateReverbOptions();
void handleLocalEchoAndReverb(QByteArray& inputByteArray);
#if defined(WEBRTC_ENABLED)
webrtc::AudioProcessing* _apm { nullptr };
void configureWebrtc();
void processWebrtcFarEnd(const int16_t* samples, int numFrames, int numChannels, int sampleRate);
void processWebrtcNearEnd(int16_t* samples, int numFrames, int numChannels, int sampleRate);
#endif
bool switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInfo, bool isShutdownRequest = false);
bool switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceInfo, bool isShutdownRequest = false);
@ -476,10 +482,6 @@ private:
QTimer* _checkPeakValuesTimer { nullptr };
bool _isRecording { false };
#if WEBRTC_ENABLED
webrtc::AudioProcessing* _apm;
#endif
};