From 569c76c8ba51ff25a90ebd7b1ec85206f4b0184d Mon Sep 17 00:00:00 2001
From: Ken Cooke <ken@highfidelity.io>
Date: Sat, 20 Jul 2019 13:05:11 -0700
Subject: [PATCH] Baseline AEC using WebRTC, always enabled. Audio streams are
 hooked as close as possible to device input/output, re-buffering as needed.

---
 libraries/audio-client/src/AudioClient.cpp | 156 +++++++++++++++++++--
 libraries/audio-client/src/AudioClient.h   |  12 +-
 2 files changed, 150 insertions(+), 18 deletions(-)
diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp
index 9bf7a24c56..b95dd96d06 100644
--- a/libraries/audio-client/src/AudioClient.cpp
+++ b/libraries/audio-client/src/AudioClient.cpp
@@ -303,12 +303,6 @@ AudioClient::AudioClient() :
 #endif
     _orientationGetter(DEFAULT_ORIENTATION_GETTER) {
 
-#if defined(WEBRTC_ENABLED)
-    qDebug() << "QQQQ calling AudioProcessingBuilder";
-    _apm = webrtc::AudioProcessingBuilder().Create();
-    qDebug() << "QQQQ done calling AudioProcessingBuilder";
-#endif
-
     // avoid putting a lock in the device callback
     assert(_localSamplesAvailable.is_lock_free());
 
@@ -360,6 +354,10 @@ AudioClient::AudioClient() :
 
     configureReverb();
 
+#if defined(WEBRTC_ENABLED)
+    configureWebrtc();
+#endif
+
     auto nodeList = DependencyManager::get<NodeList>();
     auto& packetReceiver = nodeList->getPacketReceiver();
     packetReceiver.registerListener(PacketType::AudioStreamStats, &_stats, "processStreamStatsPacket");
@@ -1091,6 +1089,137 @@ void AudioClient::setReverbOptions(const AudioEffectOptions* options) {
     }
 }
 
+#if defined(WEBRTC_ENABLED)
+
+static const int WEBRTC_FRAMES_MAX = webrtc::AudioProcessing::kChunkSizeMs * webrtc::AudioProcessing::kMaxNativeSampleRateHz / 1000;
+static const int WEBRTC_CHANNELS_MAX = 2;
+
+static void deinterleaveToFloat(const int16_t* src, float* const* dst, int numFrames, int numChannels) {
+    for (int i = 0; i < numFrames; i++) {
+        for (int ch = 0; ch < numChannels; ch++) {
+            float f = *src++;
+            f *= (1/32768.0f);  // scale
+            dst[ch][i] = f;     // deinterleave
+        }
+    }
+}
+
+static void interleaveToInt16(const float* const* src, int16_t* dst, int numFrames, int numChannels) {
+    for (int i = 0; i < numFrames; i++) {
+        for (int ch = 0; ch < numChannels; ch++) {
+            float f = src[ch][i];
+            f *= 32768.0f;                                  // scale
+            f += (f < 0.0f) ? -0.5f : 0.5f;                 // round
+            f = std::max(std::min(f, 32767.0f), -32768.0f); // saturate
+            *dst++ = (int16_t)f;                            // interleave
+        }
+    }
+}
+
+void AudioClient::configureWebrtc() {
+    _apm = webrtc::AudioProcessingBuilder().Create();
+
+    webrtc::AudioProcessing::Config config;
+
+    config.pre_amplifier.enabled = false;
+    config.high_pass_filter.enabled = false;
+    config.echo_canceller.enabled = true;
+    config.echo_canceller.mobile_mode = false;
+    config.echo_canceller.use_legacy_aec = false;
+    config.noise_suppression.enabled = false;
+    config.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::kModerate;
+    config.voice_detection.enabled = false;
+    config.gain_controller1.enabled = false;
+    config.gain_controller2.enabled = false;
+    config.gain_controller2.fixed_digital.gain_db = 0.0f;
+    config.gain_controller2.adaptive_digital.enabled = false;
+    config.residual_echo_detector.enabled = true;
+    config.level_estimation.enabled = false;
+
+    _apm->ApplyConfig(config);
+
+    qCDebug(audioclient) << "WebRTC enabled for acoustic echo cancellation.";
+}
+
+// rebuffer into 10ms chunks
+void AudioClient::processWebrtcFarEnd(const int16_t* samples, int numFrames, int numChannels, int sampleRate) {
+
+    // TODO: move to AudioClient.h
+    static int16_t _fifo[WEBRTC_CHANNELS_MAX * WEBRTC_FRAMES_MAX];
+    static int _numFifo = 0;    // numFrames saved in fifo
+
+    const webrtc::StreamConfig streamConfig = webrtc::StreamConfig(sampleRate, numChannels);
+    const int numChunk = (int)streamConfig.num_frames();
+
+    if (sampleRate > webrtc::AudioProcessing::kMaxNativeSampleRateHz) {
+        qCWarning(audioclient) << "WebRTC does not support" << sampleRate << "output sample rate.";
+        return;
+    }
+    if (numChannels > WEBRTC_CHANNELS_MAX) {
+        qCWarning(audioclient) << "WebRTC does not support" << numChannels << "output channels.";
+        return;
+    }
+
+    while (numFrames > 0) {
+
+        // number of frames to fill
+        int numFill = std::min(numFrames, numChunk - _numFifo);
+
+        // refill fifo
+        memcpy(&_fifo[_numFifo], samples, numFill * numChannels * sizeof(int16_t));
+        samples += numFill * numChannels;
+        numFrames -= numFill;
+        _numFifo += numFill;
+
+        if (_numFifo == numChunk) {
+
+            // convert audio format
+            float buffer[WEBRTC_CHANNELS_MAX][WEBRTC_FRAMES_MAX];
+            float* const buffers[WEBRTC_CHANNELS_MAX] = { buffer[0], buffer[1] };
+            deinterleaveToFloat(_fifo, buffers, numChunk, numChannels);
+
+            // process one chunk
+            if (_apm->kNoError != _apm->ProcessReverseStream(buffers, streamConfig, streamConfig, buffers)) {
+                qCWarning(audioclient) << "WebRTC ProcessReverseStream() returned an ERROR.";
+            }
+            _numFifo = 0;
+        }
+    }
+}
+
+void AudioClient::processWebrtcNearEnd(int16_t* samples, int numFrames, int numChannels, int sampleRate) {
+
+    const webrtc::StreamConfig streamConfig = webrtc::StreamConfig(sampleRate, numChannels);
+    const int numChunk = (int)streamConfig.num_frames();
+
+    if (sampleRate > webrtc::AudioProcessing::kMaxNativeSampleRateHz) {
+        qCWarning(audioclient) << "WebRTC does not support" << sampleRate << "input sample rate.";
+        return;
+    }
+    if (numChannels > WEBRTC_CHANNELS_MAX) {
+        qCWarning(audioclient) << "WebRTC does not support" << numChannels << "input channels.";
+        return;
+    }
+    if (numFrames != numChunk) {
+        qCWarning(audioclient) << "WebRTC requires exactly 10ms of input.";
+        return;
+    }
+
+    // convert audio format
+    float buffer[WEBRTC_CHANNELS_MAX][WEBRTC_FRAMES_MAX];
+    float* const buffers[WEBRTC_CHANNELS_MAX] = { buffer[0], buffer[1] };
+    deinterleaveToFloat(samples, buffers, numFrames, numChannels);
+
+    // process one chunk
+    if (_apm->kNoError != _apm->ProcessStream(buffers, streamConfig, streamConfig, buffers)) {
+        qCWarning(audioclient) << "WebRTC ProcessStream() returned an ERROR.";
+    }
+    // modify samples in-place
+    interleaveToInt16(buffers, samples, numFrames, numChannels);
+}
+
+#endif // WEBRTC_ENABLED
+
 void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
     // If there is server echo, reverb will be applied to the recieved audio stream so no need to have it here.
     bool hasReverb = _reverb || _receivedAudioStream.hasReverb();
@@ -1269,6 +1398,11 @@ void AudioClient::handleMicAudioInput() {
 
         _inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired);
 
+#if defined(WEBRTC_ENABLED)
+        processWebrtcNearEnd(inputAudioSamples.get(), inputSamplesRequired / _inputFormat.channelCount(),
+                             _inputFormat.channelCount(), _inputFormat.sampleRate());
+#endif
+
         // detect loudness and clipping on the raw input
         bool isClipping = false;
         float loudness = computeLoudness(inputAudioSamples.get(), inputSamplesRequired, _inputFormat.channelCount(), isClipping);
@@ -2185,13 +2319,9 @@ qint64 AudioClient::AudioOutputIODevice::readData(char * data, qint64 maxSize) {
     // limit the audio
     _audio->_audioLimiter.render(mixBuffer, scratchBuffer, framesPopped);
 
-    // TODO:
-    // At this point, scratchBuffer contains the final (mixed, limited) output audio.
-    // format = interleaved int16_t
-    // samples = samplesPopped
-    // channels = OUTPUT_CHANNEL_COUNT
-    // sampleRate = _outputFormat.sampleRate()
-    // This can be used as the far-end signal for AEC.
+#if defined(WEBRTC_ENABLED)
+    _audio->processWebrtcFarEnd(scratchBuffer, framesPopped, OUTPUT_CHANNEL_COUNT, _audio->_outputFormat.sampleRate());
+#endif
 
     // if required, upmix or downmix to deviceChannelCount
     if (deviceChannelCount == OUTPUT_CHANNEL_COUNT) {
diff --git a/libraries/audio-client/src/AudioClient.h b/libraries/audio-client/src/AudioClient.h
index 67a064929c..8cf54c1a0d 100644
--- a/libraries/audio-client/src/AudioClient.h
+++ b/libraries/audio-client/src/AudioClient.h
@@ -415,9 +415,15 @@ private:
     // Adds Reverb
     void configureReverb();
     void updateReverbOptions();
-
     void handleLocalEchoAndReverb(QByteArray& inputByteArray);
 
+#if defined(WEBRTC_ENABLED)
+    webrtc::AudioProcessing* _apm { nullptr };
+    void configureWebrtc();
+    void processWebrtcFarEnd(const int16_t* samples, int numFrames, int numChannels, int sampleRate);
+    void processWebrtcNearEnd(int16_t* samples, int numFrames, int numChannels, int sampleRate);
+#endif
+
     bool switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInfo, bool isShutdownRequest = false);
     bool switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceInfo, bool isShutdownRequest = false);
 
@@ -476,10 +482,6 @@ private:
     QTimer* _checkPeakValuesTimer { nullptr };
 
     bool _isRecording { false };
-
-#if WEBRTC_ENABLED
-    webrtc::AudioProcessing* _apm;
-#endif
 };