libspeexdsp-based echo cancellation (untested)

This commit is contained in:
tosh 2013-06-18 22:18:27 +02:00
parent e3da925e46
commit 682a7c9cdf
4 changed files with 191 additions and 71 deletions

View file

@ -0,0 +1,79 @@
# Copyright (c) 2009, Whispersoft s.r.l.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Whispersoft s.r.l. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Finds SPEEXDSP library
#
# SPEEXDSP_INCLUDE_DIR - where to find speex.h, etc.
# SPEEXDSP_LIBRARIES - List of libraries when using SPEEXDSP.
# SPEEXDSP_FOUND - True if SPEEXDSP found.
#
if (SPEEXDSP_INCLUDE_DIR)
# Already in cache, be silent
set(SPEEXDSP_FIND_QUIETLY TRUE)
endif (SPEEXDSP_INCLUDE_DIR)
find_path(SPEEXDSP_INCLUDE_DIR speex/speex.h
/opt/local/include
/usr/local/include
/usr/include
)
set(SPEEXDSP_NAMES speexdsp)
find_library(SPEEXDSP_LIBRARY
NAMES ${SPEEXDSP_NAMES}
PATHS /usr/lib /usr/local/lib /opt/local/lib
)
if (SPEEXDSP_INCLUDE_DIR AND SPEEXDSP_LIBRARY)
set(SPEEXDSP_FOUND TRUE)
set( SPEEXDSP_LIBRARIES ${SPEEXDSP_LIBRARY} )
else (SPEEXDSP_INCLUDE_DIR AND SPEEXDSP_LIBRARY)
set(SPEEXDSP_FOUND FALSE)
set(SPEEXDSP_LIBRARIES)
endif (SPEEXDSP_INCLUDE_DIR AND SPEEXDSP_LIBRARY)
if (SPEEXDSP_FOUND)
if (NOT SPEEXDSP_FIND_QUIETLY)
message(STATUS "Found SpeexDSP: ${SPEEXDSP_LIBRARY}")
endif (NOT SPEEXDSP_FIND_QUIETLY)
else (SPEEXDSP_FOUND)
if (SPEEXDSP_FIND_REQUIRED)
message(STATUS "Looked for SpeexDSP libraries named ${SPEEXDSP_NAMES}.")
message(STATUS "Include file detected: [${SPEEXDSP_INCLUDE_DIR}].")
message(STATUS "Lib file detected: [${SPEEXDSP_LIBRARY}].")
message(FATAL_ERROR "=========> Could NOT find SpeexDSP library")
endif (SPEEXDSP_FIND_REQUIRED)
endif (SPEEXDSP_FOUND)
mark_as_advanced(
SPEEXDSP_LIBRARY
SPEEXDSP_INCLUDE_DIR
)

View file

@ -86,6 +86,7 @@ link_hifi_library(audio ${TARGET_NAME} ${ROOT_DIR})
find_package(GLM REQUIRED)
find_package(LodePNG REQUIRED)
find_package(LibOVR)
find_package(SpeexDSP REQUIRED)
# include headers for interface and InterfaceConfig.
include_directories(
@ -102,7 +103,7 @@ include_directories(
${LIBOVR_INCLUDE_DIRS}
)
target_link_libraries(${TARGET_NAME} ${QT_LIBRARIES})
target_link_libraries(${TARGET_NAME} ${QT_LIBRARIES} ${SPEEXDSP_LIBRARIES})
if (APPLE)
# link in required OS X frameworks and include the right GL headers

View file

@ -52,18 +52,24 @@ static const float AUDIO_CALLBACK_MSECS = (float)BUFFER_LENGTH_SAMPLES_PER_CHANN
static const int AGENT_LOOPBACK_MODIFIER = 307;
static const int AEC_N_CHANNELS_MIC = 1; // Number of microphone channels
static const int AEC_N_CHANNELS_PLAY = 2; // Number of speaker channels
static const int AEC_FILTER_LENGTH = BUFFER_LENGTH_SAMPLES_PER_CHANNEL * 10; // Width of the filter
static const int AEC_BUFFERED_FRAMES = 6; // Maximum number of frames to buffer
static const int AEC_BUFFERED_SAMPLES_PER_CHANNEL = BUFFER_LENGTH_SAMPLES_PER_CHANNEL * AEC_BUFFERED_FRAMES;
static const int AEC_BUFFERED_SAMPLES = AEC_BUFFERED_SAMPLES_PER_CHANNEL * AEC_N_CHANNELS_PLAY;
static const int AEC_TMP_BUFFER_SIZE = (AEC_N_CHANNELS_MIC + // Temporary space for processing a
AEC_N_CHANNELS_PLAY) * BUFFER_LENGTH_SAMPLES_PER_CHANNEL; // single frame
static const int ECHO_INPUT_FRAMES = 6; // Frames to buffer for echo cancellation
static const int ECHO_PING_RETRY = 3; // Number of retries for EC calibration
static const float ECHO_PING_PITCH = 16.f; // Ping wavelength, # samples / radian
static const float ECHO_PING_VOLUME = 32000.f; // Signal peak amplitude
static const float ECHO_PING_VOLUME = 32000.f; // Ping peak amplitude
static const int ECHO_PING_RETRY = 3; // Number of retries for EC calibration
static const int ECHO_PING_MIN_AMPLI = 225; // Minimum amplitude for EC calibration
static const int ECHO_PING_MAX_PERIOD_DIFFERENCE = 15; // Maximum # samples from expected period
static const int ECHO_CANCEL_ADJUST = -9; // Some samples to fine-tune delay
static const int ECHO_PING_PERIOD = int(Radians::twicePi() * ECHO_PING_PITCH); // Sine period based on the given pitch
static const int ECHO_PING_HALF_PERIOD = int(Radians::pi() * ECHO_PING_PITCH); // Distance between extrema
static const int ECHO_PING_BUFFER_OFFSET = PACKET_LENGTH_SAMPLES_PER_CHANNEL - ECHO_PING_PERIOD * 2.0f; // Signal start
static const int ECHO_INPUT_LENGTH_SAMPLES = PACKET_LENGTH_SAMPLES_PER_CHANNEL * ECHO_INPUT_FRAMES; // Total buffer size
static const int ECHO_PING_BUFFER_OFFSET = BUFFER_LENGTH_SAMPLES_PER_CHANNEL - ECHO_PING_PERIOD * 2.0f; // Signal start
inline void Audio::performIO(int16_t* inputLeft, int16_t* outputLeft, int16_t* outputRight) {
@ -74,7 +80,7 @@ inline void Audio::performIO(int16_t* inputLeft, int16_t* outputLeft, int16_t* o
eventuallyCancelEcho(inputLeft);
// Add Procedural effects to input samples
//addProceduralSounds(inputLeft, BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
addProceduralSounds(inputLeft, BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
if (agentList && inputLeft) {
@ -234,8 +240,8 @@ inline void Audio::performIO(int16_t* inputLeft, int16_t* outputLeft, int16_t* o
// add output (@speakers) data just written to the scope
#ifndef DEBUG_ECHO_CANCELLATION
_scope->addSamples(1, outputLeft, PACKET_LENGTH_SAMPLES_PER_CHANNEL);
_scope->addSamples(2, outputRight, PACKET_LENGTH_SAMPLES_PER_CHANNEL);
_scope->addSamples(1, outputLeft, BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
_scope->addSamples(2, outputRight, BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
#endif
gettimeofday(&_lastCallbackTime, NULL);
@ -272,6 +278,8 @@ static void outputPortAudioError(PaError error) {
Audio::Audio(Oscilloscope* scope) :
_stream(NULL),
_speexEchoState(NULL),
_speexPreprocessState(NULL),
_ringBuffer(true),
_scope(scope),
_averagedLatency(0.0),
@ -306,13 +314,36 @@ Audio::Audio(Oscilloscope* scope) :
BUFFER_LENGTH_SAMPLES_PER_CHANNEL,
audioCallback,
(void*) this));
if (! _stream) {
return;
}
_echoSamplesLeft = new int16_t[AEC_BUFFERED_SAMPLES + AEC_TMP_BUFFER_SIZE];
if (! _echoSamplesLeft) {
return;
}
memset(_echoSamplesLeft, 0, AEC_BUFFERED_SAMPLES * sizeof(int16_t));
_echoSamplesRight = _echoSamplesLeft + AEC_BUFFERED_SAMPLES_PER_CHANNEL;
_speexTmpBuf = _echoSamplesRight + AEC_BUFFERED_SAMPLES_PER_CHANNEL;
_speexPreprocessState = speex_preprocess_state_init(BUFFER_LENGTH_SAMPLES_PER_CHANNEL, SAMPLE_RATE);
if (_speexPreprocessState) {
_speexEchoState = speex_echo_state_init_mc(BUFFER_LENGTH_SAMPLES_PER_CHANNEL,
AEC_FILTER_LENGTH, AEC_N_CHANNELS_MIC, AEC_N_CHANNELS_PLAY);
if (_speexEchoState) {
int sampleRate = SAMPLE_RATE;
speex_echo_ctl(_speexEchoState, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate);
speex_preprocess_ctl(_speexPreprocessState, SPEEX_PREPROCESS_SET_ECHO_STATE, _speexEchoState);
} else {
speex_preprocess_state_destroy(_speexPreprocessState);
_speexPreprocessState = NULL;
}
}
// start the stream now that sources are good to go
outputPortAudioError(Pa_StartStream(_stream));
_echoInputSamples = new int16_t[ECHO_INPUT_LENGTH_SAMPLES];
memset(_echoInputSamples, 0, BUFFER_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int));
gettimeofday(&_lastReceiveTime, NULL);
}
@ -321,6 +352,11 @@ Audio::~Audio() {
outputPortAudioError(Pa_CloseStream(_stream));
outputPortAudioError(Pa_Terminate());
}
if (_speexEchoState) {
speex_preprocess_state_destroy(_speexPreprocessState);
speex_echo_state_destroy(_speexEchoState);
}
delete[] _echoSamplesLeft;
}
void Audio::addReceivedAudioToBuffer(unsigned char* receivedData, int receivedBytes) {
@ -479,32 +515,36 @@ inline void Audio::eventuallyCancelEcho(int16_t* inputLeft) {
return;
}
// Determine echo buffer range
unsigned n = PACKET_LENGTH_SAMPLES_PER_CHANNEL, n2 = 0;
unsigned readPos = (_echoWritePos + ECHO_INPUT_LENGTH_SAMPLES - _echoDelay) % ECHO_INPUT_LENGTH_SAMPLES;
// Construct an artificial frame from the captured playback
// that contains the appropriately delayed output to cancel
unsigned n = BUFFER_LENGTH_SAMPLES_PER_CHANNEL, n2 = 0;
unsigned readPos = (_echoWritePos + AEC_BUFFERED_SAMPLES_PER_CHANNEL - _echoDelay) % AEC_BUFFERED_SAMPLES_PER_CHANNEL;
unsigned readEnd = readPos + n;
if (readEnd >= ECHO_INPUT_LENGTH_SAMPLES) {
n2 = (readEnd -= ECHO_INPUT_LENGTH_SAMPLES);
if (readEnd >= AEC_BUFFERED_SAMPLES_PER_CHANNEL) {
n2 = (readEnd -= AEC_BUFFERED_SAMPLES_PER_CHANNEL);
n -= n2;
}
// Use two subsequent buffers for the two stereo channels
int16_t* playBufferLeft = _speexTmpBuf + BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
memcpy(playBufferLeft, _echoSamplesLeft + readPos, n * sizeof(int16_t));
memcpy(playBufferLeft + n, _echoSamplesLeft, n2 * sizeof(int16_t));
int16_t* playBufferRight = playBufferLeft + BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
memcpy(playBufferRight, _echoSamplesRight + readPos, n * sizeof(int16_t));
memcpy(playBufferRight + n, _echoSamplesLeft, n2 * sizeof(int16_t));
#ifdef DEBUG_ECHO_CANCELLATION
// Visualization
static short dbgBuf[BUFFER_LENGTH_SAMPLES_PER_CHANNEL];
memset(dbgBuf, 0, BUFFER_LENGTH_BYTES_PER_CHANNEL);
subScaled(dbgBuf, _echoInputSamples + readPos, n, _echoAmplitude);
subScaled(dbgBuf + n, _echoInputSamples, n2, _echoAmplitude);
// Visualize the input
_scope->addSamples(0, inputLeft, BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
_scope->addSamples(1, playBufferLeft, BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
#endif
// Subtract echo from input
subScaled(inputLeft, _echoInputSamples + readPos, n, _echoAmplitude);
subScaled(inputLeft + n, _echoInputSamples, n2, _echoAmplitude);
// Have Speex perform echo cancellation
speex_echo_cancellation(_speexEchoState, inputLeft, playBufferLeft, _speexTmpBuf);
memcpy(inputLeft, _speexTmpBuf, BUFFER_LENGTH_BYTES_PER_CHANNEL);
speex_preprocess_run(_speexPreprocessState, inputLeft);
#ifdef DEBUG_ECHO_CANCELLATION
// Visualization
_scope->addSamples(1, dbgBuf, BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
// Visualization the result
_scope->addSamples(2, inputLeft, BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
#endif
}
@ -514,15 +554,17 @@ inline void Audio::eventuallyRecordEcho(int16_t* outputLeft, int16_t* outputRigh
return;
}
// Copy data to circular buffer
unsigned n = PACKET_LENGTH_SAMPLES_PER_CHANNEL, n2 = 0;
// Copy playback data to circular buffers
unsigned n = BUFFER_LENGTH_SAMPLES_PER_CHANNEL, n2 = 0;
unsigned writeEnd = _echoWritePos + n;
if (writeEnd >= ECHO_INPUT_LENGTH_SAMPLES) {
n2 = (writeEnd -= ECHO_INPUT_LENGTH_SAMPLES);
if (writeEnd >= AEC_BUFFERED_SAMPLES_PER_CHANNEL) {
n2 = (writeEnd -= AEC_BUFFERED_SAMPLES_PER_CHANNEL);
n -= n2;
}
memcpy(_echoInputSamples + _echoWritePos, outputLeft, n * sizeof(int16_t));
memcpy(_echoInputSamples, outputLeft + n, n2 * sizeof(int16_t));
memcpy(_echoSamplesLeft + _echoWritePos, outputLeft, n * sizeof(int16_t));
memcpy(_echoSamplesLeft, outputLeft + n, n2 * sizeof(int16_t));
memcpy(_echoSamplesRight + _echoWritePos, outputRight, n * sizeof(int16_t));
memcpy(_echoSamplesRight, outputRight + n, n2 * sizeof(int16_t));
_echoWritePos = writeEnd;
}
@ -535,7 +577,7 @@ void Audio::setIsCancellingEcho(bool enabled) {
// Request recalibration
_echoPingRetries = ECHO_PING_RETRY;
_echoInputFramesToRecord = ECHO_INPUT_FRAMES;
_echoInputFramesToRecord = AEC_BUFFERED_FRAMES;
_isSendingEchoPing = true;
// _scope->setDownsampleRatio(8); // DEBUG
@ -555,13 +597,13 @@ inline void Audio::eventuallySendRecvPing(int16_t* inputLeft, int16_t* outputLef
if (Application::getInstance()->shouldEchoAudio()) {
enum { bufs = 32 };
static int16_t buf[bufs][PACKET_LENGTH_SAMPLES_PER_CHANNEL];
static int16_t buf[bufs][BUFFER_LENGTH_SAMPLES_PER_CHANNEL];
static int bufIdx = 0;
int wBuf = bufIdx;
bufIdx = (bufIdx + 1) % bufs;
memcpy(buf[wBuf], inputLeft, PACKET_LENGTH_BYTES_PER_CHANNEL);
subScaled(outputLeft, buf[bufIdx], PACKET_LENGTH_SAMPLES_PER_CHANNEL, -0x7000);
memcpy(buf[wBuf], inputLeft, BUFFER_LENGTH_BYTES_PER_CHANNEL);
subScaled(outputLeft, buf[bufIdx], BUFFER_LENGTH_SAMPLES_PER_CHANNEL, -0x7000);
}
*/
// Calibration of echo cancellation
@ -582,21 +624,21 @@ inline void Audio::eventuallySendRecvPing(int16_t* inputLeft, int16_t* outputLef
// As of the next frame, we'll be recoding _echoInputFramesToRecord from the mic
_isSendingEchoPing = false;
printLog("Send echo ping\n");
printLog("Send audio ping\n");
} else if (_echoInputFramesToRecord > 0) {
// Store input samples
int offset = PACKET_LENGTH_SAMPLES_PER_CHANNEL * (
ECHO_INPUT_FRAMES - _echoInputFramesToRecord);
memcpy(_echoInputSamples + offset,
inputLeft, PACKET_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int16_t));
int offset = BUFFER_LENGTH_SAMPLES_PER_CHANNEL * (
AEC_BUFFERED_FRAMES - _echoInputFramesToRecord);
memcpy(_echoSamplesLeft + offset,
inputLeft, BUFFER_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int16_t));
--_echoInputFramesToRecord;
if (_echoInputFramesToRecord == 0) {
_echoAnalysisPending = true;
printLog("got input\n");
printLog("Received ping echo\n");
}
}
}
@ -618,50 +660,45 @@ static int findExtremum(int16_t const* samples, int length, int sign) {
bool Audio::calibrateEchoCancellation() {
// Analyze received signal
int botAt = findExtremum(_echoInputSamples, ECHO_INPUT_LENGTH_SAMPLES, -1);
int botAt = findExtremum(_echoSamplesLeft, AEC_BUFFERED_SAMPLES_PER_CHANNEL, -1);
if (botAt == -1) {
printLog("Minimum not found.\n");
printLog("AEC: Minimum not found.\n");
return false;
}
int topAt = findExtremum(_echoInputSamples, ECHO_INPUT_LENGTH_SAMPLES, 1);
int topAt = findExtremum(_echoSamplesLeft, AEC_BUFFERED_SAMPLES_PER_CHANNEL, 1);
if (topAt == -1) {
printLog("Maximum not found.\n");
printLog("AEC: Maximum not found.\n");
return false;
}
// Determine peak amplitude
int ampli = (_echoInputSamples[topAt] - _echoInputSamples[botAt]) / 2;
int ampli = (_echoSamplesLeft[topAt] - _echoSamplesLeft[botAt]) / 2;
if (ampli < ECHO_PING_MIN_AMPLI) {
// We can't reliably calibrate and probably won't hear it, anyways.
printLog("Low amplitude %d.\n", ampli);
printLog("AEC: Amplitude too low %d.\n", ampli);
return false;
}
// Determine period
int halfPeriod = topAt - botAt;
if (halfPeriod < 0) {
printLog("Min/max inverted.\n");
printLog("AEC: Min/max inverted.\n");
halfPeriod = -halfPeriod;
topAt -= ECHO_PING_PERIOD;
ampli = -ampli;
}
if (abs(halfPeriod-ECHO_PING_HALF_PERIOD) > ECHO_PING_MAX_PERIOD_DIFFERENCE) {
// Probably not our signal
printLog("Unexpected period %d vs. %d\n", halfPeriod, ECHO_PING_HALF_PERIOD);
printLog("AEC: Unexpected period %d vs. %d\n", halfPeriod, ECHO_PING_HALF_PERIOD);
return false;
}
// Determine delay based on the characteristic center of the signal we found
int delay = (botAt + topAt) / 2 + PACKET_LENGTH_SAMPLES_PER_CHANNEL - ECHO_PING_PERIOD + ECHO_CANCEL_ADJUST;
// (this value is too small by one packet minus ping length and it's good that
// way as the initial movement will be before the peak)
_echoDelay = (botAt + topAt) / 2;
// Scale amplitude to fraction in 16-bit fixpoint, relative to the volume of the original signal
ampli = (ampli << 16) / ECHO_PING_VOLUME;
// Set state
_echoDelay = delay;
_echoAmplitude = ampli;
printLog("delay = %d\namp = %d\ntopAt = %d\nbotAt = %d\n", delay, ampli, topAt, botAt);
printLog("AEC:\ndelay = %d\namp = %d\ntopAt = %d\nbotAt = %d\n", _echoDelay, ampli, topAt, botAt);
return true;
}
@ -676,13 +713,13 @@ bool Audio::eventuallyCalibrateEchoCancellation() {
if (calibrateEchoCancellation()) {
// Success! Enable echo cancellation.
_echoWritePos = 0;
memset(_echoInputSamples, 0, ECHO_INPUT_LENGTH_SAMPLES * sizeof(int16_t));
memset(_echoSamplesLeft, 0, AEC_BUFFERED_SAMPLES * sizeof(int16_t));
_isCancellingEcho = true;
}
else if (--_echoPingRetries >= 0) {
// Retry - better luck next time.
_isSendingEchoPing = true;
_echoInputFramesToRecord = ECHO_INPUT_FRAMES;
_echoInputFramesToRecord = AEC_BUFFERED_FRAMES;
// _scope->inputPaused = false; // DEBUG
return false;
}
@ -690,7 +727,4 @@ bool Audio::eventuallyCalibrateEchoCancellation() {
return true;
}
#endif

View file

@ -11,6 +11,9 @@
#include <portaudio.h>
#include <speex/speex_echo.h>
#include <speex/speex_preprocess.h>
#include <AudioRingBuffer.h>
#include <StdDev.h>
@ -49,6 +52,9 @@ public:
private:
PaStream* _stream;
SpeexEchoState* _speexEchoState;
SpeexPreprocessState* _speexPreprocessState;
int16_t* _speexTmpBuf;
AudioRingBuffer _ringBuffer;
Oscilloscope* _scope;
StDev _stdev;
@ -73,9 +79,9 @@ private:
int _echoPingRetries;
unsigned _echoWritePos;
unsigned _echoDelay;
int _echoAmplitude;
int _echoInputFramesToRecord;
int16_t* _echoInputSamples;
int16_t* _echoSamplesLeft;
int16_t* _echoSamplesRight;
// Flange effect
int _samplesLeftForFlange;
int _lastYawMeasuredMaximum;