Merge pull request #5872 from kencooke/audio-src

New low-latency sample rate conversion library
This commit is contained in:
Philip Rosedale 2015-09-23 15:42:53 -07:00
commit 75a9dd13be
5 changed files with 4650 additions and 120 deletions

View file

@ -47,8 +47,6 @@ extern "C" {
#pragma GCC diagnostic pop
#endif
#include <soxr.h>
#include <NodeList.h>
#include <udt/PacketHeaders.h>
#include <PositionalAudioStream.h>
@ -189,18 +187,6 @@ QAudioDeviceInfo getNamedAudioDeviceForMode(QAudio::Mode mode, const QString& de
return result;
}
soxr_datatype_t soxrDataTypeFromQAudioFormat(const QAudioFormat& audioFormat) {
if (audioFormat.sampleType() == QAudioFormat::Float) {
return SOXR_FLOAT32_I;
} else {
if (audioFormat.sampleSize() == 16) {
return SOXR_INT16_I;
} else {
return SOXR_INT32_I;
}
}
}
int numDestinationSamplesRequired(const QAudioFormat& sourceFormat, const QAudioFormat& destinationFormat,
int numSourceSamples) {
float ratio = (float) destinationFormat.channelCount() / sourceFormat.channelCount();
@ -350,7 +336,7 @@ bool adjustedFormatForAudioDevice(const QAudioDeviceInfo& audioDevice,
// use 22050, resample but closer to 24
adjustedAudioFormat.setSampleRate(HALF_FORTY_FOUR);
} else if (audioDevice.supportedSampleRates().contains(FORTY_FOUR)) {
// use 48000, libsoxr will resample
// use 48000, resample
adjustedAudioFormat.setSampleRate(FORTY_FOUR);
}
#endif
@ -391,10 +377,10 @@ bool sampleChannelConversion(const int16_t* sourceSamples, int16_t* destinationS
return false;
}
soxr_error_t possibleResampling(soxr_t resampler,
const int16_t* sourceSamples, int16_t* destinationSamples,
unsigned int numSourceSamples, unsigned int numDestinationSamples,
const QAudioFormat& sourceAudioFormat, const QAudioFormat& destinationAudioFormat) {
void possibleResampling(AudioSRC* resampler,
const int16_t* sourceSamples, int16_t* destinationSamples,
unsigned int numSourceSamples, unsigned int numDestinationSamples,
const QAudioFormat& sourceAudioFormat, const QAudioFormat& destinationAudioFormat) {
if (numSourceSamples > 0) {
if (!resampler) {
@ -403,32 +389,19 @@ soxr_error_t possibleResampling(soxr_t resampler,
// no conversion, we can copy the samples directly across
memcpy(destinationSamples, sourceSamples, numSourceSamples * sizeof(int16_t));
}
return 0;
} else {
soxr_error_t resampleError = 0;
if (sourceAudioFormat.channelCount() != destinationAudioFormat.channelCount()) {
float channelCountRatio = (float) destinationAudioFormat.channelCount() / sourceAudioFormat.channelCount();
float channelCountRatio = (float)destinationAudioFormat.channelCount() / sourceAudioFormat.channelCount();
int numChannelCoversionSamples = (int) (numSourceSamples * channelCountRatio);
int numChannelCoversionSamples = (int)(numSourceSamples * channelCountRatio);
int16_t* channelConversionSamples = new int16_t[numChannelCoversionSamples];
sampleChannelConversion(sourceSamples, channelConversionSamples,
numSourceSamples,
sourceAudioFormat, destinationAudioFormat);
size_t numDestinationSamplesActual = 0;
resampleError = soxr_process(resampler,
channelConversionSamples, numChannelCoversionSamples, NULL,
destinationSamples, numDestinationSamples, &numDestinationSamplesActual);
// return silence instead of playing garbage samples
if (numDestinationSamplesActual < numDestinationSamples) {
unsigned int nBytes = (numDestinationSamples - numDestinationSamplesActual) * destinationAudioFormat.channelCount() * sizeof(int16_t);
memset(&destinationSamples[numDestinationSamplesActual * destinationAudioFormat.channelCount()], 0, nBytes);
qCDebug(audioclient) << "SOXR: padded with" << nBytes << "bytes of silence";
}
resampler->render(channelConversionSamples, destinationSamples, numChannelCoversionSamples);
delete[] channelConversionSamples;
} else {
@ -441,56 +414,12 @@ soxr_error_t possibleResampling(soxr_t resampler,
numAdjustedDestinationSamples /= 2;
}
size_t numAdjustedDestinationSamplesActual = 0;
resampleError = soxr_process(resampler,
sourceSamples, numAdjustedSourceSamples, NULL,
destinationSamples, numAdjustedDestinationSamples, &numAdjustedDestinationSamplesActual);
// return silence instead of playing garbage samples
if (numAdjustedDestinationSamplesActual < numAdjustedDestinationSamples) {
unsigned int nBytes = (numAdjustedDestinationSamples - numAdjustedDestinationSamplesActual) * destinationAudioFormat.channelCount() * sizeof(int16_t);
memset(&destinationSamples[numAdjustedDestinationSamplesActual * destinationAudioFormat.channelCount()], 0, nBytes);
qCDebug(audioclient) << "SOXR: padded with" << nBytes << "bytes of silence";
}
resampler->render(sourceSamples, destinationSamples, numAdjustedSourceSamples);
}
return resampleError;
}
} else {
return 0;
}
}
soxr_t soxrResamplerFromInputFormatToOutputFormat(const QAudioFormat& sourceAudioFormat,
const QAudioFormat& destinationAudioFormat) {
soxr_error_t soxrError;
// setup soxr_io_spec_t for input and output
soxr_io_spec_t inputToNetworkSpec = soxr_io_spec(soxrDataTypeFromQAudioFormat(sourceAudioFormat),
soxrDataTypeFromQAudioFormat(destinationAudioFormat));
// setup soxr_quality_spec_t for quality options
soxr_quality_spec_t qualitySpec = soxr_quality_spec(SOXR_MQ, 0);
int channelCount = (sourceAudioFormat.channelCount() == 2 && destinationAudioFormat.channelCount() == 2)
? 2 : 1;
soxr_t newResampler = soxr_create(sourceAudioFormat.sampleRate(),
destinationAudioFormat.sampleRate(),
channelCount,
&soxrError, &inputToNetworkSpec, &qualitySpec, 0);
if (soxrError) {
qCDebug(audioclient) << "There was an error setting up the soxr resampler -" << "soxr error code was " << soxrError;
soxr_delete(newResampler);
return NULL;
}
return newResampler;
}
void AudioClient::start() {
// set up the desired audio format
@ -546,7 +475,7 @@ void AudioClient::stop() {
switchOutputToAudioDevice(QAudioDeviceInfo());
if (_loopbackResampler) {
soxr_delete(_loopbackResampler);
delete _loopbackResampler;
_loopbackResampler = NULL;
}
}
@ -767,11 +696,12 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
// do we need to setup a resampler?
if (_inputFormat.sampleRate() != _outputFormat.sampleRate() && !_loopbackResampler) {
qCDebug(audioclient) << "Attemping to create a resampler for input format to output format for audio loopback.";
_loopbackResampler = soxrResamplerFromInputFormatToOutputFormat(_inputFormat, _outputFormat);
if (!_loopbackResampler) {
return;
}
assert(_inputFormat.sampleSize() == 16);
assert(_outputFormat.sampleSize() == 16);
int channelCount = (_inputFormat.channelCount() == 2 && _outputFormat.channelCount() == 2) ? 2 : 1;
_loopbackResampler = new AudioSRC(_inputFormat.sampleRate(), _outputFormat.sampleRate(), channelCount);
}
static QByteArray reverbAlone; // Intermediary for local reverb with no echo
@ -1099,7 +1029,7 @@ bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo& inputDeviceIn
if (_inputToNetworkResampler) {
// if we were using an input to network resampler, delete it here
soxr_delete(_inputToNetworkResampler);
delete _inputToNetworkResampler;
_inputToNetworkResampler = NULL;
}
@ -1111,15 +1041,17 @@ bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo& inputDeviceIn
qCDebug(audioclient) << "The format to be used for audio input is" << _inputFormat;
// we've got the best we can get for input
// if required, setup a soxr resampler for this input to our desired network format
// if required, setup a resampler for this input to our desired network format
if (_inputFormat != _desiredInputFormat
&& _inputFormat.sampleRate() != _desiredInputFormat.sampleRate()) {
qCDebug(audioclient) << "Attemping to create a soxr resampler for input format to network format.";
_inputToNetworkResampler = soxrResamplerFromInputFormatToOutputFormat(_inputFormat, _desiredInputFormat);
qCDebug(audioclient) << "Attemping to create a resampler for input format to network format.";
assert(_inputFormat.sampleSize() == 16);
assert(_desiredInputFormat.sampleSize() == 16);
int channelCount = (_inputFormat.channelCount() == 2 && _desiredInputFormat.channelCount() == 2) ? 2 : 1;
_inputToNetworkResampler = new AudioSRC(_inputFormat.sampleRate(), _desiredInputFormat.sampleRate(), channelCount);
if (!_inputToNetworkResampler) {
return false;
}
} else {
qCDebug(audioclient) << "No resampling required for audio input to match desired network format.";
}
@ -1194,13 +1126,13 @@ bool AudioClient::switchOutputToAudioDevice(const QAudioDeviceInfo& outputDevice
if (_networkToOutputResampler) {
// if we were using an input to network resampler, delete it here
soxr_delete(_networkToOutputResampler);
delete _networkToOutputResampler;
_networkToOutputResampler = NULL;
}
if (_loopbackResampler) {
// if we were using an input to output resample, delete it here
soxr_delete(_loopbackResampler);
delete _loopbackResampler;
_loopbackResampler = NULL;
}
@ -1212,15 +1144,17 @@ bool AudioClient::switchOutputToAudioDevice(const QAudioDeviceInfo& outputDevice
qCDebug(audioclient) << "The format to be used for audio output is" << _outputFormat;
// we've got the best we can get for input
// if required, setup a soxr resampler for this input to our desired network format
// if required, setup a resampler for this input to our desired network format
if (_desiredOutputFormat != _outputFormat
&& _desiredOutputFormat.sampleRate() != _outputFormat.sampleRate()) {
qCDebug(audioclient) << "Attemping to create a resampler for network format to output format.";
_networkToOutputResampler = soxrResamplerFromInputFormatToOutputFormat(_desiredOutputFormat, _outputFormat);
if (!_networkToOutputResampler) {
return false;
}
assert(_desiredOutputFormat.sampleSize() == 16);
assert(_outputFormat.sampleSize() == 16);
int channelCount = (_desiredOutputFormat.channelCount() == 2 && _outputFormat.channelCount() == 2) ? 2 : 1;
_networkToOutputResampler = new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), channelCount);
} else {
qCDebug(audioclient) << "No resampling required for network output to match actual output format.";
}

View file

@ -45,6 +45,7 @@
#include "AudioIOStats.h"
#include "AudioNoiseGate.h"
#include "AudioSRC.h"
#ifdef _WIN32
#pragma warning( push )
@ -72,7 +73,7 @@ static const quint64 DEFAULT_AUDIO_OUTPUT_STARVE_DETECTION_PERIOD = 10 * 1000; /
class QAudioInput;
class QAudioOutput;
class QIODevice;
struct soxr;
typedef struct ty_gverb ty_gverb;
typedef glm::vec3 (*AudioPositionGetter)();
@ -262,10 +263,10 @@ private:
AudioEffectOptions* _reverbOptions;
ty_gverb* _gverb;
// possible soxr streams needed for resample
soxr* _inputToNetworkResampler;
soxr* _networkToOutputResampler;
soxr* _loopbackResampler;
// possible streams needed for resample
AudioSRC* _inputToNetworkResampler;
AudioSRC* _networkToOutputResampler;
AudioSRC* _loopbackResampler;
// Adds Reverb
ty_gverb* createGverbFilter();

View file

@ -16,12 +16,12 @@
#include <udt/PacketHeaders.h>
#include <SharedUtil.h>
#include <UUID.h>
#include <soxr.h>
#include "AbstractAudioInterface.h"
#include "AudioRingBuffer.h"
#include "AudioLogging.h"
#include "SoundCache.h"
#include "AudioSRC.h"
#include "AudioInjector.h"
@ -316,23 +316,20 @@ AudioInjector* AudioInjector::playSound(const QString& soundUrl, const float vol
return playSoundAndDelete(samples, options, NULL);
}
soxr_io_spec_t spec = soxr_io_spec(SOXR_INT16_I, SOXR_INT16_I);
soxr_quality_spec_t qualitySpec = soxr_quality_spec(SOXR_MQ, 0);
const int channelCount = sound->isStereo() ? 2 : 1;
const int standardRate = AudioConstants::SAMPLE_RATE;
const int resampledRate = standardRate * stretchFactor;
const int nInputSamples = samples.size() / sizeof(int16_t);
const int nOutputSamples = nInputSamples * stretchFactor;
QByteArray resampled(nOutputSamples * sizeof(int16_t), '\0');
const int16_t* receivedSamples = reinterpret_cast<const int16_t*>(samples.data());
soxr_error_t soxError = soxr_oneshot(standardRate, resampledRate, channelCount,
receivedSamples, nInputSamples, NULL,
reinterpret_cast<int16_t*>(resampled.data()), nOutputSamples, NULL,
&spec, &qualitySpec, 0);
if (soxError) {
qCDebug(audio) << "Unable to resample" << soundUrl << "from" << nInputSamples << "@" << standardRate << "to" << nOutputSamples << "@" << resampledRate;
resampled = samples;
}
const int channelCount = sound->isStereo() ? 2 : 1;
AudioSRC resampler(standardRate, resampledRate, channelCount);
const int nInputFrames = samples.size() / (channelCount * sizeof(int16_t));
const int maxOutputFrames = resampler.getMaxOutput(nInputFrames);
QByteArray resampled(maxOutputFrames * channelCount * sizeof(int16_t), '\0');
int nOutputFrames = resampler.render(reinterpret_cast<const int16_t*>(samples.data()),
reinterpret_cast<int16_t*>(resampled.data()),
nInputFrames);
return playSoundAndDelete(resampled, options, NULL);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,61 @@
//
// AudioSRC.h
// libraries/audio/src
//
// Created by Ken Cooke on 9/18/15.
// Copyright 2015 High Fidelity, Inc.
//
// Distributed under the Apache License, Version 2.0.
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
//
#ifndef hifi_AudioSRC_h
#define hifi_AudioSRC_h
#include "stdint.h"
class AudioSRC {
public:
static const int MAX_CHANNELS = 2;
AudioSRC(int inputSampleRate, int outputSampleRate, int numChannels);
~AudioSRC();
int render(const int16_t* input, int16_t* output, int inputFrames);
int getMinOutput(int inputFrames);
int getMaxOutput(int inputFrames);
int getMinInput(int outputFrames);
int getMaxInput(int outputFrames);
private:
float* _polyphaseFilter;
float* _history[MAX_CHANNELS];
float* _inputs[MAX_CHANNELS];
float* _outputs[MAX_CHANNELS];
int _inputSampleRate;
int _outputSampleRate;
int _numChannels;
int _inputBlock;
int _upFactor;
int _downFactor;
int _numTaps;
int _numHistory;
int64_t _offset;
int64_t _step;
int createPolyphaseFilter(int upFactor, int downFactor, float gain);
int multirateFilter1(const float* input0, float* output0, int inputFrames);
int multirateFilter2(const float* input0, const float* input1, float* output0, float* output1, int inputFrames);
void convertInputFromInt16(const int16_t* input, float** outputs, int numFrames);
void convertOutputToInt16(float** inputs, int16_t* output, int numFrames);
int processFloat(float** inputs, float** outputs, int inputFrames);
};
#endif // AudioSRC_h