Merge pull request #15307 from kencooke/audio-gain-interpolation

Case 22019: Audio clicks/pops when gain is rapidly changing
This commit is contained in:
Sam Gateau 2019-04-10 12:26:59 -07:00 committed by GitHub
commit 6f4f7335dc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 205 additions and 93 deletions

View file

@ -549,38 +549,28 @@ void AudioMixerSlave::addStream(AudioMixerClientData::MixableStream& mixableStre
// grab the stream from the ring buffer
AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd->getLastPopOutput();
// stereo sources are not passed through HRTF
if (streamToAdd->isStereo()) {
// apply the avatar gain adjustment
gain *= mixableStream.hrtf->getGainAdjustment();
streamPopOutput.readSamples(_bufferSamples, AudioConstants::NETWORK_FRAME_SAMPLES_STEREO);
const float scale = 1 / 32768.0f; // int16_t to float
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; i++) {
_mixSamples[2*i+0] += (float)streamPopOutput[2*i+0] * gain * scale;
_mixSamples[2*i+1] += (float)streamPopOutput[2*i+1] * gain * scale;
}
// stereo sources are not passed through HRTF
mixableStream.hrtf->mixStereo(_bufferSamples, _mixSamples, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++stats.manualStereoMixes;
} else if (isEcho) {
streamPopOutput.readSamples(_bufferSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
// echo sources are not passed through HRTF
const float scale = 1/32768.0f; // int16_t to float
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; i++) {
float sample = (float)streamPopOutput[i] * gain * scale;
_mixSamples[2*i+0] += sample;
_mixSamples[2*i+1] += sample;
}
mixableStream.hrtf->mixMono(_bufferSamples, _mixSamples, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++stats.manualEchoMixes;
} else {
streamPopOutput.readSamples(_bufferSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
mixableStream.hrtf->render(_bufferSamples, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
++stats.hrtfRenders;
}
}

View file

@ -1397,7 +1397,6 @@ bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) {
// spatialize into mixBuffer
injector->getLocalFOA().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX,
qw, qx, qy, qz, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
} else if (options.stereo) {
if (options.positionSet) {
@ -1409,11 +1408,8 @@ bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) {
}
// direct mix into mixBuffer
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; i++) {
mixBuffer[2*i+0] += convertToFloat(_localScratchBuffer[2*i+0]) * gain;
mixBuffer[2*i+1] += convertToFloat(_localScratchBuffer[2*i+1]) * gain;
}
injector->getLocalHRTF().mixStereo(_localScratchBuffer, mixBuffer, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
} else { // injector is mono
if (options.positionSet) {
@ -1431,11 +1427,8 @@ bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) {
} else {
// direct mix into mixBuffer
for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; i++) {
float sample = convertToFloat(_localScratchBuffer[i]) * gain;
mixBuffer[2*i+0] += sample;
mixBuffer[2*i+1] += sample;
}
injector->getLocalHRTF().mixMono(_localScratchBuffer, mixBuffer, gain,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
}
}

View file

@ -882,14 +882,16 @@ static void convertInput_ref(int16_t* src, float *dst[4], float gain, int numFra
#endif
// in-place rotation of the soundfield
// crossfade between old and new rotation, to prevent artifacts
static void rotate_3x3_ref(float* buf[4], const float m0[3][3], const float m1[3][3], const float* win, int numFrames) {
// in-place rotation and scaling of the soundfield
// crossfade between old and new matrix, to prevent artifacts
static void rotate_4x4_ref(float* buf[4], const float m0[4][4], const float m1[4][4], const float* win, int numFrames) {
const float md[3][3] = {
{ m0[0][0] - m1[0][0], m0[0][1] - m1[0][1], m0[0][2] - m1[0][2] },
{ m0[1][0] - m1[1][0], m0[1][1] - m1[1][1], m0[1][2] - m1[1][2] },
{ m0[2][0] - m1[2][0], m0[2][1] - m1[2][1], m0[2][2] - m1[2][2] },
// matrix difference
const float md[4][4] = {
{ m0[0][0] - m1[0][0], m0[0][1] - m1[0][1], m0[0][2] - m1[0][2], m0[0][3] - m1[0][3] },
{ m0[1][0] - m1[1][0], m0[1][1] - m1[1][1], m0[1][2] - m1[1][2], m0[1][3] - m1[1][3] },
{ m0[2][0] - m1[2][0], m0[2][1] - m1[2][1], m0[2][2] - m1[2][2], m0[2][3] - m1[2][3] },
{ m0[3][0] - m1[3][0], m0[3][1] - m1[3][1], m0[3][2] - m1[3][2], m0[3][3] - m1[3][3] },
};
for (int i = 0; i < numFrames; i++) {
@ -898,22 +900,27 @@ static void rotate_3x3_ref(float* buf[4], const float m0[3][3], const float m1[3
// interpolate the matrix
float m00 = m1[0][0] + frac * md[0][0];
float m10 = m1[1][0] + frac * md[1][0];
float m20 = m1[2][0] + frac * md[2][0];
float m01 = m1[0][1] + frac * md[0][1];
float m11 = m1[1][1] + frac * md[1][1];
float m21 = m1[2][1] + frac * md[2][1];
float m31 = m1[3][1] + frac * md[3][1];
float m02 = m1[0][2] + frac * md[0][2];
float m12 = m1[1][2] + frac * md[1][2];
float m22 = m1[2][2] + frac * md[2][2];
float m32 = m1[3][2] + frac * md[3][2];
float m13 = m1[1][3] + frac * md[1][3];
float m23 = m1[2][3] + frac * md[2][3];
float m33 = m1[3][3] + frac * md[3][3];
// matrix multiply
float x = m00 * buf[1][i] + m01 * buf[2][i] + m02 * buf[3][i];
float y = m10 * buf[1][i] + m11 * buf[2][i] + m12 * buf[3][i];
float z = m20 * buf[1][i] + m21 * buf[2][i] + m22 * buf[3][i];
float w = m00 * buf[0][i];
float x = m11 * buf[1][i] + m12 * buf[2][i] + m13 * buf[3][i];
float y = m21 * buf[1][i] + m22 * buf[2][i] + m23 * buf[3][i];
float z = m31 * buf[1][i] + m32 * buf[2][i] + m33 * buf[3][i];
buf[0][i] = w;
buf[1][i] = x;
buf[2][i] = y;
buf[3][i] = z;
@ -932,7 +939,7 @@ void rfft512_AVX2(float buf[512]);
void rifft512_AVX2(float buf[512]);
void rfft512_cmadd_1X2_AVX2(const float src[512], const float coef0[512], const float coef1[512], float dst0[512], float dst1[512]);
void convertInput_AVX2(int16_t* src, float *dst[4], float gain, int numFrames);
void rotate_3x3_AVX2(float* buf[4], const float m0[3][3], const float m1[3][3], const float* win, int numFrames);
void rotate_4x4_AVX2(float* buf[4], const float m0[4][4], const float m1[4][4], const float* win, int numFrames);
static void rfft512(float buf[512]) {
static auto f = cpuSupportsAVX2() ? rfft512_AVX2 : rfft512_ref;
@ -954,8 +961,8 @@ static void convertInput(int16_t* src, float *dst[4], float gain, int numFrames)
(*f)(src, dst, gain, numFrames); // dispatch
}
static void rotate_3x3(float* buf[4], const float m0[3][3], const float m1[3][3], const float* win, int numFrames) {
static auto f = cpuSupportsAVX2() ? rotate_3x3_AVX2 : rotate_3x3_ref;
static void rotate_4x4(float* buf[4], const float m0[4][4], const float m1[4][4], const float* win, int numFrames) {
static auto f = cpuSupportsAVX2() ? rotate_4x4_AVX2 : rotate_4x4_ref;
(*f)(buf, m0, m1, win, numFrames); // dispatch
}
@ -965,7 +972,7 @@ static auto& rfft512 = rfft512_ref;
static auto& rifft512 = rifft512_ref;
static auto& rfft512_cmadd_1X2 = rfft512_cmadd_1X2_ref;
static auto& convertInput = convertInput_ref;
static auto& rotate_3x3 = rotate_3x3_ref;
static auto& rotate_4x4 = rotate_4x4_ref;
#endif
@ -1007,8 +1014,8 @@ ALIGN32 static const float crossfadeTable[FOA_BLOCK] = {
0.0020975362f, 0.0015413331f, 0.0010705384f, 0.0006852326f, 0.0003854819f, 0.0001713375f, 0.0000428362f, 0.0000000000f,
};
// convert quaternion to a column-major 3x3 rotation matrix
static void quatToMatrix_3x3(float w, float x, float y, float z, float m[3][3]) {
// convert quaternion to a column-major 4x4 rotation matrix
static void quatToMatrix_4x4(float w, float x, float y, float z, float m[4][4]) {
float xx = x * (x + x);
float xy = x * (y + y);
@ -1022,17 +1029,33 @@ static void quatToMatrix_3x3(float w, float x, float y, float z, float m[3][3])
float wy = w * (y + y);
float wz = w * (z + z);
m[0][0] = 1.0f - (yy + zz);
m[0][1] = xy - wz;
m[0][2] = xz + wy;
m[0][0] = 1.0f;
m[0][1] = 0.0f;
m[0][2] = 0.0f;
m[0][3] = 0.0f;
m[1][0] = xy + wz;
m[1][1] = 1.0f - (xx + zz);
m[1][2] = yz - wx;
m[1][0] = 0.0f;
m[1][1] = 1.0f - (yy + zz);
m[1][2] = xy - wz;
m[1][3] = xz + wy;
m[2][0] = xz - wy;
m[2][1] = yz + wx;
m[2][2] = 1.0f - (xx + yy);
m[2][0] = 0.0f;
m[2][1] = xy + wz;
m[2][2] = 1.0f - (xx + zz);
m[2][3] = yz - wx;
m[3][0] = 0.0f;
m[3][1] = xz - wy;
m[3][2] = yz + wx;
m[3][3] = 1.0f - (xx + yy);
}
static void scaleMatrix_4x4(float scale, float m[4][4]) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
m[i][j] *= scale;
}
}
}
// Ambisonic to binaural render
@ -1047,18 +1070,26 @@ void AudioFOA::render(int16_t* input, float* output, int index, float qw, float
ALIGN32 float inBuffer[4][FOA_BLOCK]; // deinterleaved input buffers
float* in[4] = { inBuffer[0], inBuffer[1], inBuffer[2], inBuffer[3] };
float rotation[3][3];
float rotation[4][4];
// convert input to deinterleaved float
convertInput(input, in, FOA_GAIN * gain, FOA_BLOCK);
convertInput(input, in, FOA_GAIN, FOA_BLOCK);
// convert quaternion to 3x3 rotation
quatToMatrix_3x3(qw, qx, qy, qz, rotation);
// convert quaternion to 4x4 rotation
quatToMatrix_4x4(qw, qx, qy, qz, rotation);
// rotate the soundfield
rotate_3x3(in, _rotationState, rotation, crossfadeTable, FOA_BLOCK);
// apply gain as uniform scale
scaleMatrix_4x4(gain, rotation);
// rotation history update
// disable interpolation from reset state
if (_resetState) {
memcpy(_rotationState, rotation, sizeof(_rotationState));
}
// rotate and scale the soundfield
rotate_4x4(in, _rotationState, rotation, crossfadeTable, FOA_BLOCK);
// new parameters become old
memcpy(_rotationState, rotation, sizeof(_rotationState));
//
@ -1093,4 +1124,6 @@ void AudioFOA::render(int16_t* input, float* output, int index, float qw, float
output[2*i+0] += accBuffer[0][i + FOA_OVERLAP];
output[2*i+1] += accBuffer[1][i + FOA_OVERLAP];
}
_resetState = false;
}

View file

@ -28,12 +28,7 @@ static_assert((FOA_BLOCK + FOA_OVERLAP) == FOA_NFFT, "FFT convolution requires L
class AudioFOA {
public:
AudioFOA() {
// identity matrix
_rotationState[0][0] = 1.0f;
_rotationState[1][1] = 1.0f;
_rotationState[2][2] = 1.0f;
};
AudioFOA() {};
//
// input: interleaved First-Order Ambisonic source
@ -55,8 +50,10 @@ private:
// input history, for overlap-save
float _fftState[4][FOA_OVERLAP] = {};
// orientation history
float _rotationState[3][3] = {};
// orientation and gain history
float _rotationState[4][4] = {};
bool _resetState = true;
};
#endif // AudioFOA_h

View file

@ -750,6 +750,43 @@ static void interpolate(const float* src0, const float* src1, float* dst, float
#endif
// apply gain crossfade with accumulation (interleaved)
static void gainfade_1x2(int16_t* src, float* dst, const float* win, float gain0, float gain1, int numFrames) {
gain0 *= (1/32768.0f); // int16_t to float
gain1 *= (1/32768.0f);
for (int i = 0; i < numFrames; i++) {
float frac = win[i];
float gain = gain1 + frac * (gain0 - gain1);
float x0 = (float)src[i] * gain;
dst[2*i+0] += x0;
dst[2*i+1] += x0;
}
}
// apply gain crossfade with accumulation (interleaved)
static void gainfade_2x2(int16_t* src, float* dst, const float* win, float gain0, float gain1, int numFrames) {
gain0 *= (1/32768.0f); // int16_t to float
gain1 *= (1/32768.0f);
for (int i = 0; i < numFrames; i++) {
float frac = win[i];
float gain = gain1 + frac * (gain0 - gain1);
float x0 = (float)src[2*i+0] * gain;
float x1 = (float)src[2*i+1] * gain;
dst[2*i+0] += x0;
dst[2*i+1] += x1;
}
}
// design a 2nd order Thiran allpass
static void ThiranBiquad(float f, float& b0, float& b1, float& b2, float& a1, float& a2) {
@ -1104,6 +1141,13 @@ void AudioHRTF::render(int16_t* input, float* output, int index, float azimuth,
// apply global and local gain adjustment
gain *= _gainAdjust;
// disable interpolation from reset state
if (_resetState) {
_azimuthState = azimuth;
_distanceState = distance;
_gainState = gain;
}
// to avoid polluting the cache, old filters are recomputed instead of stored
setFilters(firCoef, bqCoef, delay, index, _azimuthState, _distanceState, _gainState, L0);
@ -1175,3 +1219,45 @@ void AudioHRTF::render(int16_t* input, float* output, int index, float azimuth,
_resetState = false;
}
void AudioHRTF::mixMono(int16_t* input, float* output, float gain, int numFrames) {
assert(numFrames == HRTF_BLOCK);
// apply global and local gain adjustment
gain *= _gainAdjust;
// disable interpolation from reset state
if (_resetState) {
_gainState = gain;
}
// crossfade gain and accumulate
gainfade_1x2(input, output, crossfadeTable, _gainState, gain, HRTF_BLOCK);
// new parameters become old
_gainState = gain;
_resetState = false;
}
void AudioHRTF::mixStereo(int16_t* input, float* output, float gain, int numFrames) {
assert(numFrames == HRTF_BLOCK);
// apply global and local gain adjustment
gain *= _gainAdjust;
// disable interpolation from reset state
if (_resetState) {
_gainState = gain;
}
// crossfade gain and accumulate
gainfade_2x2(input, output, crossfadeTable, _gainState, gain, HRTF_BLOCK);
// new parameters become old
_gainState = gain;
_resetState = false;
}

View file

@ -50,6 +50,12 @@ public:
//
void render(int16_t* input, float* output, int index, float azimuth, float distance, float gain, int numFrames);
//
// Non-spatialized direct mix (accumulates into existing output)
//
void mixMono(int16_t* input, float* output, float gain, int numFrames);
void mixStereo(int16_t* input, float* output, float gain, int numFrames);
//
// Fast path when input is known to be silent and state as been flushed
//

View file

@ -1289,14 +1289,16 @@ void convertInput_AVX2(int16_t* src, float *dst[4], float gain, int numFrames) {
#endif
// in-place rotation of the soundfield
// crossfade between old and new rotation, to prevent artifacts
void rotate_3x3_AVX2(float* buf[4], const float m0[3][3], const float m1[3][3], const float* win, int numFrames) {
// in-place rotation and scaling of the soundfield
// crossfade between old and new matrix, to prevent artifacts
void rotate_4x4_AVX2(float* buf[4], const float m0[4][4], const float m1[4][4], const float* win, int numFrames) {
const float md[3][3] = {
{ m0[0][0] - m1[0][0], m0[0][1] - m1[0][1], m0[0][2] - m1[0][2] },
{ m0[1][0] - m1[1][0], m0[1][1] - m1[1][1], m0[1][2] - m1[1][2] },
{ m0[2][0] - m1[2][0], m0[2][1] - m1[2][1], m0[2][2] - m1[2][2] },
// matrix difference
const float md[4][4] = {
{ m0[0][0] - m1[0][0], m0[0][1] - m1[0][1], m0[0][2] - m1[0][2], m0[0][3] - m1[0][3] },
{ m0[1][0] - m1[1][0], m0[1][1] - m1[1][1], m0[1][2] - m1[1][2], m0[1][3] - m1[1][3] },
{ m0[2][0] - m1[2][0], m0[2][1] - m1[2][1], m0[2][2] - m1[2][2], m0[2][3] - m1[2][3] },
{ m0[3][0] - m1[3][0], m0[3][1] - m1[3][1], m0[3][2] - m1[3][2], m0[3][3] - m1[3][3] },
};
assert(numFrames % 8 == 0);
@ -1307,30 +1309,35 @@ void rotate_3x3_AVX2(float* buf[4], const float m0[3][3], const float m1[3][3],
// interpolate the matrix
__m256 m00 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[0][0]), _mm256_broadcast_ss(&m1[0][0]));
__m256 m10 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[1][0]), _mm256_broadcast_ss(&m1[1][0]));
__m256 m20 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[2][0]), _mm256_broadcast_ss(&m1[2][0]));
__m256 m01 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[0][1]), _mm256_broadcast_ss(&m1[0][1]));
__m256 m11 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[1][1]), _mm256_broadcast_ss(&m1[1][1]));
__m256 m21 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[2][1]), _mm256_broadcast_ss(&m1[2][1]));
__m256 m31 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[3][1]), _mm256_broadcast_ss(&m1[3][1]));
__m256 m02 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[0][2]), _mm256_broadcast_ss(&m1[0][2]));
__m256 m12 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[1][2]), _mm256_broadcast_ss(&m1[1][2]));
__m256 m22 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[2][2]), _mm256_broadcast_ss(&m1[2][2]));
__m256 m32 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[3][2]), _mm256_broadcast_ss(&m1[3][2]));
__m256 m13 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[1][3]), _mm256_broadcast_ss(&m1[1][3]));
__m256 m23 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[2][3]), _mm256_broadcast_ss(&m1[2][3]));
__m256 m33 = _mm256_fmadd_ps(frac, _mm256_broadcast_ss(&md[3][3]), _mm256_broadcast_ss(&m1[3][3]));
// matrix multiply
__m256 x = _mm256_mul_ps(m00, _mm256_loadu_ps(&buf[1][i]));
__m256 y = _mm256_mul_ps(m10, _mm256_loadu_ps(&buf[1][i]));
__m256 z = _mm256_mul_ps(m20, _mm256_loadu_ps(&buf[1][i]));
__m256 w = _mm256_mul_ps(m00, _mm256_loadu_ps(&buf[0][i]));
x = _mm256_fmadd_ps(m01, _mm256_loadu_ps(&buf[2][i]), x);
y = _mm256_fmadd_ps(m11, _mm256_loadu_ps(&buf[2][i]), y);
z = _mm256_fmadd_ps(m21, _mm256_loadu_ps(&buf[2][i]), z);
__m256 x = _mm256_mul_ps(m11, _mm256_loadu_ps(&buf[1][i]));
__m256 y = _mm256_mul_ps(m21, _mm256_loadu_ps(&buf[1][i]));
__m256 z = _mm256_mul_ps(m31, _mm256_loadu_ps(&buf[1][i]));
x = _mm256_fmadd_ps(m02, _mm256_loadu_ps(&buf[3][i]), x);
y = _mm256_fmadd_ps(m12, _mm256_loadu_ps(&buf[3][i]), y);
z = _mm256_fmadd_ps(m22, _mm256_loadu_ps(&buf[3][i]), z);
x = _mm256_fmadd_ps(m12, _mm256_loadu_ps(&buf[2][i]), x);
y = _mm256_fmadd_ps(m22, _mm256_loadu_ps(&buf[2][i]), y);
z = _mm256_fmadd_ps(m32, _mm256_loadu_ps(&buf[2][i]), z);
x = _mm256_fmadd_ps(m13, _mm256_loadu_ps(&buf[3][i]), x);
y = _mm256_fmadd_ps(m23, _mm256_loadu_ps(&buf[3][i]), y);
z = _mm256_fmadd_ps(m33, _mm256_loadu_ps(&buf[3][i]), z);
_mm256_storeu_ps(&buf[0][i], w);
_mm256_storeu_ps(&buf[1][i], x);
_mm256_storeu_ps(&buf[2][i], y);
_mm256_storeu_ps(&buf[3][i], z);