Merge pull request #8733 from zzmp/fix/audio-channel-swap

fix intermittent channel swapping
This commit is contained in:
Chris Collins 2016-10-05 15:54:25 -07:00 committed by GitHub
commit bc6797fc3a
10 changed files with 79 additions and 78 deletions

View file

@ -48,8 +48,7 @@ static const int RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES = 10;
Agent::Agent(ReceivedMessage& message) :
ThreadedAssignment(message),
_entityEditSender(),
_receivedAudioStream(AudioConstants::NETWORK_FRAME_SAMPLES_STEREO,
RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES, RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES) {
_receivedAudioStream(RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES, RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES) {
DependencyManager::get<EntityScriptingInterface>()->setPacketSender(&_entityEditSender);
ResourceManager::init();

View file

@ -250,8 +250,6 @@ int AudioScope::addSilenceToScope(QByteArray* byteArray, int frameOffset, int si
}
const int STEREO_FACTOR = 2;
void AudioScope::addStereoSilenceToScope(int silentSamplesPerChannel) {
if (!_isEnabled || _isPaused) {
return;
@ -265,10 +263,10 @@ void AudioScope::addStereoSamplesToScope(const QByteArray& samples) {
return;
}
const int16_t* samplesData = reinterpret_cast<const int16_t*>(samples.data());
int samplesPerChannel = samples.size() / sizeof(int16_t) / STEREO_FACTOR;
int samplesPerChannel = samples.size() / sizeof(int16_t) / AudioConstants::STEREO;
addBufferToScope(_scopeOutputLeft, _scopeOutputOffset, samplesData, samplesPerChannel, 0, STEREO_FACTOR);
_scopeOutputOffset = addBufferToScope(_scopeOutputRight, _scopeOutputOffset, samplesData, samplesPerChannel, 1, STEREO_FACTOR);
addBufferToScope(_scopeOutputLeft, _scopeOutputOffset, samplesData, samplesPerChannel, 0, AudioConstants::STEREO);
_scopeOutputOffset = addBufferToScope(_scopeOutputRight, _scopeOutputOffset, samplesData, samplesPerChannel, 1, AudioConstants::STEREO);
_scopeLastFrame = samples.right(AudioConstants::NETWORK_FRAME_BYTES_STEREO);
}
@ -282,9 +280,9 @@ void AudioScope::addLastFrameRepeatedWithFadeToScope(int samplesPerChannel) {
int samplesToWriteThisIteration = std::min(samplesRemaining, (int) AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
float fade = calculateRepeatedFrameFadeFactor(indexOfRepeat);
addBufferToScope(_scopeOutputLeft, _scopeOutputOffset, lastFrameData,
samplesToWriteThisIteration, 0, STEREO_FACTOR, fade);
samplesToWriteThisIteration, 0, AudioConstants::STEREO, fade);
_scopeOutputOffset = addBufferToScope(_scopeOutputRight, _scopeOutputOffset,
lastFrameData, samplesToWriteThisIteration, 1, STEREO_FACTOR, fade);
lastFrameData, samplesToWriteThisIteration, 1, AudioConstants::STEREO, fade);
samplesRemaining -= samplesToWriteThisIteration;
indexOfRepeat++;

View file

@ -115,7 +115,7 @@ AudioClient::AudioClient() :
_loopbackAudioOutput(NULL),
_loopbackOutputDevice(NULL),
_inputRingBuffer(0),
_receivedAudioStream(0, RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES),
_receivedAudioStream(RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES),
_isStereoInput(false),
_outputStarveDetectionStartTimeMsec(0),
_outputStarveDetectionCount(0),
@ -1152,9 +1152,9 @@ bool AudioClient::outputLocalInjector(bool isStereo, AudioInjector* injector) {
}
void AudioClient::outputFormatChanged() {
int outputFormatChannelCountTimesSampleRate = _outputFormat.channelCount() * _outputFormat.sampleRate();
_outputFrameSize = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * outputFormatChannelCountTimesSampleRate / _desiredOutputFormat.sampleRate();
_receivedAudioStream.outputFormatChanged(outputFormatChannelCountTimesSampleRate);
_outputFrameSize = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * _outputFormat.channelCount() * _outputFormat.sampleRate()) /
_desiredOutputFormat.sampleRate();
_receivedAudioStream.outputFormatChanged(_outputFormat.sampleRate(), _outputFormat.channelCount());
}
bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo& inputDeviceInfo) {

View file

@ -46,10 +46,11 @@ static const int STATS_FOR_STATS_PACKET_WINDOW_SECONDS = 30;
// _currentJitterBufferFrames is updated with the time-weighted avg and the running time-weighted avg is reset.
static const quint64 FRAMES_AVAILABLE_STAT_WINDOW_USECS = 10 * USECS_PER_SECOND;
InboundAudioStream::InboundAudioStream(int numFrameSamples, int numFramesCapacity, int numStaticJitterFrames) :
_ringBuffer(numFrameSamples, numFramesCapacity),
_dynamicJitterBufferEnabled(numStaticJitterFrames == -1),
_staticJitterBufferFrames(std::max(numStaticJitterFrames, DEFAULT_STATIC_JITTER_FRAMES)),
InboundAudioStream::InboundAudioStream(int numChannels, int numFrames, int numBlocks, int numStaticJitterBlocks) :
_ringBuffer(numChannels * numFrames, numBlocks),
_numChannels(numChannels),
_dynamicJitterBufferEnabled(numStaticJitterBlocks == -1),
_staticJitterBufferFrames(std::max(numStaticJitterBlocks, DEFAULT_STATIC_JITTER_FRAMES)),
_desiredJitterBufferFrames(_dynamicJitterBufferEnabled ? 1 : _staticJitterBufferFrames),
_incomingSequenceNumberStats(STATS_FOR_STATS_PACKET_WINDOW_SECONDS),
_starveHistory(STARVE_HISTORY_CAPACITY),
@ -121,11 +122,11 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
packetReceivedUpdateTimingStats();
int networkSamples;
int networkFrames;
// parse the info after the seq number and before the audio data (the stream properties)
int prePropertyPosition = message.getPosition();
int propertyBytes = parseStreamProperties(message.getType(), message.readWithoutCopy(message.getBytesLeftToRead()), networkSamples);
int propertyBytes = parseStreamProperties(message.getType(), message.readWithoutCopy(message.getBytesLeftToRead()), networkFrames);
message.seek(prePropertyPosition + propertyBytes);
// handle this packet based on its arrival status.
@ -135,7 +136,7 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
// NOTE: we assume that each dropped packet contains the same number of samples
// as the packet we just received.
int packetsDropped = arrivalInfo._seqDiffFromExpected;
writeSamplesForDroppedPackets(packetsDropped * networkSamples);
writeFramesForDroppedPackets(packetsDropped * networkFrames);
// fall through to OnTime case
}
@ -143,7 +144,7 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
// Packet is on time; parse its data to the ringbuffer
if (message.getType() == PacketType::SilentAudioFrame) {
// FIXME - Some codecs need to know about these silent frames... and can produce better output
writeDroppableSilentSamples(networkSamples);
writeDroppableSilentFrames(networkFrames);
} else {
// note: PCM and no codec are identical
bool selectedPCM = _selectedCodecName == "pcm" || _selectedCodecName == "";
@ -153,7 +154,7 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
parseAudioData(message.getType(), afterProperties);
} else {
qDebug() << "Codec mismatch: expected" << _selectedCodecName << "got" << codecInPacket << "writing silence";
writeDroppableSilentSamples(networkSamples);
writeDroppableSilentFrames(networkFrames);
// inform others of the mismatch
auto sendingNode = DependencyManager::get<NodeList>()->nodeWithUUID(message.getSourceID());
emit mismatchedAudioCodec(sendingNode, _selectedCodecName, codecInPacket);
@ -218,12 +219,13 @@ int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packet
return _ringBuffer.writeData(decodedBuffer.data(), actualSize);
}
int InboundAudioStream::writeDroppableSilentSamples(int silentSamples) {
int InboundAudioStream::writeDroppableSilentFrames(int silentFrames) {
if (_decoder) {
_decoder->trackLostFrames(silentSamples);
_decoder->trackLostFrames(silentFrames);
}
// calculate how many silent frames we should drop.
int silentSamples = silentFrames * _numChannels;
int samplesPerFrame = _ringBuffer.getNumFrameSamples();
int desiredJitterBufferFramesPlusPadding = _desiredJitterBufferFrames + DESIRED_JITTER_BUFFER_FRAMES_PADDING;
int numSilentFramesToDrop = 0;
@ -414,14 +416,14 @@ void InboundAudioStream::packetReceivedUpdateTimingStats() {
_lastPacketReceivedTime = now;
}
int InboundAudioStream::writeSamplesForDroppedPackets(int networkSamples) {
return writeLastFrameRepeatedWithFade(networkSamples);
int InboundAudioStream::writeFramesForDroppedPackets(int networkFrames) {
return writeLastFrameRepeatedWithFade(networkFrames);
}
int InboundAudioStream::writeLastFrameRepeatedWithFade(int samples) {
int InboundAudioStream::writeLastFrameRepeatedWithFade(int frames) {
AudioRingBuffer::ConstIterator frameToRepeat = _ringBuffer.lastFrameWritten();
int frameSize = _ringBuffer.getNumFrameSamples();
int samplesToWrite = samples;
int samplesToWrite = frames * _numChannels;
int indexOfRepeat = 0;
do {
int samplesToWriteThisIteration = std::min(samplesToWrite, frameSize);
@ -434,7 +436,7 @@ int InboundAudioStream::writeLastFrameRepeatedWithFade(int samples) {
indexOfRepeat++;
} while (samplesToWrite > 0);
return samples;
return frames;
}
AudioStreamStats InboundAudioStream::getAudioStreamStats() const {

View file

@ -47,7 +47,7 @@ public:
static const bool REPETITION_WITH_FADE;
InboundAudioStream() = delete;
InboundAudioStream(int numFrameSamples, int numFramesCapacity, int numStaticJitterFrames = -1);
InboundAudioStream(int numChannels, int numFrames, int numBlocks, int numStaticJitterBlocks);
~InboundAudioStream();
void reset();
@ -115,7 +115,7 @@ public slots:
private:
void packetReceivedUpdateTimingStats();
int writeSamplesForDroppedPackets(int networkSamples);
int writeFramesForDroppedPackets(int networkFrames);
void popSamplesNoCheck(int samples);
void framesAvailableChanged();
@ -134,16 +134,17 @@ protected:
/// default implementation assumes packet contains raw audio samples after stream properties
virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties);
/// writes silent samples to the buffer that may be dropped to reduce latency caused by the buffer
virtual int writeDroppableSilentSamples(int silentSamples);
/// writes silent frames to the buffer that may be dropped to reduce latency caused by the buffer
virtual int writeDroppableSilentFrames(int silentFrames);
/// writes the last written frame repeatedly, gradually fading to silence.
/// used for writing samples for dropped packets.
virtual int writeLastFrameRepeatedWithFade(int samples);
virtual int writeLastFrameRepeatedWithFade(int frames);
protected:
AudioRingBuffer _ringBuffer;
int _numChannels;
bool _lastPopSucceeded { false };
AudioRingBuffer::ConstIterator _lastPopOutput;

View file

@ -11,5 +11,8 @@
#include "MixedAudioStream.h"
MixedAudioStream::MixedAudioStream(int numFrameSamples, int numFramesCapacity, int numStaticJitterFrames) :
InboundAudioStream(numFrameSamples, numFramesCapacity, numStaticJitterFrames) {}
#include "AudioConstants.h"
MixedAudioStream::MixedAudioStream(int numFramesCapacity, int numStaticJitterFrames) :
InboundAudioStream(AudioConstants::STEREO, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL,
numFramesCapacity, numStaticJitterFrames) {}

View file

@ -16,7 +16,7 @@
class MixedAudioStream : public InboundAudioStream {
public:
MixedAudioStream(int numFrameSamples, int numFramesCapacity, int numStaticJitterFrames = -1);
MixedAudioStream(int numFramesCapacity, int numStaticJitterFrames = -1);
float getNextOutputFrameLoudness() const { return _ringBuffer.getNextOutputFrameLoudness(); }
};

View file

@ -12,33 +12,30 @@
#include "MixedProcessedAudioStream.h"
#include "AudioLogging.h"
static const int STEREO_FACTOR = 2;
MixedProcessedAudioStream::MixedProcessedAudioStream(int numFramesCapacity, int numStaticJitterFrames)
: InboundAudioStream(AudioConstants::STEREO, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL,
numFramesCapacity, numStaticJitterFrames) {}
MixedProcessedAudioStream::MixedProcessedAudioStream(int numFrameSamples, int numFramesCapacity, int numStaticJitterFrames)
: InboundAudioStream(numFrameSamples, numFramesCapacity, numStaticJitterFrames) {}
void MixedProcessedAudioStream::outputFormatChanged(int outputFormatChannelCountTimesSampleRate) {
_outputFormatChannelsTimesSampleRate = outputFormatChannelCountTimesSampleRate;
int deviceOutputFrameSize = networkToDeviceSamples(AudioConstants::NETWORK_FRAME_SAMPLES_STEREO);
_ringBuffer.resizeForFrameSize(deviceOutputFrameSize);
void MixedProcessedAudioStream::outputFormatChanged(int sampleRate, int channelCount) {
_outputSampleRate = sampleRate;
_outputChannelCount = channelCount;
int deviceOutputFrameFrames = networkToDeviceFrames(AudioConstants::NETWORK_FRAME_SAMPLES_STEREO / AudioConstants::STEREO);
int deviceOutputFrameSamples = deviceOutputFrameFrames * AudioConstants::STEREO;
_ringBuffer.resizeForFrameSize(deviceOutputFrameSamples);
}
int MixedProcessedAudioStream::writeDroppableSilentSamples(int silentSamples) {
int deviceSilentSamplesWritten = InboundAudioStream::writeDroppableSilentSamples(networkToDeviceSamples(silentSamples));
emit addedSilence(deviceToNetworkSamples(deviceSilentSamplesWritten) / STEREO_FACTOR);
return deviceSilentSamplesWritten;
int MixedProcessedAudioStream::writeDroppableSilentFrames(int silentFrames) {
int deviceSilentFrames = networkToDeviceFrames(silentFrames);
int deviceSilentFramesWritten = InboundAudioStream::writeDroppableSilentFrames(deviceSilentFrames);
emit addedSilence(deviceToNetworkFrames(deviceSilentFramesWritten));
return deviceSilentFramesWritten;
}
int MixedProcessedAudioStream::writeLastFrameRepeatedWithFade(int samples) {
int deviceSamplesWritten = InboundAudioStream::writeLastFrameRepeatedWithFade(networkToDeviceSamples(samples));
emit addedLastFrameRepeatedWithFade(deviceToNetworkSamples(deviceSamplesWritten) / STEREO_FACTOR);
return deviceSamplesWritten;
int MixedProcessedAudioStream::writeLastFrameRepeatedWithFade(int frames) {
int deviceFrames = networkToDeviceFrames(frames);
int deviceFramesWritten = InboundAudioStream::writeLastFrameRepeatedWithFade(deviceFrames);
emit addedLastFrameRepeatedWithFade(deviceToNetworkFrames(deviceFramesWritten));
return deviceFramesWritten;
}
int MixedProcessedAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties) {
@ -56,16 +53,16 @@ int MixedProcessedAudioStream::parseAudioData(PacketType type, const QByteArray&
_ringBuffer.writeData(outputBuffer.data(), outputBuffer.size());
qCDebug(audiostream, "Wrote %d samples to buffer (%d available)", outputBuffer.size() / (int)sizeof(int16_t), getSamplesAvailable());
return packetAfterStreamProperties.size();
}
int MixedProcessedAudioStream::networkToDeviceSamples(int networkSamples) {
return (quint64)networkSamples * (quint64)_outputFormatChannelsTimesSampleRate / (quint64)(STEREO_FACTOR
* AudioConstants::SAMPLE_RATE);
int MixedProcessedAudioStream::networkToDeviceFrames(int networkFrames) {
return ((quint64)networkFrames * _outputChannelCount * _outputSampleRate) /
(quint64)(AudioConstants::STEREO * AudioConstants::SAMPLE_RATE);
}
int MixedProcessedAudioStream::deviceToNetworkSamples(int deviceSamples) {
return (quint64)deviceSamples * (quint64)(STEREO_FACTOR * AudioConstants::SAMPLE_RATE)
/ (quint64)_outputFormatChannelsTimesSampleRate;
int MixedProcessedAudioStream::deviceToNetworkFrames(int deviceFrames) {
return (quint64)deviceFrames * (quint64)(AudioConstants::STEREO * AudioConstants::SAMPLE_RATE) /
(_outputSampleRate * _outputChannelCount);
}

View file

@ -19,7 +19,7 @@ class AudioClient;
class MixedProcessedAudioStream : public InboundAudioStream {
Q_OBJECT
public:
MixedProcessedAudioStream(int numFrameSamples, int numFramesCapacity, int numStaticJitterFrames = -1);
MixedProcessedAudioStream(int numFramesCapacity, int numStaticJitterFrames = -1);
signals:
@ -30,19 +30,20 @@ signals:
void processSamples(const QByteArray& inputBuffer, QByteArray& outputBuffer);
public:
void outputFormatChanged(int outputFormatChannelCountTimesSampleRate);
void outputFormatChanged(int sampleRate, int channelCount);
protected:
int writeDroppableSilentSamples(int silentSamples) override;
int writeLastFrameRepeatedWithFade(int samples) override;
int writeDroppableSilentFrames(int silentFrames) override;
int writeLastFrameRepeatedWithFade(int frames) override;
int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties) override;
private:
int networkToDeviceSamples(int networkSamples);
int deviceToNetworkSamples(int deviceSamples);
int networkToDeviceFrames(int networkFrames);
int deviceToNetworkFrames(int deviceFrames);
private:
int _outputFormatChannelsTimesSampleRate;
quint64 _outputSampleRate;
quint64 _outputChannelCount;
};
#endif // hifi_MixedProcessedAudioStream_h

View file

@ -22,10 +22,10 @@
#include <UUID.h>
PositionalAudioStream::PositionalAudioStream(PositionalAudioStream::Type type, bool isStereo, int numStaticJitterFrames) :
InboundAudioStream(isStereo
? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO
: AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL,
AUDIOMIXER_INBOUND_RING_BUFFER_FRAME_CAPACITY, numStaticJitterFrames),
InboundAudioStream(isStereo ? AudioConstants::STEREO : AudioConstants::MONO,
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL,
AUDIOMIXER_INBOUND_RING_BUFFER_FRAME_CAPACITY,
numStaticJitterFrames),
_type(type),
_position(0.0f, 0.0f, 0.0f),
_orientation(0.0f, 0.0f, 0.0f, 0.0f),