Merge pull request #9842 from ZappoMan/tellCodecAboutSilence

improved noise gate
This commit is contained in:
Brad Hefta-Gaub 2017-03-09 11:04:22 -08:00 committed by GitHub
commit 45efb235e3
4 changed files with 42 additions and 9 deletions

View file

@ -1052,7 +1052,12 @@ void AudioClient::handleAudioInput() {
auto packetType = _shouldEchoToServer ?
PacketType::MicrophoneAudioWithEcho : PacketType::MicrophoneAudioNoEcho;
if (_lastInputLoudness == 0) {
// if the _inputGate closed in this last frame, then we don't actually want
// to send a silent packet, instead, we want to go ahead and encode and send
// the output from the input gate (eventually, this could be crossfaded)
// and allow the codec to properly encode down to silent/zero. If we still
// have _lastInputLoudness of 0 in our NEXT frame, we will send a silent packet
if (_lastInputLoudness == 0 && !_inputGate.closedInLastFrame()) {
packetType = PacketType::SilentAudioFrame;
}
Transform audioTransform;

View file

@ -58,7 +58,6 @@ void AudioNoiseGate::removeDCOffset(int16_t* samples, int numSamples) {
}
}
void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
//
// Impose Noise Gate
@ -77,8 +76,7 @@ void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
// NOISE_GATE_FRAMES_TO_AVERAGE: How many audio frames should we average together to compute noise floor.
// More means better rejection but also can reject continuous things like singing.
// NUMBER_OF_NOISE_SAMPLE_FRAMES: How often should we re-evaluate the noise floor?
float loudness = 0;
int thisSample = 0;
int samplesOverNoiseGate = 0;
@ -142,11 +140,13 @@ void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
_sampleCounter = 0;
}
if (samplesOverNoiseGate > NOISE_GATE_WIDTH) {
_isOpen = true;
_framesToClose = NOISE_GATE_CLOSE_FRAME_DELAY;
} else {
if (--_framesToClose == 0) {
_closedInLastFrame = !_isOpen;
_isOpen = false;
}
}

View file

@ -24,6 +24,7 @@ public:
void removeDCOffset(int16_t* samples, int numSamples);
bool clippedInLastFrame() const { return _didClipInLastFrame; }
bool closedInLastFrame() const { return _closedInLastFrame; }
float getMeasuredFloor() const { return _measuredFloor; }
float getLastLoudness() const { return _lastLoudness; }
@ -40,6 +41,7 @@ private:
float _sampleFrames[NUMBER_OF_NOISE_SAMPLE_FRAMES];
int _sampleCounter;
bool _isOpen;
bool _closedInLastFrame { false };
int _framesToClose;
};

View file

@ -136,9 +136,10 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
break;
}
case SequenceNumberStats::Early: {
// Packet is early; write droppable silent samples for each of the skipped packets.
// NOTE: we assume that each dropped packet contains the same number of samples
// as the packet we just received.
// Packet is early. Treat the packets as if all the packets between the last
// OnTime packet and this packet were lost. If we're using a codec this will
// also result in allowing the codec to interpolate lost data. Then
// fall through to the "on time" logic to actually handle this packet
int packetsDropped = arrivalInfo._seqDiffFromExpected;
lostAudioData(packetsDropped);
@ -147,7 +148,8 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
case SequenceNumberStats::OnTime: {
// Packet is on time; parse its data to the ringbuffer
if (message.getType() == PacketType::SilentAudioFrame) {
// FIXME - Some codecs need to know about these silent frames... and can produce better output
// If we recieved a SilentAudioFrame from our sender, we might want to drop
// some of the samples in order to catch up to our desired jitter buffer size.
writeDroppableSilentFrames(networkFrames);
} else {
// note: PCM and no codec are identical
@ -158,7 +160,12 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
parseAudioData(message.getType(), afterProperties);
} else {
qDebug(audio) << "Codec mismatch: expected" << _selectedCodecName << "got" << codecInPacket << "writing silence";
writeDroppableSilentFrames(networkFrames);
// Since the data in the stream is using a codec that we aren't prepared for,
// we need to let the codec know that we don't have data for it, this will
// allow the codec to interpolate missing data and produce a fade to silence.
lostAudioData(1);
// inform others of the mismatch
auto sendingNode = DependencyManager::get<NodeList>()->nodeWithUUID(message.getSourceID());
emit mismatchedAudioCodec(sendingNode, _selectedCodecName, codecInPacket);
@ -240,6 +247,25 @@ int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packet
int InboundAudioStream::writeDroppableSilentFrames(int silentFrames) {
// We can't guarentee that all clients have faded the stream down
// to silence and encoded that silence before sending us a
// SilentAudioFrame. If the encoder has truncated the stream it will
// leave the decoder holding some unknown loud state. To handle this
// case we will call the decoder's lostFrame() method, which indicates
// that it should interpolate from its last known state down toward
// silence.
if (_decoder) {
// FIXME - We could potentially use the output from the codec, in which
// case we might get a cleaner fade toward silence. NOTE: The below logic
// attempts to catch up in the event that the jitter buffers have grown.
// The better long term fix is to use the output from the decode, detect
// when it actually reaches silence, and then delete the silent portions
// of the jitter buffers. Or petentially do a cross fade from the decode
// output to silence.
QByteArray decodedBuffer;
_decoder->lostFrame(decodedBuffer);
}
// calculate how many silent frames we should drop.
int silentSamples = silentFrames * _numChannels;
int samplesPerFrame = _ringBuffer.getNumFrameSamples();