From 1bdad89cbe2bd95b1af910ee6d4c10d232c86012 Mon Sep 17 00:00:00 2001
From: Brad Hefta-Gaub <brad@highfidelity.io>
Date: Wed, 8 Mar 2017 17:27:46 -0800
Subject: [PATCH 1/5] properly handle silent packet transitions

---
 libraries/audio-client/src/AudioClient.cpp    |  7 +++-
 libraries/audio-client/src/AudioNoiseGate.cpp |  7 +++-
 libraries/audio-client/src/AudioNoiseGate.h   |  5 +++
 libraries/audio/src/InboundAudioStream.cpp    | 34 ++++++++++++++++---
 4 files changed, 46 insertions(+), 7 deletions(-)
diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp
index bd141cfb12..9cd87d2e70 100644
--- a/libraries/audio-client/src/AudioClient.cpp
+++ b/libraries/audio-client/src/AudioClient.cpp
@@ -1052,7 +1052,12 @@ void AudioClient::handleAudioInput() {
         auto packetType = _shouldEchoToServer ?
             PacketType::MicrophoneAudioWithEcho : PacketType::MicrophoneAudioNoEcho;
 
-        if (_lastInputLoudness == 0) {
+        // if the _inputGate closed in this last frame, then we don't actually want
+        // to send a silent packet, instead, we want to go ahead and encode and send
+        // the output from the input gate (eventually, this could be crossfaded)
+        // and allow the codec to properly encode down to silent/zero. If we still
+        // have _lastInputLoudness of 0 in our NEXT frame, we will send a silent packet
+        if (_lastInputLoudness == 0 && !_inputGate.closedInLastFrame()) {
             packetType = PacketType::SilentAudioFrame;
         }
         Transform audioTransform;
diff --git a/libraries/audio-client/src/AudioNoiseGate.cpp b/libraries/audio-client/src/AudioNoiseGate.cpp
index 8766a20cdf..9458f47d8c 100644
--- a/libraries/audio-client/src/AudioNoiseGate.cpp
+++ b/libraries/audio-client/src/AudioNoiseGate.cpp
@@ -58,6 +58,7 @@ void AudioNoiseGate::removeDCOffset(int16_t* samples, int numSamples) {
     }
 }
 
+#include <QDebug>
 
 void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
     //
@@ -77,7 +78,8 @@ void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
     //  NOISE_GATE_FRAMES_TO_AVERAGE:  How many audio frames should we average together to compute noise floor.
     //                      More means better rejection but also can reject continuous things like singing.
     // NUMBER_OF_NOISE_SAMPLE_FRAMES:  How often should we re-evaluate the noise floor?
-    
+
+    _closedInLastFrame = false;
     
     float loudness = 0;
     int thisSample = 0;
@@ -147,6 +149,9 @@ void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
         _framesToClose = NOISE_GATE_CLOSE_FRAME_DELAY;
     } else {
         if (--_framesToClose == 0) {
+            if (_isOpen) {
+                _closedInLastFrame = true;
+            }
             _isOpen = false;
         }
     }
diff --git a/libraries/audio-client/src/AudioNoiseGate.h b/libraries/audio-client/src/AudioNoiseGate.h
index 8cb1155938..18b5a77056 100644
--- a/libraries/audio-client/src/AudioNoiseGate.h
+++ b/libraries/audio-client/src/AudioNoiseGate.h
@@ -24,6 +24,10 @@ public:
     void removeDCOffset(int16_t* samples, int numSamples);
     
     bool clippedInLastFrame() const { return _didClipInLastFrame; }
+
+    bool closedInLastFrame() const { return _closedInLastFrame; }
+
+    bool isOpen() const { return _isOpen; }
     float getMeasuredFloor() const { return _measuredFloor; }
     float getLastLoudness() const { return _lastLoudness; }
     
@@ -40,6 +44,7 @@ private:
     float _sampleFrames[NUMBER_OF_NOISE_SAMPLE_FRAMES];
     int _sampleCounter;
     bool _isOpen;
+    bool _closedInLastFrame { false };
     int _framesToClose;
 };
 
diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp
index 57c344adaf..12181cb8e2 100644
--- a/libraries/audio/src/InboundAudioStream.cpp
+++ b/libraries/audio/src/InboundAudioStream.cpp
@@ -136,9 +136,10 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
             break;
         }
         case SequenceNumberStats::Early: {
-            // Packet is early; write droppable silent samples for each of the skipped packets.
-            // NOTE: we assume that each dropped packet contains the same number of samples
-            // as the packet we just received.
+            // Packet is early treat the packets as if all the packets between the last
+            // OnTime packet and this packet was lost. If we're using a codec this will 
+            // also result in allowing the codec to flush its internal state. Then
+            // fall through to the "on time" logic to actually handle this packet
             int packetsDropped = arrivalInfo._seqDiffFromExpected;
             lostAudioData(packetsDropped);
 
@@ -147,7 +148,10 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
         case SequenceNumberStats::OnTime: {
             // Packet is on time; parse its data to the ringbuffer
             if (message.getType() == PacketType::SilentAudioFrame) {
-                // FIXME - Some codecs need to know about these silent frames... and can produce better output
+                // If we recieved a SilentAudioFrame from our sender, we might want to drop
+                // some of the samples in order to catch up to our desired jitter buffer size.
+                // NOTE: If we're using a codec we will be calling the codec's lostFrame()
+                // method to allow the codec to flush its internal state.
                 writeDroppableSilentFrames(networkFrames);
             } else {
                 // note: PCM and no codec are identical
@@ -158,7 +162,12 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
                     parseAudioData(message.getType(), afterProperties);
                 } else {
                     qDebug(audio) << "Codec mismatch: expected" << _selectedCodecName << "got" << codecInPacket << "writing silence";
-                    writeDroppableSilentFrames(networkFrames);
+
+                    // Since the data in the stream is using a codec that we're not prepared for,
+                    // we need to let the codec know that we don't have data for it, this will
+                    // flush any internal codec state and produce fade to silence.
+                    lostAudioData(1);
+
                     // inform others of the mismatch
                     auto sendingNode = DependencyManager::get<NodeList>()->nodeWithUUID(message.getSourceID());
                     emit mismatchedAudioCodec(sendingNode, _selectedCodecName, codecInPacket);
@@ -240,6 +249,21 @@ int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packet
 
 int InboundAudioStream::writeDroppableSilentFrames(int silentFrames) {
 
+    // if we have a decoder, we still want to tell the decoder about our
+    // lost frame. this will flush the internal state of the decoder
+    // we can safely ignore the output of the codec in this case, because
+    // we've enforced that on the sending side, the encoder ran at least
+    // one frame of truly silent audio before we sent the "droppable" silent
+    // frame. Technically we could leave this out, if we know for certain
+    // that the sender has really sent us an encoded packet of zeros, but
+    // since we can trust all encoders to always encode at least one silent
+    // frame (open source, someone code modify it), we will go ahead and
+    // tell our decoder about the lost frame.
+    if (_decoder) {
+        QByteArray decodedBuffer;
+        _decoder->lostFrame(decodedBuffer);
+    }
+
     // calculate how many silent frames we should drop.
     int silentSamples = silentFrames * _numChannels;
     int samplesPerFrame = _ringBuffer.getNumFrameSamples();

From 60daaba52511e79eac826685df77893c50c1eb2d Mon Sep 17 00:00:00 2001
From: Brad Hefta-Gaub <brad@highfidelity.io>
Date: Wed, 8 Mar 2017 17:30:44 -0800
Subject: [PATCH 2/5] small cleanup

---
 libraries/audio-client/src/AudioNoiseGate.cpp | 2 --
 libraries/audio-client/src/AudioNoiseGate.h   | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/libraries/audio-client/src/AudioNoiseGate.cpp b/libraries/audio-client/src/AudioNoiseGate.cpp
index 9458f47d8c..ac45f98f0b 100644
--- a/libraries/audio-client/src/AudioNoiseGate.cpp
+++ b/libraries/audio-client/src/AudioNoiseGate.cpp
@@ -58,8 +58,6 @@ void AudioNoiseGate::removeDCOffset(int16_t* samples, int numSamples) {
     }
 }
 
-#include <QDebug>
-
 void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
     //
     //  Impose Noise Gate
diff --git a/libraries/audio-client/src/AudioNoiseGate.h b/libraries/audio-client/src/AudioNoiseGate.h
index 18b5a77056..774a4157bb 100644
--- a/libraries/audio-client/src/AudioNoiseGate.h
+++ b/libraries/audio-client/src/AudioNoiseGate.h
@@ -24,10 +24,7 @@ public:
     void removeDCOffset(int16_t* samples, int numSamples);
     
     bool clippedInLastFrame() const { return _didClipInLastFrame; }
-
     bool closedInLastFrame() const { return _closedInLastFrame; }
-
-    bool isOpen() const { return _isOpen; }
     float getMeasuredFloor() const { return _measuredFloor; }
     float getLastLoudness() const { return _lastLoudness; }
     

From 2d595c1f879ccc75709b164567b7e4c4c00c7010 Mon Sep 17 00:00:00 2001
From: Brad Hefta-Gaub <brad@highfidelity.io>
Date: Wed, 8 Mar 2017 20:29:32 -0800
Subject: [PATCH 3/5] CR feedback

---
 libraries/audio-client/src/AudioNoiseGate.cpp |  7 ++---
 libraries/audio/src/InboundAudioStream.cpp    | 30 ++++++++++---------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/libraries/audio-client/src/AudioNoiseGate.cpp b/libraries/audio-client/src/AudioNoiseGate.cpp
index ac45f98f0b..98ce8cc9e8 100644
--- a/libraries/audio-client/src/AudioNoiseGate.cpp
+++ b/libraries/audio-client/src/AudioNoiseGate.cpp
@@ -77,8 +77,6 @@ void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
     //                      More means better rejection but also can reject continuous things like singing.
     // NUMBER_OF_NOISE_SAMPLE_FRAMES:  How often should we re-evaluate the noise floor?
 
-    _closedInLastFrame = false;
-    
     float loudness = 0;
     int thisSample = 0;
     int samplesOverNoiseGate = 0;
@@ -142,14 +140,13 @@ void AudioNoiseGate::gateSamples(int16_t* samples, int numSamples) {
         _sampleCounter = 0;
         
     }
+
     if (samplesOverNoiseGate > NOISE_GATE_WIDTH) {
         _isOpen = true;
         _framesToClose = NOISE_GATE_CLOSE_FRAME_DELAY;
     } else {
         if (--_framesToClose == 0) {
-            if (_isOpen) {
-                _closedInLastFrame = true;
-            }
+            _closedInLastFrame = !_isOpen;
             _isOpen = false;
         }
     }
diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp
index 12181cb8e2..3a5829e8fb 100644
--- a/libraries/audio/src/InboundAudioStream.cpp
+++ b/libraries/audio/src/InboundAudioStream.cpp
@@ -136,9 +136,9 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
             break;
         }
         case SequenceNumberStats::Early: {
-            // Packet is early treat the packets as if all the packets between the last
+            // Packet is early. Treat the packets as if all the packets between the last
             // OnTime packet and this packet was lost. If we're using a codec this will 
-            // also result in allowing the codec to flush its internal state. Then
+            // also result in allowing the codec to interpolate lost data. Then
             // fall through to the "on time" logic to actually handle this packet
             int packetsDropped = arrivalInfo._seqDiffFromExpected;
             lostAudioData(packetsDropped);
@@ -150,8 +150,6 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
             if (message.getType() == PacketType::SilentAudioFrame) {
                 // If we recieved a SilentAudioFrame from our sender, we might want to drop
                 // some of the samples in order to catch up to our desired jitter buffer size.
-                // NOTE: If we're using a codec we will be calling the codec's lostFrame()
-                // method to allow the codec to flush its internal state.
                 writeDroppableSilentFrames(networkFrames);
             } else {
                 // note: PCM and no codec are identical
@@ -249,17 +247,21 @@ int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packet
 
 int InboundAudioStream::writeDroppableSilentFrames(int silentFrames) {
 
-    // if we have a decoder, we still want to tell the decoder about our
-    // lost frame. this will flush the internal state of the decoder
-    // we can safely ignore the output of the codec in this case, because
-    // we've enforced that on the sending side, the encoder ran at least
-    // one frame of truly silent audio before we sent the "droppable" silent
-    // frame. Technically we could leave this out, if we know for certain
-    // that the sender has really sent us an encoded packet of zeros, but
-    // since we can trust all encoders to always encode at least one silent
-    // frame (open source, someone code modify it), we will go ahead and
-    // tell our decoder about the lost frame.
+    // We can't guarentee that all clients have faded the stream down
+    // to silence and encode that silence before sending us a 
+    // SilentAudioFrame. The encoder may have truncated the stream and 
+    // left the decoder holding some loud state. To handle this case
+    // we will call the decoder's lostFrame() method, which indicates
+    // that it should interpolate from it's last known state (which may
+    // be some unknown loud state) down toward silence.
     if (_decoder) {
+        // FIXME - We could potentially use the output from the codec, in which 
+        // case we might get a cleaner fade toward silence. NOTE: The below logic 
+        // attempts to catch up in the event that the jitter buffers have grown. 
+        // The better long term fix is to use the output from the decode, detect
+        // when it actually reaches silence, and then delete the silent portions
+        // of the jitter buffers. Or petentially do a cross fade from the decode
+        // output to silence.
         QByteArray decodedBuffer;
         _decoder->lostFrame(decodedBuffer);
     }

From 79d8d90581103152926775f5cdd312bc1e2d0ecd Mon Sep 17 00:00:00 2001
From: Brad Hefta-Gaub <brad@highfidelity.io>
Date: Wed, 8 Mar 2017 20:35:24 -0800
Subject: [PATCH 4/5] CR feedback

---
 libraries/audio/src/InboundAudioStream.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp
index 3a5829e8fb..70884c9fd1 100644
--- a/libraries/audio/src/InboundAudioStream.cpp
+++ b/libraries/audio/src/InboundAudioStream.cpp
@@ -137,7 +137,7 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
         }
         case SequenceNumberStats::Early: {
             // Packet is early. Treat the packets as if all the packets between the last
-            // OnTime packet and this packet was lost. If we're using a codec this will 
+            // OnTime packet and this packet were lost. If we're using a codec this will 
             // also result in allowing the codec to interpolate lost data. Then
             // fall through to the "on time" logic to actually handle this packet
             int packetsDropped = arrivalInfo._seqDiffFromExpected;
@@ -161,9 +161,9 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
                 } else {
                     qDebug(audio) << "Codec mismatch: expected" << _selectedCodecName << "got" << codecInPacket << "writing silence";
 
-                    // Since the data in the stream is using a codec that we're not prepared for,
+                    // Since the data in the stream is using a codec that we aren't prepared for,
                     // we need to let the codec know that we don't have data for it, this will
-                    // flush any internal codec state and produce fade to silence.
+                    // allowe the codec to interpolate missing data and produce a fade to silence.
                     lostAudioData(1);
 
                     // inform others of the mismatch
@@ -248,12 +248,12 @@ int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packet
 int InboundAudioStream::writeDroppableSilentFrames(int silentFrames) {
 
     // We can't guarentee that all clients have faded the stream down
-    // to silence and encode that silence before sending us a 
-    // SilentAudioFrame. The encoder may have truncated the stream and 
-    // left the decoder holding some loud state. To handle this case
-    // we will call the decoder's lostFrame() method, which indicates
-    // that it should interpolate from it's last known state (which may
-    // be some unknown loud state) down toward silence.
+    // to silence and encoding that silence before sending us a 
+    // SilentAudioFrame. If the encoder has truncated the stream it will
+    // leave the decoder holding some unknown loud state. To handle this 
+    // case we will call the decoder's lostFrame() method, which indicates
+    // that it should interpolate from its last known state down toward 
+    // silence.
     if (_decoder) {
         // FIXME - We could potentially use the output from the codec, in which 
         // case we might get a cleaner fade toward silence. NOTE: The below logic 

From bdab24d9bfc731b81db02ffdf409a7768881adb7 Mon Sep 17 00:00:00 2001
From: Brad Hefta-Gaub <brad@highfidelity.io>
Date: Thu, 9 Mar 2017 08:25:56 -0800
Subject: [PATCH 5/5] fix spelling

---
 libraries/audio/src/InboundAudioStream.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libraries/audio/src/InboundAudioStream.cpp b/libraries/audio/src/InboundAudioStream.cpp
index 70884c9fd1..88ec7e7bc0 100644
--- a/libraries/audio/src/InboundAudioStream.cpp
+++ b/libraries/audio/src/InboundAudioStream.cpp
@@ -163,7 +163,7 @@ int InboundAudioStream::parseData(ReceivedMessage& message) {
 
                     // Since the data in the stream is using a codec that we aren't prepared for,
                     // we need to let the codec know that we don't have data for it, this will
-                    // allowe the codec to interpolate missing data and produce a fade to silence.
+                    // allow the codec to interpolate missing data and produce a fade to silence.
                     lostAudioData(1);
 
                     // inform others of the mismatch
@@ -248,7 +248,7 @@ int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packet
 int InboundAudioStream::writeDroppableSilentFrames(int silentFrames) {
 
     // We can't guarentee that all clients have faded the stream down
-    // to silence and encoding that silence before sending us a 
+    // to silence and encoded that silence before sending us a 
     // SilentAudioFrame. If the encoder has truncated the stream it will
     // leave the decoder holding some unknown loud state. To handle this 
     // case we will call the decoder's lostFrame() method, which indicates