From 7da091d2b41010dcb8d81189ecf2df0caa123f64 Mon Sep 17 00:00:00 2001
From: Stephen Birarda <commit@birarda.com>
Date: Fri, 6 Jun 2014 10:15:09 -0700
Subject: [PATCH 1/4] initial hooks for stereo audio

---
 interface/src/Audio.cpp                       | 283 ++++++++++--------
 interface/src/Audio.h                         |   2 +
 interface/src/Menu.cpp                        |   2 +
 interface/src/Menu.h                          |   1 +
 .../audio/src/PositionalAudioRingBuffer.cpp   |   7 +-
 .../audio/src/PositionalAudioRingBuffer.h     |   3 +-
 libraries/networking/src/PacketHeaders.cpp    |   3 +
 7 files changed, 169 insertions(+), 132 deletions(-)

diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp
index 50ab720450..6d672b736d 100644
--- a/interface/src/Audio.cpp
+++ b/interface/src/Audio.cpp
@@ -68,6 +68,7 @@ Audio::Audio(int16_t initialJitterBufferSamples, QObject* parent) :
     _proceduralOutputDevice(NULL),
     _inputRingBuffer(0),
     _ringBuffer(NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL),
+    _isStereoInput(false),
     _averagedLatency(0.0),
     _measuredJitter(0),
     _jitterBufferSamples(initialJitterBufferSamples),
@@ -405,12 +406,12 @@ bool Audio::switchOutputToAudioDevice(const QString& outputDeviceName) {
 }
 
 void Audio::handleAudioInput() {
-    static char monoAudioDataPacket[MAX_PACKET_SIZE];
+    static char audioDataPacket[MAX_PACKET_SIZE];
 
     static int numBytesPacketHeader = numBytesForPacketHeaderGivenPacketType(PacketTypeMicrophoneAudioNoEcho);
     static int leadingBytes = numBytesPacketHeader + sizeof(glm::vec3) + sizeof(glm::quat);
 
-    static int16_t* monoAudioSamples = (int16_t*) (monoAudioDataPacket + leadingBytes);
+    static int16_t* networkAudioSamples = (int16_t*) (audioDataPacket + leadingBytes);
 
     float inputToNetworkInputRatio = calculateDeviceToNetworkInputRatio(_numInputCallbackBytes);
 
@@ -452,125 +453,130 @@ void Audio::handleAudioInput() {
 
         int16_t* inputAudioSamples = new int16_t[inputSamplesRequired];
         _inputRingBuffer.readSamples(inputAudioSamples, inputSamplesRequired);
+        
+        int numNetworkBytes = _isStereoInput ? NETWORK_BUFFER_LENGTH_BYTES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL;
+        int numNetworkSamples = _isStereoInput ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
 
         // zero out the monoAudioSamples array and the locally injected audio
-        memset(monoAudioSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL);
+        memset(networkAudioSamples, 0, numNetworkBytes);
 
         if (!_muted) {
             // we aren't muted, downsample the input audio
-            linearResampling((int16_t*) inputAudioSamples,
-                             monoAudioSamples,
-                             inputSamplesRequired,
-                             NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL,
+            linearResampling((int16_t*) inputAudioSamples, networkAudioSamples,
+                             inputSamplesRequired,  numNetworkSamples,
                              _inputFormat, _desiredInputFormat);
             
-            //
-            //  Impose Noise Gate
-            //
-            //  The Noise Gate is used to reject constant background noise by measuring the noise
-            //  floor observed at the microphone and then opening the 'gate' to allow microphone
-            //  signals to be transmitted when the microphone samples average level exceeds a multiple
-            //  of the noise floor.
-            //
-            //  NOISE_GATE_HEIGHT:  How loud you have to speak relative to noise background to open the gate.
-            //                      Make this value lower for more sensitivity and less rejection of noise.
-            //  NOISE_GATE_WIDTH:   The number of samples in an audio frame for which the height must be exceeded
-            //                      to open the gate.
-            //  NOISE_GATE_CLOSE_FRAME_DELAY:  Once the noise is below the gate height for the frame, how many frames
-            //                      will we wait before closing the gate.
-            //  NOISE_GATE_FRAMES_TO_AVERAGE:  How many audio frames should we average together to compute noise floor.
-            //                      More means better rejection but also can reject continuous things like singing.
-            // NUMBER_OF_NOISE_SAMPLE_FRAMES:  How often should we re-evaluate the noise floor?
-            
-
-            float loudness = 0;
-            float thisSample = 0;
-            int samplesOverNoiseGate = 0;
-            
-            const float NOISE_GATE_HEIGHT = 7.0f;
-            const int NOISE_GATE_WIDTH = 5;
-            const int NOISE_GATE_CLOSE_FRAME_DELAY = 5;
-            const int NOISE_GATE_FRAMES_TO_AVERAGE = 5;
-            const float DC_OFFSET_AVERAGING = 0.99f;
-            const float CLIPPING_THRESHOLD = 0.90f;
-            
-            //
-            //  Check clipping, adjust DC offset, and check if should open noise gate
-            //
-            float measuredDcOffset = 0.0f;
-            //  Increment the time since the last clip
-            if (_timeSinceLastClip >= 0.0f) {
-                _timeSinceLastClip += (float) NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL / (float) SAMPLE_RATE;
-            }
-           
-            for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; i++) {
-                measuredDcOffset += monoAudioSamples[i];
-                monoAudioSamples[i] -= (int16_t) _dcOffset;
-                thisSample = fabsf(monoAudioSamples[i]);
-                if (thisSample >= (32767.0f * CLIPPING_THRESHOLD)) {
-                    _timeSinceLastClip = 0.0f;
+            // only impose the noise gate and perform tone injection if we sending mono audio
+            if (!_isStereoInput) {
+                
+                //
+                //  Impose Noise Gate
+                //
+                //  The Noise Gate is used to reject constant background noise by measuring the noise
+                //  floor observed at the microphone and then opening the 'gate' to allow microphone
+                //  signals to be transmitted when the microphone samples average level exceeds a multiple
+                //  of the noise floor.
+                //
+                //  NOISE_GATE_HEIGHT:  How loud you have to speak relative to noise background to open the gate.
+                //                      Make this value lower for more sensitivity and less rejection of noise.
+                //  NOISE_GATE_WIDTH:   The number of samples in an audio frame for which the height must be exceeded
+                //                      to open the gate.
+                //  NOISE_GATE_CLOSE_FRAME_DELAY:  Once the noise is below the gate height for the frame, how many frames
+                //                      will we wait before closing the gate.
+                //  NOISE_GATE_FRAMES_TO_AVERAGE:  How many audio frames should we average together to compute noise floor.
+                //                      More means better rejection but also can reject continuous things like singing.
+                // NUMBER_OF_NOISE_SAMPLE_FRAMES:  How often should we re-evaluate the noise floor?
+                
+                
+                float loudness = 0;
+                float thisSample = 0;
+                int samplesOverNoiseGate = 0;
+                
+                const float NOISE_GATE_HEIGHT = 7.0f;
+                const int NOISE_GATE_WIDTH = 5;
+                const int NOISE_GATE_CLOSE_FRAME_DELAY = 5;
+                const int NOISE_GATE_FRAMES_TO_AVERAGE = 5;
+                const float DC_OFFSET_AVERAGING = 0.99f;
+                const float CLIPPING_THRESHOLD = 0.90f;
+                
+                //
+                //  Check clipping, adjust DC offset, and check if should open noise gate
+                //
+                float measuredDcOffset = 0.0f;
+                //  Increment the time since the last clip
+                if (_timeSinceLastClip >= 0.0f) {
+                    _timeSinceLastClip += (float) NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL / (float) SAMPLE_RATE;
                 }
-                loudness += thisSample;
-                //  Noise Reduction:  Count peaks above the average loudness
-                if (_noiseGateEnabled && (thisSample > (_noiseGateMeasuredFloor * NOISE_GATE_HEIGHT))) {
-                    samplesOverNoiseGate++;
-                }
-            }
-            
-            measuredDcOffset /= NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
-            if (_dcOffset == 0.0f) {
-                // On first frame, copy over measured offset
-                _dcOffset = measuredDcOffset;
-            } else {
-                _dcOffset = DC_OFFSET_AVERAGING * _dcOffset + (1.0f - DC_OFFSET_AVERAGING) * measuredDcOffset;
-            }
-            
-            //  Add tone injection if enabled
-            const float TONE_FREQ = 220.0f / SAMPLE_RATE * TWO_PI;
-            const float QUARTER_VOLUME = 8192.0f;
-            if (_toneInjectionEnabled) {
-                loudness = 0.0f;
+                
                 for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; i++) {
-                    monoAudioSamples[i] = QUARTER_VOLUME * sinf(TONE_FREQ * (float)(i + _proceduralEffectSample));
-                    loudness += fabsf(monoAudioSamples[i]);
-                }
-            }
-            _lastInputLoudness = fabs(loudness / NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
-
-            //  If Noise Gate is enabled, check and turn the gate on and off
-            if (!_toneInjectionEnabled && _noiseGateEnabled) {
-                float averageOfAllSampleFrames = 0.0f;
-                _noiseSampleFrames[_noiseGateSampleCounter++] = _lastInputLoudness;
-                if (_noiseGateSampleCounter == NUMBER_OF_NOISE_SAMPLE_FRAMES) {
-                    float smallestSample = FLT_MAX;
-                    for (int i = 0; i <= NUMBER_OF_NOISE_SAMPLE_FRAMES - NOISE_GATE_FRAMES_TO_AVERAGE; i += NOISE_GATE_FRAMES_TO_AVERAGE) {
-                        float thisAverage = 0.0f;
-                        for (int j = i; j < i + NOISE_GATE_FRAMES_TO_AVERAGE; j++) {
-                            thisAverage += _noiseSampleFrames[j];
-                            averageOfAllSampleFrames += _noiseSampleFrames[j];
-                        }
-                        thisAverage /= NOISE_GATE_FRAMES_TO_AVERAGE;
-                        
-                        if (thisAverage < smallestSample) {
-                            smallestSample = thisAverage;
-                        }
+                    measuredDcOffset += networkAudioSamples[i];
+                    networkAudioSamples[i] -= (int16_t) _dcOffset;
+                    thisSample = fabsf(networkAudioSamples[i]);
+                    if (thisSample >= (32767.0f * CLIPPING_THRESHOLD)) {
+                        _timeSinceLastClip = 0.0f;
+                    }
+                    loudness += thisSample;
+                    //  Noise Reduction:  Count peaks above the average loudness
+                    if (_noiseGateEnabled && (thisSample > (_noiseGateMeasuredFloor * NOISE_GATE_HEIGHT))) {
+                        samplesOverNoiseGate++;
                     }
-                    averageOfAllSampleFrames /= NUMBER_OF_NOISE_SAMPLE_FRAMES;
-                    _noiseGateMeasuredFloor = smallestSample;
-                    _noiseGateSampleCounter = 0;
-
                 }
-                if (samplesOverNoiseGate > NOISE_GATE_WIDTH) {
-                    _noiseGateOpen = true;
-                    _noiseGateFramesToClose = NOISE_GATE_CLOSE_FRAME_DELAY;
+                
+                measuredDcOffset /= NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
+                if (_dcOffset == 0.0f) {
+                    // On first frame, copy over measured offset
+                    _dcOffset = measuredDcOffset;
                 } else {
-                    if (--_noiseGateFramesToClose == 0) {
-                        _noiseGateOpen = false;
+                    _dcOffset = DC_OFFSET_AVERAGING * _dcOffset + (1.0f - DC_OFFSET_AVERAGING) * measuredDcOffset;
+                }
+                
+                //  Add tone injection if enabled
+                const float TONE_FREQ = 220.0f / SAMPLE_RATE * TWO_PI;
+                const float QUARTER_VOLUME = 8192.0f;
+                if (_toneInjectionEnabled) {
+                    loudness = 0.0f;
+                    for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; i++) {
+                        networkAudioSamples[i] = QUARTER_VOLUME * sinf(TONE_FREQ * (float)(i + _proceduralEffectSample));
+                        loudness += fabsf(networkAudioSamples[i]);
                     }
                 }
-                if (!_noiseGateOpen) {
-                    memset(monoAudioSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL);
-                    _lastInputLoudness = 0;
+                _lastInputLoudness = fabs(loudness / NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
+                
+                //  If Noise Gate is enabled, check and turn the gate on and off
+                if (!_toneInjectionEnabled && _noiseGateEnabled) {
+                    float averageOfAllSampleFrames = 0.0f;
+                    _noiseSampleFrames[_noiseGateSampleCounter++] = _lastInputLoudness;
+                    if (_noiseGateSampleCounter == NUMBER_OF_NOISE_SAMPLE_FRAMES) {
+                        float smallestSample = FLT_MAX;
+                        for (int i = 0; i <= NUMBER_OF_NOISE_SAMPLE_FRAMES - NOISE_GATE_FRAMES_TO_AVERAGE; i += NOISE_GATE_FRAMES_TO_AVERAGE) {
+                            float thisAverage = 0.0f;
+                            for (int j = i; j < i + NOISE_GATE_FRAMES_TO_AVERAGE; j++) {
+                                thisAverage += _noiseSampleFrames[j];
+                                averageOfAllSampleFrames += _noiseSampleFrames[j];
+                            }
+                            thisAverage /= NOISE_GATE_FRAMES_TO_AVERAGE;
+                            
+                            if (thisAverage < smallestSample) {
+                                smallestSample = thisAverage;
+                            }
+                        }
+                        averageOfAllSampleFrames /= NUMBER_OF_NOISE_SAMPLE_FRAMES;
+                        _noiseGateMeasuredFloor = smallestSample;
+                        _noiseGateSampleCounter = 0;
+                        
+                    }
+                    if (samplesOverNoiseGate > NOISE_GATE_WIDTH) {
+                        _noiseGateOpen = true;
+                        _noiseGateFramesToClose = NOISE_GATE_CLOSE_FRAME_DELAY;
+                    } else {
+                        if (--_noiseGateFramesToClose == 0) {
+                            _noiseGateOpen = false;
+                        }
+                    }
+                    if (!_noiseGateOpen) {
+                        memset(networkAudioSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL);
+                        _lastInputLoudness = 0;
+                    }
                 }
             }
         } else {
@@ -580,19 +586,19 @@ void Audio::handleAudioInput() {
         
         // at this point we have clean monoAudioSamples, which match our target output... 
         // this is what we should send to our interested listeners
-        if (_processSpatialAudio && !_muted && _audioOutput) {
-            QByteArray monoInputData((char*)monoAudioSamples, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int16_t));
+        if (_processSpatialAudio && !_muted && !_isStereoInput && _audioOutput) {
+            QByteArray monoInputData((char*)networkAudioSamples, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int16_t));
             emit processLocalAudio(_spatialAudioStart, monoInputData, _desiredInputFormat);
         }
         
-        if (_proceduralAudioOutput) {
-            processProceduralAudio(monoAudioSamples, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
+        if (!_isStereoInput && _proceduralAudioOutput) {
+            processProceduralAudio(networkAudioSamples, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
         }
 
-        if (_scopeEnabled && !_scopeEnabledPause) {
+        if (!_isStereoInput && _scopeEnabled && !_scopeEnabledPause) {
             unsigned int numMonoAudioChannels = 1;
             unsigned int monoAudioChannel = 0;
-            addBufferToScope(_scopeInput, _scopeInputOffset, monoAudioSamples, monoAudioChannel, numMonoAudioChannels); 
+            addBufferToScope(_scopeInput, _scopeInputOffset, networkAudioSamples, monoAudioChannel, numMonoAudioChannels);
             _scopeInputOffset += NETWORK_SAMPLES_PER_FRAME;
             _scopeInputOffset %= _samplesPerScope;
         }
@@ -615,7 +621,7 @@ void Audio::handleAudioInput() {
                 packetType = PacketTypeSilentAudioFrame;
                 
                 // we need to indicate how many silent samples this is to the audio mixer
-                monoAudioSamples[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
+                networkAudioSamples[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
                 numAudioBytes = sizeof(int16_t);
                 
             } else {
@@ -628,7 +634,7 @@ void Audio::handleAudioInput() {
                 }
             }
 
-            char* currentPacketPtr = monoAudioDataPacket + populatePacketHeader(monoAudioDataPacket, packetType);
+            char* currentPacketPtr = audioDataPacket + populatePacketHeader(audioDataPacket, packetType);
 
             // memcpy the three float positions
             memcpy(currentPacketPtr, &headPosition, sizeof(headPosition));
@@ -638,7 +644,7 @@ void Audio::handleAudioInput() {
             memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation));
             currentPacketPtr += sizeof(headOrientation);
             
-            nodeList->writeDatagram(monoAudioDataPacket, numAudioBytes + leadingBytes, audioMixer);
+            nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer);
 
             Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO)
                 .updateValue(numAudioBytes + leadingBytes);
@@ -761,6 +767,24 @@ void Audio::toggleAudioNoiseReduction() {
     _noiseGateEnabled = !_noiseGateEnabled;
 }
 
+void Audio::toggleStereoInput() {
+    int oldChannelCount = _desiredInputFormat.channelCount();
+    QAction* stereoAudioOption = Menu::getInstance()->getActionForOption(MenuOption::StereoAudio);
+
+    if (stereoAudioOption->isChecked()) {
+        _desiredInputFormat.setChannelCount(2);
+        _isStereoInput = true;
+    } else {
+        _desiredInputFormat.setChannelCount(1);
+        _isStereoInput = false;
+    }
+    
+    if (oldChannelCount != _desiredInputFormat.channelCount()) {
+        // change in channel count for desired input format, restart the input device
+        switchInputToAudioDevice(_inputAudioDeviceName);
+    }
+}
+
 void Audio::processReceivedAudio(const QByteArray& audioByteArray) {
     _ringBuffer.parseData(audioByteArray);
     
@@ -1300,18 +1324,21 @@ bool Audio::switchInputToAudioDevice(const QAudioDeviceInfo& inputDeviceInfo) {
     
         if (adjustedFormatForAudioDevice(inputDeviceInfo, _desiredInputFormat, _inputFormat)) {
             qDebug() << "The format to be used for audio input is" << _inputFormat;
-        
-            _audioInput = new QAudioInput(inputDeviceInfo, _inputFormat, this);
-            _numInputCallbackBytes = calculateNumberOfInputCallbackBytes(_inputFormat);
-            _audioInput->setBufferSize(_numInputCallbackBytes);
-
-            // how do we want to handle input working, but output not working?
-            int numFrameSamples = calculateNumberOfFrameSamples(_numInputCallbackBytes);
-            _inputRingBuffer.resizeForFrameSize(numFrameSamples);
-            _inputDevice = _audioInput->start();
-            connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleAudioInput()));
-
-            supportedFormat = true;
+            
+            // if the user wants stereo but this device can't provide then bail
+            if (!_isStereoInput || _inputFormat.channelCount() == 2) {
+                _audioInput = new QAudioInput(inputDeviceInfo, _inputFormat, this);
+                _numInputCallbackBytes = calculateNumberOfInputCallbackBytes(_inputFormat);
+                _audioInput->setBufferSize(_numInputCallbackBytes);
+                
+                // how do we want to handle input working, but output not working?
+                int numFrameSamples = calculateNumberOfFrameSamples(_numInputCallbackBytes);
+                _inputRingBuffer.resizeForFrameSize(numFrameSamples);
+                _inputDevice = _audioInput->start();
+                connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleAudioInput()));
+                
+                supportedFormat = true;
+            }
         }
     }
     return supportedFormat;
diff --git a/interface/src/Audio.h b/interface/src/Audio.h
index 79f0f84ff5..74fc373cb0 100644
--- a/interface/src/Audio.h
+++ b/interface/src/Audio.h
@@ -85,6 +85,7 @@ public slots:
     void toggleScope();
     void toggleScopePause();
     void toggleAudioSpatialProcessing();
+    void toggleStereoInput();
     void selectAudioScopeFiveFrames();
     void selectAudioScopeTwentyFrames();
     void selectAudioScopeFiftyFrames();
@@ -127,6 +128,7 @@ private:
     QIODevice* _proceduralOutputDevice;
     AudioRingBuffer _inputRingBuffer;
     AudioRingBuffer _ringBuffer;
+    bool _isStereoInput;
 
     QString _inputAudioDeviceName;
     QString _outputAudioDeviceName;
diff --git a/interface/src/Menu.cpp b/interface/src/Menu.cpp
index 5d9cd1f1c4..9ab47cdeda 100644
--- a/interface/src/Menu.cpp
+++ b/interface/src/Menu.cpp
@@ -432,6 +432,8 @@ Menu::Menu() :
                                            SLOT(toggleAudioNoiseReduction()));
     addCheckableActionToQMenuAndActionHash(audioDebugMenu, MenuOption::EchoServerAudio);
     addCheckableActionToQMenuAndActionHash(audioDebugMenu, MenuOption::EchoLocalAudio);
+    addCheckableActionToQMenuAndActionHash(audioDebugMenu, MenuOption::StereoAudio, 0, false,
+                                           appInstance->getAudio(), SLOT(toggleStereoInput()));
     addCheckableActionToQMenuAndActionHash(audioDebugMenu, MenuOption::MuteAudio,
                                            Qt::CTRL | Qt::Key_M,
                                            false,
diff --git a/interface/src/Menu.h b/interface/src/Menu.h
index 6bc9adef05..f9af80119b 100644
--- a/interface/src/Menu.h
+++ b/interface/src/Menu.h
@@ -402,6 +402,7 @@ namespace MenuOption {
     const QString StandOnNearbyFloors = "Stand on nearby floors";
     const QString Stars = "Stars";
     const QString Stats = "Stats";
+    const QString StereoAudio = "Stereo Audio";
     const QString StopAllScripts = "Stop All Scripts";
     const QString SuppressShortTimings = "Suppress Timings Less than 10ms";
     const QString TestPing = "Test Ping";
diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp
index 6fc16c57a9..0a3d2d0c16 100644
--- a/libraries/audio/src/PositionalAudioRingBuffer.cpp
+++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp
@@ -20,14 +20,15 @@
 
 #include "PositionalAudioRingBuffer.h"
 
-PositionalAudioRingBuffer::PositionalAudioRingBuffer(PositionalAudioRingBuffer::Type type) :
-    AudioRingBuffer(NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL),
+PositionalAudioRingBuffer::PositionalAudioRingBuffer(PositionalAudioRingBuffer::Type type, bool isStereo) :
+    AudioRingBuffer(isStereo ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL),
     _type(type),
     _position(0.0f, 0.0f, 0.0f),
     _orientation(0.0f, 0.0f, 0.0f, 0.0f),
     _willBeAddedToMix(false),
     _shouldLoopbackForNode(false),
-    _shouldOutputStarveDebug(true)
+    _shouldOutputStarveDebug(true),
+    _isStereo(isStereo)
 {
 
 }
diff --git a/libraries/audio/src/PositionalAudioRingBuffer.h b/libraries/audio/src/PositionalAudioRingBuffer.h
index b130a9b216..1864271d5f 100644
--- a/libraries/audio/src/PositionalAudioRingBuffer.h
+++ b/libraries/audio/src/PositionalAudioRingBuffer.h
@@ -24,7 +24,7 @@ public:
         Injector
     };
     
-    PositionalAudioRingBuffer(PositionalAudioRingBuffer::Type type);
+    PositionalAudioRingBuffer(PositionalAudioRingBuffer::Type type, bool isStereo = false);
     ~PositionalAudioRingBuffer();
     
     int parseData(const QByteArray& packet);
@@ -56,6 +56,7 @@ protected:
     bool _willBeAddedToMix;
     bool _shouldLoopbackForNode;
     bool _shouldOutputStarveDebug;
+    bool _isStereo;
     
     float _nextOutputTrailingLoudness;
 };
diff --git a/libraries/networking/src/PacketHeaders.cpp b/libraries/networking/src/PacketHeaders.cpp
index c56dba9cf1..751c6f45b2 100644
--- a/libraries/networking/src/PacketHeaders.cpp
+++ b/libraries/networking/src/PacketHeaders.cpp
@@ -47,6 +47,9 @@ int packArithmeticallyCodedValue(int value, char* destination) {
 
 PacketVersion versionForPacketType(PacketType type) {
     switch (type) {
+        case PacketTypeMicrophoneAudioNoEcho:
+        case PacketTypeMicrophoneAudioWithEcho:
+            return 1;
         case PacketTypeAvatarData:
             return 3;
         case PacketTypeAvatarIdentity:

From 681ce247d6230ffe561dcfe8cda2eaefa3c884c0 Mon Sep 17 00:00:00 2001
From: Stephen Birarda <commit@birarda.com>
Date: Fri, 6 Jun 2014 10:27:59 -0700
Subject: [PATCH 2/4] tweak resample to handle 48 to 24 stereo, add stereo/mono
 byte

---
 interface/src/Audio.cpp | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp
index 6d672b736d..1575aa524f 100644
--- a/interface/src/Audio.cpp
+++ b/interface/src/Audio.cpp
@@ -290,20 +290,27 @@ void linearResampling(int16_t* sourceSamples, int16_t* destinationSamples,
         if (sourceToDestinationFactor >= 2) {
             // we need to downsample from 48 to 24
             // for now this only supports a mono output - this would be the case for audio input
-
-            for (unsigned int i = sourceAudioFormat.channelCount(); i < numSourceSamples; i += 2 * sourceAudioFormat.channelCount()) {
-                if (i + (sourceAudioFormat.channelCount()) >= numSourceSamples) {
-                    destinationSamples[(i - sourceAudioFormat.channelCount()) / (int) sourceToDestinationFactor] =
+            if (destinationAudioFormat.channelCount() == 1) {
+                for (unsigned int i = sourceAudioFormat.channelCount(); i < numSourceSamples; i += 2 * sourceAudioFormat.channelCount()) {
+                    if (i + (sourceAudioFormat.channelCount()) >= numSourceSamples) {
+                        destinationSamples[(i - sourceAudioFormat.channelCount()) / (int) sourceToDestinationFactor] =
                         (sourceSamples[i - sourceAudioFormat.channelCount()] / 2)
                         + (sourceSamples[i] / 2);
-                } else {
-                    destinationSamples[(i - sourceAudioFormat.channelCount()) / (int) sourceToDestinationFactor] =
+                    } else {
+                        destinationSamples[(i - sourceAudioFormat.channelCount()) / (int) sourceToDestinationFactor] =
                         (sourceSamples[i - sourceAudioFormat.channelCount()] / 4)
                         + (sourceSamples[i] / 2)
                         + (sourceSamples[i + sourceAudioFormat.channelCount()] / 4);
+                    }
+                }
+            } else {
+                // this is a 48 to 24 resampling but both source and destination are two channels
+                // squish two samples into one in each channel
+                for (int i = 0; i < numSourceSamples; i += 2) {
+                    destinationSamples[i / 2] = (sourceSamples[i] / 2) + (sourceSamples[i + 2] / 2);
+                    destinationSamples[(i / 2) + 1] = (sourceSamples[i + 1] / 2) + (sourceSamples[i + 3] / 2);
                 }
             }
-
         } else {
             if (sourceAudioFormat.sampleRate() == destinationAudioFormat.sampleRate()) {
                 // mono to stereo, same sample rate
@@ -409,7 +416,7 @@ void Audio::handleAudioInput() {
     static char audioDataPacket[MAX_PACKET_SIZE];
 
     static int numBytesPacketHeader = numBytesForPacketHeaderGivenPacketType(PacketTypeMicrophoneAudioNoEcho);
-    static int leadingBytes = numBytesPacketHeader + sizeof(glm::vec3) + sizeof(glm::quat);
+    static int leadingBytes = numBytesPacketHeader + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
 
     static int16_t* networkAudioSamples = (int16_t*) (audioDataPacket + leadingBytes);
 
@@ -610,9 +617,7 @@ void Audio::handleAudioInput() {
             MyAvatar* interfaceAvatar = Application::getInstance()->getAvatar();
             glm::vec3 headPosition = interfaceAvatar->getHead()->getPosition();
             glm::quat headOrientation = interfaceAvatar->getHead()->getFinalOrientation();
-
-            // we need the amount of bytes in the buffer + 1 for type
-            // + 12 for 3 floats for position + float for bearing + 1 attenuation byte
+            quint8 isStereo = _isStereoInput ? 1 : 0;
             
             int numAudioBytes = 0;
             
@@ -621,7 +626,7 @@ void Audio::handleAudioInput() {
                 packetType = PacketTypeSilentAudioFrame;
                 
                 // we need to indicate how many silent samples this is to the audio mixer
-                networkAudioSamples[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
+                audioDataPacket[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
                 numAudioBytes = sizeof(int16_t);
                 
             } else {
@@ -644,6 +649,9 @@ void Audio::handleAudioInput() {
             memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation));
             currentPacketPtr += sizeof(headOrientation);
             
+            // set the mono/stereo byte
+            *currentPacketPtr++ = isStereo;
+            
             nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer);
 
             Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO)

From ae2f6a3cb676294457beae372a0bc7bb9dc1728f Mon Sep 17 00:00:00 2001
From: Stephen Birarda <commit@birarda.com>
Date: Fri, 6 Jun 2014 10:55:04 -0700
Subject: [PATCH 3/4] complete piping of stereo audio through mixer

---
 assignment-client/src/audio/AudioMixer.cpp    | 242 ++++++++++--------
 .../src/audio/AudioMixerClientData.cpp        |  14 +-
 .../src/audio/AudioMixerClientData.h          |   4 +-
 .../src/audio/AvatarAudioRingBuffer.cpp       |   4 +-
 .../src/audio/AvatarAudioRingBuffer.h         |   2 +-
 interface/src/Audio.cpp                       |   6 +-
 .../audio/src/PositionalAudioRingBuffer.cpp   |   3 +
 .../audio/src/PositionalAudioRingBuffer.h     |   2 +
 8 files changed, 160 insertions(+), 117 deletions(-)

diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp
index 3acd783bb0..f8cfb3140c 100644
--- a/assignment-client/src/audio/AudioMixer.cpp
+++ b/assignment-client/src/audio/AudioMixer.cpp
@@ -173,134 +173,160 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
             weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio);
         }
     }
-
-    // if the bearing relative angle to source is > 0 then the delayed channel is the right one
-    int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0;
-    int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0;
     
     const int16_t* nextOutputStart = bufferToAdd->getNextOutput();
-   
-    const int16_t* bufferStart = bufferToAdd->getBuffer();
-    int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity();
-
-    int16_t correctBufferSample[2], delayBufferSample[2];
-    int delayedChannelIndex = 0;
     
-    const int SINGLE_STEREO_OFFSET = 2;
-    
-    for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
+    if (!bufferToAdd->isStereo()) {
+        // this is a mono buffer, which means it gets full attenuation and spatialization
         
-        // setup the int16_t variables for the two sample sets
-        correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient;
-        correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient;
+        // if the bearing relative angle to source is > 0 then the delayed channel is the right one
+        int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0;
+        int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0;
         
-        delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset;
+        const int16_t* bufferStart = bufferToAdd->getBuffer();
+        int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity();
         
-        delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio;
-        delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio;
+        int16_t correctBufferSample[2], delayBufferSample[2];
+        int delayedChannelIndex = 0;
         
-        __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset],
-                                           _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET],
-                                           _clientSamples[delayedChannelIndex],
-                                           _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]);
-        __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1],
-                                         delayBufferSample[0], delayBufferSample[1]);
+        const int SINGLE_STEREO_OFFSET = 2;
         
-        // perform the MMX add (with saturation) of two correct and delayed samples
-        __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples);
-        int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-        
-        // assign the results from the result of the mmx arithmetic
-        _clientSamples[s + goodChannelOffset] = shortResults[3];
-        _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2];
-        _clientSamples[delayedChannelIndex] = shortResults[1];
-        _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0];
-    }
-    
-    // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid
-    // too many conditionals in handling the delay samples at the beginning of _clientSamples.
-    // Basically we try to take the samples in batches of four, and then handle the remainder
-    // conditionally to get rid of the rest.
-    
-    const int DOUBLE_STEREO_OFFSET = 4;
-    const int TRIPLE_STEREO_OFFSET = 6;
-    
-    if (numSamplesDelay > 0) {
-        // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput
-        // to stick at the beginning
-        float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio;
-        const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay;
-        if (delayNextOutputStart < bufferStart) {
-            delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay;
+        for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
+            
+            // setup the int16_t variables for the two sample sets
+            correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient;
+            correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient;
+            
+            delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset;
+            
+            delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio;
+            delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio;
+            
+            __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset],
+                                               _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET],
+                                               _clientSamples[delayedChannelIndex],
+                                               _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]);
+            __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1],
+                                              delayBufferSample[0], delayBufferSample[1]);
+            
+            // perform the MMX add (with saturation) of two correct and delayed samples
+            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples);
+            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+            
+            // assign the results from the result of the mmx arithmetic
+            _clientSamples[s + goodChannelOffset] = shortResults[3];
+            _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2];
+            _clientSamples[delayedChannelIndex] = shortResults[1];
+            _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0];
         }
         
-        int i = 0;
+        // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid
+        // too many conditionals in handling the delay samples at the beginning of _clientSamples.
+        // Basically we try to take the samples in batches of four, and then handle the remainder
+        // conditionally to get rid of the rest.
         
-        while (i + 3 < numSamplesDelay) {
-            // handle the first cases where we can MMX add four samples at once
+        const int DOUBLE_STEREO_OFFSET = 4;
+        const int TRIPLE_STEREO_OFFSET = 6;
+        
+        if (numSamplesDelay > 0) {
+            // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput
+            // to stick at the beginning
+            float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio;
+            const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay;
+            if (delayNextOutputStart < bufferStart) {
+                delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay;
+            }
+            
+            int i = 0;
+            
+            while (i + 3 < numSamplesDelay) {
+                // handle the first cases where we can MMX add four samples at once
+                int parentIndex = i * 2;
+                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]);
+                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio);
+                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
+                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+                
+                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
+                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
+                _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0];
+                
+                // push the index
+                i += 4;
+            }
+            
             int parentIndex = i * 2;
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                               _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
-                                               _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
-                                               _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]);
-            __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio);
-            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
             
-            _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-            _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-            _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
-            _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0];
-            
-            // push the index
-            i += 4;
+            if (i + 2 < numSamplesDelay) {
+                // MMX add only three delayed samples
+                
+                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
+                                                   0);
+                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
+                                                0);
+                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
+                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+                
+                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
+                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
+                
+            } else if (i + 1 < numSamplesDelay) {
+                // MMX add two delayed samples
+                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0);
+                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0);
+                
+                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
+                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+                
+                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
+                
+            } else if (i < numSamplesDelay) {
+                // MMX add a single delayed sample
+                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0);
+                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0);
+                
+                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
+                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+                
+                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+            }
         }
+    } else {
+        // stereo buffer - do attenuation but no sample delay for spatialization
+        qDebug() << "Adding a stereo buffer";
         
-        int parentIndex = i * 2;
-        
-        if (i + 2 < numSamplesDelay) {
-            // MMX add only three delayed samples
+        for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
+            // use MMX to clamp four additions at a time
             
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                               _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
-                                               _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
-                                               0);
-            __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
-                                            0);
-            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-            
-            _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-            _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-            _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
-            
-        } else if (i + 1 < numSamplesDelay) {
-            // MMX add two delayed samples
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                               _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0);
-            __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0);
+            __m64 bufferSamples = _mm_set_pi16(_clientSamples[s], _clientSamples[s + 1],
+                                               _clientSamples[s + 2], _clientSamples[s + 3]);
+            __m64 addSamples = _mm_set_pi16(nextOutputStart[s] * attenuationCoefficient,
+                                            nextOutputStart[s + 1] * attenuationCoefficient,
+                                            nextOutputStart[s + 2] * attenuationCoefficient,
+                                            nextOutputStart[s + 3] * attenuationCoefficient);
             
             __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
             int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
             
-            _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-            _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-            
-        } else if (i < numSamplesDelay) {
-            // MMX add a single delayed sample
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0);
-            __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0);
-            
-            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-            
-            _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+            _clientSamples[s] = shortResults[3];
+            _clientSamples[s + 1] = shortResults[2];
+            _clientSamples[s + 2] = shortResults[1];
+            _clientSamples[s + 3] = shortResults[0];
         }
     }
 }
diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp
index 85b8dde690..e21fadbd16 100644
--- a/assignment-client/src/audio/AudioMixerClientData.cpp
+++ b/assignment-client/src/audio/AudioMixerClientData.cpp
@@ -50,10 +50,22 @@ int AudioMixerClientData::parseData(const QByteArray& packet) {
 
         // grab the AvatarAudioRingBuffer from the vector (or create it if it doesn't exist)
         AvatarAudioRingBuffer* avatarRingBuffer = getAvatarAudioRingBuffer();
+        
+        // read the first byte after the header to see if this is a stereo or mono buffer
+        quint8 channelFlag = packet.at(numBytesForPacketHeader(packet));
+        bool isStereo = channelFlag == 1;
+        
+        if (avatarRingBuffer && avatarRingBuffer->isStereo() != isStereo) {
+            // there's a mismatch in the buffer channels for the incoming and current buffer
+            // so delete our current buffer and create a new one
+            _ringBuffers.removeOne(avatarRingBuffer);
+            avatarRingBuffer->deleteLater();
+            avatarRingBuffer = NULL;
+        }
 
         if (!avatarRingBuffer) {
             // we don't have an AvatarAudioRingBuffer yet, so add it
-            avatarRingBuffer = new AvatarAudioRingBuffer();
+            avatarRingBuffer = new AvatarAudioRingBuffer(isStereo);
             _ringBuffers.push_back(avatarRingBuffer);
         }
 
diff --git a/assignment-client/src/audio/AudioMixerClientData.h b/assignment-client/src/audio/AudioMixerClientData.h
index a5f03ebd15..70b653301b 100644
--- a/assignment-client/src/audio/AudioMixerClientData.h
+++ b/assignment-client/src/audio/AudioMixerClientData.h
@@ -24,14 +24,14 @@ public:
     AudioMixerClientData();
     ~AudioMixerClientData();
     
-    const std::vector<PositionalAudioRingBuffer*> getRingBuffers() const { return _ringBuffers; }
+    const QList<PositionalAudioRingBuffer*> getRingBuffers() const { return _ringBuffers; }
     AvatarAudioRingBuffer* getAvatarAudioRingBuffer() const;
     
     int parseData(const QByteArray& packet);
     void checkBuffersBeforeFrameSend(int jitterBufferLengthSamples);
     void pushBuffersAfterFrameSend();
 private:
-    std::vector<PositionalAudioRingBuffer*> _ringBuffers;
+    QList<PositionalAudioRingBuffer*> _ringBuffers;
 };
 
 #endif // hifi_AudioMixerClientData_h
diff --git a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp
index 9a7c2839d8..5613a64cc4 100644
--- a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp
+++ b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp
@@ -13,8 +13,8 @@
 
 #include "AvatarAudioRingBuffer.h"
 
-AvatarAudioRingBuffer::AvatarAudioRingBuffer() :
-    PositionalAudioRingBuffer(PositionalAudioRingBuffer::Microphone) {
+AvatarAudioRingBuffer::AvatarAudioRingBuffer(bool isStereo) :
+    PositionalAudioRingBuffer(PositionalAudioRingBuffer::Microphone, isStereo) {
     
 }
 
diff --git a/assignment-client/src/audio/AvatarAudioRingBuffer.h b/assignment-client/src/audio/AvatarAudioRingBuffer.h
index 1e61a82f68..f842c2aa33 100644
--- a/assignment-client/src/audio/AvatarAudioRingBuffer.h
+++ b/assignment-client/src/audio/AvatarAudioRingBuffer.h
@@ -18,7 +18,7 @@
 
 class AvatarAudioRingBuffer : public PositionalAudioRingBuffer {
 public:
-    AvatarAudioRingBuffer();
+    AvatarAudioRingBuffer(bool isStereo = false);
     
     int parseData(const QByteArray& packet);
 private:
diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp
index 1575aa524f..39392a5361 100644
--- a/interface/src/Audio.cpp
+++ b/interface/src/Audio.cpp
@@ -640,6 +640,9 @@ void Audio::handleAudioInput() {
             }
 
             char* currentPacketPtr = audioDataPacket + populatePacketHeader(audioDataPacket, packetType);
+            
+            // set the mono/stereo byte
+            *currentPacketPtr++ = isStereo;
 
             // memcpy the three float positions
             memcpy(currentPacketPtr, &headPosition, sizeof(headPosition));
@@ -649,9 +652,6 @@ void Audio::handleAudioInput() {
             memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation));
             currentPacketPtr += sizeof(headOrientation);
             
-            // set the mono/stereo byte
-            *currentPacketPtr++ = isStereo;
-            
             nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer);
 
             Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO)
diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp
index 0a3d2d0c16..94a88897e3 100644
--- a/libraries/audio/src/PositionalAudioRingBuffer.cpp
+++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp
@@ -41,6 +41,9 @@ int PositionalAudioRingBuffer::parseData(const QByteArray& packet) {
     // skip the packet header (includes the source UUID)
     int readBytes = numBytesForPacketHeader(packet);
     
+    // hop over the channel flag that has already been read in AudioMixerClientData
+    readBytes += sizeof(quint8);
+    // read the positional data
     readBytes += parsePositionalData(packet.mid(readBytes));
    
     if (packetTypeForPacket(packet) == PacketTypeSilentAudioFrame) {
diff --git a/libraries/audio/src/PositionalAudioRingBuffer.h b/libraries/audio/src/PositionalAudioRingBuffer.h
index 1864271d5f..17a663d5f6 100644
--- a/libraries/audio/src/PositionalAudioRingBuffer.h
+++ b/libraries/audio/src/PositionalAudioRingBuffer.h
@@ -41,6 +41,8 @@ public:
     
     bool shouldLoopbackForNode() const { return _shouldLoopbackForNode; }
     
+    bool isStereo() const { return _isStereo; }
+    
     PositionalAudioRingBuffer::Type getType() const { return _type; }
     const glm::vec3& getPosition() const { return _position; }
     const glm::quat& getOrientation() const { return _orientation; }

From f6011b65d7604ef303ee94f4ba2884e4c9ebfb54 Mon Sep 17 00:00:00 2001
From: Stephen Birarda <commit@birarda.com>
Date: Fri, 6 Jun 2014 11:26:04 -0700
Subject: [PATCH 4/4] fix input loudness for a stereo source

---
 assignment-client/src/audio/AudioMixer.cpp      |  4 +---
 .../src/audio/AudioMixerClientData.cpp          |  3 ++-
 interface/src/Audio.cpp                         | 17 +++++++++++++----
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp
index f8cfb3140c..61dee6c82b 100644
--- a/assignment-client/src/audio/AudioMixer.cpp
+++ b/assignment-client/src/audio/AudioMixer.cpp
@@ -307,9 +307,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
             }
         }
     } else {
-        // stereo buffer - do attenuation but no sample delay for spatialization
-        qDebug() << "Adding a stereo buffer";
-        
+        // stereo buffer - do attenuation but no sample delay for spatialization        
         for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
             // use MMX to clamp four additions at a time
             
diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp
index e21fadbd16..7fb2a7dcab 100644
--- a/assignment-client/src/audio/AudioMixerClientData.cpp
+++ b/assignment-client/src/audio/AudioMixerClientData.cpp
@@ -118,7 +118,8 @@ void AudioMixerClientData::pushBuffersAfterFrameSend() {
         PositionalAudioRingBuffer* audioBuffer = _ringBuffers[i];
 
         if (audioBuffer->willBeAddedToMix()) {
-            audioBuffer->shiftReadPosition(NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
+            audioBuffer->shiftReadPosition(audioBuffer->isStereo()
+                                           ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
 
             audioBuffer->setWillBeAddedToMix(false);
         } else if (audioBuffer->getType() == PositionalAudioRingBuffer::Injector
diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp
index 39392a5361..b012daa017 100644
--- a/interface/src/Audio.cpp
+++ b/interface/src/Audio.cpp
@@ -306,7 +306,7 @@ void linearResampling(int16_t* sourceSamples, int16_t* destinationSamples,
             } else {
                 // this is a 48 to 24 resampling but both source and destination are two channels
                 // squish two samples into one in each channel
-                for (int i = 0; i < numSourceSamples; i += 2) {
+                for (int i = 0; i < numSourceSamples; i += 4) {
                     destinationSamples[i / 2] = (sourceSamples[i] / 2) + (sourceSamples[i + 2] / 2);
                     destinationSamples[(i / 2) + 1] = (sourceSamples[i + 1] / 2) + (sourceSamples[i + 3] / 2);
                 }
@@ -585,6 +585,14 @@ void Audio::handleAudioInput() {
                         _lastInputLoudness = 0;
                     }
                 }
+            } else {
+                float loudness = 0.0f;
+                
+                for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; i++) {
+                    loudness += fabsf(networkAudioSamples[i]);
+                }
+                
+                _lastInputLoudness = fabs(loudness / NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL);
             }
         } else {
             // our input loudness is 0, since we're muted
@@ -626,11 +634,12 @@ void Audio::handleAudioInput() {
                 packetType = PacketTypeSilentAudioFrame;
                 
                 // we need to indicate how many silent samples this is to the audio mixer
-                audioDataPacket[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
+                audioDataPacket[0] = _isStereoInput
+                    ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO
+                    : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL;
                 numAudioBytes = sizeof(int16_t);
-                
             } else {
-                numAudioBytes = NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL;
+                numAudioBytes = _isStereoInput ? NETWORK_BUFFER_LENGTH_BYTES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL;
                 
                 if (Menu::getInstance()->isOptionChecked(MenuOption::EchoServerAudio)) {
                     packetType = PacketTypeMicrophoneAudioWithEcho;