From a1b2bf59e9ebcb5b367fb808ff63f3aad130d9f5 Mon Sep 17 00:00:00 2001
From: wangyix <wangyix@gmail.com>
Date: Tue, 15 Jul 2014 12:23:02 -0700
Subject: [PATCH 1/5] disabled SIMD adds in AudioMixer for windows

---
 assignment-client/src/audio/AudioMixer.cpp    | 35 +++++++++++++++++--
 .../audio/src/PositionalAudioRingBuffer.cpp   |  1 +
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp
index c86d37e283..60f905f20a 100644
--- a/assignment-client/src/audio/AudioMixer.cpp
+++ b/assignment-client/src/audio/AudioMixer.cpp
@@ -231,6 +231,12 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
             delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio;
             delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio;
             
+#ifdef _WIN32
+            _clientSamples[s + goodChannelOffset] += correctBufferSample[0];
+            _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] += correctBufferSample[1];
+            _clientSamples[delayedChannelIndex] += delayBufferSample[0];
+            _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] += delayBufferSample[1];
+#else
             __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset],
                                                _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET],
                                                _clientSamples[delayedChannelIndex],
@@ -247,6 +253,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
             _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2];
             _clientSamples[delayedChannelIndex] = shortResults[1];
             _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0];
+#endif
         }
         
         // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid
@@ -271,6 +278,13 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
             while (i + 3 < numSamplesDelay) {
                 // handle the first cases where we can MMX add four samples at once
                 int parentIndex = i * 2;
+
+#ifdef _WIN32
+                _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio;
+                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio;
+                _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio;
+#else
                 __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
                                                    _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
                                                    _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
@@ -286,7 +300,8 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
                 _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
                 _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
                 _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0];
-                
+#endif
+
                 // push the index
                 i += 4;
             }
@@ -296,6 +311,11 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
             if (i + 2 < numSamplesDelay) {
                 // MMX add only three delayed samples
                 
+#ifdef _WIN32
+                _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio;
+                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio;
+#else
                 __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
                                                    _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
                                                    _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
@@ -310,8 +330,15 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
                 _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
                 _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
                 _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
+#endif
+
                 
             } else if (i + 1 < numSamplesDelay) {
+
+#ifdef _WIN32
+                _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio;
+#else
                 // MMX add two delayed samples
                 __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
                                                    _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
@@ -324,9 +351,12 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
                 
                 _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
                 _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-                
+#endif
             } else if (i < numSamplesDelay) {
                 // MMX add a single delayed sample
+#ifdef _WIN32
+                _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;
+#else
                 __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0);
                 __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0);
                 
@@ -334,6 +364,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
                 int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
                 
                 _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+#endif
             }
         }
     } else {
diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp
index 8cba6d72b0..c9a58b2210 100644
--- a/libraries/audio/src/PositionalAudioRingBuffer.cpp
+++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp
@@ -149,6 +149,7 @@ void PositionalAudioRingBuffer::updateNextOutputTrailingLoudness() {
     // fixes bug on Windows where _nextOutputTrailingLoudness sometimes becomes NaN.  In that case,
     // revert _nextOutputTrailingLoudness to its previous value
     if (isNaN(_nextOutputTrailingLoudness)) {
+        printf("next output trailling loudness NaN!! --------------------------------------\n");
         _nextOutputTrailingLoudness = oldNextOutputTrailingLoudness;
     }
 }

From 828410b7d6b80f930de864aaace331437e7669a3 Mon Sep 17 00:00:00 2001
From: wangyix <wangyix@gmail.com>
Date: Tue, 15 Jul 2014 12:23:54 -0700
Subject: [PATCH 2/5] removed isNan check for _nextOutputTrailingLoudness

---
 libraries/audio/src/PositionalAudioRingBuffer.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp
index c9a58b2210..a0ce74dbaf 100644
--- a/libraries/audio/src/PositionalAudioRingBuffer.cpp
+++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp
@@ -145,13 +145,6 @@ void PositionalAudioRingBuffer::updateNextOutputTrailingLoudness() {
             _nextOutputTrailingLoudness = 0;
         }
     }
-    
-    // fixes bug on Windows where _nextOutputTrailingLoudness sometimes becomes NaN.  In that case,
-    // revert _nextOutputTrailingLoudness to its previous value
-    if (isNaN(_nextOutputTrailingLoudness)) {
-        printf("next output trailling loudness NaN!! --------------------------------------\n");
-        _nextOutputTrailingLoudness = oldNextOutputTrailingLoudness;
-    }
 }
 
 bool PositionalAudioRingBuffer::shouldBeAddedToMix() {

From 871aa2790a6f380e6b256e6bafe22455d208db36 Mon Sep 17 00:00:00 2001
From: wangyix <wangyix@gmail.com>
Date: Tue, 15 Jul 2014 12:24:28 -0700
Subject: [PATCH 3/5] forgot unused variable

---
 libraries/audio/src/PositionalAudioRingBuffer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp
index a0ce74dbaf..6c7ecd1cad 100644
--- a/libraries/audio/src/PositionalAudioRingBuffer.cpp
+++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp
@@ -135,7 +135,6 @@ void PositionalAudioRingBuffer::updateNextOutputTrailingLoudness() {
     const float PREVIOUS_FRAMES_RATIO = 1.0f - CURRENT_FRAME_RATIO;
     const float LOUDNESS_EPSILON = 0.000001f;
     
-    float oldNextOutputTrailingLoudness = _nextOutputTrailingLoudness;
     if (nextLoudness >= _nextOutputTrailingLoudness) {
         _nextOutputTrailingLoudness = nextLoudness;
     } else {

From d5a30ff6eedbb00bf0d850f903a68686b776b7c9 Mon Sep 17 00:00:00 2001
From: wangyix <wangyix@gmail.com>
Date: Tue, 15 Jul 2014 14:43:08 -0700
Subject: [PATCH 4/5] removed SIMD stuff from AudioMixer for all builds

---
 assignment-client/src/audio/AudioMixer.cpp | 111 +--------------------
 1 file changed, 2 insertions(+), 109 deletions(-)

diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp
index 60f905f20a..076769d989 100644
--- a/assignment-client/src/audio/AudioMixer.cpp
+++ b/assignment-client/src/audio/AudioMixer.cpp
@@ -231,29 +231,10 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
             delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio;
             delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio;
             
-#ifdef _WIN32
             _clientSamples[s + goodChannelOffset] += correctBufferSample[0];
             _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] += correctBufferSample[1];
             _clientSamples[delayedChannelIndex] += delayBufferSample[0];
             _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] += delayBufferSample[1];
-#else
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset],
-                                               _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET],
-                                               _clientSamples[delayedChannelIndex],
-                                               _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]);
-            __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1],
-                                              delayBufferSample[0], delayBufferSample[1]);
-            
-            // perform the MMX add (with saturation) of two correct and delayed samples
-            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples);
-            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-            
-            // assign the results from the result of the mmx arithmetic
-            _clientSamples[s + goodChannelOffset] = shortResults[3];
-            _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2];
-            _clientSamples[delayedChannelIndex] = shortResults[1];
-            _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0];
-#endif
         }
         
         // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid
@@ -273,98 +254,10 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
                 delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay;
             }
             
-            int i = 0;
-            
-            while (i + 3 < numSamplesDelay) {
-                // handle the first cases where we can MMX add four samples at once
+
+            for (int i = 0; i < numSamplesDelay; i++) {
                 int parentIndex = i * 2;
-
-#ifdef _WIN32
                 _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;
-                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio;
-                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio;
-                _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio;
-#else
-                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
-                                                   _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
-                                                   _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]);
-                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
-                                                delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
-                                                delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio);
-                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-                
-                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
-                _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0];
-#endif
-
-                // push the index
-                i += 4;
-            }
-            
-            int parentIndex = i * 2;
-            
-            if (i + 2 < numSamplesDelay) {
-                // MMX add only three delayed samples
-                
-#ifdef _WIN32
-                _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;
-                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio;
-                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio;
-#else
-                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
-                                                   _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
-                                                   0);
-                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
-                                                delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
-                                                0);
-                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-                
-                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
-#endif
-
-                
-            } else if (i + 1 < numSamplesDelay) {
-
-#ifdef _WIN32
-                _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;
-                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio;
-#else
-                // MMX add two delayed samples
-                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
-                                                   0, 0);
-                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0);
-                
-                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-                
-                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-#endif
-            } else if (i < numSamplesDelay) {
-                // MMX add a single delayed sample
-#ifdef _WIN32
-                _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;
-#else
-                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0);
-                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0);
-                
-                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-                
-                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-#endif
             }
         }
     } else {

From abca4661f4e6dfdd863dcd6d2964d9d3972ed0ef Mon Sep 17 00:00:00 2001
From: wangyix <wangyix@gmail.com>
Date: Tue, 15 Jul 2014 14:48:26 -0700
Subject: [PATCH 5/5] removed space

---
 assignment-client/src/audio/AudioMixer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp
index 076769d989..b008a20aa7 100644
--- a/assignment-client/src/audio/AudioMixer.cpp
+++ b/assignment-client/src/audio/AudioMixer.cpp
@@ -254,7 +254,6 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
                 delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay;
             }
             
-
             for (int i = 0; i < numSamplesDelay; i++) {
                 int parentIndex = i * 2;
                 _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;