From fc68b31b712f0506e3f6b4e6eb225f3153350f1b Mon Sep 17 00:00:00 2001
From: Seth Alves <seth.alves@gmail.com>
Date: Sat, 12 Sep 2015 08:23:50 -0700
Subject: [PATCH 01/10] Revert "Revert "edit.js adjustment""

---
 examples/edit.js                 | 7 ++-----
 examples/libraries/entityList.js | 3 ++-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/examples/edit.js b/examples/edit.js
index d778ff324d..55b745a4e1 100644
--- a/examples/edit.js
+++ b/examples/edit.js
@@ -260,7 +260,6 @@ var toolBar = (function () {
                     cameraManager.disable();
                 } else {
                     hasShownPropertiesTool = false;
-                    cameraManager.enable();
                     entityListTool.setVisible(true);
                     gridTool.setVisible(true);
                     grid.setEnabled(true);
@@ -670,15 +669,11 @@ function mouseMove(event) {
 
     lastMousePosition = { x: event.x, y: event.y };
 
-    highlightEntityUnderCursor(lastMousePosition, false);
     idleMouseTimerId = Script.setTimeout(handleIdleMouse, IDLE_MOUSE_TIMEOUT);
 }
 
 function handleIdleMouse() {
     idleMouseTimerId = null;
-    if (isActive) {
-        highlightEntityUnderCursor(lastMousePosition, true);
-    }
 }
 
 function highlightEntityUnderCursor(position, accurateRay) {
@@ -802,6 +797,7 @@ function mouseClickEvent(event) {
                 selectionDisplay.select(selectedEntityID, event);
 
                 if (Menu.isOptionChecked(MENU_AUTO_FOCUS_ON_SELECT)) {
+                    cameraManager.enable();
                     cameraManager.focus(selectionManager.worldPosition,
                                         selectionManager.worldDimensions,
                                         Menu.isOptionChecked(MENU_EASE_ON_FOCUS));
@@ -1142,6 +1138,7 @@ Controller.keyReleaseEvent.connect(function (event) {
     } else if (event.text == "f") {
         if (isActive) {
             if (selectionManager.hasSelection()) {
+                cameraManager.enable();
                 cameraManager.focus(selectionManager.worldPosition,
                                     selectionManager.worldDimensions,
                                     Menu.isOptionChecked(MENU_EASE_ON_FOCUS));
diff --git a/examples/libraries/entityList.js b/examples/libraries/entityList.js
index 66dc9f336f..3d6bf4d14f 100644
--- a/examples/libraries/entityList.js
+++ b/examples/libraries/entityList.js
@@ -49,7 +49,7 @@ EntityListTool = function(opts) {
 
         var selectedIDs = [];
         for (var i = 0; i < selectionManager.selections.length; i++) {
-            selectedIDs.push(selectionManager.selections[i].id); // ?
+            selectedIDs.push(selectionManager.selections[i].id);
         }
 
         var data = {
@@ -70,6 +70,7 @@ EntityListTool = function(opts) {
             }
             selectionManager.setSelections(entityIDs);
             if (data.focus) {
+                cameraManager.enable();
                 cameraManager.focus(selectionManager.worldPosition,
                                     selectionManager.worldDimensions,
                                     Menu.isOptionChecked(MENU_EASE_ON_FOCUS));

From 4e29d8382dc4a026b60fccd81c050776c34d17bc Mon Sep 17 00:00:00 2001
From: Ken Cooke <ken@highfidelity.io>
Date: Thu, 1 Oct 2015 03:24:10 -0700
Subject: [PATCH 02/10] SSE2 optimization of new resampler. 3.5x faster for all
 modes.  Dither is always enabled.

---
 libraries/audio/src/AudioSRC.cpp | 400 ++++++++++++++++++++++++++-----
 1 file changed, 343 insertions(+), 57 deletions(-)

diff --git a/libraries/audio/src/AudioSRC.cpp b/libraries/audio/src/AudioSRC.cpp
index 736b098def..e33d399213 100644
--- a/libraries/audio/src/AudioSRC.cpp
+++ b/libraries/audio/src/AudioSRC.cpp
@@ -558,30 +558,8 @@ static const float Q32_TO_FLOAT = 1.0f / (1ULL << 32);
 // blocking size in frames, chosen so block processing fits in L1 cache
 static const int SRC_BLOCK = 1024;
 
-//#define SRC_DITHER
-#define RAND16(r) (((r) = (r) * 69069u + 1u) >> 16)
-
-// these are performance sensitive
-#define lo32(a)   (((uint32_t* )&(a))[0])
-#define hi32(a)   (((int32_t* )&(a))[1])
-
-//#define lo32(a)   ((uint32_t)(a))
-//#define hi32(a)   ((int32_t)((a) >> 32))
-
-//static inline uint32_t lo32(int64_t a) {
-//    union { 
-//        int64_t val; 
-//        struct { uint32_t lo; int32_t hi; } reg; 
-//    } b = { a };
-//    return b.reg.lo;
-//}
-//static inline int32_t hi32(int64_t a) {
-//    union { 
-//        int64_t val; 
-//        struct { uint32_t lo; int32_t hi; } reg; 
-//    } b = { a };
-//    return b.reg.hi;
-//}
+#define lo32(a)   ((uint32_t)(a))
+#define hi32(a)   ((int32_t)((a) >> 32))
 
 //
 // Portable aligned malloc/free
@@ -671,6 +649,7 @@ int AudioSRC::createRationalFilter(int upFactor, int downFactor, float gain) {
         numTaps = (numCoefs + upFactor - 1) / upFactor;
         gain *= (float)oldCoefs / numCoefs;
     }
+    numTaps = (numTaps + 3) & ~3;   // SIMD4
 
     // interpolate the coefficients of the prototype filter
     float* tempFilter = new float[numTaps * numPhases];
@@ -679,7 +658,7 @@ int AudioSRC::createRationalFilter(int upFactor, int downFactor, float gain) {
     cubicInterpolation(prototypeFilter, tempFilter, prototypeCoefs, numCoefs, gain);
 
     // create the polyphase filter
-    _polyphaseFilter = (float*)aligned_malloc(numTaps * numPhases * sizeof(float), 32);
+    _polyphaseFilter = (float*)aligned_malloc(numTaps * numPhases * sizeof(float), 16); // SIMD4
 
     // rearrange into polyphase form, ordered by use
     for (int i = 0; i < numPhases; i++) {
@@ -720,6 +699,7 @@ int AudioSRC::createIrrationalFilter(int upFactor, int downFactor, float gain) {
         numTaps = (numCoefs + upFactor - 1) / upFactor;
         gain *= (float)oldCoefs / numCoefs;
     }
+    numTaps = (numTaps + 3) & ~3;   // SIMD4
 
     // interpolate the coefficients of the prototype filter
     float* tempFilter = new float[numTaps * numPhases];
@@ -728,7 +708,7 @@ int AudioSRC::createIrrationalFilter(int upFactor, int downFactor, float gain) {
     cubicInterpolation(prototypeFilter, tempFilter, prototypeCoefs, numCoefs, gain);
 
     // create the polyphase filter, with extra phase at the end to simplify coef interpolation
-    _polyphaseFilter = (float*)aligned_malloc(numTaps * (numPhases + 1) * sizeof(float), 32);
+    _polyphaseFilter = (float*)aligned_malloc(numTaps * (numPhases + 1) * sizeof(float), 16);   // SIMD4
 
     // rearrange into polyphase form, ordered by fractional delay
     for (int phase = 0; phase < numPhases; phase++) {
@@ -754,6 +734,284 @@ int AudioSRC::createIrrationalFilter(int upFactor, int downFactor, float gain) {
     return numTaps;
 }
 
+//
+// on x86 architecture, assume that SSE2 is present
+//
+#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
+
+#include <emmintrin.h>
+
+int AudioSRC::multirateFilter1(const float* input0, float* output0, int inputFrames) {
+    int outputFrames = 0;
+
+    assert((_numTaps & 0x3) == 0);  // SIMD4
+
+    if (_step == 0) {   // rational
+
+        int32_t i = hi32(_offset);
+
+        while (i < inputFrames) {
+
+            const float* c0 = &_polyphaseFilter[_numTaps * _phase];
+
+            __m128 acc0 = _mm_setzero_ps();
+
+            for (int j = 0; j < _numTaps; j += 4) {
+
+                //float coef = c0[j];
+                __m128 coef0 = _mm_loadu_ps(&c0[j]);
+
+                //acc0 += input0[i + j] * coef;
+                acc0 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(&input0[i + j]), coef0), acc0);
+            }
+
+            // horizontal sum
+            acc0 = _mm_add_ps(acc0, _mm_movehl_ps(acc0, acc0));
+            acc0 = _mm_add_ss(acc0, _mm_shuffle_ps(acc0, acc0, _MM_SHUFFLE(0,0,0,1)));
+
+            _mm_store_ss(&output0[outputFrames], acc0);
+            outputFrames += 1;
+
+            i += _stepTable[_phase];
+            if (++_phase == _upFactor) {
+                _phase = 0;
+            }
+        }
+        _offset = (int64_t)(i - inputFrames) << 32;
+
+    } else {    // irrational
+
+        while (hi32(_offset) < inputFrames) {
+
+            int32_t i = hi32(_offset);
+            uint32_t f = lo32(_offset);
+
+            uint32_t phase = f >> SRC_FRACBITS;
+            __m128 frac = _mm_set1_ps((f & SRC_FRACMASK) * QFRAC_TO_FLOAT);
+
+            const float* c0 = &_polyphaseFilter[_numTaps * (phase + 0)];
+            const float* c1 = &_polyphaseFilter[_numTaps * (phase + 1)];
+
+            __m128 acc0 = _mm_setzero_ps();
+
+            for (int j = 0; j < _numTaps; j += 4) {
+
+                //float coef = c0[j] + frac * (c1[j] - c0[j]);
+                __m128 coef0 = _mm_loadu_ps(&c0[j]);
+                __m128 coef1 = _mm_loadu_ps(&c1[j]);
+                coef1 = _mm_sub_ps(coef1, coef0);
+                coef0 = _mm_add_ps(_mm_mul_ps(coef1, frac), coef0);
+
+                //acc0 += input0[i + j] * coef;
+                acc0 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(&input0[i + j]), coef0), acc0);
+            }
+
+            // horizontal sum
+            acc0 = _mm_add_ps(acc0, _mm_movehl_ps(acc0, acc0));
+            acc0 = _mm_add_ss(acc0, _mm_shuffle_ps(acc0, acc0, _MM_SHUFFLE(0,0,0,1)));
+
+            _mm_store_ss(&output0[outputFrames], acc0);
+            outputFrames += 1;
+
+            _offset += _step;
+        }
+        _offset -= (int64_t)inputFrames << 32;
+    }
+
+    return outputFrames;
+}
+
+int AudioSRC::multirateFilter2(const float* input0, const float* input1, float* output0, float* output1, int inputFrames) {
+    int outputFrames = 0;
+
+    assert((_numTaps & 0x3) == 0);  // SIMD4
+
+    if (_step == 0) {   // rational
+
+        int32_t i = hi32(_offset);
+
+        while (i < inputFrames) {
+
+            const float* c0 = &_polyphaseFilter[_numTaps * _phase];
+
+            __m128 acc0 = _mm_setzero_ps();
+            __m128 acc1 = _mm_setzero_ps();
+
+            for (int j = 0; j < _numTaps; j += 4) {
+
+                //float coef = c0[j];
+                __m128 coef0 = _mm_loadu_ps(&c0[j]);
+
+                //acc0 += input0[i + j] * coef;
+                acc0 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(&input0[i + j]), coef0), acc0);
+                acc1 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(&input1[i + j]), coef0), acc1);
+            }
+
+            // horizontal sum
+            acc0 = _mm_add_ps(acc0, _mm_movehl_ps(acc0, acc0));
+            acc1 = _mm_add_ps(acc1, _mm_movehl_ps(acc1, acc1));
+            acc0 = _mm_add_ss(acc0, _mm_shuffle_ps(acc0, acc0, _MM_SHUFFLE(0,0,0,1)));
+            acc1 = _mm_add_ss(acc1, _mm_shuffle_ps(acc1, acc1, _MM_SHUFFLE(0,0,0,1)));
+
+            _mm_store_ss(&output0[outputFrames], acc0);
+            _mm_store_ss(&output1[outputFrames], acc1);
+            outputFrames += 1;
+
+            i += _stepTable[_phase];
+            if (++_phase == _upFactor) {
+                _phase = 0;
+            }
+        }
+        _offset = (int64_t)(i - inputFrames) << 32;
+
+    } else {    // irrational
+
+        while (hi32(_offset) < inputFrames) {
+
+            int32_t i = hi32(_offset);
+            uint32_t f = lo32(_offset);
+
+            uint32_t phase = f >> SRC_FRACBITS;
+            __m128 frac = _mm_set1_ps((f & SRC_FRACMASK) * QFRAC_TO_FLOAT);
+
+            const float* c0 = &_polyphaseFilter[_numTaps * (phase + 0)];
+            const float* c1 = &_polyphaseFilter[_numTaps * (phase + 1)];
+
+            __m128 acc0 = _mm_setzero_ps();
+            __m128 acc1 = _mm_setzero_ps();
+
+            for (int j = 0; j < _numTaps; j += 4) {
+
+                //float coef = c0[j] + frac * (c1[j] - c0[j]);
+                __m128 coef0 = _mm_loadu_ps(&c0[j]);
+                __m128 coef1 = _mm_loadu_ps(&c1[j]);
+                coef1 = _mm_sub_ps(coef1, coef0);
+                coef0 = _mm_add_ps(_mm_mul_ps(coef1, frac), coef0);
+
+                //acc0 += input0[i + j] * coef;
+                acc0 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(&input0[i + j]), coef0), acc0);
+                acc1 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(&input1[i + j]), coef0), acc1);
+            }
+
+            // horizontal sum
+            acc0 = _mm_add_ps(acc0, _mm_movehl_ps(acc0, acc0));
+            acc1 = _mm_add_ps(acc1, _mm_movehl_ps(acc1, acc1));
+            acc0 = _mm_add_ss(acc0, _mm_shuffle_ps(acc0, acc0, _MM_SHUFFLE(0,0,0,1)));
+            acc1 = _mm_add_ss(acc1, _mm_shuffle_ps(acc1, acc1, _MM_SHUFFLE(0,0,0,1)));
+
+            _mm_store_ss(&output0[outputFrames], acc0);
+            _mm_store_ss(&output1[outputFrames], acc1);
+            outputFrames += 1;
+
+            _offset += _step;
+        }
+        _offset -= (int64_t)inputFrames << 32;
+    }
+
+    return outputFrames;
+}
+
+// convert int16_t to float, deinterleave stereo
+void AudioSRC::convertInputFromInt16(const int16_t* input, float** outputs, int numFrames) {
+    __m128 scale = _mm_set1_ps(1/32768.0f);
+
+    numFrames = (numFrames + 3) & ~3;   // SIMD4 can overcompute
+    assert(numFrames <= SRC_BLOCK);  
+
+    if (_numChannels == 1) {
+        for (int i = 0; i < numFrames; i += 4) {
+
+            __m128i a0 = _mm_loadl_epi64((__m128i*)&input[i]);
+
+            // sign-extend
+            a0 = _mm_srai_epi32(_mm_unpacklo_epi16(a0, a0), 16);
+
+            __m128 f0 = _mm_mul_ps(_mm_cvtepi32_ps(a0), scale);
+
+            _mm_storeu_ps(&outputs[0][i], f0);
+        }
+    } else if (_numChannels == 2) {
+        for (int i = 0; i < numFrames; i += 4) {
+
+            __m128i a0 = _mm_loadu_si128((__m128i*)&input[2*i]);
+            __m128i a1 = a0;
+
+            // deinterleave and sign-extend
+            a0 = _mm_madd_epi16(a0, _mm_set1_epi32(0x00000001));
+            a1 = _mm_madd_epi16(a1, _mm_set1_epi32(0x00010000));
+
+            __m128 f0 = _mm_mul_ps(_mm_cvtepi32_ps(a0), scale);
+            __m128 f1 = _mm_mul_ps(_mm_cvtepi32_ps(a1), scale);
+
+            _mm_storeu_ps(&outputs[0][i], f0);
+            _mm_storeu_ps(&outputs[1][i], f1);
+        }
+    }
+}
+
+// fast TPDF dither in [-1.0f, 1.0f]
+static inline __m128 dither4() {
+    static __m128i rz = _mm_set_epi16(0, -12285, 8251, 22985, -4297, 14758, -19785, -26093);
+
+    // update the parallel LCGs
+    rz = _mm_mullo_epi16(rz, _mm_set1_epi16(25173));
+    rz = _mm_add_epi16(rz, _mm_set1_epi16(13849));
+
+    // promote to 32-bit
+    __m128i r0 = _mm_unpacklo_epi16(rz, _mm_setzero_si128());
+    __m128i r1 = _mm_unpackhi_epi16(rz, _mm_setzero_si128());
+
+    // return (r0 - r1) * (1/65536.0f);
+    __m128 d0 = _mm_cvtepi32_ps(_mm_sub_epi32(r0, r1));
+    return _mm_mul_ps(d0, _mm_set1_ps(1/65536.0f));
+}
+
+// convert float to int16_t, interleave stereo
+void AudioSRC::convertOutputToInt16(float** inputs, int16_t* output, int numFrames) {
+    __m128 scale = _mm_set1_ps(32768.0f);
+
+    numFrames = (numFrames + 3) & ~3;   // SIMD4 can overcompute
+    assert(numFrames <= SRC_BLOCK);  
+
+    if (_numChannels == 1) {
+        for (int i = 0; i < numFrames; i += 4) {
+
+            __m128 f0 = _mm_mul_ps(_mm_loadu_ps(&inputs[0][i]), scale);
+
+            f0 = _mm_add_ps(f0, dither4());
+
+            // round and saturate
+            __m128i a0 = _mm_cvtps_epi32(f0);
+            a0 = _mm_packs_epi32(a0, a0);
+
+            _mm_storel_epi64((__m128i*)&output[i], a0);
+        }
+    } else if (_numChannels == 2) {
+        for (int i = 0; i < numFrames; i += 4) {
+
+            __m128 f0 = _mm_mul_ps(_mm_loadu_ps(&inputs[0][i]), scale);
+            __m128 f1 = _mm_mul_ps(_mm_loadu_ps(&inputs[1][i]), scale);
+
+            __m128 d0 = dither4();
+            f0 = _mm_add_ps(f0, d0);
+            f1 = _mm_add_ps(f1, d0);
+
+            // round and saturate
+            __m128i a0 = _mm_cvtps_epi32(f0);
+            __m128i a1 = _mm_cvtps_epi32(f1);
+            a0 = _mm_packs_epi32(a0, a0);
+            a1 = _mm_packs_epi32(a1, a1);
+
+            // interleave
+            a0 = _mm_unpacklo_epi16(a0, a1);
+
+            _mm_storeu_si128((__m128i*)&output[2*i], a0);
+        }
+    }
+}
+
+#else
+
 int AudioSRC::multirateFilter1(const float* input0, float* output0, int inputFrames) {
     int outputFrames = 0;
 
@@ -886,45 +1144,73 @@ int AudioSRC::multirateFilter2(const float* input0, const float* input1, float*
     return outputFrames;
 }
 
-// convert int16_t to float
-// deinterleave stereo samples
+// convert int16_t to float, deinterleave stereo
 void AudioSRC::convertInputFromInt16(const int16_t* input, float** outputs, int numFrames) {
-    for (int i = 0; i < numFrames; i++) {
-        for (int j = 0; j < _numChannels; j++) {
+    const float scale = 1/32768.0f;
 
-            float f = (float)input[_numChannels*i + j];
-            outputs[j][i] = f * (1.0f/32768.0f);
+    if (_numChannels == 1) {
+        for (int i = 0; i < numFrames; i++) {
+            outputs[0][i] = (float)input[i] * scale;
+        }
+    } else if (_numChannels == 2) {
+        for (int i = 0; i < numFrames; i++) {
+            outputs[0][i] = (float)input[2*i + 0] * scale;
+            outputs[1][i] = (float)input[2*i + 1] * scale;
         }
     }
 }
 
-// convert float to int16_t
-// interleave stereo samples
+// fast TPDF dither in [-1.0f, 1.0f]
+static inline float dither() {
+    static uint32_t rz = 0;
+    rz = rz * 69069 + 1;
+    int32_t r0 = rz & 0xffff;
+    int32_t r1 = rz >> 16;
+    return (r0 - r1) * (1/65536.0f);
+}
+
+// convert float to int16_t, interleave stereo
 void AudioSRC::convertOutputToInt16(float** inputs, int16_t* output, int numFrames) {
-    for (int i = 0; i < numFrames; i++) {
-        for (int j = 0; j < _numChannels; j++) {
+    const float scale = 32768.0f;
 
-            float f = inputs[j][i] * 32768.0f;
+    if (_numChannels == 1) {
+        for (int i = 0; i < numFrames; i++) {
 
-#ifdef SRC_DITHER
-            // TPDF dither in [-1.0f, 1.0f]
-            static uint32_t rz = 1;
-            int r0 = RAND16(rz);
-            int r1 = RAND16(rz);
-            f += (r0 - r1) * (1.0f/65536.0f);
+            float f = inputs[0][i] * scale;
 
-            // round
+            f += dither();
+
+            // round and saturate
             f += (f < 0.0f ? -0.5f : +0.5f);
-#endif
-            // saturate
-            f = std::min(f, 32767.0f);
-            f = std::max(f, -32768.0f);
+            f = std::max(std::min(f, 32767.0f), -32768.0f);
 
-            output[_numChannels * i + j] = (int16_t)f;
+            output[i] = (int16_t)f;
+        }
+    } else if (_numChannels == 2) {
+        for (int i = 0; i < numFrames; i++) {
+
+            float f0 = inputs[0][i] * scale;
+            float f1 = inputs[1][i] * scale;
+
+            float d = dither();
+            f0 += d;
+            f1 += d;
+
+            // round and saturate
+            f0 += (f0 < 0.0f ? -0.5f : +0.5f);
+            f1 += (f1 < 0.0f ? -0.5f : +0.5f);
+            f0 = std::max(std::min(f0, 32767.0f), -32768.0f);
+            f1 = std::max(std::min(f1, 32767.0f), -32768.0f);
+
+            // interleave
+            output[2*i + 0] = (int16_t)f0;
+            output[2*i + 1] = (int16_t)f1;
         }
     }
 }
 
+#endif
+
 int AudioSRC::processFloat(float** inputs, float** outputs, int inputFrames) {
     int outputFrames = 0;
 
@@ -1019,10 +1305,10 @@ AudioSRC::AudioSRC(int inputSampleRate, int outputSampleRate, int numChannels) {
     _history[1] = new float[2 * _numHistory];
 
     // format conversion buffers
-    _inputs[0] = new float[SRC_BLOCK];
-    _inputs[1] = new float[SRC_BLOCK];
-    _outputs[0] = new float[SRC_BLOCK];
-    _outputs[1] = new float[SRC_BLOCK];
+    _inputs[0] = (float*)aligned_malloc(SRC_BLOCK * sizeof(float), 16); // SIMD4
+    _inputs[1] = (float*)aligned_malloc(SRC_BLOCK * sizeof(float), 16);
+    _outputs[0] = (float*)aligned_malloc(SRC_BLOCK * sizeof(float), 16);
+    _outputs[1] = (float*)aligned_malloc(SRC_BLOCK * sizeof(float), 16);
 
     // input blocking size, such that input and output are both guaranteed not to exceed SRC_BLOCK frames
     _inputBlock = std::min(SRC_BLOCK, getMaxInput(SRC_BLOCK));
@@ -1041,10 +1327,10 @@ AudioSRC::~AudioSRC() {
     delete[] _history[0];
     delete[] _history[1];
 
-    delete[] _inputs[0];
-    delete[] _inputs[1];
-    delete[] _outputs[0];
-    delete[] _outputs[1];
+    aligned_free(_inputs[0]);
+    aligned_free(_inputs[1]);
+    aligned_free(_outputs[0]);
+    aligned_free(_outputs[1]);
 }
 
 //

From ef7e908675e7fdaebc6e73d8aec2f1468e217f6e Mon Sep 17 00:00:00 2001
From: Andrew Meadows <andrew@highfidelity.io>
Date: Wed, 30 Sep 2015 18:03:15 -0700
Subject: [PATCH 03/10] allow dynamic objects to be set collisionless

---
 libraries/entities/src/BoxEntityItem.h      | 1 +
 libraries/entities/src/EntityItem.h         | 2 +-
 libraries/entities/src/SphereEntityItem.h   | 1 +
 libraries/physics/src/EntityMotionState.cpp | 6 ++++++
 libraries/physics/src/ObjectMotionState.h   | 8 ++++----
 5 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/libraries/entities/src/BoxEntityItem.h b/libraries/entities/src/BoxEntityItem.h
index fdc66dd3e5..cc3bba4823 100644
--- a/libraries/entities/src/BoxEntityItem.h
+++ b/libraries/entities/src/BoxEntityItem.h
@@ -52,6 +52,7 @@ public:
     }
     
     virtual ShapeType getShapeType() const { return SHAPE_TYPE_BOX; }
+    virtual bool shouldBePhysical() const { return true; }
 
     virtual void debugDump() const;
 
diff --git a/libraries/entities/src/EntityItem.h b/libraries/entities/src/EntityItem.h
index 62b436b498..f070e2c1e4 100644
--- a/libraries/entities/src/EntityItem.h
+++ b/libraries/entities/src/EntityItem.h
@@ -334,7 +334,7 @@ public:
     bool getCollisionsWillMove() const { return _collisionsWillMove; }
     void setCollisionsWillMove(bool value) { _collisionsWillMove = value; }
 
-    virtual bool shouldBePhysical() const { return !_ignoreForCollisions; }
+    virtual bool shouldBePhysical() const { return false; }
 
     bool getLocked() const { return _locked; }
     void setLocked(bool value) { _locked = value; }
diff --git a/libraries/entities/src/SphereEntityItem.h b/libraries/entities/src/SphereEntityItem.h
index 19ea5d06f9..81a6cf704c 100644
--- a/libraries/entities/src/SphereEntityItem.h
+++ b/libraries/entities/src/SphereEntityItem.h
@@ -51,6 +51,7 @@ public:
     }
 
     virtual ShapeType getShapeType() const { return SHAPE_TYPE_SPHERE; }
+    virtual bool shouldBePhysical() const { return true; }
     
     virtual bool supportsDetailedRayIntersection() const { return true; }
     virtual bool findDetailedRayIntersection(const glm::vec3& origin, const glm::vec3& direction,
diff --git a/libraries/physics/src/EntityMotionState.cpp b/libraries/physics/src/EntityMotionState.cpp
index ff546dbb0b..9cbe89dda6 100644
--- a/libraries/physics/src/EntityMotionState.cpp
+++ b/libraries/physics/src/EntityMotionState.cpp
@@ -596,6 +596,12 @@ QString EntityMotionState::getName() {
 
 // virtual
 int16_t EntityMotionState::computeCollisionGroup() {
+    if (!_entity) {
+        return COLLISION_GROUP_STATIC;
+    }
+    if (_entity->getIgnoreForCollisions()) {
+        return COLLISION_GROUP_COLLISIONLESS;
+    }
     switch (computeObjectMotionType()){
         case MOTION_TYPE_STATIC:
             return COLLISION_GROUP_STATIC;
diff --git a/libraries/physics/src/ObjectMotionState.h b/libraries/physics/src/ObjectMotionState.h
index 1bdf8b6372..61254e49bd 100644
--- a/libraries/physics/src/ObjectMotionState.h
+++ b/libraries/physics/src/ObjectMotionState.h
@@ -37,11 +37,11 @@ enum MotionStateType {
 
 // The update flags trigger two varieties of updates: "hard" which require the body to be pulled 
 // and re-added to the physics engine and "easy" which just updates the body properties.
-const uint32_t HARD_DIRTY_PHYSICS_FLAGS = (uint32_t)(EntityItem::DIRTY_MOTION_TYPE | EntityItem::DIRTY_SHAPE);
+const uint32_t HARD_DIRTY_PHYSICS_FLAGS = (uint32_t)(EntityItem::DIRTY_MOTION_TYPE | EntityItem::DIRTY_SHAPE | 
+                                                     EntityItem::DIRTY_COLLISION_GROUP);
 const uint32_t EASY_DIRTY_PHYSICS_FLAGS = (uint32_t)(EntityItem::DIRTY_TRANSFORM | EntityItem::DIRTY_VELOCITIES |
-                                                     EntityItem::DIRTY_MASS | EntityItem::DIRTY_COLLISION_GROUP |
-                                                     EntityItem::DIRTY_MATERIAL | EntityItem::DIRTY_SIMULATOR_ID | 
-                                                     EntityItem::DIRTY_SIMULATOR_OWNERSHIP);
+                                                     EntityItem::DIRTY_MASS | EntityItem::DIRTY_MATERIAL | 
+                                                     EntityItem::DIRTY_SIMULATOR_ID | EntityItem::DIRTY_SIMULATOR_OWNERSHIP);
 
 // These are the set of incoming flags that the PhysicsEngine needs to hear about:
 const uint32_t DIRTY_PHYSICS_FLAGS = (uint32_t)(HARD_DIRTY_PHYSICS_FLAGS | EASY_DIRTY_PHYSICS_FLAGS |

From 22b66077604e699d1725cc4c44e1a9594993b080 Mon Sep 17 00:00:00 2001
From: Andrew Meadows <andrew@highfidelity.io>
Date: Wed, 30 Sep 2015 18:15:34 -0700
Subject: [PATCH 04/10] add polyvox entities to physics engine

---
 libraries/entities-renderer/src/RenderablePolyVoxEntityItem.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libraries/entities-renderer/src/RenderablePolyVoxEntityItem.h b/libraries/entities-renderer/src/RenderablePolyVoxEntityItem.h
index e1e042f3d0..ef44ba5ab0 100644
--- a/libraries/entities-renderer/src/RenderablePolyVoxEntityItem.h
+++ b/libraries/entities-renderer/src/RenderablePolyVoxEntityItem.h
@@ -79,6 +79,7 @@ public:
     glm::mat4 localToVoxelMatrix() const;
 
     virtual ShapeType getShapeType() const;
+    virtual bool shouldBePhysical() const { return true; }
     virtual bool isReadyToComputeShape();
     virtual void computeShapeInfo(ShapeInfo& info);
 

From 381c98c4fa268b09ec81c85d38ad4a8676a58db0 Mon Sep 17 00:00:00 2001
From: Andrew Meadows <andrew@highfidelity.io>
Date: Wed, 30 Sep 2015 18:32:08 -0700
Subject: [PATCH 05/10] fix for ModelEntityItem::shouldBePhysical()

---
 libraries/entities/src/ModelEntityItem.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libraries/entities/src/ModelEntityItem.cpp b/libraries/entities/src/ModelEntityItem.cpp
index 70747937d8..cd9e3cd3e1 100644
--- a/libraries/entities/src/ModelEntityItem.cpp
+++ b/libraries/entities/src/ModelEntityItem.cpp
@@ -457,5 +457,5 @@ QString ModelEntityItem::getAnimationSettings() const {
 
 // virtual
 bool ModelEntityItem::shouldBePhysical() const {
-    return EntityItem::shouldBePhysical() && getShapeType() != SHAPE_TYPE_NONE;
+    return getShapeType() != SHAPE_TYPE_NONE;
 }

From 2fbb436bc55b50ee3d12d2ccebab01912f0446fd Mon Sep 17 00:00:00 2001
From: David Rowe <david@ctrlaltstudio.com>
Date: Thu, 1 Oct 2015 12:42:23 -0700
Subject: [PATCH 06/10] Fix Reload Content menu item

---
 interface/src/Application.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp
index 2a8f7cf8b5..e386cd3a81 100644
--- a/interface/src/Application.cpp
+++ b/interface/src/Application.cpp
@@ -2741,12 +2741,19 @@ void Application::cameraMenuChanged() {
 }
 
 void Application::reloadResourceCaches() {
+    // Clear entities out of view frustum
+    _viewFrustum.setPosition(glm::vec3(0.0f, 0.0f, TREE_SCALE));
+    _viewFrustum.setOrientation(glm::quat());
+    queryOctree(NodeType::EntityServer, PacketType::EntityQuery, _entityServerJurisdictions);
+
     emptyLocalCache();
-    
+
     DependencyManager::get<AnimationCache>()->refreshAll();
     DependencyManager::get<ModelCache>()->refreshAll();
     DependencyManager::get<SoundCache>()->refreshAll();
     DependencyManager::get<TextureCache>()->refreshAll();
+
+    DependencyManager::get<NodeList>()->reset();  // Force redownload of .fst models
 }
 
 void Application::rotationModeChanged() {

From e1e34b2ce5cbd20a972947977ca4e79a02e912da Mon Sep 17 00:00:00 2001
From: Ken Cooke <ken@highfidelity.io>
Date: Thu, 1 Oct 2015 20:17:04 -0700
Subject: [PATCH 07/10] Fix to allow arbitrary input/output buffers from
 caller.  SIMD padding is no longer required.

---
 libraries/audio/src/AudioSRC.cpp | 78 ++++++++++++++++++++++++++------
 1 file changed, 65 insertions(+), 13 deletions(-)

diff --git a/libraries/audio/src/AudioSRC.cpp b/libraries/audio/src/AudioSRC.cpp
index e33d399213..99760fc42f 100644
--- a/libraries/audio/src/AudioSRC.cpp
+++ b/libraries/audio/src/AudioSRC.cpp
@@ -915,12 +915,10 @@ int AudioSRC::multirateFilter2(const float* input0, const float* input1, float*
 void AudioSRC::convertInputFromInt16(const int16_t* input, float** outputs, int numFrames) {
     __m128 scale = _mm_set1_ps(1/32768.0f);
 
-    numFrames = (numFrames + 3) & ~3;   // SIMD4 can overcompute
-    assert(numFrames <= SRC_BLOCK);  
-
     if (_numChannels == 1) {
-        for (int i = 0; i < numFrames; i += 4) {
 
+        int i = 0;
+        for (; i < numFrames - 3; i += 4) {
             __m128i a0 = _mm_loadl_epi64((__m128i*)&input[i]);
 
             // sign-extend
@@ -930,9 +928,21 @@ void AudioSRC::convertInputFromInt16(const int16_t* input, float** outputs, int
 
             _mm_storeu_ps(&outputs[0][i], f0);
         }
-    } else if (_numChannels == 2) {
-        for (int i = 0; i < numFrames; i += 4) {
+        for (; i < numFrames; i++) {
+            __m128i a0 = _mm_insert_epi16(_mm_setzero_si128(), input[i], 0);
 
+            // sign-extend
+            a0 = _mm_srai_epi32(_mm_unpacklo_epi16(a0, a0), 16);
+
+            __m128 f0 = _mm_mul_ps(_mm_cvtepi32_ps(a0), scale);
+
+            _mm_store_ss(&outputs[0][i], f0);
+        }
+
+    } else if (_numChannels == 2) {
+
+        int i = 0;
+        for (; i < numFrames - 3; i += 4) {
             __m128i a0 = _mm_loadu_si128((__m128i*)&input[2*i]);
             __m128i a1 = a0;
 
@@ -946,6 +956,20 @@ void AudioSRC::convertInputFromInt16(const int16_t* input, float** outputs, int
             _mm_storeu_ps(&outputs[0][i], f0);
             _mm_storeu_ps(&outputs[1][i], f1);
         }
+        for (; i < numFrames; i++) {
+            __m128i a0 = _mm_cvtsi32_si128(*(int32_t*)&input[2*i]);
+            __m128i a1 = a0;
+
+            // deinterleave and sign-extend
+            a0 = _mm_madd_epi16(a0, _mm_set1_epi32(0x00000001));
+            a1 = _mm_madd_epi16(a1, _mm_set1_epi32(0x00010000));
+
+            __m128 f0 = _mm_mul_ps(_mm_cvtepi32_ps(a0), scale);
+            __m128 f1 = _mm_mul_ps(_mm_cvtepi32_ps(a1), scale);
+
+            _mm_store_ss(&outputs[0][i], f0);
+            _mm_store_ss(&outputs[1][i], f1);
+        }
     }
 }
 
@@ -970,12 +994,10 @@ static inline __m128 dither4() {
 void AudioSRC::convertOutputToInt16(float** inputs, int16_t* output, int numFrames) {
     __m128 scale = _mm_set1_ps(32768.0f);
 
-    numFrames = (numFrames + 3) & ~3;   // SIMD4 can overcompute
-    assert(numFrames <= SRC_BLOCK);  
-
     if (_numChannels == 1) {
-        for (int i = 0; i < numFrames; i += 4) {
 
+        int i = 0;
+        for (; i < numFrames - 3; i += 4) {
             __m128 f0 = _mm_mul_ps(_mm_loadu_ps(&inputs[0][i]), scale);
 
             f0 = _mm_add_ps(f0, dither4());
@@ -986,9 +1008,22 @@ void AudioSRC::convertOutputToInt16(float** inputs, int16_t* output, int numFram
 
             _mm_storel_epi64((__m128i*)&output[i], a0);
         }
-    } else if (_numChannels == 2) {
-        for (int i = 0; i < numFrames; i += 4) {
+        for (; i < numFrames; i++) {
+            __m128 f0 = _mm_mul_ps(_mm_load_ss(&inputs[0][i]), scale);
 
+            f0 = _mm_add_ps(f0, dither4());
+
+            // round and saturate
+            __m128i a0 = _mm_cvtps_epi32(f0);
+            a0 = _mm_packs_epi32(a0, a0);
+
+            output[i] = (int16_t)_mm_extract_epi16(a0, 0);
+        }
+
+    } else if (_numChannels == 2) {
+
+        int i = 0;
+        for (; i < numFrames - 3; i += 4) {
             __m128 f0 = _mm_mul_ps(_mm_loadu_ps(&inputs[0][i]), scale);
             __m128 f1 = _mm_mul_ps(_mm_loadu_ps(&inputs[1][i]), scale);
 
@@ -1004,9 +1039,26 @@ void AudioSRC::convertOutputToInt16(float** inputs, int16_t* output, int numFram
 
             // interleave
             a0 = _mm_unpacklo_epi16(a0, a1);
-
             _mm_storeu_si128((__m128i*)&output[2*i], a0);
         }
+        for (; i < numFrames; i++) {
+            __m128 f0 = _mm_mul_ps(_mm_load_ss(&inputs[0][i]), scale);
+            __m128 f1 = _mm_mul_ps(_mm_load_ss(&inputs[1][i]), scale);
+
+            __m128 d0 = dither4();
+            f0 = _mm_add_ps(f0, d0);
+            f1 = _mm_add_ps(f1, d0);
+
+            // round and saturate
+            __m128i a0 = _mm_cvtps_epi32(f0);
+            __m128i a1 = _mm_cvtps_epi32(f1);
+            a0 = _mm_packs_epi32(a0, a0);
+            a1 = _mm_packs_epi32(a1, a1);
+
+            // interleave
+            a0 = _mm_unpacklo_epi16(a0, a1);
+            *(int32_t*)&output[2*i] = _mm_cvtsi128_si32(a0);
+        }
     }
 }
 

From 3555e90cf60360273f3fac159c05009b9528c223 Mon Sep 17 00:00:00 2001
From: Ken Cooke <ken@highfidelity.io>
Date: Thu, 1 Oct 2015 20:32:48 -0700
Subject: [PATCH 08/10] Improved random generator for SIMD dither

---
 libraries/audio/src/AudioSRC.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libraries/audio/src/AudioSRC.cpp b/libraries/audio/src/AudioSRC.cpp
index 99760fc42f..59fe29df36 100644
--- a/libraries/audio/src/AudioSRC.cpp
+++ b/libraries/audio/src/AudioSRC.cpp
@@ -975,11 +975,11 @@ void AudioSRC::convertInputFromInt16(const int16_t* input, float** outputs, int
 
 // fast TPDF dither in [-1.0f, 1.0f]
 static inline __m128 dither4() {
-    static __m128i rz = _mm_set_epi16(0, -12285, 8251, 22985, -4297, 14758, -19785, -26093);
+    static __m128i rz;
 
-    // update the parallel LCGs
-    rz = _mm_mullo_epi16(rz, _mm_set1_epi16(25173));
-    rz = _mm_add_epi16(rz, _mm_set1_epi16(13849));
+    // update the 8 different maximum-length LCGs
+    rz = _mm_mullo_epi16(rz, _mm_set_epi16(25173, -25511, -5975, -23279, 19445, -27591, 30185, -3495));
+    rz = _mm_add_epi16(rz, _mm_set_epi16(13849, -32767, 105, -19675, -7701, -32679, -13225, 28013));
 
     // promote to 32-bit
     __m128i r0 = _mm_unpacklo_epi16(rz, _mm_setzero_si128());

From 0497de47ea41f540d3364955980a089551d46d7e Mon Sep 17 00:00:00 2001
From: ericrius1 <ericrius1>
Date: Fri, 2 Oct 2015 12:40:14 -0700
Subject: [PATCH 09/10] No longer removing non-existent entities from spray
 paint can; color tweaks to paint stream

---
 examples/toys/sprayPaintCan.js | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/examples/toys/sprayPaintCan.js b/examples/toys/sprayPaintCan.js
index e0aeb19995..251827ead7 100644
--- a/examples/toys/sprayPaintCan.js
+++ b/examples/toys/sprayPaintCan.js
@@ -9,7 +9,7 @@
 //
 
 
-(function() {
+(function () {
     // Script.include("../libraries/utils.js");
     //Need absolute path for now, for testing before PR merge and s3 cloning. Will change post-merge
 
@@ -33,19 +33,19 @@
     var MIN_POINT_DISTANCE = 0.01;
     var STROKE_WIDTH = 0.02;
 
-    this.setRightHand = function() {
+    this.setRightHand = function () {
         this.hand = 'RIGHT';
     }
 
-    this.setLeftHand = function() {
+    this.setLeftHand = function () {
         this.hand = 'LEFT';
     }
 
-    this.startNearGrab = function() {
+    this.startNearGrab = function () {
         this.whichHand = this.hand;
     }
 
-    this.toggleWithTriggerPressure = function() {
+    this.toggleWithTriggerPressure = function () {
         var handClickString = this.whichHand + "_HAND_CLICK";
 
         var handClick = Controller.findAction(handClickString);
@@ -60,7 +60,7 @@
         }
     }
 
-    this.enableStream = function() {
+    this.enableStream = function () {
         var position = Entities.getEntityProperties(this.entityId, "position").position;
         var animationSettings = JSON.stringify({
             fps: 30,
@@ -85,6 +85,11 @@
             particleRadius: 0.01,
             radiusSpread: 0.005,
             polarFinish: 0.05,
+            colorStart: {
+                red: 50,
+                green: 10,
+                blue: 150
+            },
             color: {
                 red: 170,
                 green: 20,
@@ -105,11 +110,11 @@
 
     }
 
-    this.releaseGrab = function() {
+    this.releaseGrab = function () {
         this.disableStream();
     }
 
-    this.disableStream = function() {
+    this.disableStream = function () {
         Entities.deleteEntity(this.paintStream);
         this.paintStream = null;
         this.spraying = false;
@@ -117,7 +122,7 @@
     }
 
 
-    this.continueNearGrab = function() {
+    this.continueNearGrab = function () {
 
         this.toggleWithTriggerPressure();
 
@@ -143,7 +148,7 @@
         });
     }
 
-    this.preload = function(entityId) {
+    this.preload = function (entityId) {
         this.sprayVolume = 0.1;
         this.spraying = false;
         this.entityId = entityId;
@@ -151,12 +156,9 @@
     }
 
 
-    this.unload = function() {
+    this.unload = function () {
         if (this.paintStream) {
             Entities.deleteEntity(this.paintStream);
         }
-        this.strokes.forEach(function(stroke) {
-            Entities.deleteEntity(stroke);
-        });
     }
-});
+});
\ No newline at end of file

From 255689e24f3436a2ea49228cb590adf7de413187 Mon Sep 17 00:00:00 2001
From: Brad Hefta-Gaub <brad@highfidelity.io>
Date: Fri, 2 Oct 2015 15:08:04 -0700
Subject: [PATCH 10/10] add GLEW_STATIC to fix link warnings

---
 interface/CMakeLists.txt                 | 2 ++
 libraries/display-plugins/CMakeLists.txt | 2 ++
 libraries/render-utils/CMakeLists.txt    | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt
index e3118c0048..63d1445496 100644
--- a/interface/CMakeLists.txt
+++ b/interface/CMakeLists.txt
@@ -1,6 +1,8 @@
 set(TARGET_NAME interface)
 project(${TARGET_NAME})
 
+add_definitions(-DGLEW_STATIC)
+
 # set a default root dir for each of our optional externals if it was not passed
 set(OPTIONAL_EXTERNALS "Faceshift" "LeapMotion" "RtMidi" "RSSDK" "3DConnexionClient" "iViewHMD")
 foreach(EXTERNAL ${OPTIONAL_EXTERNALS})
diff --git a/libraries/display-plugins/CMakeLists.txt b/libraries/display-plugins/CMakeLists.txt
index 384fa57b62..b602327f4c 100644
--- a/libraries/display-plugins/CMakeLists.txt
+++ b/libraries/display-plugins/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(TARGET_NAME display-plugins)
 
+add_definitions(-DGLEW_STATIC)
+
 # use setup_hifi_library macro to setup our project and link appropriate Qt modules
 setup_hifi_library(OpenGL)
 
diff --git a/libraries/render-utils/CMakeLists.txt b/libraries/render-utils/CMakeLists.txt
index 6a0f69dd8d..fa6cc6fe77 100644
--- a/libraries/render-utils/CMakeLists.txt
+++ b/libraries/render-utils/CMakeLists.txt
@@ -20,6 +20,8 @@ add_dependency_external_projects(oglplus)
 find_package(OGLPLUS REQUIRED)
 target_include_directories(${TARGET_NAME} PUBLIC ${OGLPLUS_INCLUDE_DIRS})
 
+add_definitions(-DGLEW_STATIC)
+ 
 if (WIN32)
   if (USE_NSIGHT)
     # try to find the Nsight package and add it to the build if we find it