From a1b2bf59e9ebcb5b367fb808ff63f3aad130d9f5 Mon Sep 17 00:00:00 2001 From: wangyix Date: Tue, 15 Jul 2014 12:23:02 -0700 Subject: [PATCH 1/5] disabled SIMD adds in AudioMixer for windows --- assignment-client/src/audio/AudioMixer.cpp | 35 +++++++++++++++++-- .../audio/src/PositionalAudioRingBuffer.cpp | 1 + 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index c86d37e283..60f905f20a 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -231,6 +231,12 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio; delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio; +#ifdef _WIN32 + _clientSamples[s + goodChannelOffset] += correctBufferSample[0]; + _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] += correctBufferSample[1]; + _clientSamples[delayedChannelIndex] += delayBufferSample[0]; + _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] += delayBufferSample[1]; +#else __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset], _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET], _clientSamples[delayedChannelIndex], @@ -247,6 +253,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2]; _clientSamples[delayedChannelIndex] = shortResults[1]; _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0]; +#endif } // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid @@ -271,6 +278,13 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf while (i + 3 < numSamplesDelay) { // handle the first cases where we can MMX add four samples at once int parentIndex = i * 2; + +#ifdef _WIN32 + _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio; + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio; + _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio; +#else __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], @@ -286,7 +300,8 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0]; - +#endif + // push the index i += 4; } @@ -296,6 +311,11 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf if (i + 2 < numSamplesDelay) { // MMX add only three delayed samples +#ifdef _WIN32 + _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio; + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio; +#else __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], @@ -310,8 +330,15 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; +#endif + } else if (i + 1 < numSamplesDelay) { + +#ifdef _WIN32 + _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio; +#else // MMX add two delayed samples __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], @@ -324,9 +351,12 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - +#endif } else if (i < numSamplesDelay) { // MMX add a single delayed sample +#ifdef _WIN32 + _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio; +#else __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0); __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0); @@ -334,6 +364,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf int16_t* shortResults = reinterpret_cast(&mmxResult); _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; +#endif } } } else { diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp index 8cba6d72b0..c9a58b2210 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.cpp +++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp @@ -149,6 +149,7 @@ void PositionalAudioRingBuffer::updateNextOutputTrailingLoudness() { // fixes bug on Windows where _nextOutputTrailingLoudness sometimes becomes NaN. In that case, // revert _nextOutputTrailingLoudness to its previous value if (isNaN(_nextOutputTrailingLoudness)) { + printf("next output trailling loudness NaN!! --------------------------------------\n"); _nextOutputTrailingLoudness = oldNextOutputTrailingLoudness; } } From 828410b7d6b80f930de864aaace331437e7669a3 Mon Sep 17 00:00:00 2001 From: wangyix Date: Tue, 15 Jul 2014 12:23:54 -0700 Subject: [PATCH 2/5] removed isNan check for _nextOutputTrailingLoudness --- libraries/audio/src/PositionalAudioRingBuffer.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp index c9a58b2210..a0ce74dbaf 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.cpp +++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp @@ -145,13 +145,6 @@ void PositionalAudioRingBuffer::updateNextOutputTrailingLoudness() { _nextOutputTrailingLoudness = 0; } } - - // fixes bug on Windows where _nextOutputTrailingLoudness sometimes becomes NaN. In that case, - // revert _nextOutputTrailingLoudness to its previous value - if (isNaN(_nextOutputTrailingLoudness)) { - printf("next output trailling loudness NaN!! --------------------------------------\n"); - _nextOutputTrailingLoudness = oldNextOutputTrailingLoudness; - } } bool PositionalAudioRingBuffer::shouldBeAddedToMix() { From 871aa2790a6f380e6b256e6bafe22455d208db36 Mon Sep 17 00:00:00 2001 From: wangyix Date: Tue, 15 Jul 2014 12:24:28 -0700 Subject: [PATCH 3/5] forgot unused variable --- libraries/audio/src/PositionalAudioRingBuffer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp index a0ce74dbaf..6c7ecd1cad 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.cpp +++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp @@ -135,7 +135,6 @@ void PositionalAudioRingBuffer::updateNextOutputTrailingLoudness() { const float PREVIOUS_FRAMES_RATIO = 1.0f - CURRENT_FRAME_RATIO; const float LOUDNESS_EPSILON = 0.000001f; - float oldNextOutputTrailingLoudness = _nextOutputTrailingLoudness; if (nextLoudness >= _nextOutputTrailingLoudness) { _nextOutputTrailingLoudness = nextLoudness; } else { From d5a30ff6eedbb00bf0d850f903a68686b776b7c9 Mon Sep 17 00:00:00 2001 From: wangyix Date: Tue, 15 Jul 2014 14:43:08 -0700 Subject: [PATCH 4/5] removed SIMD stuff from AudioMixer for all builds --- assignment-client/src/audio/AudioMixer.cpp | 111 +-------------------- 1 file changed, 2 insertions(+), 109 deletions(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index 60f905f20a..076769d989 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -231,29 +231,10 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio; delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio; -#ifdef _WIN32 _clientSamples[s + goodChannelOffset] += correctBufferSample[0]; _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] += correctBufferSample[1]; _clientSamples[delayedChannelIndex] += delayBufferSample[0]; _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] += delayBufferSample[1]; -#else - __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset], - _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET], - _clientSamples[delayedChannelIndex], - _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]); - __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1], - delayBufferSample[0], delayBufferSample[1]); - - // perform the MMX add (with saturation) of two correct and delayed samples - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - // assign the results from the result of the mmx arithmetic - _clientSamples[s + goodChannelOffset] = shortResults[3]; - _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2]; - _clientSamples[delayedChannelIndex] = shortResults[1]; - _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0]; -#endif } // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid @@ -273,98 +254,10 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay; } - int i = 0; - - while (i + 3 < numSamplesDelay) { - // handle the first cases where we can MMX add four samples at once + + for (int i = 0; i < numSamplesDelay; i++) { int parentIndex = i * 2; - -#ifdef _WIN32 _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio; - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio; - _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio; -#else - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio); - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; - _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0]; -#endif - - // push the index - i += 4; - } - - int parentIndex = i * 2; - - if (i + 2 < numSamplesDelay) { - // MMX add only three delayed samples - -#ifdef _WIN32 - _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio; - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio; -#else - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], - 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, - 0); - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; -#endif - - - } else if (i + 1 < numSamplesDelay) { - -#ifdef _WIN32 - _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] += delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio; -#else - // MMX add two delayed samples - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], - 0, 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0); - - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; -#endif - } else if (i < numSamplesDelay) { - // MMX add a single delayed sample -#ifdef _WIN32 - _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio; -#else - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0); - - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; -#endif } } } else { From abca4661f4e6dfdd863dcd6d2964d9d3972ed0ef Mon Sep 17 00:00:00 2001 From: wangyix Date: Tue, 15 Jul 2014 14:48:26 -0700 Subject: [PATCH 5/5] removed space --- assignment-client/src/audio/AudioMixer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index 076769d989..b008a20aa7 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -254,7 +254,6 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay; } - for (int i = 0; i < numSamplesDelay; i++) { int parentIndex = i * 2; _clientSamples[parentIndex + delayedChannelOffset] += delayNextOutputStart[i] * attenuationAndWeakChannelRatio;