complete piping of stereo audio through mixer

2025-04-23 01:04:06 +02:00 · 2014-06-06 10:55:04 -07:00 · 2014-06-06 10:55:04 -07:00 · ae2f6a3cb6
commit ae2f6a3cb6
parent 681ce247d6
8 changed files with 160 additions and 117 deletions
--- a/assignment-client/src/audio/AudioMixer.cpp
+++ b/assignment-client/src/audio/AudioMixer.cpp
@ -173,134 +173,160 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf
            weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio);
        }
    }
-
-    // if the bearing relative angle to source is > 0 then the delayed channel is the right one
-    int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0;
-    int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0;
    
    const int16_t* nextOutputStart = bufferToAdd->getNextOutput();
-   
-    const int16_t* bufferStart = bufferToAdd->getBuffer();
-    int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity();
-
-    int16_t correctBufferSample[2], delayBufferSample[2];
-    int delayedChannelIndex = 0;
    
-    const int SINGLE_STEREO_OFFSET = 2;
-    
-    for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
+    if (!bufferToAdd->isStereo()) {
+        // this is a mono buffer, which means it gets full attenuation and spatialization
        
-        // setup the int16_t variables for the two sample sets
-        correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient;
-        correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient;
+        // if the bearing relative angle to source is > 0 then the delayed channel is the right one
+        int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0;
+        int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0;
        
-        delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset;
+        const int16_t* bufferStart = bufferToAdd->getBuffer();
+        int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity();
        
-        delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio;
-        delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio;
+        int16_t correctBufferSample[2], delayBufferSample[2];
+        int delayedChannelIndex = 0;
        
-        __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset],
-                                           _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET],
-                                           _clientSamples[delayedChannelIndex],
-                                           _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]);
-        __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1],
-                                         delayBufferSample[0], delayBufferSample[1]);
+        const int SINGLE_STEREO_OFFSET = 2;
        
-        // perform the MMX add (with saturation) of two correct and delayed samples
-        __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples);
-        int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-        
-        // assign the results from the result of the mmx arithmetic
-        _clientSamples[s + goodChannelOffset] = shortResults[3];
-        _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2];
-        _clientSamples[delayedChannelIndex] = shortResults[1];
-        _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0];
-    }
-    
-    // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid
-    // too many conditionals in handling the delay samples at the beginning of _clientSamples.
-    // Basically we try to take the samples in batches of four, and then handle the remainder
-    // conditionally to get rid of the rest.
-    
-    const int DOUBLE_STEREO_OFFSET = 4;
-    const int TRIPLE_STEREO_OFFSET = 6;
-    
-    if (numSamplesDelay > 0) {
-        // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput
-        // to stick at the beginning
-        float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio;
-        const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay;
-        if (delayNextOutputStart < bufferStart) {
-            delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay;
+        for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
+            
+            // setup the int16_t variables for the two sample sets
+            correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient;
+            correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient;
+            
+            delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset;
+            
+            delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio;
+            delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio;
+            
+            __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset],
+                                               _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET],
+                                               _clientSamples[delayedChannelIndex],
+                                               _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]);
+            __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1],
+                                              delayBufferSample[0], delayBufferSample[1]);
+            
+            // perform the MMX add (with saturation) of two correct and delayed samples
+            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples);
+            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+            
+            // assign the results from the result of the mmx arithmetic
+            _clientSamples[s + goodChannelOffset] = shortResults[3];
+            _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2];
+            _clientSamples[delayedChannelIndex] = shortResults[1];
+            _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0];
        }
        
-        int i = 0;
+        // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid
+        // too many conditionals in handling the delay samples at the beginning of _clientSamples.
+        // Basically we try to take the samples in batches of four, and then handle the remainder
+        // conditionally to get rid of the rest.
        
-        while (i + 3 < numSamplesDelay) {
-            // handle the first cases where we can MMX add four samples at once
+        const int DOUBLE_STEREO_OFFSET = 4;
+        const int TRIPLE_STEREO_OFFSET = 6;
+        
+        if (numSamplesDelay > 0) {
+            // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput
+            // to stick at the beginning
+            float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio;
+            const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay;
+            if (delayNextOutputStart < bufferStart) {
+                delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay;
+            }
+            
+            int i = 0;
+            
+            while (i + 3 < numSamplesDelay) {
+                // handle the first cases where we can MMX add four samples at once
+                int parentIndex = i * 2;
+                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]);
+                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio);
+                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
+                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+                
+                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
+                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
+                _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0];
+                
+                // push the index
+                i += 4;
+            }
+            
            int parentIndex = i * 2;
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                               _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
-                                               _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
-                                               _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]);
-            __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio);
-            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
            
-            _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-            _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-            _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
-            _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0];
-            
-            // push the index
-            i += 4;
+            if (i + 2 < numSamplesDelay) {
+                // MMX add only three delayed samples
+                
+                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
+                                                   0);
+                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
+                                                0);
+                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
+                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+                
+                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
+                _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
+                
+            } else if (i + 1 < numSamplesDelay) {
+                // MMX add two delayed samples
+                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
+                                                   _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0);
+                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
+                                                delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0);
+                
+                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
+                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+                
+                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+                _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
+                
+            } else if (i < numSamplesDelay) {
+                // MMX add a single delayed sample
+                __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0);
+                __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0);
+                
+                __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
+                int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
+                
+                _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+            }
        }
+    } else {
+        // stereo buffer - do attenuation but no sample delay for spatialization
+        qDebug() << "Adding a stereo buffer";
        
-        int parentIndex = i * 2;
-        
-        if (i + 2 < numSamplesDelay) {
-            // MMX add only three delayed samples
+        for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) {
+            // use MMX to clamp four additions at a time
            
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                               _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset],
-                                               _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset],
-                                               0);
-            __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio,
-                                            0);
-            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-            
-            _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-            _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-            _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1];
-            
-        } else if (i + 1 < numSamplesDelay) {
-            // MMX add two delayed samples
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset],
-                                               _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0);
-            __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio,
-                                            delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0);
+            __m64 bufferSamples = _mm_set_pi16(_clientSamples[s], _clientSamples[s + 1],
+                                               _clientSamples[s + 2], _clientSamples[s + 3]);
+            __m64 addSamples = _mm_set_pi16(nextOutputStart[s] * attenuationCoefficient,
+                                            nextOutputStart[s + 1] * attenuationCoefficient,
+                                            nextOutputStart[s + 2] * attenuationCoefficient,
+                                            nextOutputStart[s + 3] * attenuationCoefficient);
            
            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
            
-            _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
-            _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2];
-            
-        } else if (i < numSamplesDelay) {
-            // MMX add a single delayed sample
-            __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0);
-            __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0);
-            
-            __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples);
-            int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult);
-            
-            _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3];
+            _clientSamples[s] = shortResults[3];
+            _clientSamples[s + 1] = shortResults[2];
+            _clientSamples[s + 2] = shortResults[1];
+            _clientSamples[s + 3] = shortResults[0];
        }
    }
 }
--- a/assignment-client/src/audio/AudioMixerClientData.cpp
+++ b/assignment-client/src/audio/AudioMixerClientData.cpp
@ -50,10 +50,22 @@ int AudioMixerClientData::parseData(const QByteArray& packet) {

        // grab the AvatarAudioRingBuffer from the vector (or create it if it doesn't exist)
        AvatarAudioRingBuffer* avatarRingBuffer = getAvatarAudioRingBuffer();
+        
+        // read the first byte after the header to see if this is a stereo or mono buffer
+        quint8 channelFlag = packet.at(numBytesForPacketHeader(packet));
+        bool isStereo = channelFlag == 1;
+        
+        if (avatarRingBuffer && avatarRingBuffer->isStereo() != isStereo) {
+            // there's a mismatch in the buffer channels for the incoming and current buffer
+            // so delete our current buffer and create a new one
+            _ringBuffers.removeOne(avatarRingBuffer);
+            avatarRingBuffer->deleteLater();
+            avatarRingBuffer = NULL;
+        }

        if (!avatarRingBuffer) {
            // we don't have an AvatarAudioRingBuffer yet, so add it
-            avatarRingBuffer = new AvatarAudioRingBuffer();
+            avatarRingBuffer = new AvatarAudioRingBuffer(isStereo);
            _ringBuffers.push_back(avatarRingBuffer);
        }

--- a/assignment-client/src/audio/AudioMixerClientData.h
+++ b/assignment-client/src/audio/AudioMixerClientData.h
@ -24,14 +24,14 @@ public:
    AudioMixerClientData();
    ~AudioMixerClientData();
    
-    const std::vector<PositionalAudioRingBuffer*> getRingBuffers() const { return _ringBuffers; }
+    const QList<PositionalAudioRingBuffer*> getRingBuffers() const { return _ringBuffers; }
    AvatarAudioRingBuffer* getAvatarAudioRingBuffer() const;
    
    int parseData(const QByteArray& packet);
    void checkBuffersBeforeFrameSend(int jitterBufferLengthSamples);
    void pushBuffersAfterFrameSend();
 private:
-    std::vector<PositionalAudioRingBuffer*> _ringBuffers;
+    QList<PositionalAudioRingBuffer*> _ringBuffers;
 };

 #endif // hifi_AudioMixerClientData_h
--- a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp
+++ b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp
@ -13,8 +13,8 @@

 #include "AvatarAudioRingBuffer.h"

-AvatarAudioRingBuffer::AvatarAudioRingBuffer() :
-    PositionalAudioRingBuffer(PositionalAudioRingBuffer::Microphone) {
+AvatarAudioRingBuffer::AvatarAudioRingBuffer(bool isStereo) :
+    PositionalAudioRingBuffer(PositionalAudioRingBuffer::Microphone, isStereo) {
    
 }

--- a/assignment-client/src/audio/AvatarAudioRingBuffer.h
+++ b/assignment-client/src/audio/AvatarAudioRingBuffer.h
@ -18,7 +18,7 @@

 class AvatarAudioRingBuffer : public PositionalAudioRingBuffer {
 public:
-    AvatarAudioRingBuffer();
+    AvatarAudioRingBuffer(bool isStereo = false);
    
    int parseData(const QByteArray& packet);
 private:
--- a/interface/src/Audio.cpp
+++ b/interface/src/Audio.cpp
@ -640,6 +640,9 @@ void Audio::handleAudioInput() {
            }

            char* currentPacketPtr = audioDataPacket + populatePacketHeader(audioDataPacket, packetType);
+            
+            // set the mono/stereo byte
+            *currentPacketPtr++ = isStereo;

            // memcpy the three float positions
            memcpy(currentPacketPtr, &headPosition, sizeof(headPosition));
@ -649,9 +652,6 @@ void Audio::handleAudioInput() {
            memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation));
            currentPacketPtr += sizeof(headOrientation);
            
-            // set the mono/stereo byte
-            *currentPacketPtr++ = isStereo;
-            
            nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer);

            Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO)
--- a/libraries/audio/src/PositionalAudioRingBuffer.cpp
+++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp
@ -41,6 +41,9 @@ int PositionalAudioRingBuffer::parseData(const QByteArray& packet) {
    // skip the packet header (includes the source UUID)
    int readBytes = numBytesForPacketHeader(packet);
    
+    // hop over the channel flag that has already been read in AudioMixerClientData
+    readBytes += sizeof(quint8);
+    // read the positional data
    readBytes += parsePositionalData(packet.mid(readBytes));
   
    if (packetTypeForPacket(packet) == PacketTypeSilentAudioFrame) {
--- a/libraries/audio/src/PositionalAudioRingBuffer.h
+++ b/libraries/audio/src/PositionalAudioRingBuffer.h
@ -41,6 +41,8 @@ public:
    
    bool shouldLoopbackForNode() const { return _shouldLoopbackForNode; }
    
+    bool isStereo() const { return _isStereo; }
+    
    PositionalAudioRingBuffer::Type getType() const { return _type; }
    const glm::vec3& getPosition() const { return _position; }
    const glm::quat& getOrientation() const { return _orientation; }