From ae2f6a3cb676294457beae372a0bc7bb9dc1728f Mon Sep 17 00:00:00 2001 From: Stephen Birarda Date: Fri, 6 Jun 2014 10:55:04 -0700 Subject: [PATCH] complete piping of stereo audio through mixer --- assignment-client/src/audio/AudioMixer.cpp | 242 ++++++++++-------- .../src/audio/AudioMixerClientData.cpp | 14 +- .../src/audio/AudioMixerClientData.h | 4 +- .../src/audio/AvatarAudioRingBuffer.cpp | 4 +- .../src/audio/AvatarAudioRingBuffer.h | 2 +- interface/src/Audio.cpp | 6 +- .../audio/src/PositionalAudioRingBuffer.cpp | 3 + .../audio/src/PositionalAudioRingBuffer.h | 2 + 8 files changed, 160 insertions(+), 117 deletions(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index 3acd783bb0..f8cfb3140c 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -173,134 +173,160 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio); } } - - // if the bearing relative angle to source is > 0 then the delayed channel is the right one - int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0; - int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0; const int16_t* nextOutputStart = bufferToAdd->getNextOutput(); - - const int16_t* bufferStart = bufferToAdd->getBuffer(); - int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity(); - - int16_t correctBufferSample[2], delayBufferSample[2]; - int delayedChannelIndex = 0; - const int SINGLE_STEREO_OFFSET = 2; - - for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { + if (!bufferToAdd->isStereo()) { + // this is a mono buffer, which means it gets full attenuation and spatialization - // setup the int16_t variables for the two sample sets - correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient; - correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient; + // if the bearing relative angle to source is > 0 then the delayed channel is the right one + int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0; + int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0; - delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset; + const int16_t* bufferStart = bufferToAdd->getBuffer(); + int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity(); - delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio; - delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio; + int16_t correctBufferSample[2], delayBufferSample[2]; + int delayedChannelIndex = 0; - __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset], - _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET], - _clientSamples[delayedChannelIndex], - _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]); - __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1], - delayBufferSample[0], delayBufferSample[1]); + const int SINGLE_STEREO_OFFSET = 2; - // perform the MMX add (with saturation) of two correct and delayed samples - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - // assign the results from the result of the mmx arithmetic - _clientSamples[s + goodChannelOffset] = shortResults[3]; - _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2]; - _clientSamples[delayedChannelIndex] = shortResults[1]; - _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0]; - } - - // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid - // too many conditionals in handling the delay samples at the beginning of _clientSamples. - // Basically we try to take the samples in batches of four, and then handle the remainder - // conditionally to get rid of the rest. - - const int DOUBLE_STEREO_OFFSET = 4; - const int TRIPLE_STEREO_OFFSET = 6; - - if (numSamplesDelay > 0) { - // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput - // to stick at the beginning - float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio; - const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay; - if (delayNextOutputStart < bufferStart) { - delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay; + for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { + + // setup the int16_t variables for the two sample sets + correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient; + correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient; + + delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset; + + delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio; + delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio; + + __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset], + _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET], + _clientSamples[delayedChannelIndex], + _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]); + __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1], + delayBufferSample[0], delayBufferSample[1]); + + // perform the MMX add (with saturation) of two correct and delayed samples + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + // assign the results from the result of the mmx arithmetic + _clientSamples[s + goodChannelOffset] = shortResults[3]; + _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2]; + _clientSamples[delayedChannelIndex] = shortResults[1]; + _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0]; } - int i = 0; + // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid + // too many conditionals in handling the delay samples at the beginning of _clientSamples. + // Basically we try to take the samples in batches of four, and then handle the remainder + // conditionally to get rid of the rest. - while (i + 3 < numSamplesDelay) { - // handle the first cases where we can MMX add four samples at once + const int DOUBLE_STEREO_OFFSET = 4; + const int TRIPLE_STEREO_OFFSET = 6; + + if (numSamplesDelay > 0) { + // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput + // to stick at the beginning + float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio; + const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay; + if (delayNextOutputStart < bufferStart) { + delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay; + } + + int i = 0; + + while (i + 3 < numSamplesDelay) { + // handle the first cases where we can MMX add four samples at once + int parentIndex = i * 2; + __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], + _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]); + __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio); + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; + _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0]; + + // push the index + i += 4; + } + int parentIndex = i * 2; - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio); - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; - _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0]; - - // push the index - i += 4; + if (i + 2 < numSamplesDelay) { + // MMX add only three delayed samples + + __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], + 0); + __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, + 0); + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; + + } else if (i + 1 < numSamplesDelay) { + // MMX add two delayed samples + __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0); + __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0); + + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; + + } else if (i < numSamplesDelay) { + // MMX add a single delayed sample + __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0); + __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0); + + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + } } + } else { + // stereo buffer - do attenuation but no sample delay for spatialization + qDebug() << "Adding a stereo buffer"; - int parentIndex = i * 2; - - if (i + 2 < numSamplesDelay) { - // MMX add only three delayed samples + for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { + // use MMX to clamp four additions at a time - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], - 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, - 0); - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; - - } else if (i + 1 < numSamplesDelay) { - // MMX add two delayed samples - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0); + __m64 bufferSamples = _mm_set_pi16(_clientSamples[s], _clientSamples[s + 1], + _clientSamples[s + 2], _clientSamples[s + 3]); + __m64 addSamples = _mm_set_pi16(nextOutputStart[s] * attenuationCoefficient, + nextOutputStart[s + 1] * attenuationCoefficient, + nextOutputStart[s + 2] * attenuationCoefficient, + nextOutputStart[s + 3] * attenuationCoefficient); __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); int16_t* shortResults = reinterpret_cast(&mmxResult); - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - - } else if (i < numSamplesDelay) { - // MMX add a single delayed sample - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0); - - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + _clientSamples[s] = shortResults[3]; + _clientSamples[s + 1] = shortResults[2]; + _clientSamples[s + 2] = shortResults[1]; + _clientSamples[s + 3] = shortResults[0]; } } } diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp index 85b8dde690..e21fadbd16 100644 --- a/assignment-client/src/audio/AudioMixerClientData.cpp +++ b/assignment-client/src/audio/AudioMixerClientData.cpp @@ -50,10 +50,22 @@ int AudioMixerClientData::parseData(const QByteArray& packet) { // grab the AvatarAudioRingBuffer from the vector (or create it if it doesn't exist) AvatarAudioRingBuffer* avatarRingBuffer = getAvatarAudioRingBuffer(); + + // read the first byte after the header to see if this is a stereo or mono buffer + quint8 channelFlag = packet.at(numBytesForPacketHeader(packet)); + bool isStereo = channelFlag == 1; + + if (avatarRingBuffer && avatarRingBuffer->isStereo() != isStereo) { + // there's a mismatch in the buffer channels for the incoming and current buffer + // so delete our current buffer and create a new one + _ringBuffers.removeOne(avatarRingBuffer); + avatarRingBuffer->deleteLater(); + avatarRingBuffer = NULL; + } if (!avatarRingBuffer) { // we don't have an AvatarAudioRingBuffer yet, so add it - avatarRingBuffer = new AvatarAudioRingBuffer(); + avatarRingBuffer = new AvatarAudioRingBuffer(isStereo); _ringBuffers.push_back(avatarRingBuffer); } diff --git a/assignment-client/src/audio/AudioMixerClientData.h b/assignment-client/src/audio/AudioMixerClientData.h index a5f03ebd15..70b653301b 100644 --- a/assignment-client/src/audio/AudioMixerClientData.h +++ b/assignment-client/src/audio/AudioMixerClientData.h @@ -24,14 +24,14 @@ public: AudioMixerClientData(); ~AudioMixerClientData(); - const std::vector getRingBuffers() const { return _ringBuffers; } + const QList getRingBuffers() const { return _ringBuffers; } AvatarAudioRingBuffer* getAvatarAudioRingBuffer() const; int parseData(const QByteArray& packet); void checkBuffersBeforeFrameSend(int jitterBufferLengthSamples); void pushBuffersAfterFrameSend(); private: - std::vector _ringBuffers; + QList _ringBuffers; }; #endif // hifi_AudioMixerClientData_h diff --git a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp index 9a7c2839d8..5613a64cc4 100644 --- a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp +++ b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp @@ -13,8 +13,8 @@ #include "AvatarAudioRingBuffer.h" -AvatarAudioRingBuffer::AvatarAudioRingBuffer() : - PositionalAudioRingBuffer(PositionalAudioRingBuffer::Microphone) { +AvatarAudioRingBuffer::AvatarAudioRingBuffer(bool isStereo) : + PositionalAudioRingBuffer(PositionalAudioRingBuffer::Microphone, isStereo) { } diff --git a/assignment-client/src/audio/AvatarAudioRingBuffer.h b/assignment-client/src/audio/AvatarAudioRingBuffer.h index 1e61a82f68..f842c2aa33 100644 --- a/assignment-client/src/audio/AvatarAudioRingBuffer.h +++ b/assignment-client/src/audio/AvatarAudioRingBuffer.h @@ -18,7 +18,7 @@ class AvatarAudioRingBuffer : public PositionalAudioRingBuffer { public: - AvatarAudioRingBuffer(); + AvatarAudioRingBuffer(bool isStereo = false); int parseData(const QByteArray& packet); private: diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp index 1575aa524f..39392a5361 100644 --- a/interface/src/Audio.cpp +++ b/interface/src/Audio.cpp @@ -640,6 +640,9 @@ void Audio::handleAudioInput() { } char* currentPacketPtr = audioDataPacket + populatePacketHeader(audioDataPacket, packetType); + + // set the mono/stereo byte + *currentPacketPtr++ = isStereo; // memcpy the three float positions memcpy(currentPacketPtr, &headPosition, sizeof(headPosition)); @@ -649,9 +652,6 @@ void Audio::handleAudioInput() { memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation)); currentPacketPtr += sizeof(headOrientation); - // set the mono/stereo byte - *currentPacketPtr++ = isStereo; - nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer); Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO) diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp index 0a3d2d0c16..94a88897e3 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.cpp +++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp @@ -41,6 +41,9 @@ int PositionalAudioRingBuffer::parseData(const QByteArray& packet) { // skip the packet header (includes the source UUID) int readBytes = numBytesForPacketHeader(packet); + // hop over the channel flag that has already been read in AudioMixerClientData + readBytes += sizeof(quint8); + // read the positional data readBytes += parsePositionalData(packet.mid(readBytes)); if (packetTypeForPacket(packet) == PacketTypeSilentAudioFrame) { diff --git a/libraries/audio/src/PositionalAudioRingBuffer.h b/libraries/audio/src/PositionalAudioRingBuffer.h index 1864271d5f..17a663d5f6 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.h +++ b/libraries/audio/src/PositionalAudioRingBuffer.h @@ -41,6 +41,8 @@ public: bool shouldLoopbackForNode() const { return _shouldLoopbackForNode; } + bool isStereo() const { return _isStereo; } + PositionalAudioRingBuffer::Type getType() const { return _type; } const glm::vec3& getPosition() const { return _position; } const glm::quat& getOrientation() const { return _orientation; }