From 7da091d2b41010dcb8d81189ecf2df0caa123f64 Mon Sep 17 00:00:00 2001 From: Stephen Birarda Date: Fri, 6 Jun 2014 10:15:09 -0700 Subject: [PATCH 1/4] initial hooks for stereo audio --- interface/src/Audio.cpp | 283 ++++++++++-------- interface/src/Audio.h | 2 + interface/src/Menu.cpp | 2 + interface/src/Menu.h | 1 + .../audio/src/PositionalAudioRingBuffer.cpp | 7 +- .../audio/src/PositionalAudioRingBuffer.h | 3 +- libraries/networking/src/PacketHeaders.cpp | 3 + 7 files changed, 169 insertions(+), 132 deletions(-) diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp index 50ab720450..6d672b736d 100644 --- a/interface/src/Audio.cpp +++ b/interface/src/Audio.cpp @@ -68,6 +68,7 @@ Audio::Audio(int16_t initialJitterBufferSamples, QObject* parent) : _proceduralOutputDevice(NULL), _inputRingBuffer(0), _ringBuffer(NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL), + _isStereoInput(false), _averagedLatency(0.0), _measuredJitter(0), _jitterBufferSamples(initialJitterBufferSamples), @@ -405,12 +406,12 @@ bool Audio::switchOutputToAudioDevice(const QString& outputDeviceName) { } void Audio::handleAudioInput() { - static char monoAudioDataPacket[MAX_PACKET_SIZE]; + static char audioDataPacket[MAX_PACKET_SIZE]; static int numBytesPacketHeader = numBytesForPacketHeaderGivenPacketType(PacketTypeMicrophoneAudioNoEcho); static int leadingBytes = numBytesPacketHeader + sizeof(glm::vec3) + sizeof(glm::quat); - static int16_t* monoAudioSamples = (int16_t*) (monoAudioDataPacket + leadingBytes); + static int16_t* networkAudioSamples = (int16_t*) (audioDataPacket + leadingBytes); float inputToNetworkInputRatio = calculateDeviceToNetworkInputRatio(_numInputCallbackBytes); @@ -452,125 +453,130 @@ void Audio::handleAudioInput() { int16_t* inputAudioSamples = new int16_t[inputSamplesRequired]; _inputRingBuffer.readSamples(inputAudioSamples, inputSamplesRequired); + + int numNetworkBytes = _isStereoInput ? NETWORK_BUFFER_LENGTH_BYTES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; + int numNetworkSamples = _isStereoInput ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; // zero out the monoAudioSamples array and the locally injected audio - memset(monoAudioSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL); + memset(networkAudioSamples, 0, numNetworkBytes); if (!_muted) { // we aren't muted, downsample the input audio - linearResampling((int16_t*) inputAudioSamples, - monoAudioSamples, - inputSamplesRequired, - NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL, + linearResampling((int16_t*) inputAudioSamples, networkAudioSamples, + inputSamplesRequired, numNetworkSamples, _inputFormat, _desiredInputFormat); - // - // Impose Noise Gate - // - // The Noise Gate is used to reject constant background noise by measuring the noise - // floor observed at the microphone and then opening the 'gate' to allow microphone - // signals to be transmitted when the microphone samples average level exceeds a multiple - // of the noise floor. - // - // NOISE_GATE_HEIGHT: How loud you have to speak relative to noise background to open the gate. - // Make this value lower for more sensitivity and less rejection of noise. - // NOISE_GATE_WIDTH: The number of samples in an audio frame for which the height must be exceeded - // to open the gate. - // NOISE_GATE_CLOSE_FRAME_DELAY: Once the noise is below the gate height for the frame, how many frames - // will we wait before closing the gate. - // NOISE_GATE_FRAMES_TO_AVERAGE: How many audio frames should we average together to compute noise floor. - // More means better rejection but also can reject continuous things like singing. - // NUMBER_OF_NOISE_SAMPLE_FRAMES: How often should we re-evaluate the noise floor? - - - float loudness = 0; - float thisSample = 0; - int samplesOverNoiseGate = 0; - - const float NOISE_GATE_HEIGHT = 7.0f; - const int NOISE_GATE_WIDTH = 5; - const int NOISE_GATE_CLOSE_FRAME_DELAY = 5; - const int NOISE_GATE_FRAMES_TO_AVERAGE = 5; - const float DC_OFFSET_AVERAGING = 0.99f; - const float CLIPPING_THRESHOLD = 0.90f; - - // - // Check clipping, adjust DC offset, and check if should open noise gate - // - float measuredDcOffset = 0.0f; - // Increment the time since the last clip - if (_timeSinceLastClip >= 0.0f) { - _timeSinceLastClip += (float) NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL / (float) SAMPLE_RATE; - } - - for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; i++) { - measuredDcOffset += monoAudioSamples[i]; - monoAudioSamples[i] -= (int16_t) _dcOffset; - thisSample = fabsf(monoAudioSamples[i]); - if (thisSample >= (32767.0f * CLIPPING_THRESHOLD)) { - _timeSinceLastClip = 0.0f; + // only impose the noise gate and perform tone injection if we sending mono audio + if (!_isStereoInput) { + + // + // Impose Noise Gate + // + // The Noise Gate is used to reject constant background noise by measuring the noise + // floor observed at the microphone and then opening the 'gate' to allow microphone + // signals to be transmitted when the microphone samples average level exceeds a multiple + // of the noise floor. + // + // NOISE_GATE_HEIGHT: How loud you have to speak relative to noise background to open the gate. + // Make this value lower for more sensitivity and less rejection of noise. + // NOISE_GATE_WIDTH: The number of samples in an audio frame for which the height must be exceeded + // to open the gate. + // NOISE_GATE_CLOSE_FRAME_DELAY: Once the noise is below the gate height for the frame, how many frames + // will we wait before closing the gate. + // NOISE_GATE_FRAMES_TO_AVERAGE: How many audio frames should we average together to compute noise floor. + // More means better rejection but also can reject continuous things like singing. + // NUMBER_OF_NOISE_SAMPLE_FRAMES: How often should we re-evaluate the noise floor? + + + float loudness = 0; + float thisSample = 0; + int samplesOverNoiseGate = 0; + + const float NOISE_GATE_HEIGHT = 7.0f; + const int NOISE_GATE_WIDTH = 5; + const int NOISE_GATE_CLOSE_FRAME_DELAY = 5; + const int NOISE_GATE_FRAMES_TO_AVERAGE = 5; + const float DC_OFFSET_AVERAGING = 0.99f; + const float CLIPPING_THRESHOLD = 0.90f; + + // + // Check clipping, adjust DC offset, and check if should open noise gate + // + float measuredDcOffset = 0.0f; + // Increment the time since the last clip + if (_timeSinceLastClip >= 0.0f) { + _timeSinceLastClip += (float) NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL / (float) SAMPLE_RATE; } - loudness += thisSample; - // Noise Reduction: Count peaks above the average loudness - if (_noiseGateEnabled && (thisSample > (_noiseGateMeasuredFloor * NOISE_GATE_HEIGHT))) { - samplesOverNoiseGate++; - } - } - - measuredDcOffset /= NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; - if (_dcOffset == 0.0f) { - // On first frame, copy over measured offset - _dcOffset = measuredDcOffset; - } else { - _dcOffset = DC_OFFSET_AVERAGING * _dcOffset + (1.0f - DC_OFFSET_AVERAGING) * measuredDcOffset; - } - - // Add tone injection if enabled - const float TONE_FREQ = 220.0f / SAMPLE_RATE * TWO_PI; - const float QUARTER_VOLUME = 8192.0f; - if (_toneInjectionEnabled) { - loudness = 0.0f; + for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; i++) { - monoAudioSamples[i] = QUARTER_VOLUME * sinf(TONE_FREQ * (float)(i + _proceduralEffectSample)); - loudness += fabsf(monoAudioSamples[i]); - } - } - _lastInputLoudness = fabs(loudness / NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL); - - // If Noise Gate is enabled, check and turn the gate on and off - if (!_toneInjectionEnabled && _noiseGateEnabled) { - float averageOfAllSampleFrames = 0.0f; - _noiseSampleFrames[_noiseGateSampleCounter++] = _lastInputLoudness; - if (_noiseGateSampleCounter == NUMBER_OF_NOISE_SAMPLE_FRAMES) { - float smallestSample = FLT_MAX; - for (int i = 0; i <= NUMBER_OF_NOISE_SAMPLE_FRAMES - NOISE_GATE_FRAMES_TO_AVERAGE; i += NOISE_GATE_FRAMES_TO_AVERAGE) { - float thisAverage = 0.0f; - for (int j = i; j < i + NOISE_GATE_FRAMES_TO_AVERAGE; j++) { - thisAverage += _noiseSampleFrames[j]; - averageOfAllSampleFrames += _noiseSampleFrames[j]; - } - thisAverage /= NOISE_GATE_FRAMES_TO_AVERAGE; - - if (thisAverage < smallestSample) { - smallestSample = thisAverage; - } + measuredDcOffset += networkAudioSamples[i]; + networkAudioSamples[i] -= (int16_t) _dcOffset; + thisSample = fabsf(networkAudioSamples[i]); + if (thisSample >= (32767.0f * CLIPPING_THRESHOLD)) { + _timeSinceLastClip = 0.0f; + } + loudness += thisSample; + // Noise Reduction: Count peaks above the average loudness + if (_noiseGateEnabled && (thisSample > (_noiseGateMeasuredFloor * NOISE_GATE_HEIGHT))) { + samplesOverNoiseGate++; } - averageOfAllSampleFrames /= NUMBER_OF_NOISE_SAMPLE_FRAMES; - _noiseGateMeasuredFloor = smallestSample; - _noiseGateSampleCounter = 0; - } - if (samplesOverNoiseGate > NOISE_GATE_WIDTH) { - _noiseGateOpen = true; - _noiseGateFramesToClose = NOISE_GATE_CLOSE_FRAME_DELAY; + + measuredDcOffset /= NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; + if (_dcOffset == 0.0f) { + // On first frame, copy over measured offset + _dcOffset = measuredDcOffset; } else { - if (--_noiseGateFramesToClose == 0) { - _noiseGateOpen = false; + _dcOffset = DC_OFFSET_AVERAGING * _dcOffset + (1.0f - DC_OFFSET_AVERAGING) * measuredDcOffset; + } + + // Add tone injection if enabled + const float TONE_FREQ = 220.0f / SAMPLE_RATE * TWO_PI; + const float QUARTER_VOLUME = 8192.0f; + if (_toneInjectionEnabled) { + loudness = 0.0f; + for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; i++) { + networkAudioSamples[i] = QUARTER_VOLUME * sinf(TONE_FREQ * (float)(i + _proceduralEffectSample)); + loudness += fabsf(networkAudioSamples[i]); } } - if (!_noiseGateOpen) { - memset(monoAudioSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL); - _lastInputLoudness = 0; + _lastInputLoudness = fabs(loudness / NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL); + + // If Noise Gate is enabled, check and turn the gate on and off + if (!_toneInjectionEnabled && _noiseGateEnabled) { + float averageOfAllSampleFrames = 0.0f; + _noiseSampleFrames[_noiseGateSampleCounter++] = _lastInputLoudness; + if (_noiseGateSampleCounter == NUMBER_OF_NOISE_SAMPLE_FRAMES) { + float smallestSample = FLT_MAX; + for (int i = 0; i <= NUMBER_OF_NOISE_SAMPLE_FRAMES - NOISE_GATE_FRAMES_TO_AVERAGE; i += NOISE_GATE_FRAMES_TO_AVERAGE) { + float thisAverage = 0.0f; + for (int j = i; j < i + NOISE_GATE_FRAMES_TO_AVERAGE; j++) { + thisAverage += _noiseSampleFrames[j]; + averageOfAllSampleFrames += _noiseSampleFrames[j]; + } + thisAverage /= NOISE_GATE_FRAMES_TO_AVERAGE; + + if (thisAverage < smallestSample) { + smallestSample = thisAverage; + } + } + averageOfAllSampleFrames /= NUMBER_OF_NOISE_SAMPLE_FRAMES; + _noiseGateMeasuredFloor = smallestSample; + _noiseGateSampleCounter = 0; + + } + if (samplesOverNoiseGate > NOISE_GATE_WIDTH) { + _noiseGateOpen = true; + _noiseGateFramesToClose = NOISE_GATE_CLOSE_FRAME_DELAY; + } else { + if (--_noiseGateFramesToClose == 0) { + _noiseGateOpen = false; + } + } + if (!_noiseGateOpen) { + memset(networkAudioSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL); + _lastInputLoudness = 0; + } } } } else { @@ -580,19 +586,19 @@ void Audio::handleAudioInput() { // at this point we have clean monoAudioSamples, which match our target output... // this is what we should send to our interested listeners - if (_processSpatialAudio && !_muted && _audioOutput) { - QByteArray monoInputData((char*)monoAudioSamples, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int16_t)); + if (_processSpatialAudio && !_muted && !_isStereoInput && _audioOutput) { + QByteArray monoInputData((char*)networkAudioSamples, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int16_t)); emit processLocalAudio(_spatialAudioStart, monoInputData, _desiredInputFormat); } - if (_proceduralAudioOutput) { - processProceduralAudio(monoAudioSamples, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL); + if (!_isStereoInput && _proceduralAudioOutput) { + processProceduralAudio(networkAudioSamples, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL); } - if (_scopeEnabled && !_scopeEnabledPause) { + if (!_isStereoInput && _scopeEnabled && !_scopeEnabledPause) { unsigned int numMonoAudioChannels = 1; unsigned int monoAudioChannel = 0; - addBufferToScope(_scopeInput, _scopeInputOffset, monoAudioSamples, monoAudioChannel, numMonoAudioChannels); + addBufferToScope(_scopeInput, _scopeInputOffset, networkAudioSamples, monoAudioChannel, numMonoAudioChannels); _scopeInputOffset += NETWORK_SAMPLES_PER_FRAME; _scopeInputOffset %= _samplesPerScope; } @@ -615,7 +621,7 @@ void Audio::handleAudioInput() { packetType = PacketTypeSilentAudioFrame; // we need to indicate how many silent samples this is to the audio mixer - monoAudioSamples[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; + networkAudioSamples[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; numAudioBytes = sizeof(int16_t); } else { @@ -628,7 +634,7 @@ void Audio::handleAudioInput() { } } - char* currentPacketPtr = monoAudioDataPacket + populatePacketHeader(monoAudioDataPacket, packetType); + char* currentPacketPtr = audioDataPacket + populatePacketHeader(audioDataPacket, packetType); // memcpy the three float positions memcpy(currentPacketPtr, &headPosition, sizeof(headPosition)); @@ -638,7 +644,7 @@ void Audio::handleAudioInput() { memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation)); currentPacketPtr += sizeof(headOrientation); - nodeList->writeDatagram(monoAudioDataPacket, numAudioBytes + leadingBytes, audioMixer); + nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer); Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO) .updateValue(numAudioBytes + leadingBytes); @@ -761,6 +767,24 @@ void Audio::toggleAudioNoiseReduction() { _noiseGateEnabled = !_noiseGateEnabled; } +void Audio::toggleStereoInput() { + int oldChannelCount = _desiredInputFormat.channelCount(); + QAction* stereoAudioOption = Menu::getInstance()->getActionForOption(MenuOption::StereoAudio); + + if (stereoAudioOption->isChecked()) { + _desiredInputFormat.setChannelCount(2); + _isStereoInput = true; + } else { + _desiredInputFormat.setChannelCount(1); + _isStereoInput = false; + } + + if (oldChannelCount != _desiredInputFormat.channelCount()) { + // change in channel count for desired input format, restart the input device + switchInputToAudioDevice(_inputAudioDeviceName); + } +} + void Audio::processReceivedAudio(const QByteArray& audioByteArray) { _ringBuffer.parseData(audioByteArray); @@ -1300,18 +1324,21 @@ bool Audio::switchInputToAudioDevice(const QAudioDeviceInfo& inputDeviceInfo) { if (adjustedFormatForAudioDevice(inputDeviceInfo, _desiredInputFormat, _inputFormat)) { qDebug() << "The format to be used for audio input is" << _inputFormat; - - _audioInput = new QAudioInput(inputDeviceInfo, _inputFormat, this); - _numInputCallbackBytes = calculateNumberOfInputCallbackBytes(_inputFormat); - _audioInput->setBufferSize(_numInputCallbackBytes); - - // how do we want to handle input working, but output not working? - int numFrameSamples = calculateNumberOfFrameSamples(_numInputCallbackBytes); - _inputRingBuffer.resizeForFrameSize(numFrameSamples); - _inputDevice = _audioInput->start(); - connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleAudioInput())); - - supportedFormat = true; + + // if the user wants stereo but this device can't provide then bail + if (!_isStereoInput || _inputFormat.channelCount() == 2) { + _audioInput = new QAudioInput(inputDeviceInfo, _inputFormat, this); + _numInputCallbackBytes = calculateNumberOfInputCallbackBytes(_inputFormat); + _audioInput->setBufferSize(_numInputCallbackBytes); + + // how do we want to handle input working, but output not working? + int numFrameSamples = calculateNumberOfFrameSamples(_numInputCallbackBytes); + _inputRingBuffer.resizeForFrameSize(numFrameSamples); + _inputDevice = _audioInput->start(); + connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleAudioInput())); + + supportedFormat = true; + } } } return supportedFormat; diff --git a/interface/src/Audio.h b/interface/src/Audio.h index 79f0f84ff5..74fc373cb0 100644 --- a/interface/src/Audio.h +++ b/interface/src/Audio.h @@ -85,6 +85,7 @@ public slots: void toggleScope(); void toggleScopePause(); void toggleAudioSpatialProcessing(); + void toggleStereoInput(); void selectAudioScopeFiveFrames(); void selectAudioScopeTwentyFrames(); void selectAudioScopeFiftyFrames(); @@ -127,6 +128,7 @@ private: QIODevice* _proceduralOutputDevice; AudioRingBuffer _inputRingBuffer; AudioRingBuffer _ringBuffer; + bool _isStereoInput; QString _inputAudioDeviceName; QString _outputAudioDeviceName; diff --git a/interface/src/Menu.cpp b/interface/src/Menu.cpp index 5d9cd1f1c4..9ab47cdeda 100644 --- a/interface/src/Menu.cpp +++ b/interface/src/Menu.cpp @@ -432,6 +432,8 @@ Menu::Menu() : SLOT(toggleAudioNoiseReduction())); addCheckableActionToQMenuAndActionHash(audioDebugMenu, MenuOption::EchoServerAudio); addCheckableActionToQMenuAndActionHash(audioDebugMenu, MenuOption::EchoLocalAudio); + addCheckableActionToQMenuAndActionHash(audioDebugMenu, MenuOption::StereoAudio, 0, false, + appInstance->getAudio(), SLOT(toggleStereoInput())); addCheckableActionToQMenuAndActionHash(audioDebugMenu, MenuOption::MuteAudio, Qt::CTRL | Qt::Key_M, false, diff --git a/interface/src/Menu.h b/interface/src/Menu.h index 6bc9adef05..f9af80119b 100644 --- a/interface/src/Menu.h +++ b/interface/src/Menu.h @@ -402,6 +402,7 @@ namespace MenuOption { const QString StandOnNearbyFloors = "Stand on nearby floors"; const QString Stars = "Stars"; const QString Stats = "Stats"; + const QString StereoAudio = "Stereo Audio"; const QString StopAllScripts = "Stop All Scripts"; const QString SuppressShortTimings = "Suppress Timings Less than 10ms"; const QString TestPing = "Test Ping"; diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp index 6fc16c57a9..0a3d2d0c16 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.cpp +++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp @@ -20,14 +20,15 @@ #include "PositionalAudioRingBuffer.h" -PositionalAudioRingBuffer::PositionalAudioRingBuffer(PositionalAudioRingBuffer::Type type) : - AudioRingBuffer(NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL), +PositionalAudioRingBuffer::PositionalAudioRingBuffer(PositionalAudioRingBuffer::Type type, bool isStereo) : + AudioRingBuffer(isStereo ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL), _type(type), _position(0.0f, 0.0f, 0.0f), _orientation(0.0f, 0.0f, 0.0f, 0.0f), _willBeAddedToMix(false), _shouldLoopbackForNode(false), - _shouldOutputStarveDebug(true) + _shouldOutputStarveDebug(true), + _isStereo(isStereo) { } diff --git a/libraries/audio/src/PositionalAudioRingBuffer.h b/libraries/audio/src/PositionalAudioRingBuffer.h index b130a9b216..1864271d5f 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.h +++ b/libraries/audio/src/PositionalAudioRingBuffer.h @@ -24,7 +24,7 @@ public: Injector }; - PositionalAudioRingBuffer(PositionalAudioRingBuffer::Type type); + PositionalAudioRingBuffer(PositionalAudioRingBuffer::Type type, bool isStereo = false); ~PositionalAudioRingBuffer(); int parseData(const QByteArray& packet); @@ -56,6 +56,7 @@ protected: bool _willBeAddedToMix; bool _shouldLoopbackForNode; bool _shouldOutputStarveDebug; + bool _isStereo; float _nextOutputTrailingLoudness; }; diff --git a/libraries/networking/src/PacketHeaders.cpp b/libraries/networking/src/PacketHeaders.cpp index c56dba9cf1..751c6f45b2 100644 --- a/libraries/networking/src/PacketHeaders.cpp +++ b/libraries/networking/src/PacketHeaders.cpp @@ -47,6 +47,9 @@ int packArithmeticallyCodedValue(int value, char* destination) { PacketVersion versionForPacketType(PacketType type) { switch (type) { + case PacketTypeMicrophoneAudioNoEcho: + case PacketTypeMicrophoneAudioWithEcho: + return 1; case PacketTypeAvatarData: return 3; case PacketTypeAvatarIdentity: From 681ce247d6230ffe561dcfe8cda2eaefa3c884c0 Mon Sep 17 00:00:00 2001 From: Stephen Birarda Date: Fri, 6 Jun 2014 10:27:59 -0700 Subject: [PATCH 2/4] tweak resample to handle 48 to 24 stereo, add stereo/mono byte --- interface/src/Audio.cpp | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp index 6d672b736d..1575aa524f 100644 --- a/interface/src/Audio.cpp +++ b/interface/src/Audio.cpp @@ -290,20 +290,27 @@ void linearResampling(int16_t* sourceSamples, int16_t* destinationSamples, if (sourceToDestinationFactor >= 2) { // we need to downsample from 48 to 24 // for now this only supports a mono output - this would be the case for audio input - - for (unsigned int i = sourceAudioFormat.channelCount(); i < numSourceSamples; i += 2 * sourceAudioFormat.channelCount()) { - if (i + (sourceAudioFormat.channelCount()) >= numSourceSamples) { - destinationSamples[(i - sourceAudioFormat.channelCount()) / (int) sourceToDestinationFactor] = + if (destinationAudioFormat.channelCount() == 1) { + for (unsigned int i = sourceAudioFormat.channelCount(); i < numSourceSamples; i += 2 * sourceAudioFormat.channelCount()) { + if (i + (sourceAudioFormat.channelCount()) >= numSourceSamples) { + destinationSamples[(i - sourceAudioFormat.channelCount()) / (int) sourceToDestinationFactor] = (sourceSamples[i - sourceAudioFormat.channelCount()] / 2) + (sourceSamples[i] / 2); - } else { - destinationSamples[(i - sourceAudioFormat.channelCount()) / (int) sourceToDestinationFactor] = + } else { + destinationSamples[(i - sourceAudioFormat.channelCount()) / (int) sourceToDestinationFactor] = (sourceSamples[i - sourceAudioFormat.channelCount()] / 4) + (sourceSamples[i] / 2) + (sourceSamples[i + sourceAudioFormat.channelCount()] / 4); + } + } + } else { + // this is a 48 to 24 resampling but both source and destination are two channels + // squish two samples into one in each channel + for (int i = 0; i < numSourceSamples; i += 2) { + destinationSamples[i / 2] = (sourceSamples[i] / 2) + (sourceSamples[i + 2] / 2); + destinationSamples[(i / 2) + 1] = (sourceSamples[i + 1] / 2) + (sourceSamples[i + 3] / 2); } } - } else { if (sourceAudioFormat.sampleRate() == destinationAudioFormat.sampleRate()) { // mono to stereo, same sample rate @@ -409,7 +416,7 @@ void Audio::handleAudioInput() { static char audioDataPacket[MAX_PACKET_SIZE]; static int numBytesPacketHeader = numBytesForPacketHeaderGivenPacketType(PacketTypeMicrophoneAudioNoEcho); - static int leadingBytes = numBytesPacketHeader + sizeof(glm::vec3) + sizeof(glm::quat); + static int leadingBytes = numBytesPacketHeader + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8); static int16_t* networkAudioSamples = (int16_t*) (audioDataPacket + leadingBytes); @@ -610,9 +617,7 @@ void Audio::handleAudioInput() { MyAvatar* interfaceAvatar = Application::getInstance()->getAvatar(); glm::vec3 headPosition = interfaceAvatar->getHead()->getPosition(); glm::quat headOrientation = interfaceAvatar->getHead()->getFinalOrientation(); - - // we need the amount of bytes in the buffer + 1 for type - // + 12 for 3 floats for position + float for bearing + 1 attenuation byte + quint8 isStereo = _isStereoInput ? 1 : 0; int numAudioBytes = 0; @@ -621,7 +626,7 @@ void Audio::handleAudioInput() { packetType = PacketTypeSilentAudioFrame; // we need to indicate how many silent samples this is to the audio mixer - networkAudioSamples[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; + audioDataPacket[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; numAudioBytes = sizeof(int16_t); } else { @@ -644,6 +649,9 @@ void Audio::handleAudioInput() { memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation)); currentPacketPtr += sizeof(headOrientation); + // set the mono/stereo byte + *currentPacketPtr++ = isStereo; + nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer); Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO) From ae2f6a3cb676294457beae372a0bc7bb9dc1728f Mon Sep 17 00:00:00 2001 From: Stephen Birarda Date: Fri, 6 Jun 2014 10:55:04 -0700 Subject: [PATCH 3/4] complete piping of stereo audio through mixer --- assignment-client/src/audio/AudioMixer.cpp | 242 ++++++++++-------- .../src/audio/AudioMixerClientData.cpp | 14 +- .../src/audio/AudioMixerClientData.h | 4 +- .../src/audio/AvatarAudioRingBuffer.cpp | 4 +- .../src/audio/AvatarAudioRingBuffer.h | 2 +- interface/src/Audio.cpp | 6 +- .../audio/src/PositionalAudioRingBuffer.cpp | 3 + .../audio/src/PositionalAudioRingBuffer.h | 2 + 8 files changed, 160 insertions(+), 117 deletions(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index 3acd783bb0..f8cfb3140c 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -173,134 +173,160 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio); } } - - // if the bearing relative angle to source is > 0 then the delayed channel is the right one - int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0; - int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0; const int16_t* nextOutputStart = bufferToAdd->getNextOutput(); - - const int16_t* bufferStart = bufferToAdd->getBuffer(); - int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity(); - - int16_t correctBufferSample[2], delayBufferSample[2]; - int delayedChannelIndex = 0; - const int SINGLE_STEREO_OFFSET = 2; - - for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { + if (!bufferToAdd->isStereo()) { + // this is a mono buffer, which means it gets full attenuation and spatialization - // setup the int16_t variables for the two sample sets - correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient; - correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient; + // if the bearing relative angle to source is > 0 then the delayed channel is the right one + int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0; + int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0; - delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset; + const int16_t* bufferStart = bufferToAdd->getBuffer(); + int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity(); - delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio; - delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio; + int16_t correctBufferSample[2], delayBufferSample[2]; + int delayedChannelIndex = 0; - __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset], - _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET], - _clientSamples[delayedChannelIndex], - _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]); - __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1], - delayBufferSample[0], delayBufferSample[1]); + const int SINGLE_STEREO_OFFSET = 2; - // perform the MMX add (with saturation) of two correct and delayed samples - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - // assign the results from the result of the mmx arithmetic - _clientSamples[s + goodChannelOffset] = shortResults[3]; - _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2]; - _clientSamples[delayedChannelIndex] = shortResults[1]; - _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0]; - } - - // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid - // too many conditionals in handling the delay samples at the beginning of _clientSamples. - // Basically we try to take the samples in batches of four, and then handle the remainder - // conditionally to get rid of the rest. - - const int DOUBLE_STEREO_OFFSET = 4; - const int TRIPLE_STEREO_OFFSET = 6; - - if (numSamplesDelay > 0) { - // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput - // to stick at the beginning - float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio; - const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay; - if (delayNextOutputStart < bufferStart) { - delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay; + for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { + + // setup the int16_t variables for the two sample sets + correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient; + correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient; + + delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset; + + delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio; + delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio; + + __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset], + _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET], + _clientSamples[delayedChannelIndex], + _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]); + __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1], + delayBufferSample[0], delayBufferSample[1]); + + // perform the MMX add (with saturation) of two correct and delayed samples + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + // assign the results from the result of the mmx arithmetic + _clientSamples[s + goodChannelOffset] = shortResults[3]; + _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2]; + _clientSamples[delayedChannelIndex] = shortResults[1]; + _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0]; } - int i = 0; + // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid + // too many conditionals in handling the delay samples at the beginning of _clientSamples. + // Basically we try to take the samples in batches of four, and then handle the remainder + // conditionally to get rid of the rest. - while (i + 3 < numSamplesDelay) { - // handle the first cases where we can MMX add four samples at once + const int DOUBLE_STEREO_OFFSET = 4; + const int TRIPLE_STEREO_OFFSET = 6; + + if (numSamplesDelay > 0) { + // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput + // to stick at the beginning + float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio; + const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay; + if (delayNextOutputStart < bufferStart) { + delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay; + } + + int i = 0; + + while (i + 3 < numSamplesDelay) { + // handle the first cases where we can MMX add four samples at once + int parentIndex = i * 2; + __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], + _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]); + __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio); + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; + _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0]; + + // push the index + i += 4; + } + int parentIndex = i * 2; - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio); - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; - _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0]; - - // push the index - i += 4; + if (i + 2 < numSamplesDelay) { + // MMX add only three delayed samples + + __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], + 0); + __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, + 0); + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; + _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; + + } else if (i + 1 < numSamplesDelay) { + // MMX add two delayed samples + __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0); + __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, + delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0); + + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; + + } else if (i < numSamplesDelay) { + // MMX add a single delayed sample + __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0); + __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0); + + __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); + int16_t* shortResults = reinterpret_cast(&mmxResult); + + _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + } } + } else { + // stereo buffer - do attenuation but no sample delay for spatialization + qDebug() << "Adding a stereo buffer"; - int parentIndex = i * 2; - - if (i + 2 < numSamplesDelay) { - // MMX add only three delayed samples + for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { + // use MMX to clamp four additions at a time - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], - 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, - 0); - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; - - } else if (i + 1 < numSamplesDelay) { - // MMX add two delayed samples - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, - delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0); + __m64 bufferSamples = _mm_set_pi16(_clientSamples[s], _clientSamples[s + 1], + _clientSamples[s + 2], _clientSamples[s + 3]); + __m64 addSamples = _mm_set_pi16(nextOutputStart[s] * attenuationCoefficient, + nextOutputStart[s + 1] * attenuationCoefficient, + nextOutputStart[s + 2] * attenuationCoefficient, + nextOutputStart[s + 3] * attenuationCoefficient); __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); int16_t* shortResults = reinterpret_cast(&mmxResult); - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; - _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; - - } else if (i < numSamplesDelay) { - // MMX add a single delayed sample - __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0); - __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0); - - __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); - int16_t* shortResults = reinterpret_cast(&mmxResult); - - _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; + _clientSamples[s] = shortResults[3]; + _clientSamples[s + 1] = shortResults[2]; + _clientSamples[s + 2] = shortResults[1]; + _clientSamples[s + 3] = shortResults[0]; } } } diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp index 85b8dde690..e21fadbd16 100644 --- a/assignment-client/src/audio/AudioMixerClientData.cpp +++ b/assignment-client/src/audio/AudioMixerClientData.cpp @@ -50,10 +50,22 @@ int AudioMixerClientData::parseData(const QByteArray& packet) { // grab the AvatarAudioRingBuffer from the vector (or create it if it doesn't exist) AvatarAudioRingBuffer* avatarRingBuffer = getAvatarAudioRingBuffer(); + + // read the first byte after the header to see if this is a stereo or mono buffer + quint8 channelFlag = packet.at(numBytesForPacketHeader(packet)); + bool isStereo = channelFlag == 1; + + if (avatarRingBuffer && avatarRingBuffer->isStereo() != isStereo) { + // there's a mismatch in the buffer channels for the incoming and current buffer + // so delete our current buffer and create a new one + _ringBuffers.removeOne(avatarRingBuffer); + avatarRingBuffer->deleteLater(); + avatarRingBuffer = NULL; + } if (!avatarRingBuffer) { // we don't have an AvatarAudioRingBuffer yet, so add it - avatarRingBuffer = new AvatarAudioRingBuffer(); + avatarRingBuffer = new AvatarAudioRingBuffer(isStereo); _ringBuffers.push_back(avatarRingBuffer); } diff --git a/assignment-client/src/audio/AudioMixerClientData.h b/assignment-client/src/audio/AudioMixerClientData.h index a5f03ebd15..70b653301b 100644 --- a/assignment-client/src/audio/AudioMixerClientData.h +++ b/assignment-client/src/audio/AudioMixerClientData.h @@ -24,14 +24,14 @@ public: AudioMixerClientData(); ~AudioMixerClientData(); - const std::vector getRingBuffers() const { return _ringBuffers; } + const QList getRingBuffers() const { return _ringBuffers; } AvatarAudioRingBuffer* getAvatarAudioRingBuffer() const; int parseData(const QByteArray& packet); void checkBuffersBeforeFrameSend(int jitterBufferLengthSamples); void pushBuffersAfterFrameSend(); private: - std::vector _ringBuffers; + QList _ringBuffers; }; #endif // hifi_AudioMixerClientData_h diff --git a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp index 9a7c2839d8..5613a64cc4 100644 --- a/assignment-client/src/audio/AvatarAudioRingBuffer.cpp +++ b/assignment-client/src/audio/AvatarAudioRingBuffer.cpp @@ -13,8 +13,8 @@ #include "AvatarAudioRingBuffer.h" -AvatarAudioRingBuffer::AvatarAudioRingBuffer() : - PositionalAudioRingBuffer(PositionalAudioRingBuffer::Microphone) { +AvatarAudioRingBuffer::AvatarAudioRingBuffer(bool isStereo) : + PositionalAudioRingBuffer(PositionalAudioRingBuffer::Microphone, isStereo) { } diff --git a/assignment-client/src/audio/AvatarAudioRingBuffer.h b/assignment-client/src/audio/AvatarAudioRingBuffer.h index 1e61a82f68..f842c2aa33 100644 --- a/assignment-client/src/audio/AvatarAudioRingBuffer.h +++ b/assignment-client/src/audio/AvatarAudioRingBuffer.h @@ -18,7 +18,7 @@ class AvatarAudioRingBuffer : public PositionalAudioRingBuffer { public: - AvatarAudioRingBuffer(); + AvatarAudioRingBuffer(bool isStereo = false); int parseData(const QByteArray& packet); private: diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp index 1575aa524f..39392a5361 100644 --- a/interface/src/Audio.cpp +++ b/interface/src/Audio.cpp @@ -640,6 +640,9 @@ void Audio::handleAudioInput() { } char* currentPacketPtr = audioDataPacket + populatePacketHeader(audioDataPacket, packetType); + + // set the mono/stereo byte + *currentPacketPtr++ = isStereo; // memcpy the three float positions memcpy(currentPacketPtr, &headPosition, sizeof(headPosition)); @@ -649,9 +652,6 @@ void Audio::handleAudioInput() { memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation)); currentPacketPtr += sizeof(headOrientation); - // set the mono/stereo byte - *currentPacketPtr++ = isStereo; - nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer); Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO) diff --git a/libraries/audio/src/PositionalAudioRingBuffer.cpp b/libraries/audio/src/PositionalAudioRingBuffer.cpp index 0a3d2d0c16..94a88897e3 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.cpp +++ b/libraries/audio/src/PositionalAudioRingBuffer.cpp @@ -41,6 +41,9 @@ int PositionalAudioRingBuffer::parseData(const QByteArray& packet) { // skip the packet header (includes the source UUID) int readBytes = numBytesForPacketHeader(packet); + // hop over the channel flag that has already been read in AudioMixerClientData + readBytes += sizeof(quint8); + // read the positional data readBytes += parsePositionalData(packet.mid(readBytes)); if (packetTypeForPacket(packet) == PacketTypeSilentAudioFrame) { diff --git a/libraries/audio/src/PositionalAudioRingBuffer.h b/libraries/audio/src/PositionalAudioRingBuffer.h index 1864271d5f..17a663d5f6 100644 --- a/libraries/audio/src/PositionalAudioRingBuffer.h +++ b/libraries/audio/src/PositionalAudioRingBuffer.h @@ -41,6 +41,8 @@ public: bool shouldLoopbackForNode() const { return _shouldLoopbackForNode; } + bool isStereo() const { return _isStereo; } + PositionalAudioRingBuffer::Type getType() const { return _type; } const glm::vec3& getPosition() const { return _position; } const glm::quat& getOrientation() const { return _orientation; } From f6011b65d7604ef303ee94f4ba2884e4c9ebfb54 Mon Sep 17 00:00:00 2001 From: Stephen Birarda Date: Fri, 6 Jun 2014 11:26:04 -0700 Subject: [PATCH 4/4] fix input loudness for a stereo source --- assignment-client/src/audio/AudioMixer.cpp | 4 +--- .../src/audio/AudioMixerClientData.cpp | 3 ++- interface/src/Audio.cpp | 17 +++++++++++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index f8cfb3140c..61dee6c82b 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -307,9 +307,7 @@ void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuf } } } else { - // stereo buffer - do attenuation but no sample delay for spatialization - qDebug() << "Adding a stereo buffer"; - + // stereo buffer - do attenuation but no sample delay for spatialization for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { // use MMX to clamp four additions at a time diff --git a/assignment-client/src/audio/AudioMixerClientData.cpp b/assignment-client/src/audio/AudioMixerClientData.cpp index e21fadbd16..7fb2a7dcab 100644 --- a/assignment-client/src/audio/AudioMixerClientData.cpp +++ b/assignment-client/src/audio/AudioMixerClientData.cpp @@ -118,7 +118,8 @@ void AudioMixerClientData::pushBuffersAfterFrameSend() { PositionalAudioRingBuffer* audioBuffer = _ringBuffers[i]; if (audioBuffer->willBeAddedToMix()) { - audioBuffer->shiftReadPosition(NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL); + audioBuffer->shiftReadPosition(audioBuffer->isStereo() + ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL); audioBuffer->setWillBeAddedToMix(false); } else if (audioBuffer->getType() == PositionalAudioRingBuffer::Injector diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp index 39392a5361..b012daa017 100644 --- a/interface/src/Audio.cpp +++ b/interface/src/Audio.cpp @@ -306,7 +306,7 @@ void linearResampling(int16_t* sourceSamples, int16_t* destinationSamples, } else { // this is a 48 to 24 resampling but both source and destination are two channels // squish two samples into one in each channel - for (int i = 0; i < numSourceSamples; i += 2) { + for (int i = 0; i < numSourceSamples; i += 4) { destinationSamples[i / 2] = (sourceSamples[i] / 2) + (sourceSamples[i + 2] / 2); destinationSamples[(i / 2) + 1] = (sourceSamples[i + 1] / 2) + (sourceSamples[i + 3] / 2); } @@ -585,6 +585,14 @@ void Audio::handleAudioInput() { _lastInputLoudness = 0; } } + } else { + float loudness = 0.0f; + + for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; i++) { + loudness += fabsf(networkAudioSamples[i]); + } + + _lastInputLoudness = fabs(loudness / NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL); } } else { // our input loudness is 0, since we're muted @@ -626,11 +634,12 @@ void Audio::handleAudioInput() { packetType = PacketTypeSilentAudioFrame; // we need to indicate how many silent samples this is to the audio mixer - audioDataPacket[0] = NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; + audioDataPacket[0] = _isStereoInput + ? NETWORK_BUFFER_LENGTH_SAMPLES_STEREO + : NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; numAudioBytes = sizeof(int16_t); - } else { - numAudioBytes = NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; + numAudioBytes = _isStereoInput ? NETWORK_BUFFER_LENGTH_BYTES_STEREO : NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; if (Menu::getInstance()->isOptionChecked(MenuOption::EchoServerAudio)) { packetType = PacketTypeMicrophoneAudioWithEcho;