diff --git a/assignment-client/src/audio/AudioMixer.cpp b/assignment-client/src/audio/AudioMixer.cpp index b511c255b9..700642bf59 100644 --- a/assignment-client/src/audio/AudioMixer.cpp +++ b/assignment-client/src/audio/AudioMixer.cpp @@ -10,7 +10,6 @@ #include <fcntl.h> #include <fstream> #include <iostream> -#include <limits> #include <math.h> #include <signal.h> #include <stdio.h> @@ -56,9 +55,6 @@ const short JITTER_BUFFER_SAMPLES = JITTER_BUFFER_MSECS * (SAMPLE_RATE / 1000.0) const unsigned int BUFFER_SEND_INTERVAL_USECS = floorf((NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL / (float) SAMPLE_RATE) * 1000 * 1000); -const int MAX_SAMPLE_VALUE = std::numeric_limits<int16_t>::max(); -const int MIN_SAMPLE_VALUE = std::numeric_limits<int16_t>::min(); - const char AUDIO_MIXER_LOGGING_TARGET_NAME[] = "audio-mixer"; void attachNewBufferToNode(Node *newNode) { diff --git a/interface/src/Audio.cpp b/interface/src/Audio.cpp index e5f5df2311..bd7da88aef 100644 --- a/interface/src/Audio.cpp +++ b/interface/src/Audio.cpp @@ -47,6 +47,7 @@ Audio::Audio(Oscilloscope* scope, int16_t initialJitterBufferSamples, QObject* p _desiredInputFormat(), _inputFormat(), _inputBuffer(), + _monoAudioSamples(NULL), _numInputCallbackBytes(0), _audioOutput(NULL), _desiredOutputFormat(), @@ -172,8 +173,9 @@ void linearResampling(int16_t* sourceSamples, int16_t* destinationSamples, if (sourceAudioFormat == destinationAudioFormat) { memcpy(destinationSamples, sourceSamples, numSourceSamples * sizeof(int16_t)); } else { + int destinationChannels = (destinationAudioFormat.channelCount() >= 2) ? 2 : destinationAudioFormat.channelCount(); float sourceToDestinationFactor = (sourceAudioFormat.sampleRate() / (float) destinationAudioFormat.sampleRate()) - * (sourceAudioFormat.channelCount() / (float) destinationAudioFormat.channelCount()) ; + * (sourceAudioFormat.channelCount() / (float) destinationChannels); // take into account the number of channels in source and destination // accomodate for the case where have an output with > 2 channels @@ -184,40 +186,36 @@ void linearResampling(int16_t* sourceSamples, int16_t* destinationSamples, // for now this only supports a mono output - this would be the case for audio input for (int i = 2; i < numSourceSamples; i += 4) { - if (i + 2 >= numSourceSamples) { destinationSamples[(i - 2) / 4] = (sourceSamples[i - 2] / 2) - + (sourceSamples[i] / 2); + + (sourceSamples[i] / 2); } else { destinationSamples[(i - 2) / 4] = (sourceSamples[i - 2] / 4) - + (sourceSamples[i] / 2) - + (sourceSamples[i + 2] / 4); + + (sourceSamples[i] / 2) + + (sourceSamples[i + 2] / 4); } } } else { - int sourceIndex = 0; - // upsample from 24 to 48 - for (int i = 0; i < numDestinationSamples; i += destinationAudioFormat.channelCount()) { - sourceIndex = i * sourceToDestinationFactor; + // for now this only supports a stereo to stereo conversion - this is our case for network audio to output + int sourceIndex = 0; + int destinationToSourceFactor = (1 / sourceToDestinationFactor); + + for (int i = 0; i < numDestinationSamples; i += destinationAudioFormat.channelCount() * destinationToSourceFactor) { + sourceIndex = (i / destinationToSourceFactor); - if (sourceIndex >= numSourceSamples) { - sourceIndex -= destinationAudioFormat.channelCount(); - } - - destinationSamples[i] = sourceSamples[sourceIndex]; - - if (sourceAudioFormat.channelCount() == 1) { - destinationSamples[i + 1] = sourceSamples[sourceIndex]; - } else { - destinationSamples[i + 1] = sourceSamples[(sourceIndex) + 1]; - - if (destinationAudioFormat.channelCount() > 2) { - // fill the rest of the channels with silence - for (int j = 2; j < destinationAudioFormat.channelCount(); j++) { - destinationSamples[i] = 0; - } + // fill the L/R channels and make the rest silent + for (int j = i; j < i + (destinationToSourceFactor * destinationAudioFormat.channelCount()); j++) { + if (j % destinationAudioFormat.channelCount() == 0) { + // left channel + destinationSamples[j] = sourceSamples[sourceIndex]; + } else if (j % destinationAudioFormat.channelCount() == 1) { + // right channel + destinationSamples[j] = sourceSamples[sourceIndex + 1]; + } else { + // channels above 2, fill with silence + destinationSamples[j] = 0; } } } @@ -281,7 +279,8 @@ void Audio::handleAudioInput() { static int numBytesPacketHeader = numBytesForPacketHeader((unsigned char*) &PACKET_TYPE_MICROPHONE_AUDIO_NO_ECHO); static int leadingBytes = numBytesPacketHeader + sizeof(glm::vec3) + sizeof(glm::quat) + NUM_BYTES_RFC4122_UUID; - static int16_t* monoAudioSamples = (int16_t*) (monoAudioDataPacket + leadingBytes); + + _monoAudioSamples = (int16_t*) (monoAudioDataPacket + leadingBytes); static float inputToNetworkInputRatio = _numInputCallbackBytes * CALLBACK_ACCELERATOR_RATIO / NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL; @@ -298,37 +297,23 @@ void Audio::handleAudioInput() { _inputRingBuffer.readSamples(inputAudioSamples, inputSamplesRequired); // zero out the monoAudioSamples array - memset(monoAudioSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL); + memset(_monoAudioSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL); if (!_muted) { // we aren't muted, downsample the input audio linearResampling((int16_t*) inputAudioSamples, - monoAudioSamples, + _monoAudioSamples, inputSamplesRequired, NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL, _inputFormat, _desiredInputFormat); // add input data just written to the scope QMetaObject::invokeMethod(_scope, "addSamples", Qt::QueuedConnection, - Q_ARG(QByteArray, QByteArray((char*) monoAudioSamples, + Q_ARG(QByteArray, QByteArray((char*) _monoAudioSamples, NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL)), Q_ARG(bool, false), Q_ARG(bool, true)); } -// if (Menu::getInstance()->isOptionChecked(MenuOption::EchoLocalAudio)) { -// // if local loopback enabled, copy input to output -// QByteArray samplesForOutput; -// samplesForOutput.resize(inputSamplesRequired * outputToInputRatio * sizeof(int16_t)); -// -// linearResampling(monoAudioSamples, (int16_t*) samplesForOutput.data(), -// NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL, -// inputSamplesRequired, -// _desiredInputFormat, _outputFormat); -// -// _outputDevice->write(samplesForOutput); -// } - - // add procedural effects to the appropriate input samples // addProceduralSounds(monoAudioSamples + (_isBufferSendCallback // ? BUFFER_LENGTH_SAMPLES_PER_CHANNEL / CALLBACK_ACCELERATOR_RATIO : 0), @@ -440,6 +425,17 @@ void Audio::addReceivedAudioToBuffer(const QByteArray& audioByteArray) { int16_t ringBufferSamples[NETWORK_BUFFER_LENGTH_SAMPLES_STEREO]; _ringBuffer.readSamples(ringBufferSamples, NETWORK_BUFFER_LENGTH_SAMPLES_STEREO); + if (!_muted && Menu::getInstance()->isOptionChecked(MenuOption::EchoLocalAudio)) { + // copy whatever is pointed to at _monoAudioSamples into our ringBufferSamples + // so that local audio is echoed back + + for (int i = 0; i < NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL; i++) { + ringBufferSamples[i * 2] = glm::clamp(ringBufferSamples[i * 2] + _monoAudioSamples[i], + MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); + ringBufferSamples[(i * 2) + 1] = ringBufferSamples[i * 2]; + } + } + // copy the packet from the RB to the output linearResampling(ringBufferSamples, (int16_t*) outputBuffer.data(), @@ -460,6 +456,7 @@ void Audio::addReceivedAudioToBuffer(const QByteArray& audioByteArray) { } else if (_audioOutput->bytesFree() == _audioOutput->bufferSize()) { // we don't have any audio data left in the output buffer, and the ring buffer from // the network has nothing in it either - we just starved + qDebug() << "Audio output just starved.\n"; _ringBuffer.setIsStarved(true); _numFramesDisplayStarve = 10; } diff --git a/interface/src/Audio.h b/interface/src/Audio.h index 8557120ab7..9a051b246b 100644 --- a/interface/src/Audio.h +++ b/interface/src/Audio.h @@ -76,6 +76,7 @@ private: QAudioFormat _inputFormat; QIODevice* _inputDevice; QByteArray _inputBuffer; + int16_t* _monoAudioSamples; int _numInputCallbackBytes; QAudioOutput* _audioOutput; QAudioFormat _desiredOutputFormat; diff --git a/libraries/audio/src/AudioRingBuffer.h b/libraries/audio/src/AudioRingBuffer.h index d715e63a97..3e6917456f 100644 --- a/libraries/audio/src/AudioRingBuffer.h +++ b/libraries/audio/src/AudioRingBuffer.h @@ -9,6 +9,7 @@ #ifndef __interface__AudioRingBuffer__ #define __interface__AudioRingBuffer__ +#include <limits> #include <stdint.h> #include <glm/glm.hpp> @@ -24,6 +25,9 @@ const int NETWORK_BUFFER_LENGTH_SAMPLES_STEREO = NETWORK_BUFFER_LENGTH_BYTES_STE const int NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL = 512; const int NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL = NETWORK_BUFFER_LENGTH_BYTES_PER_CHANNEL / sizeof(int16_t); +const int MAX_SAMPLE_VALUE = std::numeric_limits<int16_t>::max(); +const int MIN_SAMPLE_VALUE = std::numeric_limits<int16_t>::min(); + class AudioRingBuffer : public NodeData { Q_OBJECT public: