From e93e1a7c4fbc64898a0bfc00f80233a2239c4d9b Mon Sep 17 00:00:00 2001 From: Ken Cooke Date: Fri, 2 Dec 2016 18:20:57 -0800 Subject: [PATCH] Preliminary support for Ambisonic audio injectors. Supports 4-channel WAV files, presumed to be B-format (FuMa) first-order Ambisonic. Supports WAV with arbitrary sample rate (needs optimization). Supports soundfield volume and orientation set via script. Supports localOnly client-side injection using simple (non-spatialized) test renderer. --- libraries/audio-client/src/AudioClient.cpp | 104 ++++++++++++++++-- libraries/audio-client/src/AudioClient.h | 4 +- libraries/audio/src/AbstractAudioInterface.h | 2 +- libraries/audio/src/AudioConstants.h | 1 + libraries/audio/src/AudioInjector.cpp | 7 +- libraries/audio/src/AudioInjector.h | 2 + libraries/audio/src/AudioInjectorOptions.cpp | 1 + libraries/audio/src/AudioInjectorOptions.h | 1 + libraries/audio/src/Sound.cpp | 56 +++++++++- libraries/audio/src/Sound.h | 4 +- .../src/AudioScriptingInterface.cpp | 3 + 11 files changed, 168 insertions(+), 17 deletions(-) diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index 84c64398d3..52f56199e3 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -1098,13 +1098,104 @@ void AudioClient::mixLocalAudioInjectors(float* mixBuffer) { for (AudioInjector* injector : getActiveLocalAudioInjectors()) { if (injector->getLocalBuffer()) { - qint64 samplesToRead = injector->isStereo() ? AudioConstants::NETWORK_FRAME_BYTES_STEREO : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; + int numChannels = injector->isAmbisonic() ? 4 : (injector->isStereo() ? 2 : 1); + qint64 bytesToRead = numChannels * AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; - // get one frame from the injector (mono or stereo) - memset(_scratchBuffer, 0, sizeof(_scratchBuffer)); - if (0 < injector->getLocalBuffer()->readData((char*)_scratchBuffer, samplesToRead)) { + // get one frame from the injector + memset(_scratchBuffer, 0, bytesToRead); + if (0 < injector->getLocalBuffer()->readData((char*)_scratchBuffer, bytesToRead)) { - if (injector->isStereo()) { + if (injector->isAmbisonic()) { + + float gain = injector->getVolume(); + + // injector orientation can be used to align a recording to our world coordinates + glm::quat relativeOrientation = injector->getOrientation() * glm::inverse(_orientationGetter()); + + ////////////// debug ////////////////// + { + float x = relativeOrientation.x; + float y = relativeOrientation.y; + float z = relativeOrientation.z; + float w = relativeOrientation.w; + + float azimuth = atan2f(2.0f*y*w - 2.0f*x*z, 1.0f - 2.0f*y*y - 2.0f*z*z) * (180/3.141592654f); + + // log only once per second + static int counter; + if (counter++ % 100 == 0) { + qCDebug(audioclient) << "Azimuth:" << azimuth; + } + } + ////////////// debug ////////////////// + + // convert quaternion to 3x3 rotation + float m[3][3]; + { + // convert from Y-up (OpenGL) to Z-up (Ambisonic) coordinate system + float x = -relativeOrientation.z; + float y = -relativeOrientation.x; + float z = relativeOrientation.y; + float w = relativeOrientation.w; + + float xx = x * (x + x); + float xy = x * (y + y); + float xz = x * (z + z); + + float yy = y * (y + y); + float yz = y * (z + z); + float zz = z * (z + z); + + float wx = w * (x + x); + float wy = w * (y + y); + float wz = w * (z + z); + + m[0][0] = 1.0f - (yy + zz); + m[0][1] = xy - wz; + m[0][2] = xz + wy; + + m[1][0] = xy + wz; + m[1][1] = 1.0f - (xx + zz); + m[1][2] = yz - wx; + + m[2][0] = xz - wy; + m[2][1] = yz + wx; + m[2][2] = 1.0f - (xx + yy); + } + + // convert to deinterleaved float + float buffer[4][AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL]; + for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; i++) { + buffer[0][i] = (float)_scratchBuffer[4*i+0] * (1/32768.0f); + buffer[1][i] = (float)_scratchBuffer[4*i+1] * (1/32768.0f); + buffer[2][i] = (float)_scratchBuffer[4*i+2] * (1/32768.0f); + buffer[3][i] = (float)_scratchBuffer[4*i+3] * (1/32768.0f); + } + + // rotate the soundfield + for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; i++) { + + float x = m[0][0] * buffer[1][i] + m[0][1] * buffer[2][i] + m[0][2] * buffer[3][i]; + float y = m[1][0] * buffer[1][i] + m[1][1] * buffer[2][i] + m[1][2] * buffer[3][i]; + float z = m[2][0] * buffer[1][i] + m[2][1] * buffer[2][i] + m[2][2] * buffer[3][i]; + + buffer[1][i] = x; + buffer[2][i] = y; + buffer[3][i] = z; + } + + // + // Ambisonic to simple (non-spatialized) stereo decoder, + // using virtual cardioid microphones facing +y and -y. + // + float wCoef = gain * 0.7071f; + float yCoef = gain * 0.5000f; + for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; i++) { + mixBuffer[2*i+0] += wCoef * buffer[0][i] + yCoef * buffer[2][i]; + mixBuffer[2*i+1] += wCoef * buffer[0][i] - yCoef * buffer[2][i]; + } + + } else if (injector->isStereo()) { // stereo gets directly mixed into mixBuffer for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) { @@ -1225,8 +1316,7 @@ void AudioClient::setIsStereoInput(bool isStereoInput) { } } - -bool AudioClient::outputLocalInjector(bool isStereo, AudioInjector* injector) { +bool AudioClient::outputLocalInjector(AudioInjector* injector) { Lock lock(_injectorsMutex); if (injector->getLocalBuffer() && _audioInput ) { // just add it to the vector of active local injectors, if diff --git a/libraries/audio-client/src/AudioClient.h b/libraries/audio-client/src/AudioClient.h index d6f111cafc..8c7c5f3018 100644 --- a/libraries/audio-client/src/AudioClient.h +++ b/libraries/audio-client/src/AudioClient.h @@ -169,7 +169,7 @@ public slots: int setOutputBufferSize(int numFrames, bool persist = true); - bool outputLocalInjector(bool isStereo, AudioInjector* injector) override; + bool outputLocalInjector(AudioInjector* injector) override; bool shouldLoopbackInjectors() override { return _shouldEchoToServer; } bool switchInputToAudioDevice(const QString& inputDeviceName); @@ -297,7 +297,7 @@ private: // for local hrtf-ing float _mixBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO]; - int16_t _scratchBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO]; + int16_t _scratchBuffer[AudioConstants::NETWORK_FRAME_SAMPLES_AMBISONIC]; AudioLimiter _audioLimiter; // Adds Reverb diff --git a/libraries/audio/src/AbstractAudioInterface.h b/libraries/audio/src/AbstractAudioInterface.h index ec96462e73..d374249a09 100644 --- a/libraries/audio/src/AbstractAudioInterface.h +++ b/libraries/audio/src/AbstractAudioInterface.h @@ -32,7 +32,7 @@ public: PacketType packetType, QString codecName = QString("")); public slots: - virtual bool outputLocalInjector(bool isStereo, AudioInjector* injector) = 0; + virtual bool outputLocalInjector(AudioInjector* injector) = 0; virtual bool shouldLoopbackInjectors() { return false; } virtual void setIsStereoInput(bool stereo) = 0; diff --git a/libraries/audio/src/AudioConstants.h b/libraries/audio/src/AudioConstants.h index 353d9ddd9d..9172b8e859 100644 --- a/libraries/audio/src/AudioConstants.h +++ b/libraries/audio/src/AudioConstants.h @@ -33,6 +33,7 @@ namespace AudioConstants { const int NETWORK_FRAME_SAMPLES_STEREO = NETWORK_FRAME_BYTES_STEREO / SAMPLE_SIZE; const int NETWORK_FRAME_BYTES_PER_CHANNEL = NETWORK_FRAME_BYTES_STEREO / 2; const int NETWORK_FRAME_SAMPLES_PER_CHANNEL = NETWORK_FRAME_BYTES_PER_CHANNEL / SAMPLE_SIZE; + const int NETWORK_FRAME_SAMPLES_AMBISONIC = NETWORK_FRAME_SAMPLES_PER_CHANNEL * 4; const float NETWORK_FRAME_SECS = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL / float(AudioConstants::SAMPLE_RATE)); const float NETWORK_FRAME_MSECS = NETWORK_FRAME_SECS * 1000.0f; const float NETWORK_FRAMES_PER_SEC = 1.0f / NETWORK_FRAME_SECS; diff --git a/libraries/audio/src/AudioInjector.cpp b/libraries/audio/src/AudioInjector.cpp index 6f6534e2d2..f996361690 100644 --- a/libraries/audio/src/AudioInjector.cpp +++ b/libraries/audio/src/AudioInjector.cpp @@ -58,8 +58,10 @@ void AudioInjector::setOptions(const AudioInjectorOptions& options) { // since options.stereo is computed from the audio stream, // we need to copy it from existing options just in case. bool currentlyStereo = _options.stereo; + bool currentlyAmbisonic = _options.ambisonic; _options = options; _options.stereo = currentlyStereo; + _options.ambisonic = currentlyAmbisonic; } void AudioInjector::finishNetworkInjection() { @@ -134,7 +136,8 @@ bool AudioInjector::inject(bool(AudioInjectorManager::*injection)(AudioInjector* int byteOffset = 0; if (_options.secondOffset > 0.0f) { - byteOffset = (int)floorf(AudioConstants::SAMPLE_RATE * _options.secondOffset * (_options.stereo ? 2.0f : 1.0f)); + int numChannels = _options.ambisonic ? 4 : (_options.stereo ? 2 : 1); + byteOffset = (int)(AudioConstants::SAMPLE_RATE * _options.secondOffset * numChannels); byteOffset *= sizeof(AudioConstants::SAMPLE_SIZE); } _currentSendOffset = byteOffset; @@ -169,7 +172,7 @@ bool AudioInjector::injectLocally() { _localBuffer->setCurrentOffset(_currentSendOffset); // call this function on the AudioClient's thread - success = QMetaObject::invokeMethod(_localAudioInterface, "outputLocalInjector", Q_ARG(bool, _options.stereo), Q_ARG(AudioInjector*, this)); + success = QMetaObject::invokeMethod(_localAudioInterface, "outputLocalInjector", Q_ARG(AudioInjector*, this)); if (!success) { qCDebug(audio) << "AudioInjector::injectLocally could not output locally via _localAudioInterface"; diff --git a/libraries/audio/src/AudioInjector.h b/libraries/audio/src/AudioInjector.h index 7ee15a1b45..3f8e754134 100644 --- a/libraries/audio/src/AudioInjector.h +++ b/libraries/audio/src/AudioInjector.h @@ -63,7 +63,9 @@ public: bool isLocalOnly() const { return _options.localOnly; } float getVolume() const { return _options.volume; } glm::vec3 getPosition() const { return _options.position; } + glm::quat getOrientation() const { return _options.orientation; } bool isStereo() const { return _options.stereo; } + bool isAmbisonic() const { return _options.ambisonic; } bool stateHas(AudioInjectorState state) const ; static void setLocalAudioInterface(AbstractAudioInterface* audioInterface) { _localAudioInterface = audioInterface; } diff --git a/libraries/audio/src/AudioInjectorOptions.cpp b/libraries/audio/src/AudioInjectorOptions.cpp index 824a816382..eca1a4197d 100644 --- a/libraries/audio/src/AudioInjectorOptions.cpp +++ b/libraries/audio/src/AudioInjectorOptions.cpp @@ -19,6 +19,7 @@ AudioInjectorOptions::AudioInjectorOptions() : loop(false), orientation(glm::vec3(0.0f, 0.0f, 0.0f)), stereo(false), + ambisonic(false), ignorePenumbra(false), localOnly(false), secondOffset(0.0) diff --git a/libraries/audio/src/AudioInjectorOptions.h b/libraries/audio/src/AudioInjectorOptions.h index d06dc9eb63..40a3f343bd 100644 --- a/libraries/audio/src/AudioInjectorOptions.h +++ b/libraries/audio/src/AudioInjectorOptions.h @@ -25,6 +25,7 @@ public: bool loop; glm::quat orientation; bool stereo; + bool ambisonic; bool ignorePenumbra; bool localOnly; float secondOffset; diff --git a/libraries/audio/src/Sound.cpp b/libraries/audio/src/Sound.cpp index 73596f39c7..101c790aab 100644 --- a/libraries/audio/src/Sound.cpp +++ b/libraries/audio/src/Sound.cpp @@ -43,9 +43,10 @@ SoundScriptingInterface::SoundScriptingInterface(SharedSoundPointer sound) : _so QObject::connect(sound.data(), &Sound::ready, this, &SoundScriptingInterface::ready); } -Sound::Sound(const QUrl& url, bool isStereo) : +Sound::Sound(const QUrl& url, bool isStereo, bool isAmbisonic) : Resource(url), _isStereo(isStereo), + _isAmbisonic(isAmbisonic), _isReady(false) { @@ -74,7 +75,7 @@ void Sound::downloadFinished(const QByteArray& data) { qCDebug(audio) << "Processing sound of" << rawAudioByteArray.size() << "bytes from" << getURL() << "as stereo audio file."; } - // Process as RAW file + // Process as 48khz RAW file downSample(rawAudioByteArray, 48000); } else { qCDebug(audio) << "Unknown sound file type"; @@ -96,6 +97,51 @@ void Sound::downSample(const QByteArray& rawAudioByteArray, int sampleRate) { // no resampling needed _byteArray = rawAudioByteArray; + } else if (_isAmbisonic) { + + // FIXME: add a proper Ambisonic resampler! + int numChannels = 4; + AudioSRC resampler[4] { {sampleRate, AudioConstants::SAMPLE_RATE, 1}, + {sampleRate, AudioConstants::SAMPLE_RATE, 1}, + {sampleRate, AudioConstants::SAMPLE_RATE, 1}, + {sampleRate, AudioConstants::SAMPLE_RATE, 1} }; + + // resize to max possible output + int numSourceFrames = rawAudioByteArray.size() / (numChannels * sizeof(AudioConstants::AudioSample)); + int maxDestinationFrames = resampler[0].getMaxOutput(numSourceFrames); + int maxDestinationBytes = maxDestinationFrames * numChannels * sizeof(AudioConstants::AudioSample); + _byteArray.resize(maxDestinationBytes); + + int numDestinationFrames = 0; + + // iterate over channels + int16_t* srcBuffer = new int16_t[numSourceFrames]; + int16_t* dstBuffer = new int16_t[maxDestinationFrames]; + for (int ch = 0; ch < 4; ch++) { + + int16_t* src = (int16_t*)rawAudioByteArray.data(); + int16_t* dst = (int16_t*)_byteArray.data(); + + // deinterleave samples + for (int i = 0; i < numSourceFrames; i++) { + srcBuffer[i] = src[4*i + ch]; + } + + // resample one channel + numDestinationFrames = resampler[ch].render(srcBuffer, dstBuffer, numSourceFrames); + + // reinterleave samples + for (int i = 0; i < numDestinationFrames; i++) { + dst[4*i + ch] = dstBuffer[i]; + } + } + delete[] srcBuffer; + delete[] dstBuffer; + + // truncate to actual output + int numDestinationBytes = numDestinationFrames * numChannels * sizeof(AudioConstants::AudioSample); + _byteArray.resize(numDestinationBytes); + } else { int numChannels = _isStereo ? 2 : 1; @@ -200,8 +246,10 @@ int Sound::interpretAsWav(const QByteArray& inputAudioByteArray, QByteArray& out } if (qFromLittleEndian(fileHeader.wave.numChannels) == 2) { _isStereo = true; - } else if (qFromLittleEndian(fileHeader.wave.numChannels) > 2) { - qCDebug(audio) << "Currently not support audio files with more than 2 channels."; + } else if (qFromLittleEndian(fileHeader.wave.numChannels) == 4) { + _isAmbisonic = true; + } else if (qFromLittleEndian(fileHeader.wave.numChannels) != 1) { + qCDebug(audio) << "Currently not support audio files with other than 1/2/4 channels."; return 0; } diff --git a/libraries/audio/src/Sound.h b/libraries/audio/src/Sound.h index 73d5ecd03d..37d5b40e95 100644 --- a/libraries/audio/src/Sound.h +++ b/libraries/audio/src/Sound.h @@ -22,9 +22,10 @@ class Sound : public Resource { Q_OBJECT public: - Sound(const QUrl& url, bool isStereo = false); + Sound(const QUrl& url, bool isStereo = false, bool isAmbisonic = false); bool isStereo() const { return _isStereo; } + bool isAmbisonic() const { return _isAmbisonic; } bool isReady() const { return _isReady; } float getDuration() const { return _duration; } @@ -37,6 +38,7 @@ signals: private: QByteArray _byteArray; bool _isStereo; + bool _isAmbisonic; bool _isReady; float _duration; // In seconds diff --git a/libraries/script-engine/src/AudioScriptingInterface.cpp b/libraries/script-engine/src/AudioScriptingInterface.cpp index ddb4c2117e..54bc00a6bd 100644 --- a/libraries/script-engine/src/AudioScriptingInterface.cpp +++ b/libraries/script-engine/src/AudioScriptingInterface.cpp @@ -45,6 +45,9 @@ ScriptAudioInjector* AudioScriptingInterface::playSound(SharedSoundPointer sound // stereo option isn't set from script, this comes from sound metadata or filename AudioInjectorOptions optionsCopy = injectorOptions; optionsCopy.stereo = sound->isStereo(); + optionsCopy.ambisonic = sound->isAmbisonic(); + optionsCopy.localOnly = sound->isAmbisonic(); // always localOnly for Ambisonic + auto injector = AudioInjector::playSound(sound->getByteArray(), optionsCopy); if (!injector) { return NULL;