From 7de784ce27d6c9d41e3847e74caf778d90b71c87 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Wed, 10 Oct 2018 12:32:55 -0700 Subject: [PATCH 01/18] First steps (definitely not working) --- assignment-client/src/Agent.cpp | 2 +- interface/src/Application.cpp | 14 ++- interface/src/assets/ATPAssetMigrator.cpp | 4 +- .../scripting/SpeechScriptingInterface.cpp | 96 +++++++++++++++++++ .../src/scripting/SpeechScriptingInterface.h | 76 +++++++++++++++ .../src/RenderableWebEntityItem.cpp | 2 +- libraries/entities/src/EntityEditFilters.cpp | 2 +- .../src/model-networking/TextureCache.cpp | 4 +- libraries/networking/src/AddressManager.cpp | 10 +- libraries/networking/src/DomainHandler.cpp | 2 +- .../networking/src/NetworkingConstants.h | 10 +- libraries/networking/src/ResourceCache.cpp | 2 +- libraries/networking/src/ResourceManager.cpp | 18 ++-- 13 files changed, 209 insertions(+), 33 deletions(-) create mode 100644 interface/src/scripting/SpeechScriptingInterface.cpp create mode 100644 interface/src/scripting/SpeechScriptingInterface.h diff --git a/assignment-client/src/Agent.cpp b/assignment-client/src/Agent.cpp index 639e9f924b..ee21fff8c0 100644 --- a/assignment-client/src/Agent.cpp +++ b/assignment-client/src/Agent.cpp @@ -216,7 +216,7 @@ void Agent::requestScript() { } // make sure this is not a script request for the file scheme - if (scriptURL.scheme() == URL_SCHEME_FILE) { + if (scriptURL.scheme() == HIFI_URL_SCHEME_FILE) { qWarning() << "Cannot load script for Agent from local filesystem."; scriptRequestFinished(); return; diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index aa2b382c58..74532ef53a 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -182,6 +182,7 @@ #include "scripting/RatesScriptingInterface.h" #include "scripting/SelectionScriptingInterface.h" #include "scripting/WalletScriptingInterface.h" +#include "scripting/SpeechScriptingInterface.h" #if defined(Q_OS_MAC) || defined(Q_OS_WIN) #include "SpeechRecognizer.h" #endif @@ -528,11 +529,11 @@ bool isDomainURL(QUrl url) { if (url.scheme() == URL_SCHEME_HIFI) { return true; } - if (url.scheme() != URL_SCHEME_FILE) { + if (url.scheme() != HIFI_URL_SCHEME_FILE) { // TODO -- once Octree::readFromURL no-longer takes over the main event-loop, serverless-domain urls can // be loaded over http(s) - // && url.scheme() != URL_SCHEME_HTTP && - // url.scheme() != URL_SCHEME_HTTPS + // && url.scheme() != HIFI_URL_SCHEME_HTTP && + // url.scheme() != HIFI_URL_SCHEME_HTTPS return false; } if (url.path().endsWith(".json", Qt::CaseInsensitive) || @@ -943,6 +944,7 @@ bool setupEssentials(int& argc, char** argv, bool runningMarkerExisted) { DependencyManager::set(); DependencyManager::set(); DependencyManager::set(); + DependencyManager::set(); DependencyManager::set(); @@ -1024,8 +1026,8 @@ Application::Application(int& argc, char** argv, QElapsedTimer& startupTimer, bo // If the URL scheme is http(s) or ftp, then use as is, else - treat it as a local file // This is done so as not break previous command line scripts - if (testScriptPath.left(URL_SCHEME_HTTP.length()) == URL_SCHEME_HTTP || - testScriptPath.left(URL_SCHEME_FTP.length()) == URL_SCHEME_FTP) { + if (testScriptPath.left(HIFI_URL_SCHEME_HTTP.length()) == HIFI_URL_SCHEME_HTTP || + testScriptPath.left(HIFI_URL_SCHEME_FTP.length()) == HIFI_URL_SCHEME_FTP) { setProperty(hifi::properties::TEST, QUrl::fromUserInput(testScriptPath)); } else if (QFileInfo(testScriptPath).exists()) { @@ -3127,6 +3129,7 @@ void Application::onDesktopRootContextCreated(QQmlContext* surfaceContext) { surfaceContext->setContextProperty("ContextOverlay", DependencyManager::get().data()); surfaceContext->setContextProperty("Wallet", DependencyManager::get().data()); surfaceContext->setContextProperty("HiFiAbout", AboutUtil::getInstance()); + surfaceContext->setContextProperty("Speech", DependencyManager::get().data()); if (auto steamClient = PluginManager::getInstance()->getSteamClientPlugin()) { surfaceContext->setContextProperty("Steam", new SteamScriptingInterface(engine, steamClient.get())); @@ -6797,6 +6800,7 @@ void Application::registerScriptEngineWithApplicationServices(ScriptEnginePointe scriptEngine->registerGlobalObject("Wallet", DependencyManager::get().data()); scriptEngine->registerGlobalObject("AddressManager", DependencyManager::get().data()); scriptEngine->registerGlobalObject("HifiAbout", AboutUtil::getInstance()); + scriptEngine->registerGlobalObject("Speech", DependencyManager::get().data()); qScriptRegisterMetaType(scriptEngine.data(), OverlayIDtoScriptValue, OverlayIDfromScriptValue); diff --git a/interface/src/assets/ATPAssetMigrator.cpp b/interface/src/assets/ATPAssetMigrator.cpp index 45ac80b054..423a4b8509 100644 --- a/interface/src/assets/ATPAssetMigrator.cpp +++ b/interface/src/assets/ATPAssetMigrator.cpp @@ -121,8 +121,8 @@ void ATPAssetMigrator::loadEntityServerFile() { QUrl migrationURL = QUrl(migrationURLString); if (!_ignoredUrls.contains(migrationURL) - && (migrationURL.scheme() == URL_SCHEME_HTTP || migrationURL.scheme() == URL_SCHEME_HTTPS - || migrationURL.scheme() == URL_SCHEME_FILE || migrationURL.scheme() == URL_SCHEME_FTP)) { + && (migrationURL.scheme() == HIFI_URL_SCHEME_HTTP || migrationURL.scheme() == HIFI_URL_SCHEME_HTTPS + || migrationURL.scheme() == HIFI_URL_SCHEME_FILE || migrationURL.scheme() == HIFI_URL_SCHEME_FTP)) { if (_pendingReplacements.contains(migrationURL)) { // we already have a request out for this asset, just store the QJsonValueRef diff --git a/interface/src/scripting/SpeechScriptingInterface.cpp b/interface/src/scripting/SpeechScriptingInterface.cpp new file mode 100644 index 0000000000..a38e1aa824 --- /dev/null +++ b/interface/src/scripting/SpeechScriptingInterface.cpp @@ -0,0 +1,96 @@ +// +// SpeechScriptingInterface.cpp +// interface/src/scripting +// +// Created by Zach Fox on 2018-10-10. +// Copyright 2018 High Fidelity, Inc. +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +#include "SpeechScriptingInterface.h" +#include "avatar/AvatarManager.h" +#include + +SpeechScriptingInterface::SpeechScriptingInterface() { + // + // Create text to speech engine + // + HRESULT hr = m_tts.CoCreateInstance(CLSID_SpVoice); + if (FAILED(hr)) { + ATLTRACE(TEXT("Text-to-speech creation failed.\n")); + AtlThrow(hr); + } + + // + // Get token corresponding to default voice + // + hr = SpGetDefaultTokenFromCategoryId(SPCAT_VOICES, &m_voiceToken, FALSE); + if (FAILED(hr)) { + ATLTRACE(TEXT("Can't get default voice token.\n")); + AtlThrow(hr); + } + + // + // Set default voice + // + hr = m_tts->SetVoice(m_voiceToken); + if (FAILED(hr)) { + ATLTRACE(TEXT("Can't set default voice.\n")); + AtlThrow(hr); + } + + WAVEFORMATEX fmt; + fmt.wFormatTag = WAVE_FORMAT_PCM; + fmt.nSamplesPerSec = 48000; + fmt.wBitsPerSample = 16; + fmt.nChannels = 1; + fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; + fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign; + fmt.cbSize = 0; + + BYTE* pcontent = new BYTE[1024 * 1000]; + + cpIStream = SHCreateMemStream(NULL, 0); + hr = outputStream->SetBaseStream(cpIStream, SPDFID_WaveFormatEx, &fmt); + + hr = m_tts->SetOutput(outputStream, true); + if (FAILED(hr)) { + ATLTRACE(TEXT("Can't set output stream.\n")); + AtlThrow(hr); + } +} + +SpeechScriptingInterface::~SpeechScriptingInterface() { + +} + +void SpeechScriptingInterface::speakText(const QString& textToSpeak) { + ULONG streamNumber; + HRESULT hr = m_tts->Speak(reinterpret_cast(textToSpeak.utf16()), + SPF_IS_NOT_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK, + &streamNumber); + if (FAILED(hr)) { + ATLTRACE(TEXT("Speak failed.\n")); + AtlThrow(hr); + } + + m_tts->WaitUntilDone(-1); + + outputStream->GetBaseStream(&cpIStream); + ULARGE_INTEGER StreamSize; + StreamSize.LowPart = 0; + hr = IStream_Size(cpIStream, &StreamSize); + + DWORD dwSize = StreamSize.QuadPart; + char* buf1 = new char[dwSize + 1]; + hr = IStream_Read(cpIStream, buf1, dwSize); + + QByteArray byteArray = QByteArray::QByteArray(buf1, (int)dwSize); + AudioInjectorOptions options; + + options.position = DependencyManager::get()->getMyAvatarPosition(); + + AudioInjector::playSound(byteArray, options); +} diff --git a/interface/src/scripting/SpeechScriptingInterface.h b/interface/src/scripting/SpeechScriptingInterface.h new file mode 100644 index 0000000000..311bd80605 --- /dev/null +++ b/interface/src/scripting/SpeechScriptingInterface.h @@ -0,0 +1,76 @@ +// SpeechScriptingInterface.h +// interface/src/scripting +// +// Created by Zach Fox on 2018-10-10. +// Copyright 2018 High Fidelity, Inc. +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +#ifndef hifi_SpeechScriptingInterface_h +#define hifi_SpeechScriptingInterface_h + +#include +#include + +#include // SAPI +#include // SAPI Helper + +class SpeechScriptingInterface : public QObject, public Dependency { + Q_OBJECT + +public: + SpeechScriptingInterface(); + ~SpeechScriptingInterface(); + + Q_INVOKABLE void speakText(const QString& textToSpeak); + +private: + + class CComAutoInit { + public: + // Initializes COM using CoInitialize. + // On failure, signals error using AtlThrow. + CComAutoInit() { + HRESULT hr = ::CoInitialize(NULL); + if (FAILED(hr)) { + ATLTRACE(TEXT("CoInitialize() failed in CComAutoInit constructor (hr=0x%08X).\n"), hr); + AtlThrow(hr); + } + } + + // Initializes COM using CoInitializeEx. + // On failure, signals error using AtlThrow. + explicit CComAutoInit(__in DWORD dwCoInit) { + HRESULT hr = ::CoInitializeEx(NULL, dwCoInit); + if (FAILED(hr)) { + ATLTRACE(TEXT("CoInitializeEx() failed in CComAutoInit constructor (hr=0x%08X).\n"), hr); + AtlThrow(hr); + } + } + + // Uninitializes COM using CoUninitialize. + ~CComAutoInit() { ::CoUninitialize(); } + + // + // Ban copy + // + private: + CComAutoInit(const CComAutoInit&); + }; + + // COM initialization and cleanup (must precede other COM related data members) + CComAutoInit m_comInit; + + // Text to speech engine + CComPtr m_tts; + + // Default voice token + CComPtr m_voiceToken; + + CComPtr outputStream; + CComPtr cpIStream; +}; + +#endif // hifi_SpeechScriptingInterface_h diff --git a/libraries/entities-renderer/src/RenderableWebEntityItem.cpp b/libraries/entities-renderer/src/RenderableWebEntityItem.cpp index bc9ac84c91..ac5e43e558 100644 --- a/libraries/entities-renderer/src/RenderableWebEntityItem.cpp +++ b/libraries/entities-renderer/src/RenderableWebEntityItem.cpp @@ -54,7 +54,7 @@ WebEntityRenderer::ContentType WebEntityRenderer::getContentType(const QString& const QUrl url(urlString); auto scheme = url.scheme(); - if (scheme == URL_SCHEME_ABOUT || scheme == URL_SCHEME_HTTP || scheme == URL_SCHEME_HTTPS || + if (scheme == HIFI_URL_SCHEME_ABOUT || scheme == HIFI_URL_SCHEME_HTTP || scheme == HIFI_URL_SCHEME_HTTPS || urlString.toLower().endsWith(".htm") || urlString.toLower().endsWith(".html")) { return ContentType::HtmlContent; } diff --git a/libraries/entities/src/EntityEditFilters.cpp b/libraries/entities/src/EntityEditFilters.cpp index 94df7eb465..9a3f056e04 100644 --- a/libraries/entities/src/EntityEditFilters.cpp +++ b/libraries/entities/src/EntityEditFilters.cpp @@ -183,7 +183,7 @@ void EntityEditFilters::addFilter(EntityItemID entityID, QString filterURL) { } // The following should be abstracted out for use in Agent.cpp (and maybe later AvatarMixer.cpp) - if (scriptURL.scheme().isEmpty() || (scriptURL.scheme() == URL_SCHEME_FILE)) { + if (scriptURL.scheme().isEmpty() || (scriptURL.scheme() == HIFI_URL_SCHEME_FILE)) { qWarning() << "Cannot load script from local filesystem, because assignment may be on a different computer."; scriptRequestFinished(entityID); return; diff --git a/libraries/model-networking/src/model-networking/TextureCache.cpp b/libraries/model-networking/src/model-networking/TextureCache.cpp index e8aec5e60e..11a5b2f167 100644 --- a/libraries/model-networking/src/model-networking/TextureCache.cpp +++ b/libraries/model-networking/src/model-networking/TextureCache.cpp @@ -329,7 +329,7 @@ _maxNumPixels(100) static bool isLocalUrl(const QUrl& url) { auto scheme = url.scheme(); - return (scheme == URL_SCHEME_FILE || scheme == URL_SCHEME_QRC || scheme == RESOURCE_SCHEME); + return (scheme == HIFI_URL_SCHEME_FILE || scheme == URL_SCHEME_QRC || scheme == RESOURCE_SCHEME); } NetworkTexture::NetworkTexture(const QUrl& url, image::TextureUsage::Type type, const QByteArray& content, int maxNumPixels) : @@ -502,7 +502,7 @@ void NetworkTexture::handleLocalRequestCompleted() { void NetworkTexture::makeLocalRequest() { const QString scheme = _activeUrl.scheme(); QString path; - if (scheme == URL_SCHEME_FILE) { + if (scheme == HIFI_URL_SCHEME_FILE) { path = PathUtils::expandToLocalDataAbsolutePath(_activeUrl).toLocalFile(); } else { path = ":" + _activeUrl.path(); diff --git a/libraries/networking/src/AddressManager.cpp b/libraries/networking/src/AddressManager.cpp index f8ab8ceaec..e6957728e8 100644 --- a/libraries/networking/src/AddressManager.cpp +++ b/libraries/networking/src/AddressManager.cpp @@ -155,12 +155,12 @@ void AddressManager::goForward() { void AddressManager::storeCurrentAddress() { auto url = currentAddress(); - if (url.scheme() == URL_SCHEME_FILE || + if (url.scheme() == HIFI_URL_SCHEME_FILE || (url.scheme() == URL_SCHEME_HIFI && !url.host().isEmpty())) { // TODO -- once Octree::readFromURL no-longer takes over the main event-loop, serverless-domain urls can // be loaded over http(s) - // url.scheme() == URL_SCHEME_HTTP || - // url.scheme() == URL_SCHEME_HTTPS || + // url.scheme() == HIFI_URL_SCHEME_HTTP || + // url.scheme() == HIFI_URL_SCHEME_HTTPS || bool isInErrorState = DependencyManager::get()->getDomainHandler().isInErrorState(); if (isConnected()) { if (isInErrorState) { @@ -331,11 +331,11 @@ bool AddressManager::handleUrl(const QUrl& lookupUrl, LookupTrigger trigger) { emit lookupResultsFinished(); return true; - } else if (lookupUrl.scheme() == URL_SCHEME_FILE) { + } else if (lookupUrl.scheme() == HIFI_URL_SCHEME_FILE) { // TODO -- once Octree::readFromURL no-longer takes over the main event-loop, serverless-domain urls can // be loaded over http(s) // lookupUrl.scheme() == URL_SCHEME_HTTP || - // lookupUrl.scheme() == URL_SCHEME_HTTPS || + // lookupUrl.scheme() == HIFI_URL_SCHEME_HTTPS || // TODO once a file can return a connection refusal if there were to be some kind of load error, we'd // need to store the previous domain tried in _lastVisitedURL. For now , do not store it. diff --git a/libraries/networking/src/DomainHandler.cpp b/libraries/networking/src/DomainHandler.cpp index 615546b410..3dda182989 100644 --- a/libraries/networking/src/DomainHandler.cpp +++ b/libraries/networking/src/DomainHandler.cpp @@ -194,7 +194,7 @@ void DomainHandler::setURLAndID(QUrl domainURL, QUuid domainID) { _sockAddr.clear(); // if this is a file URL we need to see if it has a ~ for us to expand - if (domainURL.scheme() == URL_SCHEME_FILE) { + if (domainURL.scheme() == HIFI_URL_SCHEME_FILE) { domainURL = PathUtils::expandToLocalDataAbsolutePath(domainURL); } } diff --git a/libraries/networking/src/NetworkingConstants.h b/libraries/networking/src/NetworkingConstants.h index 839e269fd4..302e0efa02 100644 --- a/libraries/networking/src/NetworkingConstants.h +++ b/libraries/networking/src/NetworkingConstants.h @@ -30,14 +30,14 @@ namespace NetworkingConstants { QUrl METAVERSE_SERVER_URL(); } -const QString URL_SCHEME_ABOUT = "about"; +const QString HIFI_URL_SCHEME_ABOUT = "about"; const QString URL_SCHEME_HIFI = "hifi"; const QString URL_SCHEME_HIFIAPP = "hifiapp"; const QString URL_SCHEME_QRC = "qrc"; -const QString URL_SCHEME_FILE = "file"; -const QString URL_SCHEME_HTTP = "http"; -const QString URL_SCHEME_HTTPS = "https"; -const QString URL_SCHEME_FTP = "ftp"; +const QString HIFI_URL_SCHEME_FILE = "file"; +const QString HIFI_URL_SCHEME_HTTP = "http"; +const QString HIFI_URL_SCHEME_HTTPS = "https"; +const QString HIFI_URL_SCHEME_FTP = "ftp"; const QString URL_SCHEME_ATP = "atp"; #endif // hifi_NetworkingConstants_h diff --git a/libraries/networking/src/ResourceCache.cpp b/libraries/networking/src/ResourceCache.cpp index aed9f3b0e5..1328606be4 100644 --- a/libraries/networking/src/ResourceCache.cpp +++ b/libraries/networking/src/ResourceCache.cpp @@ -114,7 +114,7 @@ QSharedPointer ResourceCacheSharedItems::getHighestPendingRequest() { // Check load priority float priority = resource->getLoadPriority(); - bool isFile = resource->getURL().scheme() == URL_SCHEME_FILE; + bool isFile = resource->getURL().scheme() == HIFI_URL_SCHEME_FILE; if (priority >= highestPriority && (isFile || !currentHighestIsFile)) { highestPriority = priority; highestIndex = i; diff --git a/libraries/networking/src/ResourceManager.cpp b/libraries/networking/src/ResourceManager.cpp index 553f0d0a61..40d6570f48 100644 --- a/libraries/networking/src/ResourceManager.cpp +++ b/libraries/networking/src/ResourceManager.cpp @@ -82,10 +82,10 @@ const QSet& getKnownUrls() { static std::once_flag once; std::call_once(once, [] { knownUrls.insert(URL_SCHEME_QRC); - knownUrls.insert(URL_SCHEME_FILE); - knownUrls.insert(URL_SCHEME_HTTP); - knownUrls.insert(URL_SCHEME_HTTPS); - knownUrls.insert(URL_SCHEME_FTP); + knownUrls.insert(HIFI_URL_SCHEME_FILE); + knownUrls.insert(HIFI_URL_SCHEME_HTTP); + knownUrls.insert(HIFI_URL_SCHEME_HTTPS); + knownUrls.insert(HIFI_URL_SCHEME_FTP); knownUrls.insert(URL_SCHEME_ATP); }); return knownUrls; @@ -97,7 +97,7 @@ QUrl ResourceManager::normalizeURL(const QUrl& originalUrl) { if (!getKnownUrls().contains(scheme)) { // check the degenerative file case: on windows we can often have urls of the form c:/filename // this checks for and works around that case. - QUrl urlWithFileScheme{ URL_SCHEME_FILE + ":///" + url.toString() }; + QUrl urlWithFileScheme{ HIFI_URL_SCHEME_FILE + ":///" + url.toString() }; if (!urlWithFileScheme.toLocalFile().isEmpty()) { return urlWithFileScheme; } @@ -118,9 +118,9 @@ ResourceRequest* ResourceManager::createResourceRequest(QObject* parent, const Q ResourceRequest* request = nullptr; - if (scheme == URL_SCHEME_FILE || scheme == URL_SCHEME_QRC) { + if (scheme == HIFI_URL_SCHEME_FILE || scheme == URL_SCHEME_QRC) { request = new FileResourceRequest(normalizedURL); - } else if (scheme == URL_SCHEME_HTTP || scheme == URL_SCHEME_HTTPS || scheme == URL_SCHEME_FTP) { + } else if (scheme == HIFI_URL_SCHEME_HTTP || scheme == HIFI_URL_SCHEME_HTTPS || scheme == HIFI_URL_SCHEME_FTP) { request = new HTTPResourceRequest(normalizedURL); } else if (scheme == URL_SCHEME_ATP) { if (!_atpSupportEnabled) { @@ -143,10 +143,10 @@ ResourceRequest* ResourceManager::createResourceRequest(QObject* parent, const Q bool ResourceManager::resourceExists(const QUrl& url) { auto scheme = url.scheme(); - if (scheme == URL_SCHEME_FILE) { + if (scheme == HIFI_URL_SCHEME_FILE) { QFileInfo file{ url.toString() }; return file.exists(); - } else if (scheme == URL_SCHEME_HTTP || scheme == URL_SCHEME_HTTPS || scheme == URL_SCHEME_FTP) { + } else if (scheme == HIFI_URL_SCHEME_HTTP || scheme == HIFI_URL_SCHEME_HTTPS || scheme == HIFI_URL_SCHEME_FTP) { auto& networkAccessManager = NetworkAccessManager::getInstance(); QNetworkRequest request{ url }; From f1446532d02eca2ee160e83bb5d2b122a1b48d38 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Wed, 10 Oct 2018 16:13:23 -0700 Subject: [PATCH 02/18] Almost working --- .../scripting/SpeechScriptingInterface.cpp | 106 ++++++++++++------ .../src/scripting/SpeechScriptingInterface.h | 7 +- 2 files changed, 75 insertions(+), 38 deletions(-) diff --git a/interface/src/scripting/SpeechScriptingInterface.cpp b/interface/src/scripting/SpeechScriptingInterface.cpp index a38e1aa824..b9c7718075 100644 --- a/interface/src/scripting/SpeechScriptingInterface.cpp +++ b/interface/src/scripting/SpeechScriptingInterface.cpp @@ -19,8 +19,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() { // HRESULT hr = m_tts.CoCreateInstance(CLSID_SpVoice); if (FAILED(hr)) { - ATLTRACE(TEXT("Text-to-speech creation failed.\n")); - AtlThrow(hr); + qDebug() << "Text-to-speech engine creation failed."; } // @@ -28,8 +27,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() { // hr = SpGetDefaultTokenFromCategoryId(SPCAT_VOICES, &m_voiceToken, FALSE); if (FAILED(hr)) { - ATLTRACE(TEXT("Can't get default voice token.\n")); - AtlThrow(hr); + qDebug() << "Can't get default voice token."; } // @@ -37,28 +35,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() { // hr = m_tts->SetVoice(m_voiceToken); if (FAILED(hr)) { - ATLTRACE(TEXT("Can't set default voice.\n")); - AtlThrow(hr); - } - - WAVEFORMATEX fmt; - fmt.wFormatTag = WAVE_FORMAT_PCM; - fmt.nSamplesPerSec = 48000; - fmt.wBitsPerSample = 16; - fmt.nChannels = 1; - fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; - fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign; - fmt.cbSize = 0; - - BYTE* pcontent = new BYTE[1024 * 1000]; - - cpIStream = SHCreateMemStream(NULL, 0); - hr = outputStream->SetBaseStream(cpIStream, SPDFID_WaveFormatEx, &fmt); - - hr = m_tts->SetOutput(outputStream, true); - if (FAILED(hr)) { - ATLTRACE(TEXT("Can't set output stream.\n")); - AtlThrow(hr); + qDebug() << "Can't set default voice."; } } @@ -66,30 +43,91 @@ SpeechScriptingInterface::~SpeechScriptingInterface() { } +class ReleaseOnExit { +public: + ReleaseOnExit(IUnknown* p) : m_p(p) {} + ~ReleaseOnExit() { + if (m_p) { + m_p->Release(); + } + } + +private: + IUnknown* m_p; +}; + void SpeechScriptingInterface::speakText(const QString& textToSpeak) { + WAVEFORMATEX fmt; + fmt.wFormatTag = WAVE_FORMAT_PCM; + fmt.nSamplesPerSec = 44100; + fmt.wBitsPerSample = 16; + fmt.nChannels = 1; + fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; + fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign; + fmt.cbSize = 0; + + IStream* pStream = NULL; + + ISpStream* pSpStream = nullptr; + HRESULT hr = CoCreateInstance(CLSID_SpStream, nullptr, CLSCTX_ALL, __uuidof(ISpStream), (void**)&pSpStream); + if (FAILED(hr)) { + qDebug() << "CoCreateInstance failed."; + } + ReleaseOnExit rSpStream(pSpStream); + + pStream = SHCreateMemStream(NULL, 0); + if (nullptr == pStream) { + qDebug() << "SHCreateMemStream failed."; + } + + hr = pSpStream->SetBaseStream(pStream, SPDFID_WaveFormatEx, &fmt); + if (FAILED(hr)) { + qDebug() << "Can't set base stream."; + } + + hr = m_tts->SetOutput(pSpStream, true); + if (FAILED(hr)) { + qDebug() << "Can't set output stream."; + } + + ReleaseOnExit rStream(pStream); + ULONG streamNumber; - HRESULT hr = m_tts->Speak(reinterpret_cast(textToSpeak.utf16()), + hr = m_tts->Speak(reinterpret_cast(textToSpeak.utf16()), SPF_IS_NOT_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK, &streamNumber); if (FAILED(hr)) { - ATLTRACE(TEXT("Speak failed.\n")); - AtlThrow(hr); + qDebug() << "Speak failed."; } m_tts->WaitUntilDone(-1); - outputStream->GetBaseStream(&cpIStream); + hr = pSpStream->GetBaseStream(&pStream); + if (FAILED(hr)) { + qDebug() << "Couldn't get base stream."; + } + + hr = IStream_Reset(pStream); + if (FAILED(hr)) { + qDebug() << "Couldn't reset stream."; + } + ULARGE_INTEGER StreamSize; StreamSize.LowPart = 0; - hr = IStream_Size(cpIStream, &StreamSize); + hr = IStream_Size(pStream, &StreamSize); DWORD dwSize = StreamSize.QuadPart; char* buf1 = new char[dwSize + 1]; - hr = IStream_Read(cpIStream, buf1, dwSize); + memset(buf1, 0, dwSize + 1); + + hr = IStream_Read(pStream, buf1, dwSize); + if (FAILED(hr)) { + qDebug() << "Couldn't read from stream."; + } + + QByteArray byteArray = QByteArray::QByteArray(buf1, dwSize); - QByteArray byteArray = QByteArray::QByteArray(buf1, (int)dwSize); AudioInjectorOptions options; - options.position = DependencyManager::get()->getMyAvatarPosition(); AudioInjector::playSound(byteArray, options); diff --git a/interface/src/scripting/SpeechScriptingInterface.h b/interface/src/scripting/SpeechScriptingInterface.h index 311bd80605..ad6777e339 100644 --- a/interface/src/scripting/SpeechScriptingInterface.h +++ b/interface/src/scripting/SpeechScriptingInterface.h @@ -13,7 +13,9 @@ #include #include - +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif #include // SAPI #include // SAPI Helper @@ -68,9 +70,6 @@ private: // Default voice token CComPtr m_voiceToken; - - CComPtr outputStream; - CComPtr cpIStream; }; #endif // hifi_SpeechScriptingInterface_h From d8c9712dd2cfb404878eb830f7060015fbfc37c6 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Wed, 10 Oct 2018 16:19:11 -0700 Subject: [PATCH 03/18] It's working! --- interface/src/scripting/SpeechScriptingInterface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/src/scripting/SpeechScriptingInterface.cpp b/interface/src/scripting/SpeechScriptingInterface.cpp index b9c7718075..3b3ecf728d 100644 --- a/interface/src/scripting/SpeechScriptingInterface.cpp +++ b/interface/src/scripting/SpeechScriptingInterface.cpp @@ -59,7 +59,7 @@ private: void SpeechScriptingInterface::speakText(const QString& textToSpeak) { WAVEFORMATEX fmt; fmt.wFormatTag = WAVE_FORMAT_PCM; - fmt.nSamplesPerSec = 44100; + fmt.nSamplesPerSec = 24000; fmt.wBitsPerSample = 16; fmt.nChannels = 1; fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; From 5d4de3d3b0130aa7c6b00c5aa83fe12af5d808af Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Wed, 10 Oct 2018 17:36:38 -0700 Subject: [PATCH 04/18] I love it. --- interface/src/scripting/SpeechScriptingInterface.cpp | 9 ++++++--- interface/src/scripting/SpeechScriptingInterface.h | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/interface/src/scripting/SpeechScriptingInterface.cpp b/interface/src/scripting/SpeechScriptingInterface.cpp index 3b3ecf728d..b8e0f5c3e8 100644 --- a/interface/src/scripting/SpeechScriptingInterface.cpp +++ b/interface/src/scripting/SpeechScriptingInterface.cpp @@ -11,7 +11,6 @@ #include "SpeechScriptingInterface.h" #include "avatar/AvatarManager.h" -#include SpeechScriptingInterface::SpeechScriptingInterface() { // @@ -94,7 +93,7 @@ void SpeechScriptingInterface::speakText(const QString& textToSpeak) { ULONG streamNumber; hr = m_tts->Speak(reinterpret_cast(textToSpeak.utf16()), - SPF_IS_NOT_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK, + SPF_IS_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK, &streamNumber); if (FAILED(hr)) { qDebug() << "Speak failed."; @@ -130,5 +129,9 @@ void SpeechScriptingInterface::speakText(const QString& textToSpeak) { AudioInjectorOptions options; options.position = DependencyManager::get()->getMyAvatarPosition(); - AudioInjector::playSound(byteArray, options); + lastSound = AudioInjector::playSound(byteArray, options); +} + +void SpeechScriptingInterface::stopLastSpeech() { + lastSound->stop(); } diff --git a/interface/src/scripting/SpeechScriptingInterface.h b/interface/src/scripting/SpeechScriptingInterface.h index ad6777e339..c683a1a3c6 100644 --- a/interface/src/scripting/SpeechScriptingInterface.h +++ b/interface/src/scripting/SpeechScriptingInterface.h @@ -18,6 +18,7 @@ #endif #include // SAPI #include // SAPI Helper +#include class SpeechScriptingInterface : public QObject, public Dependency { Q_OBJECT @@ -27,9 +28,9 @@ public: ~SpeechScriptingInterface(); Q_INVOKABLE void speakText(const QString& textToSpeak); + Q_INVOKABLE void stopLastSpeech(); private: - class CComAutoInit { public: // Initializes COM using CoInitialize. @@ -70,6 +71,8 @@ private: // Default voice token CComPtr m_voiceToken; + + AudioInjectorPointer lastSound; }; #endif // hifi_SpeechScriptingInterface_h From daeedc6ef1ee64472d66ef52decf4d6380c210f9 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Thu, 11 Oct 2018 17:10:14 -0700 Subject: [PATCH 05/18] Lots of progress today --- interface/src/Application.cpp | 11 ++- ...nterface.cpp => TTSScriptingInterface.cpp} | 58 ++++++++---- ...ingInterface.h => TTSScriptingInterface.h} | 19 ++-- libraries/audio-client/src/AudioClient.cpp | 88 ++++++++++--------- libraries/audio-client/src/AudioClient.h | 3 + 5 files changed, 111 insertions(+), 68 deletions(-) rename interface/src/scripting/{SpeechScriptingInterface.cpp => TTSScriptingInterface.cpp} (64%) rename interface/src/scripting/{SpeechScriptingInterface.h => TTSScriptingInterface.h} (79%) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index 74532ef53a..728fea8c10 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -182,7 +182,7 @@ #include "scripting/RatesScriptingInterface.h" #include "scripting/SelectionScriptingInterface.h" #include "scripting/WalletScriptingInterface.h" -#include "scripting/SpeechScriptingInterface.h" +#include "scripting/TTSScriptingInterface.h" #if defined(Q_OS_MAC) || defined(Q_OS_WIN) #include "SpeechRecognizer.h" #endif @@ -944,7 +944,7 @@ bool setupEssentials(int& argc, char** argv, bool runningMarkerExisted) { DependencyManager::set(); DependencyManager::set(); DependencyManager::set(); - DependencyManager::set(); + DependencyManager::set(); DependencyManager::set(); @@ -1179,6 +1179,9 @@ Application::Application(int& argc, char** argv, QElapsedTimer& startupTimer, bo recording::Frame::registerFrameHandler(AudioConstants::getAudioFrameName(), [&audioIO](recording::Frame::ConstPointer frame) { audioIO->handleRecordedAudioInput(frame->data); }); + + auto TTS = DependencyManager::get().data(); + connect(TTS, &TTSScriptingInterface::ttsSampleCreated, audioIO, &AudioClient::handleTTSAudioInput); connect(audioIO, &AudioClient::inputReceived, [](const QByteArray& audio) { static auto recorder = DependencyManager::get(); @@ -3129,7 +3132,7 @@ void Application::onDesktopRootContextCreated(QQmlContext* surfaceContext) { surfaceContext->setContextProperty("ContextOverlay", DependencyManager::get().data()); surfaceContext->setContextProperty("Wallet", DependencyManager::get().data()); surfaceContext->setContextProperty("HiFiAbout", AboutUtil::getInstance()); - surfaceContext->setContextProperty("Speech", DependencyManager::get().data()); + surfaceContext->setContextProperty("TextToSpeech", DependencyManager::get().data()); if (auto steamClient = PluginManager::getInstance()->getSteamClientPlugin()) { surfaceContext->setContextProperty("Steam", new SteamScriptingInterface(engine, steamClient.get())); @@ -6800,7 +6803,7 @@ void Application::registerScriptEngineWithApplicationServices(ScriptEnginePointe scriptEngine->registerGlobalObject("Wallet", DependencyManager::get().data()); scriptEngine->registerGlobalObject("AddressManager", DependencyManager::get().data()); scriptEngine->registerGlobalObject("HifiAbout", AboutUtil::getInstance()); - scriptEngine->registerGlobalObject("Speech", DependencyManager::get().data()); + scriptEngine->registerGlobalObject("TextToSpeech", DependencyManager::get().data()); qScriptRegisterMetaType(scriptEngine.data(), OverlayIDtoScriptValue, OverlayIDfromScriptValue); diff --git a/interface/src/scripting/SpeechScriptingInterface.cpp b/interface/src/scripting/TTSScriptingInterface.cpp similarity index 64% rename from interface/src/scripting/SpeechScriptingInterface.cpp rename to interface/src/scripting/TTSScriptingInterface.cpp index b8e0f5c3e8..fdbb37e586 100644 --- a/interface/src/scripting/SpeechScriptingInterface.cpp +++ b/interface/src/scripting/TTSScriptingInterface.cpp @@ -1,6 +1,6 @@ // -// SpeechScriptingInterface.cpp -// interface/src/scripting +// TTSScriptingInterface.cpp +// libraries/audio-client/src/scripting // // Created by Zach Fox on 2018-10-10. // Copyright 2018 High Fidelity, Inc. @@ -9,10 +9,10 @@ // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html // -#include "SpeechScriptingInterface.h" +#include "TTSScriptingInterface.h" #include "avatar/AvatarManager.h" -SpeechScriptingInterface::SpeechScriptingInterface() { +TTSScriptingInterface::TTSScriptingInterface() { // // Create text to speech engine // @@ -38,8 +38,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() { } } -SpeechScriptingInterface::~SpeechScriptingInterface() { - +TTSScriptingInterface::~TTSScriptingInterface() { } class ReleaseOnExit { @@ -55,7 +54,28 @@ private: IUnknown* m_p; }; -void SpeechScriptingInterface::speakText(const QString& textToSpeak) { +void TTSScriptingInterface::testTone(const bool& alsoInject) { + QByteArray byteArray(480000, 0); + _lastSoundByteArray.resize(0); + _lastSoundByteArray.resize(480000); + + int32_t a = 0; + int16_t* samples = reinterpret_cast(byteArray.data()); + for (a = 0; a < 240000; a++) { + int16_t temp = (glm::sin(glm::radians((float)a))) * 32768; + samples[a] = temp; + } + emit ttsSampleCreated(_lastSoundByteArray); + + if (alsoInject) { + AudioInjectorOptions options; + options.position = DependencyManager::get()->getMyAvatarPosition(); + + _lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options); + } +} + +void TTSScriptingInterface::speakText(const QString& textToSpeak, const bool& alsoInject) { WAVEFORMATEX fmt; fmt.wFormatTag = WAVE_FORMAT_PCM; fmt.nSamplesPerSec = 24000; @@ -92,9 +112,8 @@ void SpeechScriptingInterface::speakText(const QString& textToSpeak) { ReleaseOnExit rStream(pStream); ULONG streamNumber; - hr = m_tts->Speak(reinterpret_cast(textToSpeak.utf16()), - SPF_IS_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK, - &streamNumber); + hr = m_tts->Speak(reinterpret_cast(textToSpeak.utf16()), SPF_IS_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK, + &streamNumber); if (FAILED(hr)) { qDebug() << "Speak failed."; } @@ -124,14 +143,21 @@ void SpeechScriptingInterface::speakText(const QString& textToSpeak) { qDebug() << "Couldn't read from stream."; } - QByteArray byteArray = QByteArray::QByteArray(buf1, dwSize); + _lastSoundByteArray.resize(0); + _lastSoundByteArray.append(buf1, dwSize); - AudioInjectorOptions options; - options.position = DependencyManager::get()->getMyAvatarPosition(); + emit ttsSampleCreated(_lastSoundByteArray); - lastSound = AudioInjector::playSound(byteArray, options); + if (alsoInject) { + AudioInjectorOptions options; + options.position = DependencyManager::get()->getMyAvatarPosition(); + + _lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options); + } } -void SpeechScriptingInterface::stopLastSpeech() { - lastSound->stop(); +void TTSScriptingInterface::stopLastSpeech() { + if (_lastSoundAudioInjector) { + _lastSoundAudioInjector->stop(); + } } diff --git a/interface/src/scripting/SpeechScriptingInterface.h b/interface/src/scripting/TTSScriptingInterface.h similarity index 79% rename from interface/src/scripting/SpeechScriptingInterface.h rename to interface/src/scripting/TTSScriptingInterface.h index c683a1a3c6..cb9c6c8c3e 100644 --- a/interface/src/scripting/SpeechScriptingInterface.h +++ b/interface/src/scripting/TTSScriptingInterface.h @@ -1,5 +1,5 @@ -// SpeechScriptingInterface.h -// interface/src/scripting +// TTSScriptingInterface.h +// libraries/audio-client/src/scripting // // Created by Zach Fox on 2018-10-10. // Copyright 2018 High Fidelity, Inc. @@ -20,16 +20,20 @@ #include // SAPI Helper #include -class SpeechScriptingInterface : public QObject, public Dependency { +class TTSScriptingInterface : public QObject, public Dependency { Q_OBJECT public: - SpeechScriptingInterface(); - ~SpeechScriptingInterface(); + TTSScriptingInterface(); + ~TTSScriptingInterface(); - Q_INVOKABLE void speakText(const QString& textToSpeak); + Q_INVOKABLE void testTone(const bool& alsoInject = false); + Q_INVOKABLE void speakText(const QString& textToSpeak, const bool& alsoInject = false); Q_INVOKABLE void stopLastSpeech(); +signals: + void ttsSampleCreated(QByteArray outputArray); + private: class CComAutoInit { public: @@ -72,7 +76,8 @@ private: // Default voice token CComPtr m_voiceToken; - AudioInjectorPointer lastSound; + QByteArray _lastSoundByteArray; + AudioInjectorPointer _lastSoundByteArray; }; #endif // hifi_SpeechScriptingInterface_h diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index d00bc29054..96f1c97878 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -1135,6 +1135,46 @@ void AudioClient::handleAudioInput(QByteArray& audioBuffer) { } } +void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, const uchar& channelCount, const qint32& bytesForDuration) { + // input samples required to produce exactly NETWORK_FRAME_SAMPLES of output + const int inputSamplesRequired = + (_inputToNetworkResampler ? _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) + : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * + channelCount; + + const auto inputAudioSamples = std::unique_ptr(new int16_t[inputSamplesRequired]); + + handleLocalEchoAndReverb(inputByteArray); + + _inputRingBuffer.writeData(inputByteArray.data(), inputByteArray.size()); + + float audioInputMsecsRead = inputByteArray.size() / (float)(bytesForDuration); + _stats.updateInputMsRead(audioInputMsecsRead); + + const int numNetworkBytes = + _isStereoInput ? AudioConstants::NETWORK_FRAME_BYTES_STEREO : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; + const int numNetworkSamples = + _isStereoInput ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; + + static int16_t networkAudioSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO]; + + while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) { + if (_muted) { + _inputRingBuffer.shiftReadPosition(inputSamplesRequired); + } else { + _inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired); + possibleResampling(_inputToNetworkResampler, inputAudioSamples.get(), networkAudioSamples, inputSamplesRequired, + numNetworkSamples, channelCount, _desiredInputFormat.channelCount()); + } + int bytesInInputRingBuffer = _inputRingBuffer.samplesAvailable() * AudioConstants::SAMPLE_SIZE; + float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC)); + _stats.updateInputMsUnplayed(msecsInInputRingBuffer); + + QByteArray audioBuffer(reinterpret_cast(networkAudioSamples), numNetworkBytes); + handleAudioInput(audioBuffer); + } +} + void AudioClient::handleMicAudioInput() { if (!_inputDevice || _isPlayingBackRecording) { return; @@ -1144,47 +1184,8 @@ void AudioClient::handleMicAudioInput() { _inputReadsSinceLastCheck++; #endif - // input samples required to produce exactly NETWORK_FRAME_SAMPLES of output - const int inputSamplesRequired = (_inputToNetworkResampler ? - _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) : - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * _inputFormat.channelCount(); - - const auto inputAudioSamples = std::unique_ptr(new int16_t[inputSamplesRequired]); - QByteArray inputByteArray = _inputDevice->readAll(); - - handleLocalEchoAndReverb(inputByteArray); - - _inputRingBuffer.writeData(inputByteArray.data(), inputByteArray.size()); - - float audioInputMsecsRead = inputByteArray.size() / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC)); - _stats.updateInputMsRead(audioInputMsecsRead); - - const int numNetworkBytes = _isStereoInput - ? AudioConstants::NETWORK_FRAME_BYTES_STEREO - : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; - const int numNetworkSamples = _isStereoInput - ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO - : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL; - - static int16_t networkAudioSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO]; - - while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) { - if (_muted) { - _inputRingBuffer.shiftReadPosition(inputSamplesRequired); - } else { - _inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired); - possibleResampling(_inputToNetworkResampler, - inputAudioSamples.get(), networkAudioSamples, - inputSamplesRequired, numNetworkSamples, - _inputFormat.channelCount(), _desiredInputFormat.channelCount()); - } - int bytesInInputRingBuffer = _inputRingBuffer.samplesAvailable() * AudioConstants::SAMPLE_SIZE; - float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC)); - _stats.updateInputMsUnplayed(msecsInInputRingBuffer); - - QByteArray audioBuffer(reinterpret_cast(networkAudioSamples), numNetworkBytes); - handleAudioInput(audioBuffer); - } + processAudioAndAddToRingBuffer(_inputDevice->readAll(), _inputFormat.channelCount(), + _inputFormat.bytesForDuration(USECS_PER_MSEC)); } void AudioClient::handleDummyAudioInput() { @@ -1201,6 +1202,11 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) { handleAudioInput(audioBuffer); } +void AudioClient::handleTTSAudioInput(const QByteArray& audio) { + QByteArray audioBuffer(audio); + processAudioAndAddToRingBuffer(audioBuffer, 1, 48); +} + void AudioClient::prepareLocalAudioInjectors(std::unique_ptr localAudioLock) { bool doSynchronously = localAudioLock.operator bool(); if (!localAudioLock) { diff --git a/libraries/audio-client/src/AudioClient.h b/libraries/audio-client/src/AudioClient.h index 5e7f1fb8a0..170a355abe 100644 --- a/libraries/audio-client/src/AudioClient.h +++ b/libraries/audio-client/src/AudioClient.h @@ -197,6 +197,7 @@ public slots: void checkInputTimeout(); void handleDummyAudioInput(); void handleRecordedAudioInput(const QByteArray& audio); + void handleTTSAudioInput(const QByteArray& audio); void reset(); void audioMixerKilled(); @@ -289,6 +290,8 @@ private: float azimuthForSource(const glm::vec3& relativePosition); float gainForSource(float distance, float volume); + void processAudioAndAddToRingBuffer(QByteArray& inputByteArray, const uchar& channelCount, const qint32& bytesForDuration); + #ifdef Q_OS_ANDROID QTimer _checkInputTimer; long _inputReadsSinceLastCheck = 0l; From 53226e7924d109be6a1a763da0793d721bbe32be Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Fri, 12 Oct 2018 11:19:52 -0700 Subject: [PATCH 06/18] Prevent overflows; still not working --- interface/src/scripting/TTSScriptingInterface.h | 2 +- libraries/audio-client/src/AudioClient.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/interface/src/scripting/TTSScriptingInterface.h b/interface/src/scripting/TTSScriptingInterface.h index cb9c6c8c3e..c1fffe67d1 100644 --- a/interface/src/scripting/TTSScriptingInterface.h +++ b/interface/src/scripting/TTSScriptingInterface.h @@ -77,7 +77,7 @@ private: CComPtr m_voiceToken; QByteArray _lastSoundByteArray; - AudioInjectorPointer _lastSoundByteArray; + AudioInjectorPointer _lastSoundAudioInjector; }; #endif // hifi_SpeechScriptingInterface_h diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index 96f1c97878..12da7ea3be 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -1167,7 +1167,7 @@ void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, con numNetworkSamples, channelCount, _desiredInputFormat.channelCount()); } int bytesInInputRingBuffer = _inputRingBuffer.samplesAvailable() * AudioConstants::SAMPLE_SIZE; - float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC)); + float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(bytesForDuration); _stats.updateInputMsUnplayed(msecsInInputRingBuffer); QByteArray audioBuffer(reinterpret_cast(networkAudioSamples), numNetworkBytes); @@ -1204,7 +1204,12 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) { void AudioClient::handleTTSAudioInput(const QByteArray& audio) { QByteArray audioBuffer(audio); - processAudioAndAddToRingBuffer(audioBuffer, 1, 48); + while (audioBuffer.size() > 0) { + QByteArray part; + part.append(audioBuffer.data(), AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + audioBuffer.remove(0, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + processAudioAndAddToRingBuffer(part, 1, 48); + } } void AudioClient::prepareLocalAudioInjectors(std::unique_ptr localAudioLock) { From 34befd4a52e085bbf548a200e507cc34afe07b3c Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Fri, 12 Oct 2018 12:14:51 -0700 Subject: [PATCH 07/18] Just to make sure, writes data back to a WAV file --- libraries/audio-client/src/AudioClient.cpp | 378 ++++++++++----------- 1 file changed, 188 insertions(+), 190 deletions(-) diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index 12da7ea3be..858f6e738c 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -67,7 +68,7 @@ static const int CHECK_INPUT_READS_MSECS = 2000; static const int MIN_READS_TO_CONSIDER_INPUT_ALIVE = 10; #endif -static const auto DEFAULT_POSITION_GETTER = []{ return Vectors::ZERO; }; +static const auto DEFAULT_POSITION_GETTER = [] { return Vectors::ZERO; }; static const auto DEFAULT_ORIENTATION_GETTER = [] { return Quaternions::IDENTITY; }; static const int DEFAULT_BUFFER_FRAMES = 1; @@ -78,12 +79,11 @@ static const int OUTPUT_CHANNEL_COUNT = 2; static const bool DEFAULT_STARVE_DETECTION_ENABLED = true; static const int STARVE_DETECTION_THRESHOLD = 3; -static const int STARVE_DETECTION_PERIOD = 10 * 1000; // 10 Seconds +static const int STARVE_DETECTION_PERIOD = 10 * 1000; // 10 Seconds Setting::Handle dynamicJitterBufferEnabled("dynamicJitterBuffersEnabled", - InboundAudioStream::DEFAULT_DYNAMIC_JITTER_BUFFER_ENABLED); -Setting::Handle staticJitterBufferFrames("staticJitterBufferFrames", - InboundAudioStream::DEFAULT_STATIC_JITTER_FRAMES); + InboundAudioStream::DEFAULT_DYNAMIC_JITTER_BUFFER_ENABLED); +Setting::Handle staticJitterBufferFrames("staticJitterBufferFrames", InboundAudioStream::DEFAULT_STATIC_JITTER_FRAMES); // protect the Qt internal device list using Mutex = std::mutex; @@ -127,7 +127,7 @@ QAudioDeviceInfo AudioClient::getActiveAudioDevice(QAudio::Mode mode) const { if (mode == QAudio::AudioInput) { return _inputDeviceInfo; - } else { // if (mode == QAudio::AudioOutput) + } else { // if (mode == QAudio::AudioOutput) return _outputDeviceInfo; } } @@ -137,14 +137,13 @@ QList AudioClient::getAudioDevices(QAudio::Mode mode) const { if (mode == QAudio::AudioInput) { return _inputDevices; - } else { // if (mode == QAudio::AudioOutput) + } else { // if (mode == QAudio::AudioOutput) return _outputDevices; } } static void channelUpmix(int16_t* source, int16_t* dest, int numSamples, int numExtraChannels) { - for (int i = 0; i < numSamples/2; i++) { - + for (int i = 0; i < numSamples / 2; i++) { // read 2 samples int16_t left = *source++; int16_t right = *source++; @@ -159,8 +158,7 @@ static void channelUpmix(int16_t* source, int16_t* dest, int numSamples, int num } static void channelDownmix(int16_t* source, int16_t* dest, int numSamples) { - for (int i = 0; i < numSamples/2; i++) { - + for (int i = 0; i < numSamples / 2; i++) { // read 2 samples int16_t left = *source++; int16_t right = *source++; @@ -175,48 +173,22 @@ static inline float convertToFloat(int16_t sample) { } AudioClient::AudioClient() : - AbstractAudioInterface(), - _gate(this), - _audioInput(NULL), - _dummyAudioInput(NULL), - _desiredInputFormat(), - _inputFormat(), - _numInputCallbackBytes(0), - _audioOutput(NULL), - _desiredOutputFormat(), - _outputFormat(), - _outputFrameSize(0), - _numOutputCallbackBytes(0), - _loopbackAudioOutput(NULL), - _loopbackOutputDevice(NULL), - _inputRingBuffer(0), - _localInjectorsStream(0, 1), - _receivedAudioStream(RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES), - _isStereoInput(false), - _outputStarveDetectionStartTimeMsec(0), - _outputStarveDetectionCount(0), + AbstractAudioInterface(), _gate(this), _audioInput(NULL), _dummyAudioInput(NULL), _desiredInputFormat(), _inputFormat(), + _numInputCallbackBytes(0), _audioOutput(NULL), _desiredOutputFormat(), _outputFormat(), _outputFrameSize(0), + _numOutputCallbackBytes(0), _loopbackAudioOutput(NULL), _loopbackOutputDevice(NULL), _inputRingBuffer(0), + _localInjectorsStream(0, 1), _receivedAudioStream(RECEIVED_AUDIO_STREAM_CAPACITY_FRAMES), _isStereoInput(false), + _outputStarveDetectionStartTimeMsec(0), _outputStarveDetectionCount(0), _outputBufferSizeFrames("audioOutputBufferFrames", DEFAULT_BUFFER_FRAMES), _sessionOutputBufferSizeFrames(_outputBufferSizeFrames.get()), _outputStarveDetectionEnabled("audioOutputStarveDetectionEnabled", DEFAULT_STARVE_DETECTION_ENABLED), - _lastInputLoudness(0.0f), - _timeSinceLastClip(-1.0f), - _muted(false), - _shouldEchoLocally(false), - _shouldEchoToServer(false), - _isNoiseGateEnabled(true), - _reverb(false), - _reverbOptions(&_scriptReverbOptions), - _inputToNetworkResampler(NULL), - _networkToOutputResampler(NULL), - _localToOutputResampler(NULL), - _audioLimiter(AudioConstants::SAMPLE_RATE, OUTPUT_CHANNEL_COUNT), - _outgoingAvatarAudioSequenceNumber(0), - _audioOutputIODevice(_localInjectorsStream, _receivedAudioStream, this), - _stats(&_receivedAudioStream), + _lastInputLoudness(0.0f), _timeSinceLastClip(-1.0f), _muted(false), _shouldEchoLocally(false), _shouldEchoToServer(false), + _isNoiseGateEnabled(true), _reverb(false), _reverbOptions(&_scriptReverbOptions), _inputToNetworkResampler(NULL), + _networkToOutputResampler(NULL), _localToOutputResampler(NULL), + _audioLimiter(AudioConstants::SAMPLE_RATE, OUTPUT_CHANNEL_COUNT), _outgoingAvatarAudioSequenceNumber(0), + _audioOutputIODevice(_localInjectorsStream, _receivedAudioStream, this), _stats(&_receivedAudioStream), _positionGetter(DEFAULT_POSITION_GETTER), #if defined(Q_OS_ANDROID) - _checkInputTimer(this), - _isHeadsetPluggedIn(false), + _checkInputTimer(this), _isHeadsetPluggedIn(false), #endif _orientationGetter(DEFAULT_ORIENTATION_GETTER) { // avoid putting a lock in the device callback @@ -226,16 +198,20 @@ AudioClient::AudioClient() : { Setting::Handle::Deprecated("maxFramesOverDesired", InboundAudioStream::MAX_FRAMES_OVER_DESIRED); Setting::Handle::Deprecated("windowStarveThreshold", InboundAudioStream::WINDOW_STARVE_THRESHOLD); - Setting::Handle::Deprecated("windowSecondsForDesiredCalcOnTooManyStarves", InboundAudioStream::WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES); - Setting::Handle::Deprecated("windowSecondsForDesiredReduction", InboundAudioStream::WINDOW_SECONDS_FOR_DESIRED_REDUCTION); + Setting::Handle::Deprecated("windowSecondsForDesiredCalcOnTooManyStarves", + InboundAudioStream::WINDOW_SECONDS_FOR_DESIRED_CALC_ON_TOO_MANY_STARVES); + Setting::Handle::Deprecated("windowSecondsForDesiredReduction", + InboundAudioStream::WINDOW_SECONDS_FOR_DESIRED_REDUCTION); Setting::Handle::Deprecated("useStDevForJitterCalc", InboundAudioStream::USE_STDEV_FOR_JITTER); Setting::Handle::Deprecated("repetitionWithFade", InboundAudioStream::REPETITION_WITH_FADE); } - connect(&_receivedAudioStream, &MixedProcessedAudioStream::processSamples, - this, &AudioClient::processReceivedSamples, Qt::DirectConnection); + connect(&_receivedAudioStream, &MixedProcessedAudioStream::processSamples, this, &AudioClient::processReceivedSamples, + Qt::DirectConnection); connect(this, &AudioClient::changeDevice, this, [=](const QAudioDeviceInfo& outputDeviceInfo) { - qCDebug(audioclient) << "got AudioClient::changeDevice signal, about to call switchOutputToAudioDevice() outputDeviceInfo: [" << outputDeviceInfo.deviceName() << "]"; + qCDebug(audioclient) + << "got AudioClient::changeDevice signal, about to call switchOutputToAudioDevice() outputDeviceInfo: [" + << outputDeviceInfo.deviceName() << "]"; switchOutputToAudioDevice(outputDeviceInfo); }); @@ -244,20 +220,18 @@ AudioClient::AudioClient() : // initialize wasapi; if getAvailableDevices is called from the CheckDevicesThread before this, it will crash getAvailableDevices(QAudio::AudioInput); getAvailableDevices(QAudio::AudioOutput); - + // start a thread to detect any device changes _checkDevicesTimer = new QTimer(this); - connect(_checkDevicesTimer, &QTimer::timeout, this, [this] { - QtConcurrent::run(QThreadPool::globalInstance(), [this] { checkDevices(); }); - }); + connect(_checkDevicesTimer, &QTimer::timeout, this, + [this] { QtConcurrent::run(QThreadPool::globalInstance(), [this] { checkDevices(); }); }); const unsigned long DEVICE_CHECK_INTERVAL_MSECS = 2 * 1000; _checkDevicesTimer->start(DEVICE_CHECK_INTERVAL_MSECS); // start a thread to detect peak value changes _checkPeakValuesTimer = new QTimer(this); - connect(_checkPeakValuesTimer, &QTimer::timeout, this, [this] { - QtConcurrent::run(QThreadPool::globalInstance(), [this] { checkPeakValues(); }); - }); + connect(_checkPeakValuesTimer, &QTimer::timeout, this, + [this] { QtConcurrent::run(QThreadPool::globalInstance(), [this] { checkPeakValues(); }); }); const unsigned long PEAK_VALUES_CHECK_INTERVAL_MSECS = 50; _checkPeakValuesTimer->start(PEAK_VALUES_CHECK_INTERVAL_MSECS); @@ -289,11 +263,11 @@ void AudioClient::customDeleter() { } void AudioClient::handleMismatchAudioFormat(SharedNodePointer node, const QString& currentCodec, const QString& recievedCodec) { - qCDebug(audioclient) << __FUNCTION__ << "sendingNode:" << *node << "currentCodec:" << currentCodec << "recievedCodec:" << recievedCodec; + qCDebug(audioclient) << __FUNCTION__ << "sendingNode:" << *node << "currentCodec:" << currentCodec + << "recievedCodec:" << recievedCodec; selectAudioFormat(recievedCodec); } - void AudioClient::reset() { _receivedAudioStream.reset(); _stats.reset(); @@ -321,7 +295,7 @@ void AudioClient::setAudioPaused(bool pause) { QAudioDeviceInfo getNamedAudioDeviceForMode(QAudio::Mode mode, const QString& deviceName) { QAudioDeviceInfo result; - foreach(QAudioDeviceInfo audioDevice, getAvailableDevices(mode)) { + foreach (QAudioDeviceInfo audioDevice, getAvailableDevices(mode)) { if (audioDevice.deviceName().trimmed() == deviceName.trimmed()) { result = audioDevice; break; @@ -356,7 +330,8 @@ QString AudioClient::getWinDeviceName(wchar_t* guid) { HRESULT hr = S_OK; CoInitialize(nullptr); IMMDeviceEnumerator* pMMDeviceEnumerator = nullptr; - CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pMMDeviceEnumerator); + CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), + (void**)&pMMDeviceEnumerator); IMMDevice* pEndpoint; hr = pMMDeviceEnumerator->GetDevice(guid, &pEndpoint); if (hr == E_NOTFOUND) { @@ -380,34 +355,26 @@ QAudioDeviceInfo defaultAudioDeviceForMode(QAudio::Mode mode) { if (getAvailableDevices(mode).size() > 1) { AudioDeviceID defaultDeviceID = 0; uint32_t propertySize = sizeof(AudioDeviceID); - AudioObjectPropertyAddress propertyAddress = { - kAudioHardwarePropertyDefaultInputDevice, - kAudioObjectPropertyScopeGlobal, - kAudioObjectPropertyElementMaster - }; + AudioObjectPropertyAddress propertyAddress = { kAudioHardwarePropertyDefaultInputDevice, + kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster }; if (mode == QAudio::AudioOutput) { propertyAddress.mSelector = kAudioHardwarePropertyDefaultOutputDevice; } - - OSStatus getPropertyError = AudioObjectGetPropertyData(kAudioObjectSystemObject, - &propertyAddress, - 0, - NULL, - &propertySize, - &defaultDeviceID); + OSStatus getPropertyError = + AudioObjectGetPropertyData(kAudioObjectSystemObject, &propertyAddress, 0, NULL, &propertySize, &defaultDeviceID); if (!getPropertyError && propertySize) { CFStringRef deviceName = NULL; propertySize = sizeof(deviceName); propertyAddress.mSelector = kAudioDevicePropertyDeviceNameCFString; - getPropertyError = AudioObjectGetPropertyData(defaultDeviceID, &propertyAddress, 0, - NULL, &propertySize, &deviceName); + getPropertyError = + AudioObjectGetPropertyData(defaultDeviceID, &propertyAddress, 0, NULL, &propertySize, &deviceName); if (!getPropertyError && propertySize) { // find a device in the list that matches the name we have and return it - foreach(QAudioDeviceInfo audioDevice, getAvailableDevices(mode)) { + foreach (QAudioDeviceInfo audioDevice, getAvailableDevices(mode)) { if (audioDevice.deviceName() == CFStringGetCStringPtr(deviceName, kCFStringEncodingMacRoman)) { return audioDevice; } @@ -419,7 +386,7 @@ QAudioDeviceInfo defaultAudioDeviceForMode(QAudio::Mode mode) { #ifdef WIN32 QString deviceName; //Check for Windows Vista or higher, IMMDeviceEnumerator doesn't work below that. - if (!IsWindowsVistaOrGreater()) { // lower then vista + if (!IsWindowsVistaOrGreater()) { // lower then vista if (mode == QAudio::AudioInput) { WAVEINCAPS wic; // first use WAVE_MAPPER to get the default devices manufacturer ID @@ -441,9 +408,11 @@ QAudioDeviceInfo defaultAudioDeviceForMode(QAudio::Mode mode) { HRESULT hr = S_OK; CoInitialize(NULL); IMMDeviceEnumerator* pMMDeviceEnumerator = NULL; - CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pMMDeviceEnumerator); + CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), + (void**)&pMMDeviceEnumerator); IMMDevice* pEndpoint; - hr = pMMDeviceEnumerator->GetDefaultAudioEndpoint(mode == QAudio::AudioOutput ? eRender : eCapture, eMultimedia, &pEndpoint); + hr = pMMDeviceEnumerator->GetDefaultAudioEndpoint(mode == QAudio::AudioOutput ? eRender : eCapture, eMultimedia, + &pEndpoint); if (hr == E_NOTFOUND) { printf("Audio Error: device not found\n"); deviceName = QString("NONE"); @@ -457,22 +426,22 @@ QAudioDeviceInfo defaultAudioDeviceForMode(QAudio::Mode mode) { CoUninitialize(); } - qCDebug(audioclient) << "defaultAudioDeviceForMode mode: " << (mode == QAudio::AudioOutput ? "Output" : "Input") - << " [" << deviceName << "] [" << getNamedAudioDeviceForMode(mode, deviceName).deviceName() << "]"; + qCDebug(audioclient) << "defaultAudioDeviceForMode mode: " << (mode == QAudio::AudioOutput ? "Output" : "Input") << " [" + << deviceName << "] [" << getNamedAudioDeviceForMode(mode, deviceName).deviceName() << "]"; return getNamedAudioDeviceForMode(mode, deviceName); #endif -#if defined (Q_OS_ANDROID) +#if defined(Q_OS_ANDROID) if (mode == QAudio::AudioInput) { Setting::Handle enableAEC(SETTING_AEC_KEY, false); bool aecEnabled = enableAEC.get(); auto audioClient = DependencyManager::get(); - bool headsetOn = audioClient? audioClient->isHeadsetPluggedIn() : false; + bool headsetOn = audioClient ? audioClient->isHeadsetPluggedIn() : false; auto inputDevices = QAudioDeviceInfo::availableDevices(QAudio::AudioInput); for (auto inputDevice : inputDevices) { if (((headsetOn || !aecEnabled) && inputDevice.deviceName() == VOICE_RECOGNITION) || - ((!headsetOn && aecEnabled) && inputDevice.deviceName() == VOICE_COMMUNICATION)) { + ((!headsetOn && aecEnabled) && inputDevice.deviceName() == VOICE_COMMUNICATION)) { return inputDevice; } } @@ -486,11 +455,8 @@ bool AudioClient::getNamedAudioDeviceForModeExists(QAudio::Mode mode, const QStr return (getNamedAudioDeviceForMode(mode, deviceName).deviceName() == deviceName); } - // attempt to use the native sample rate and channel count -bool nativeFormatForAudioDevice(const QAudioDeviceInfo& audioDevice, - QAudioFormat& audioFormat) { - +bool nativeFormatForAudioDevice(const QAudioDeviceInfo& audioDevice, QAudioFormat& audioFormat) { audioFormat = audioDevice.preferredFormat(); audioFormat.setCodec("audio/pcm"); @@ -513,7 +479,6 @@ bool nativeFormatForAudioDevice(const QAudioDeviceInfo& audioDevice, bool adjustedFormatForAudioDevice(const QAudioDeviceInfo& audioDevice, const QAudioFormat& desiredAudioFormat, QAudioFormat& adjustedAudioFormat) { - qCDebug(audioclient) << "The desired format for audio I/O is" << desiredAudioFormat; #if defined(Q_OS_ANDROID) || defined(Q_OS_OSX) @@ -539,12 +504,11 @@ bool adjustedFormatForAudioDevice(const QAudioDeviceInfo& audioDevice, // Attempt the device sample rate and channel count in decreasing order of preference. // const int sampleRates[] = { 48000, 44100, 32000, 24000, 16000, 96000, 192000, 88200, 176400 }; - const int inputChannels[] = { 1, 2, 4, 6, 8 }; // prefer mono - const int outputChannels[] = { 2, 4, 6, 8, 1 }; // prefer stereo, downmix as last resort + const int inputChannels[] = { 1, 2, 4, 6, 8 }; // prefer mono + const int outputChannels[] = { 2, 4, 6, 8, 1 }; // prefer stereo, downmix as last resort for (int channelCount : (desiredAudioFormat.channelCount() == 1 ? inputChannels : outputChannels)) { for (int sampleRate : sampleRates) { - adjustedAudioFormat.setChannelCount(channelCount); adjustedAudioFormat.setSampleRate(sampleRate); @@ -554,11 +518,14 @@ bool adjustedFormatForAudioDevice(const QAudioDeviceInfo& audioDevice, } } - return false; // a supported format could not be found + return false; // a supported format could not be found } -bool sampleChannelConversion(const int16_t* sourceSamples, int16_t* destinationSamples, unsigned int numSourceSamples, - const int sourceChannelCount, const int destinationChannelCount) { +bool sampleChannelConversion(const int16_t* sourceSamples, + int16_t* destinationSamples, + unsigned int numSourceSamples, + const int sourceChannelCount, + const int destinationChannelCount) { if (sourceChannelCount == 2 && destinationChannelCount == 1) { // loop through the stereo input audio samples and average every two samples for (uint i = 0; i < numSourceSamples; i += 2) { @@ -567,7 +534,6 @@ bool sampleChannelConversion(const int16_t* sourceSamples, int16_t* destinationS return true; } else if (sourceChannelCount == 1 && destinationChannelCount == 2) { - // loop through the mono input audio and repeat each sample twice for (uint i = 0; i < numSourceSamples; ++i) { destinationSamples[i * 2] = destinationSamples[(i * 2) + 1] = sourceSamples[i]; @@ -580,32 +546,31 @@ bool sampleChannelConversion(const int16_t* sourceSamples, int16_t* destinationS } void possibleResampling(AudioSRC* resampler, - const int16_t* sourceSamples, int16_t* destinationSamples, - unsigned int numSourceSamples, unsigned int numDestinationSamples, - const int sourceChannelCount, const int destinationChannelCount) { - + const int16_t* sourceSamples, + int16_t* destinationSamples, + unsigned int numSourceSamples, + unsigned int numDestinationSamples, + const int sourceChannelCount, + const int destinationChannelCount) { if (numSourceSamples > 0) { if (!resampler) { - if (!sampleChannelConversion(sourceSamples, destinationSamples, numSourceSamples, - sourceChannelCount, destinationChannelCount)) { + if (!sampleChannelConversion(sourceSamples, destinationSamples, numSourceSamples, sourceChannelCount, + destinationChannelCount)) { // no conversion, we can copy the samples directly across memcpy(destinationSamples, sourceSamples, numSourceSamples * AudioConstants::SAMPLE_SIZE); } } else { - if (sourceChannelCount != destinationChannelCount) { - int numChannelCoversionSamples = (numSourceSamples * destinationChannelCount) / sourceChannelCount; int16_t* channelConversionSamples = new int16_t[numChannelCoversionSamples]; - sampleChannelConversion(sourceSamples, channelConversionSamples, numSourceSamples, - sourceChannelCount, destinationChannelCount); + sampleChannelConversion(sourceSamples, channelConversionSamples, numSourceSamples, sourceChannelCount, + destinationChannelCount); resampler->render(channelConversionSamples, destinationSamples, numChannelCoversionSamples); delete[] channelConversionSamples; } else { - unsigned int numAdjustedSourceSamples = numSourceSamples; unsigned int numAdjustedDestinationSamples = numDestinationSamples; @@ -621,7 +586,6 @@ void possibleResampling(AudioSRC* resampler, } void AudioClient::start() { - // set up the desired audio format _desiredInputFormat.setSampleRate(AudioConstants::SAMPLE_RATE); _desiredInputFormat.setSampleSize(16); @@ -710,7 +674,6 @@ void AudioClient::handleAudioDataPacket(QSharedPointer message) nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::ReceiveFirstAudioPacket); if (_audioOutput) { - if (!_hasReceivedFirstPacket) { _hasReceivedFirstPacket = true; @@ -727,8 +690,8 @@ void AudioClient::handleAudioDataPacket(QSharedPointer message) } } -AudioClient::Gate::Gate(AudioClient* audioClient) : - _audioClient(audioClient) {} +AudioClient::Gate::Gate(AudioClient* audioClient) : _audioClient(audioClient) { +} void AudioClient::Gate::setIsSimulatingJitter(bool enable) { std::lock_guard lock(_mutex); @@ -781,7 +744,6 @@ void AudioClient::Gate::flush() { _index = 0; } - void AudioClient::handleNoisyMutePacket(QSharedPointer message) { if (!_muted) { setMuted(true); @@ -827,7 +789,6 @@ void AudioClient::handleSelectedAudioFormat(QSharedPointer mess } void AudioClient::selectAudioFormat(const QString& selectedCodecName) { - _selectedCodecName = selectedCodecName; qCDebug(audioclient) << "Selected Codec:" << _selectedCodecName << "isStereoInput:" << _isStereoInput; @@ -845,12 +806,12 @@ void AudioClient::selectAudioFormat(const QString& selectedCodecName) { if (_selectedCodecName == plugin->getName()) { _codec = plugin; _receivedAudioStream.setupCodec(plugin, _selectedCodecName, AudioConstants::STEREO); - _encoder = plugin->createEncoder(AudioConstants::SAMPLE_RATE, _isStereoInput ? AudioConstants::STEREO : AudioConstants::MONO); + _encoder = plugin->createEncoder(AudioConstants::SAMPLE_RATE, + _isStereoInput ? AudioConstants::STEREO : AudioConstants::MONO); qCDebug(audioclient) << "Selected Codec Plugin:" << _codec.get(); break; } } - } bool AudioClient::switchAudioDevice(QAudio::Mode mode, const QAudioDeviceInfo& deviceInfo) { @@ -862,7 +823,7 @@ bool AudioClient::switchAudioDevice(QAudio::Mode mode, const QAudioDeviceInfo& d if (mode == QAudio::AudioInput) { return switchInputToAudioDevice(device); - } else { // if (mode == QAudio::AudioOutput) + } else { // if (mode == QAudio::AudioOutput) return switchOutputToAudioDevice(device); } } @@ -904,8 +865,8 @@ void AudioClient::configureReverb() { p.sampleRate = _outputFormat.sampleRate(); p.wetDryMix = 100.0f; p.preDelay = 0.0f; - p.earlyGain = -96.0f; // disable ER - p.lateGain += _reverbOptions->getWetDryMix() * (24.0f/100.0f) - 24.0f; // -0dB to -24dB, based on wetDryMix + p.earlyGain = -96.0f; // disable ER + p.lateGain += _reverbOptions->getWetDryMix() * (24.0f / 100.0f) - 24.0f; // -0dB to -24dB, based on wetDryMix p.lateMixLeft = 0.0f; p.lateMixRight = 0.0f; @@ -915,7 +876,6 @@ void AudioClient::configureReverb() { void AudioClient::updateReverbOptions() { bool reverbChanged = false; if (_receivedAudioStream.hasReverb()) { - if (_zoneReverbOptions.getReverbTime() != _receivedAudioStream.getRevebTime()) { _zoneReverbOptions.setReverbTime(_receivedAudioStream.getRevebTime()); reverbChanged = true; @@ -1020,7 +980,8 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) { int16_t* loopbackSamples = reinterpret_cast(loopBackByteArray.data()); // upmix mono to stereo - if (!sampleChannelConversion(inputSamples, loopbackSamples, numInputSamples, _inputFormat.channelCount(), OUTPUT_CHANNEL_COUNT)) { + if (!sampleChannelConversion(inputSamples, loopbackSamples, numInputSamples, _inputFormat.channelCount(), + OUTPUT_CHANNEL_COUNT)) { // no conversion, just copy the samples memcpy(loopbackSamples, inputSamples, numInputSamples * AudioConstants::SAMPLE_SIZE); } @@ -1028,17 +989,15 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) { // apply stereo reverb at the source, to the loopback audio if (!_shouldEchoLocally && hasReverb) { updateReverbOptions(); - _sourceReverb.render(loopbackSamples, loopbackSamples, numLoopbackSamples/2); + _sourceReverb.render(loopbackSamples, loopbackSamples, numLoopbackSamples / 2); } // if required, upmix or downmix to deviceChannelCount int deviceChannelCount = _outputFormat.channelCount(); if (deviceChannelCount == OUTPUT_CHANNEL_COUNT) { - _loopbackOutputDevice->write(loopBackByteArray); } else { - static QByteArray deviceByteArray; int numDeviceSamples = (numLoopbackSamples * deviceChannelCount) / OUTPUT_CHANNEL_COUNT; @@ -1074,7 +1033,7 @@ void AudioClient::handleAudioInput(QByteArray& audioBuffer) { } int32_t loudness = 0; - assert(numSamples < 65536); // int32_t loudness cannot overflow + assert(numSamples < 65536); // int32_t loudness cannot overflow bool didClip = false; for (int i = 0; i < numSamples; ++i) { const int32_t CLIPPING_THRESHOLD = (int32_t)(AudioConstants::MAX_SAMPLE_VALUE * 0.9f); @@ -1129,13 +1088,14 @@ void AudioClient::handleAudioInput(QByteArray& audioBuffer) { } emitAudioPacket(encodedBuffer.data(), encodedBuffer.size(), _outgoingAvatarAudioSequenceNumber, _isStereoInput, - audioTransform, avatarBoundingBoxCorner, avatarBoundingBoxScale, - packetType, _selectedCodecName); + audioTransform, avatarBoundingBoxCorner, avatarBoundingBoxScale, packetType, _selectedCodecName); _stats.sentPacket(); } } -void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, const uchar& channelCount, const qint32& bytesForDuration) { +void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, + const uchar& channelCount, + const qint32& bytesForDuration) { // input samples required to produce exactly NETWORK_FRAME_SAMPLES of output const int inputSamplesRequired = (_inputToNetworkResampler ? _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) @@ -1189,11 +1149,10 @@ void AudioClient::handleMicAudioInput() { } void AudioClient::handleDummyAudioInput() { - const int numNetworkBytes = _isStereoInput - ? AudioConstants::NETWORK_FRAME_BYTES_STEREO - : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; + const int numNetworkBytes = + _isStereoInput ? AudioConstants::NETWORK_FRAME_BYTES_STEREO : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; - QByteArray audioBuffer(numNetworkBytes, 0); // silent + QByteArray audioBuffer(numNetworkBytes, 0); // silent handleAudioInput(audioBuffer); } @@ -1202,13 +1161,59 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) { handleAudioInput(audioBuffer); } + int rawToWav(const char* rawData, const int& rawLength, const char* wavfn, long frequency) { + long chunksize = 0x10; + + struct { + unsigned short wFormatTag; + unsigned short wChannels; + unsigned long dwSamplesPerSec; + unsigned long dwAvgBytesPerSec; + unsigned short wBlockAlign; + unsigned short wBitsPerSample; + } fmt; + + long samplecount = rawLength / 2; + long riffsize = samplecount * 2 + 0x24; + long datasize = samplecount * 2; + + FILE* wav = fopen(wavfn, "wb"); + if (!wav) { + return -3; + } + + fwrite("RIFF", 1, 4, wav); + fwrite(&riffsize, 4, 1, wav); + fwrite("WAVEfmt ", 1, 8, wav); + fwrite(&chunksize, 4, 1, wav); + + fmt.wFormatTag = 1; // PCM + fmt.wChannels = 1; // MONO + fmt.dwSamplesPerSec = frequency * 1; + fmt.dwAvgBytesPerSec = frequency * 1 * 2; // 16 bit + fmt.wBlockAlign = 2; + fmt.wBitsPerSample = 16; + + fwrite(&fmt, sizeof(fmt), 1, wav); + fwrite("data", 1, 4, wav); + fwrite(&datasize, 4, 1, wav); + fwrite(rawData, 1, rawLength, wav); + fclose(wav); +} + void AudioClient::handleTTSAudioInput(const QByteArray& audio) { QByteArray audioBuffer(audio); + QVector audioBufferReal; + + QString filename = QString::number(usecTimestampNow()); + QString path = PathUtils::getAppDataPath() + "Audio/" + filename + ".wav"; + rawToWav(audioBuffer.data(), audioBuffer.size(), path.toLocal8Bit(), 24000); + while (audioBuffer.size() > 0) { QByteArray part; part.append(audioBuffer.data(), AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); audioBuffer.remove(0, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - processAudioAndAddToRingBuffer(part, 1, 48); + processAudioAndAddToRingBuffer(part, 1, 48); } } @@ -1234,9 +1239,8 @@ void AudioClient::prepareLocalAudioInjectors(std::unique_ptr localAudioLoc int bufferCapacity = _localInjectorsStream.getSampleCapacity(); int maxOutputSamples = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * AudioConstants::STEREO; if (_localToOutputResampler) { - maxOutputSamples = - _localToOutputResampler->getMaxOutput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * - AudioConstants::STEREO; + maxOutputSamples = _localToOutputResampler->getMaxOutput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * + AudioConstants::STEREO; } samplesNeeded = bufferCapacity - _localSamplesAvailable.load(std::memory_order_relaxed); @@ -1259,7 +1263,7 @@ void AudioClient::prepareLocalAudioInjectors(std::unique_ptr localAudioLoc if (_localToOutputResampler) { // resample to output sample rate int frames = _localToOutputResampler->render(_localMixBuffer, _localOutputMixBuffer, - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); // write to local injectors' ring buffer samples = frames * AudioConstants::STEREO; @@ -1268,8 +1272,7 @@ void AudioClient::prepareLocalAudioInjectors(std::unique_ptr localAudioLoc } else { // write to local injectors' ring buffer samples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; - _localInjectorsStream.writeSamples(_localMixBuffer, - AudioConstants::NETWORK_FRAME_SAMPLES_STEREO); + _localInjectorsStream.writeSamples(_localMixBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_STEREO); } _localSamplesAvailable.fetch_add(samples, std::memory_order_release); @@ -1294,18 +1297,16 @@ bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) { // the lock guarantees that injectorBuffer, if found, is invariant AudioInjectorLocalBuffer* injectorBuffer = injector->getLocalBuffer(); if (injectorBuffer) { - static const int HRTF_DATASET_INDEX = 1; - int numChannels = injector->isAmbisonic() ? AudioConstants::AMBISONIC : (injector->isStereo() ? AudioConstants::STEREO : AudioConstants::MONO); + int numChannels = injector->isAmbisonic() ? AudioConstants::AMBISONIC + : (injector->isStereo() ? AudioConstants::STEREO : AudioConstants::MONO); size_t bytesToRead = numChannels * AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; // get one frame from the injector memset(_localScratchBuffer, 0, bytesToRead); if (0 < injectorBuffer->readData((char*)_localScratchBuffer, bytesToRead)) { - if (injector->isAmbisonic()) { - // no distance attenuation float gain = injector->getVolume(); @@ -1322,11 +1323,10 @@ bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) { float qz = relativeOrientation.y; // Ambisonic gets spatialized into mixBuffer - injector->getLocalFOA().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX, - qw, qx, qy, qz, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + injector->getLocalFOA().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX, qw, qx, qy, qz, gain, + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); } else if (injector->isStereo()) { - // stereo gets directly mixed into mixBuffer float gain = injector->getVolume(); for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) { @@ -1334,7 +1334,6 @@ bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) { } } else { - // calculate distance, gain and azimuth for hrtf glm::vec3 relativePosition = injector->getPosition() - _positionGetter(); float distance = glm::max(glm::length(relativePosition), EPSILON); @@ -1342,19 +1341,17 @@ bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) { float azimuth = azimuthForSource(relativePosition); // mono gets spatialized into mixBuffer - injector->getLocalHRTF().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX, - azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + injector->getLocalHRTF().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX, azimuth, distance, gain, + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); } } else { - qCDebug(audioclient) << "injector has no more data, marking finished for removal"; injector->finishLocalInjection(); injectorsToRemove.append(injector); } } else { - qCDebug(audioclient) << "injector has no local buffer, marking as finished for removal"; injector->finishLocalInjection(); injectorsToRemove.append(injector); @@ -1373,7 +1370,6 @@ bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) { } void AudioClient::processReceivedSamples(const QByteArray& decodedBuffer, QByteArray& outputBuffer) { - const int16_t* decodedSamples = reinterpret_cast(decodedBuffer.data()); assert(decodedBuffer.size() == AudioConstants::NETWORK_FRAME_BYTES_STEREO); @@ -1442,7 +1438,6 @@ void AudioClient::setNoiseReduction(bool enable, bool emitSignal) { } } - bool AudioClient::setIsStereoInput(bool isStereoInput) { bool stereoInputChanged = false; if (isStereoInput != _isStereoInput && _inputDeviceInfo.supportedChannelCounts().contains(2)) { @@ -1460,7 +1455,8 @@ bool AudioClient::setIsStereoInput(bool isStereoInput) { if (_encoder) { _codec->releaseEncoder(_encoder); } - _encoder = _codec->createEncoder(AudioConstants::SAMPLE_RATE, _isStereoInput ? AudioConstants::STEREO : AudioConstants::MONO); + _encoder = _codec->createEncoder(AudioConstants::SAMPLE_RATE, + _isStereoInput ? AudioConstants::STEREO : AudioConstants::MONO); } qCDebug(audioclient) << "Reset Codec:" << _selectedCodecName << "isStereoInput:" << _isStereoInput; @@ -1500,7 +1496,7 @@ bool AudioClient::outputLocalInjector(const AudioInjectorPointer& injector) { void AudioClient::outputFormatChanged() { _outputFrameSize = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * OUTPUT_CHANNEL_COUNT * _outputFormat.sampleRate()) / - _desiredOutputFormat.sampleRate(); + _desiredOutputFormat.sampleRate(); _receivedAudioStream.outputFormatChanged(_outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); } @@ -1514,7 +1510,7 @@ bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInf Lock lock(_deviceMutex); #if defined(Q_OS_ANDROID) - _shouldRestartInputSetup = false; // avoid a double call to _audioInput->start() from audioInputStateChanged + _shouldRestartInputSetup = false; // avoid a double call to _audioInput->start() from audioInputStateChanged #endif // cleanup any previously initialized device @@ -1565,15 +1561,15 @@ bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInf // we've got the best we can get for input // if required, setup a resampler for this input to our desired network format - if (_inputFormat != _desiredInputFormat - && _inputFormat.sampleRate() != _desiredInputFormat.sampleRate()) { + if (_inputFormat != _desiredInputFormat && _inputFormat.sampleRate() != _desiredInputFormat.sampleRate()) { qCDebug(audioclient) << "Attemping to create a resampler for input format to network format."; assert(_inputFormat.sampleSize() == 16); assert(_desiredInputFormat.sampleSize() == 16); int channelCount = (_inputFormat.channelCount() == 2 && _desiredInputFormat.channelCount() == 2) ? 2 : 1; - _inputToNetworkResampler = new AudioSRC(_inputFormat.sampleRate(), _desiredInputFormat.sampleRate(), channelCount); + _inputToNetworkResampler = + new AudioSRC(_inputFormat.sampleRate(), _desiredInputFormat.sampleRate(), channelCount); } else { qCDebug(audioclient) << "No resampling required for audio input to match desired network format."; @@ -1607,7 +1603,7 @@ bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInf connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleMicAudioInput())); supportedFormat = true; } else { - qCDebug(audioclient) << "Error starting audio input -" << _audioInput->error(); + qCDebug(audioclient) << "Error starting audio input -" << _audioInput->error(); _audioInput->deleteLater(); _audioInput = NULL; } @@ -1677,7 +1673,7 @@ void AudioClient::checkInputTimeout() { void AudioClient::setHeadsetPluggedIn(bool pluggedIn) { #if defined(Q_OS_ANDROID) if (pluggedIn == !_isHeadsetPluggedIn && !_inputDeviceInfo.isNull()) { - QAndroidJniObject brand = QAndroidJniObject::getStaticObjectField("android/os/Build", "BRAND"); + QAndroidJniObject brand = QAndroidJniObject::getStaticObjectField("android/os/Build", "BRAND"); // some samsung phones needs more time to shutdown the previous input device if (brand.toString().contains("samsung", Qt::CaseInsensitive)) { switchInputToAudioDevice(QAudioDeviceInfo(), true); @@ -1715,8 +1711,8 @@ void AudioClient::outputNotify() { int newOutputBufferSizeFrames = setOutputBufferSize(oldOutputBufferSizeFrames + 1, false); if (newOutputBufferSizeFrames > oldOutputBufferSizeFrames) { - qCDebug(audioclient, - "Starve threshold surpassed (%d starves in %d ms)", _outputStarveDetectionCount, dt); + qCDebug(audioclient, "Starve threshold surpassed (%d starves in %d ms)", _outputStarveDetectionCount, + dt); } _outputStarveDetectionStartTimeMsec = now; @@ -1730,7 +1726,8 @@ void AudioClient::outputNotify() { bool AudioClient::switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceInfo, bool isShutdownRequest) { Q_ASSERT_X(QThread::currentThread() == thread(), Q_FUNC_INFO, "Function invoked on wrong thread"); - qCDebug(audioclient) << "AudioClient::switchOutputToAudioDevice() outputDeviceInfo: [" << outputDeviceInfo.deviceName() << "]"; + qCDebug(audioclient) << "AudioClient::switchOutputToAudioDevice() outputDeviceInfo: [" << outputDeviceInfo.deviceName() + << "]"; bool supportedFormat = false; // NOTE: device start() uses the Qt internal device list @@ -1789,15 +1786,16 @@ bool AudioClient::switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceI // we've got the best we can get for input // if required, setup a resampler for this input to our desired network format - if (_desiredOutputFormat != _outputFormat - && _desiredOutputFormat.sampleRate() != _outputFormat.sampleRate()) { + if (_desiredOutputFormat != _outputFormat && _desiredOutputFormat.sampleRate() != _outputFormat.sampleRate()) { qCDebug(audioclient) << "Attemping to create a resampler for network format to output format."; assert(_desiredOutputFormat.sampleSize() == 16); assert(_outputFormat.sampleSize() == 16); - _networkToOutputResampler = new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); - _localToOutputResampler = new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); + _networkToOutputResampler = + new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); + _localToOutputResampler = + new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); } else { qCDebug(audioclient) << "No resampling required for network output to match actual output format."; @@ -1809,7 +1807,9 @@ bool AudioClient::switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceI _audioOutput = new QAudioOutput(outputDeviceInfo, _outputFormat, this); int deviceChannelCount = _outputFormat.channelCount(); - int frameSize = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * deviceChannelCount * _outputFormat.sampleRate()) / _desiredOutputFormat.sampleRate(); + int frameSize = + (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * deviceChannelCount * _outputFormat.sampleRate()) / + _desiredOutputFormat.sampleRate(); int requestedSize = _sessionOutputBufferSizeFrames * frameSize * AudioConstants::SAMPLE_SIZE; _audioOutput->setBufferSize(requestedSize); @@ -1825,7 +1825,10 @@ bool AudioClient::switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceI _outputScratchBuffer = new int16_t[_outputPeriod]; // size local output mix buffer based on resampled network frame size - int networkPeriod = _localToOutputResampler ? _localToOutputResampler->getMaxOutput(AudioConstants::NETWORK_FRAME_SAMPLES_STEREO) : AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; + int networkPeriod = + _localToOutputResampler + ? _localToOutputResampler->getMaxOutput(AudioConstants::NETWORK_FRAME_SAMPLES_STEREO) + : AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; _localOutputMixBuffer = new float[networkPeriod]; // local period should be at least twice the output period, @@ -1875,7 +1878,8 @@ int AudioClient::setOutputBufferSize(int numFrames, bool persist) { qCDebug(audioclient) << __FUNCTION__ << "numFrames:" << numFrames << "persist:" << persist; numFrames = std::min(std::max(numFrames, MIN_BUFFER_FRAMES), MAX_BUFFER_FRAMES); - qCDebug(audioclient) << __FUNCTION__ << "clamped numFrames:" << numFrames << "_sessionOutputBufferSizeFrames:" << _sessionOutputBufferSizeFrames; + qCDebug(audioclient) << __FUNCTION__ << "clamped numFrames:" << numFrames + << "_sessionOutputBufferSizeFrames:" << _sessionOutputBufferSizeFrames; if (numFrames != _sessionOutputBufferSizeFrames) { qCInfo(audioclient, "Audio output buffer set to %d frames", numFrames); @@ -1906,10 +1910,10 @@ const float AudioClient::CALLBACK_ACCELERATOR_RATIO = 2.0f; #endif int AudioClient::calculateNumberOfInputCallbackBytes(const QAudioFormat& format) const { - int numInputCallbackBytes = (int)(((AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL - * format.channelCount() - * ((float) format.sampleRate() / AudioConstants::SAMPLE_RATE)) - / CALLBACK_ACCELERATOR_RATIO) + 0.5f); + int numInputCallbackBytes = (int)(((AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL * format.channelCount() * + ((float)format.sampleRate() / AudioConstants::SAMPLE_RATE)) / + CALLBACK_ACCELERATOR_RATIO) + + 0.5f); return numInputCallbackBytes; } @@ -1931,10 +1935,9 @@ float AudioClient::azimuthForSource(const glm::vec3& relativePosition) { float rotatedSourcePositionLength2 = glm::length2(rotatedSourcePosition); if (rotatedSourcePositionLength2 > SOURCE_DISTANCE_THRESHOLD) { - // produce an oriented angle about the y-axis glm::vec3 direction = rotatedSourcePosition * (1.0f / fastSqrtf(rotatedSourcePositionLength2)); - float angle = fastAcosf(glm::clamp(-direction.z, -1.0f, 1.0f)); // UNIT_NEG_Z is "forward" + float angle = fastAcosf(glm::clamp(-direction.z, -1.0f, 1.0f)); // UNIT_NEG_Z is "forward" return (direction.x < 0.0f) ? -angle : angle; } else { @@ -1944,7 +1947,6 @@ float AudioClient::azimuthForSource(const glm::vec3& relativePosition) { } float AudioClient::gainForSource(float distance, float volume) { - // attenuation = -6dB * log2(distance) // reference attenuation of 0dB at distance = 1.0m float gain = volume / std::max(distance, HRTF_NEARFIELD_MIN); @@ -1952,8 +1954,7 @@ float AudioClient::gainForSource(float distance, float volume) { return gain; } -qint64 AudioClient::AudioOutputIODevice::readData(char * data, qint64 maxSize) { - +qint64 AudioClient::AudioOutputIODevice::readData(char* data, qint64 maxSize) { // samples requested from OUTPUT_CHANNEL_COUNT int deviceChannelCount = _audio->_outputFormat.channelCount(); int samplesRequested = (int)(maxSize / AudioConstants::SAMPLE_SIZE) * OUTPUT_CHANNEL_COUNT / deviceChannelCount; @@ -1965,7 +1966,8 @@ qint64 AudioClient::AudioOutputIODevice::readData(char * data, qint64 maxSize) { int networkSamplesPopped; if ((networkSamplesPopped = _receivedAudioStream.popSamples(samplesRequested, false)) > 0) { - qCDebug(audiostream, "Read %d samples from buffer (%d available, %d requested)", networkSamplesPopped, _receivedAudioStream.getSamplesAvailable(), samplesRequested); + qCDebug(audiostream, "Read %d samples from buffer (%d available, %d requested)", networkSamplesPopped, + _receivedAudioStream.getSamplesAvailable(), samplesRequested); AudioRingBuffer::ConstIterator lastPopOutput = _receivedAudioStream.getLastPopOutput(); lastPopOutput.readSamples(scratchBuffer, networkSamplesPopped); for (int i = 0; i < networkSamplesPopped; i++) { @@ -1997,14 +1999,13 @@ qint64 AudioClient::AudioOutputIODevice::readData(char * data, qint64 maxSize) { samplesRequested = std::min(samplesRequested, samplesAvailable); if ((injectorSamplesPopped = _localInjectorsStream.appendSamples(mixBuffer, samplesRequested, append)) > 0) { _audio->_localSamplesAvailable.fetch_sub(injectorSamplesPopped, std::memory_order_release); - qCDebug(audiostream, "Read %d samples from injectors (%d available, %d requested)", injectorSamplesPopped, _localInjectorsStream.samplesAvailable(), samplesRequested); + qCDebug(audiostream, "Read %d samples from injectors (%d available, %d requested)", injectorSamplesPopped, + _localInjectorsStream.samplesAvailable(), samplesRequested); } } // prepare injectors for the next callback - QtConcurrent::run(QThreadPool::globalInstance(), [this] { - _audio->prepareLocalAudioInjectors(); - }); + QtConcurrent::run(QThreadPool::globalInstance(), [this] { _audio->prepareLocalAudioInjectors(); }); int samplesPopped = std::max(networkSamplesPopped, injectorSamplesPopped); int framesPopped = samplesPopped / AudioConstants::STEREO; @@ -2038,7 +2039,6 @@ qint64 AudioClient::AudioOutputIODevice::readData(char * data, qint64 maxSize) { _audio->_audioFileWav.addRawAudioChunk(reinterpret_cast(scratchBuffer), bytesWritten); } - int bytesAudioOutputUnplayed = _audio->_audioOutput->bufferSize() - _audio->_audioOutput->bytesFree(); float msecsAudioOutputUnplayed = bytesAudioOutputUnplayed / (float)_audio->_outputFormat.bytesForDuration(USECS_PER_MSEC); _audio->_stats.updateOutputMsUnplayed(msecsAudioOutputUnplayed); @@ -2075,7 +2075,6 @@ void AudioClient::loadSettings() { for (auto& plugin : codecPlugins) { qCDebug(audioclient) << "Codec available:" << plugin->getName(); } - } void AudioClient::saveSettings() { @@ -2088,7 +2087,6 @@ void AudioClient::setAvatarBoundingBoxParameters(glm::vec3 corner, glm::vec3 sca avatarBoundingBoxScale = scale; } - void AudioClient::startThread() { moveToNewNamedThread(this, "Audio Thread", [this] { start(); }, QThread::TimeCriticalPriority); } From d9873d363322e5b15cb0e2c8e976e8595f3505ea Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Mon, 15 Oct 2018 11:33:42 -0700 Subject: [PATCH 08/18] Adding some debug stuff... --- libraries/audio-client/src/AudioClient.cpp | 48 ++++++++++++++-------- libraries/audio-client/src/AudioClient.h | 5 ++- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index 858f6e738c..c2b066b716 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -1095,7 +1095,8 @@ void AudioClient::handleAudioInput(QByteArray& audioBuffer) { void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, const uchar& channelCount, - const qint32& bytesForDuration) { + const qint32& bytesForDuration, + QByteArray& rollingBuffer) { // input samples required to produce exactly NETWORK_FRAME_SAMPLES of output const int inputSamplesRequired = (_inputToNetworkResampler ? _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) @@ -1131,6 +1132,7 @@ void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, _stats.updateInputMsUnplayed(msecsInInputRingBuffer); QByteArray audioBuffer(reinterpret_cast(networkAudioSamples), numNetworkBytes); + rollingBuffer.append(audioBuffer); handleAudioInput(audioBuffer); } } @@ -1144,8 +1146,10 @@ void AudioClient::handleMicAudioInput() { _inputReadsSinceLastCheck++; #endif + QByteArray temp; + processAudioAndAddToRingBuffer(_inputDevice->readAll(), _inputFormat.channelCount(), - _inputFormat.bytesForDuration(USECS_PER_MSEC)); + _inputFormat.bytesForDuration(USECS_PER_MSEC), temp); } void AudioClient::handleDummyAudioInput() { @@ -1161,9 +1165,7 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) { handleAudioInput(audioBuffer); } - int rawToWav(const char* rawData, const int& rawLength, const char* wavfn, long frequency) { - long chunksize = 0x10; - +int rawToWav(const char* rawData, const int& rawLength, const char* wavfn, long frequency, unsigned short channels) { struct { unsigned short wFormatTag; unsigned short wChannels; @@ -1174,47 +1176,59 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) { } fmt; long samplecount = rawLength / 2; - long riffsize = samplecount * 2 + 0x24; - long datasize = samplecount * 2; FILE* wav = fopen(wavfn, "wb"); if (!wav) { - return -3; + return -1; } fwrite("RIFF", 1, 4, wav); + + long riffsize = samplecount * 2 + 0x24; fwrite(&riffsize, 4, 1, wav); + fwrite("WAVEfmt ", 1, 8, wav); + + long chunksize = 0x10; fwrite(&chunksize, 4, 1, wav); - fmt.wFormatTag = 1; // PCM - fmt.wChannels = 1; // MONO + fmt.wFormatTag = 1; // WAVE_FORMAT_PCM + fmt.wChannels = channels; fmt.dwSamplesPerSec = frequency * 1; - fmt.dwAvgBytesPerSec = frequency * 1 * 2; // 16 bit - fmt.wBlockAlign = 2; fmt.wBitsPerSample = 16; - + fmt.wBlockAlign = fmt.wChannels * fmt.wBitsPerSample / 8; + fmt.dwAvgBytesPerSec = fmt.dwSamplesPerSec * fmt.wBlockAlign; fwrite(&fmt, sizeof(fmt), 1, wav); + fwrite("data", 1, 4, wav); + long datasize = samplecount * 2; fwrite(&datasize, 4, 1, wav); fwrite(rawData, 1, rawLength, wav); + fclose(wav); + + return 0; } void AudioClient::handleTTSAudioInput(const QByteArray& audio) { QByteArray audioBuffer(audio); - QVector audioBufferReal; QString filename = QString::number(usecTimestampNow()); - QString path = PathUtils::getAppDataPath() + "Audio/" + filename + ".wav"; - rawToWav(audioBuffer.data(), audioBuffer.size(), path.toLocal8Bit(), 24000); + QString path = PathUtils::getAppDataPath() + "Audio/" + filename + "-before.wav"; + rawToWav(audioBuffer.data(), audioBuffer.size(), path.toLocal8Bit(), 24000, 1); + + QByteArray temp; while (audioBuffer.size() > 0) { QByteArray part; part.append(audioBuffer.data(), AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); audioBuffer.remove(0, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - processAudioAndAddToRingBuffer(part, 1, 48); + processAudioAndAddToRingBuffer(part, 1, 48, temp); } + + filename = QString::number(usecTimestampNow()); + path = PathUtils::getAppDataPath() + "Audio/" + filename + "-after.wav"; + rawToWav(temp.data(), temp.size(), path.toLocal8Bit(), 12000, 1); } void AudioClient::prepareLocalAudioInjectors(std::unique_ptr localAudioLock) { diff --git a/libraries/audio-client/src/AudioClient.h b/libraries/audio-client/src/AudioClient.h index 170a355abe..1ca7cac6ca 100644 --- a/libraries/audio-client/src/AudioClient.h +++ b/libraries/audio-client/src/AudioClient.h @@ -290,7 +290,10 @@ private: float azimuthForSource(const glm::vec3& relativePosition); float gainForSource(float distance, float volume); - void processAudioAndAddToRingBuffer(QByteArray& inputByteArray, const uchar& channelCount, const qint32& bytesForDuration); + void processAudioAndAddToRingBuffer(QByteArray& inputByteArray, + const uchar& channelCount, + const qint32& bytesForDuration, + QByteArray& rollingBuffer); #ifdef Q_OS_ANDROID QTimer _checkInputTimer; From 26e388b139bb040ae2260042e7c8ff327ca18e1f Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Tue, 16 Oct 2018 17:34:48 -0700 Subject: [PATCH 09/18] Some experimentation yields promising results... --- interface/src/Application.cpp | 1 + .../src/scripting/TTSScriptingInterface.cpp | 17 +- .../src/scripting/TTSScriptingInterface.h | 11 +- libraries/audio-client/src/AudioClient.cpp | 1607 +++++++++-------- libraries/audio-client/src/AudioClient.h | 19 +- 5 files changed, 844 insertions(+), 811 deletions(-) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index 728fea8c10..2991fab5f7 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -1182,6 +1182,7 @@ Application::Application(int& argc, char** argv, QElapsedTimer& startupTimer, bo auto TTS = DependencyManager::get().data(); connect(TTS, &TTSScriptingInterface::ttsSampleCreated, audioIO, &AudioClient::handleTTSAudioInput); + connect(TTS, &TTSScriptingInterface::clearTTSBuffer, audioIO, &AudioClient::clearTTSBuffer); connect(audioIO, &AudioClient::inputReceived, [](const QByteArray& audio) { static auto recorder = DependencyManager::get(); diff --git a/interface/src/scripting/TTSScriptingInterface.cpp b/interface/src/scripting/TTSScriptingInterface.cpp index fdbb37e586..5fb47a73c3 100644 --- a/interface/src/scripting/TTSScriptingInterface.cpp +++ b/interface/src/scripting/TTSScriptingInterface.cpp @@ -65,7 +65,7 @@ void TTSScriptingInterface::testTone(const bool& alsoInject) { int16_t temp = (glm::sin(glm::radians((float)a))) * 32768; samples[a] = temp; } - emit ttsSampleCreated(_lastSoundByteArray); + emit ttsSampleCreated(_lastSoundByteArray, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50, 96); if (alsoInject) { AudioInjectorOptions options; @@ -75,11 +75,16 @@ void TTSScriptingInterface::testTone(const bool& alsoInject) { } } -void TTSScriptingInterface::speakText(const QString& textToSpeak, const bool& alsoInject) { +void TTSScriptingInterface::speakText(const QString& textToSpeak, + const int& newChunkSize, + const int& timerInterval, + const int& sampleRate, + const int& bitsPerSample, + const bool& alsoInject) { WAVEFORMATEX fmt; fmt.wFormatTag = WAVE_FORMAT_PCM; - fmt.nSamplesPerSec = 24000; - fmt.wBitsPerSample = 16; + fmt.nSamplesPerSec = sampleRate; + fmt.wBitsPerSample = bitsPerSample; fmt.nChannels = 1; fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign; @@ -146,7 +151,7 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak, const bool& al _lastSoundByteArray.resize(0); _lastSoundByteArray.append(buf1, dwSize); - emit ttsSampleCreated(_lastSoundByteArray); + emit ttsSampleCreated(_lastSoundByteArray, newChunkSize, timerInterval); if (alsoInject) { AudioInjectorOptions options; @@ -160,4 +165,6 @@ void TTSScriptingInterface::stopLastSpeech() { if (_lastSoundAudioInjector) { _lastSoundAudioInjector->stop(); } + + emit clearTTSBuffer(); } diff --git a/interface/src/scripting/TTSScriptingInterface.h b/interface/src/scripting/TTSScriptingInterface.h index c1fffe67d1..f6eca081ab 100644 --- a/interface/src/scripting/TTSScriptingInterface.h +++ b/interface/src/scripting/TTSScriptingInterface.h @@ -19,6 +19,7 @@ #include // SAPI #include // SAPI Helper #include +#include class TTSScriptingInterface : public QObject, public Dependency { Q_OBJECT @@ -28,11 +29,17 @@ public: ~TTSScriptingInterface(); Q_INVOKABLE void testTone(const bool& alsoInject = false); - Q_INVOKABLE void speakText(const QString& textToSpeak, const bool& alsoInject = false); + Q_INVOKABLE void speakText(const QString& textToSpeak, + const int& newChunkSize = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50), + const int& timerInterval = 96, + const int& sampleRate = 24000, + const int& bitsPerSample = 16, + const bool& alsoInject = false); Q_INVOKABLE void stopLastSpeech(); signals: - void ttsSampleCreated(QByteArray outputArray); + void ttsSampleCreated(QByteArray outputArray, const int& newChunkSize, const int& timerInterval); + void clearTTSBuffer(); private: class CComAutoInit { diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index c2b066b716..606763e4ab 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -186,7 +186,7 @@ AudioClient::AudioClient() : _networkToOutputResampler(NULL), _localToOutputResampler(NULL), _audioLimiter(AudioConstants::SAMPLE_RATE, OUTPUT_CHANNEL_COUNT), _outgoingAvatarAudioSequenceNumber(0), _audioOutputIODevice(_localInjectorsStream, _receivedAudioStream, this), _stats(&_receivedAudioStream), - _positionGetter(DEFAULT_POSITION_GETTER), + _positionGetter(DEFAULT_POSITION_GETTER), _TTSTimer(this), #if defined(Q_OS_ANDROID) _checkInputTimer(this), _isHeadsetPluggedIn(false), #endif @@ -245,6 +245,8 @@ AudioClient::AudioClient() : packetReceiver.registerListener(PacketType::NoisyMute, this, "handleNoisyMutePacket"); packetReceiver.registerListener(PacketType::MuteEnvironment, this, "handleMuteEnvironmentPacket"); packetReceiver.registerListener(PacketType::SelectedAudioFormat, this, "handleSelectedAudioFormat"); + + connect(&_TTSTimer, &QTimer::timeout, this, &AudioClient::processTTSBuffer); } AudioClient::~AudioClient() { @@ -939,7 +941,7 @@ void AudioClient::setReverbOptions(const AudioEffectOptions* options) { } } -void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) { +void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray, const int& sampleRate, const int& channelCount) { // If there is server echo, reverb will be applied to the recieved audio stream so no need to have it here. bool hasReverb = _reverb || _receivedAudioStream.hasReverb(); if (_muted || !_audioOutput || (!_shouldEchoLocally && !hasReverb)) { @@ -949,7 +951,7 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) { // NOTE: we assume the inputFormat and the outputFormat are the same, since on any modern // multimedia OS they should be. If there is a device that this is not true for, we can // add back support to do resampling. - if (_inputFormat.sampleRate() != _outputFormat.sampleRate()) { + if (sampleRate != _outputFormat.sampleRate()) { return; } @@ -972,7 +974,7 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) { static QByteArray loopBackByteArray; int numInputSamples = inputByteArray.size() / AudioConstants::SAMPLE_SIZE; - int numLoopbackSamples = (numInputSamples * OUTPUT_CHANNEL_COUNT) / _inputFormat.channelCount(); + int numLoopbackSamples = (numInputSamples * OUTPUT_CHANNEL_COUNT) / channelCount; loopBackByteArray.resize(numLoopbackSamples * AudioConstants::SAMPLE_SIZE); @@ -980,7 +982,7 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) { int16_t* loopbackSamples = reinterpret_cast(loopBackByteArray.data()); // upmix mono to stereo - if (!sampleChannelConversion(inputSamples, loopbackSamples, numInputSamples, _inputFormat.channelCount(), + if (!sampleChannelConversion(inputSamples, loopbackSamples, numInputSamples, channelCount, OUTPUT_CHANNEL_COUNT)) { // no conversion, just copy the samples memcpy(loopbackSamples, inputSamples, numInputSamples * AudioConstants::SAMPLE_SIZE); @@ -1093,23 +1095,29 @@ void AudioClient::handleAudioInput(QByteArray& audioBuffer) { } } -void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, - const uchar& channelCount, - const qint32& bytesForDuration, - QByteArray& rollingBuffer) { +void AudioClient::handleMicAudioInput() { + if (!_inputDevice || _isPlayingBackRecording) { + return; + } + +#if defined(Q_OS_ANDROID) + _inputReadsSinceLastCheck++; +#endif + // input samples required to produce exactly NETWORK_FRAME_SAMPLES of output const int inputSamplesRequired = (_inputToNetworkResampler ? _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * - channelCount; + _inputFormat.channelCount(); const auto inputAudioSamples = std::unique_ptr(new int16_t[inputSamplesRequired]); + QByteArray inputByteArray = _inputDevice->readAll(); - handleLocalEchoAndReverb(inputByteArray); + handleLocalEchoAndReverb(inputByteArray, _inputFormat.sampleRate(), _inputFormat.channelCount()); _inputRingBuffer.writeData(inputByteArray.data(), inputByteArray.size()); - float audioInputMsecsRead = inputByteArray.size() / (float)(bytesForDuration); + float audioInputMsecsRead = inputByteArray.size() / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC)); _stats.updateInputMsRead(audioInputMsecsRead); const int numNetworkBytes = @@ -1125,33 +1133,17 @@ void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, } else { _inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired); possibleResampling(_inputToNetworkResampler, inputAudioSamples.get(), networkAudioSamples, inputSamplesRequired, - numNetworkSamples, channelCount, _desiredInputFormat.channelCount()); + numNetworkSamples, _inputFormat.channelCount(), _desiredInputFormat.channelCount()); } int bytesInInputRingBuffer = _inputRingBuffer.samplesAvailable() * AudioConstants::SAMPLE_SIZE; - float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(bytesForDuration); + float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC)); _stats.updateInputMsUnplayed(msecsInInputRingBuffer); QByteArray audioBuffer(reinterpret_cast(networkAudioSamples), numNetworkBytes); - rollingBuffer.append(audioBuffer); handleAudioInput(audioBuffer); } } -void AudioClient::handleMicAudioInput() { - if (!_inputDevice || _isPlayingBackRecording) { - return; - } - -#if defined(Q_OS_ANDROID) - _inputReadsSinceLastCheck++; -#endif - - QByteArray temp; - - processAudioAndAddToRingBuffer(_inputDevice->readAll(), _inputFormat.channelCount(), - _inputFormat.bytesForDuration(USECS_PER_MSEC), temp); -} - void AudioClient::handleDummyAudioInput() { const int numNetworkBytes = _isStereoInput ? AudioConstants::NETWORK_FRAME_BYTES_STEREO : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; @@ -1192,7 +1184,7 @@ int rawToWav(const char* rawData, const int& rawLength, const char* wavfn, long long chunksize = 0x10; fwrite(&chunksize, 4, 1, wav); - fmt.wFormatTag = 1; // WAVE_FORMAT_PCM + fmt.wFormatTag = 1; // WAVE_FORMAT_PCM fmt.wChannels = channels; fmt.dwSamplesPerSec = frequency * 1; fmt.wBitsPerSample = 16; @@ -1210,906 +1202,927 @@ int rawToWav(const char* rawData, const int& rawLength, const char* wavfn, long return 0; } -void AudioClient::handleTTSAudioInput(const QByteArray& audio) { - QByteArray audioBuffer(audio); - - QString filename = QString::number(usecTimestampNow()); - QString path = PathUtils::getAppDataPath() + "Audio/" + filename + "-before.wav"; - rawToWav(audioBuffer.data(), audioBuffer.size(), path.toLocal8Bit(), 24000, 1); - - QByteArray temp; - - while (audioBuffer.size() > 0) { +void AudioClient::processTTSBuffer() { + Lock lock(_TTSMutex); + if (_TTSAudioBuffer.size() > 0) { QByteArray part; - part.append(audioBuffer.data(), AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - audioBuffer.remove(0, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - processAudioAndAddToRingBuffer(part, 1, 48, temp); + part.append(_TTSAudioBuffer.data(), _TTSChunkSize); + _TTSAudioBuffer.remove(0, _TTSChunkSize); + handleAudioInput(part); + } else { + _isProcessingTTS = false; + _TTSTimer.stop(); } +} - filename = QString::number(usecTimestampNow()); - path = PathUtils::getAppDataPath() + "Audio/" + filename + "-after.wav"; - rawToWav(temp.data(), temp.size(), path.toLocal8Bit(), 12000, 1); +void AudioClient::handleTTSAudioInput(const QByteArray& audio, const int& newChunkSize, const int& timerInterval) { + _TTSChunkSize = newChunkSize; + _TTSAudioBuffer.append(audio); + + handleLocalEchoAndReverb(_TTSAudioBuffer, 48000, 1); + + //QString filename = QString::number(usecTimestampNow()); + //QString path = PathUtils::getAppDataPath() + "Audio/" + filename + "-before.wav"; + //rawToWav(_TTSAudioBuffer.data(), _TTSAudioBuffer.size(), path.toLocal8Bit(), 24000, 1); + + //QByteArray temp; + + _isProcessingTTS = true; + _TTSTimer.start(timerInterval); + + //filename = QString::number(usecTimestampNow()); + //path = PathUtils::getAppDataPath() + "Audio/" + filename + "-after.wav"; + //rawToWav(temp.data(), temp.size(), path.toLocal8Bit(), 12000, 1); +} + +void AudioClient::clearTTSBuffer() { + _TTSAudioBuffer.resize(0); + _isProcessingTTS = false; + _TTSTimer.stop(); } void AudioClient::prepareLocalAudioInjectors(std::unique_ptr localAudioLock) { - bool doSynchronously = localAudioLock.operator bool(); - if (!localAudioLock) { - localAudioLock.reset(new Lock(_localAudioMutex)); + bool doSynchronously = localAudioLock.operator bool(); + if (!localAudioLock) { + localAudioLock.reset(new Lock(_localAudioMutex)); + } + + int samplesNeeded = std::numeric_limits::max(); + while (samplesNeeded > 0) { + if (!doSynchronously) { + // unlock between every write to allow device switching + localAudioLock->unlock(); + localAudioLock->lock(); + } + + // in case of a device switch, consider bufferCapacity volatile across iterations + if (_outputPeriod == 0) { + return; + } + + int bufferCapacity = _localInjectorsStream.getSampleCapacity(); + int maxOutputSamples = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * AudioConstants::STEREO; + if (_localToOutputResampler) { + maxOutputSamples = _localToOutputResampler->getMaxOutput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * + AudioConstants::STEREO; + } + + samplesNeeded = bufferCapacity - _localSamplesAvailable.load(std::memory_order_relaxed); + if (samplesNeeded < maxOutputSamples) { + // avoid overwriting the buffer to prevent losing frames + break; + } + + // get a network frame of local injectors' audio + if (!mixLocalAudioInjectors(_localMixBuffer)) { + break; + } + + // reverb + if (_reverb) { + _localReverb.render(_localMixBuffer, _localMixBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + } + + int samples; + if (_localToOutputResampler) { + // resample to output sample rate + int frames = _localToOutputResampler->render(_localMixBuffer, _localOutputMixBuffer, + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + + // write to local injectors' ring buffer + samples = frames * AudioConstants::STEREO; + _localInjectorsStream.writeSamples(_localOutputMixBuffer, samples); + + } else { + // write to local injectors' ring buffer + samples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; + _localInjectorsStream.writeSamples(_localMixBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_STEREO); + } + + _localSamplesAvailable.fetch_add(samples, std::memory_order_release); + samplesNeeded -= samples; + } } - int samplesNeeded = std::numeric_limits::max(); - while (samplesNeeded > 0) { - if (!doSynchronously) { - // unlock between every write to allow device switching - localAudioLock->unlock(); - localAudioLock->lock(); + bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) { + // check the flag for injectors before attempting to lock + if (!_localInjectorsAvailable.load(std::memory_order_acquire)) { + return false; } - // in case of a device switch, consider bufferCapacity volatile across iterations - if (_outputPeriod == 0) { - return; - } + // lock the injectors + Lock lock(_injectorsMutex); - int bufferCapacity = _localInjectorsStream.getSampleCapacity(); - int maxOutputSamples = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * AudioConstants::STEREO; - if (_localToOutputResampler) { - maxOutputSamples = _localToOutputResampler->getMaxOutput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * - AudioConstants::STEREO; - } + QVector injectorsToRemove; - samplesNeeded = bufferCapacity - _localSamplesAvailable.load(std::memory_order_relaxed); - if (samplesNeeded < maxOutputSamples) { - // avoid overwriting the buffer to prevent losing frames - break; - } + memset(mixBuffer, 0, AudioConstants::NETWORK_FRAME_SAMPLES_STEREO * sizeof(float)); - // get a network frame of local injectors' audio - if (!mixLocalAudioInjectors(_localMixBuffer)) { - break; - } + for (const AudioInjectorPointer& injector : _activeLocalAudioInjectors) { + // the lock guarantees that injectorBuffer, if found, is invariant + AudioInjectorLocalBuffer* injectorBuffer = injector->getLocalBuffer(); + if (injectorBuffer) { + static const int HRTF_DATASET_INDEX = 1; - // reverb - if (_reverb) { - _localReverb.render(_localMixBuffer, _localMixBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - } + int numChannels = injector->isAmbisonic() + ? AudioConstants::AMBISONIC + : (injector->isStereo() ? AudioConstants::STEREO : AudioConstants::MONO); + size_t bytesToRead = numChannels * AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; - int samples; - if (_localToOutputResampler) { - // resample to output sample rate - int frames = _localToOutputResampler->render(_localMixBuffer, _localOutputMixBuffer, - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + // get one frame from the injector + memset(_localScratchBuffer, 0, bytesToRead); + if (0 < injectorBuffer->readData((char*)_localScratchBuffer, bytesToRead)) { + if (injector->isAmbisonic()) { + // no distance attenuation + float gain = injector->getVolume(); - // write to local injectors' ring buffer - samples = frames * AudioConstants::STEREO; - _localInjectorsStream.writeSamples(_localOutputMixBuffer, samples); + // + // Calculate the soundfield orientation relative to the listener. + // Injector orientation can be used to align a recording to our world coordinates. + // + glm::quat relativeOrientation = injector->getOrientation() * glm::inverse(_orientationGetter()); - } else { - // write to local injectors' ring buffer - samples = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; - _localInjectorsStream.writeSamples(_localMixBuffer, AudioConstants::NETWORK_FRAME_SAMPLES_STEREO); - } + // convert from Y-up (OpenGL) to Z-up (Ambisonic) coordinate system + float qw = relativeOrientation.w; + float qx = -relativeOrientation.z; + float qy = -relativeOrientation.x; + float qz = relativeOrientation.y; - _localSamplesAvailable.fetch_add(samples, std::memory_order_release); - samplesNeeded -= samples; - } -} + // Ambisonic gets spatialized into mixBuffer + injector->getLocalFOA().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX, qw, qx, qy, qz, gain, + AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); -bool AudioClient::mixLocalAudioInjectors(float* mixBuffer) { - // check the flag for injectors before attempting to lock - if (!_localInjectorsAvailable.load(std::memory_order_acquire)) { - return false; - } + } else if (injector->isStereo()) { + // stereo gets directly mixed into mixBuffer + float gain = injector->getVolume(); + for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) { + mixBuffer[i] += convertToFloat(_localScratchBuffer[i]) * gain; + } - // lock the injectors - Lock lock(_injectorsMutex); + } else { + // calculate distance, gain and azimuth for hrtf + glm::vec3 relativePosition = injector->getPosition() - _positionGetter(); + float distance = glm::max(glm::length(relativePosition), EPSILON); + float gain = gainForSource(distance, injector->getVolume()); + float azimuth = azimuthForSource(relativePosition); - QVector injectorsToRemove; - - memset(mixBuffer, 0, AudioConstants::NETWORK_FRAME_SAMPLES_STEREO * sizeof(float)); - - for (const AudioInjectorPointer& injector : _activeLocalAudioInjectors) { - // the lock guarantees that injectorBuffer, if found, is invariant - AudioInjectorLocalBuffer* injectorBuffer = injector->getLocalBuffer(); - if (injectorBuffer) { - static const int HRTF_DATASET_INDEX = 1; - - int numChannels = injector->isAmbisonic() ? AudioConstants::AMBISONIC - : (injector->isStereo() ? AudioConstants::STEREO : AudioConstants::MONO); - size_t bytesToRead = numChannels * AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL; - - // get one frame from the injector - memset(_localScratchBuffer, 0, bytesToRead); - if (0 < injectorBuffer->readData((char*)_localScratchBuffer, bytesToRead)) { - if (injector->isAmbisonic()) { - // no distance attenuation - float gain = injector->getVolume(); - - // - // Calculate the soundfield orientation relative to the listener. - // Injector orientation can be used to align a recording to our world coordinates. - // - glm::quat relativeOrientation = injector->getOrientation() * glm::inverse(_orientationGetter()); - - // convert from Y-up (OpenGL) to Z-up (Ambisonic) coordinate system - float qw = relativeOrientation.w; - float qx = -relativeOrientation.z; - float qy = -relativeOrientation.x; - float qz = relativeOrientation.y; - - // Ambisonic gets spatialized into mixBuffer - injector->getLocalFOA().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX, qw, qx, qy, qz, gain, - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - - } else if (injector->isStereo()) { - // stereo gets directly mixed into mixBuffer - float gain = injector->getVolume(); - for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i++) { - mixBuffer[i] += convertToFloat(_localScratchBuffer[i]) * gain; + // mono gets spatialized into mixBuffer + injector->getLocalHRTF().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX, azimuth, distance, + gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); } } else { - // calculate distance, gain and azimuth for hrtf - glm::vec3 relativePosition = injector->getPosition() - _positionGetter(); - float distance = glm::max(glm::length(relativePosition), EPSILON); - float gain = gainForSource(distance, injector->getVolume()); - float azimuth = azimuthForSource(relativePosition); - - // mono gets spatialized into mixBuffer - injector->getLocalHRTF().render(_localScratchBuffer, mixBuffer, HRTF_DATASET_INDEX, azimuth, distance, gain, - AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + qCDebug(audioclient) << "injector has no more data, marking finished for removal"; + injector->finishLocalInjection(); + injectorsToRemove.append(injector); } } else { - qCDebug(audioclient) << "injector has no more data, marking finished for removal"; + qCDebug(audioclient) << "injector has no local buffer, marking as finished for removal"; injector->finishLocalInjection(); injectorsToRemove.append(injector); } - - } else { - qCDebug(audioclient) << "injector has no local buffer, marking as finished for removal"; - injector->finishLocalInjection(); - injectorsToRemove.append(injector); - } - } - - for (const AudioInjectorPointer& injector : injectorsToRemove) { - qCDebug(audioclient) << "removing injector"; - _activeLocalAudioInjectors.removeOne(injector); - } - - // update the flag - _localInjectorsAvailable.exchange(!_activeLocalAudioInjectors.empty(), std::memory_order_release); - - return true; -} - -void AudioClient::processReceivedSamples(const QByteArray& decodedBuffer, QByteArray& outputBuffer) { - const int16_t* decodedSamples = reinterpret_cast(decodedBuffer.data()); - assert(decodedBuffer.size() == AudioConstants::NETWORK_FRAME_BYTES_STEREO); - - outputBuffer.resize(_outputFrameSize * AudioConstants::SAMPLE_SIZE); - int16_t* outputSamples = reinterpret_cast(outputBuffer.data()); - - bool hasReverb = _reverb || _receivedAudioStream.hasReverb(); - - // apply stereo reverb - if (hasReverb) { - updateReverbOptions(); - int16_t* reverbSamples = _networkToOutputResampler ? _networkScratchBuffer : outputSamples; - _listenerReverb.render(decodedSamples, reverbSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - } - - // resample to output sample rate - if (_networkToOutputResampler) { - const int16_t* inputSamples = hasReverb ? _networkScratchBuffer : decodedSamples; - _networkToOutputResampler->render(inputSamples, outputSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); - } - - // if no transformations were applied, we still need to copy the buffer - if (!hasReverb && !_networkToOutputResampler) { - memcpy(outputSamples, decodedSamples, decodedBuffer.size()); - } -} - -void AudioClient::sendMuteEnvironmentPacket() { - auto nodeList = DependencyManager::get(); - - int dataSize = sizeof(glm::vec3) + sizeof(float); - - auto mutePacket = NLPacket::create(PacketType::MuteEnvironment, dataSize); - - const float MUTE_RADIUS = 50; - - glm::vec3 currentSourcePosition = _positionGetter(); - - mutePacket->writePrimitive(currentSourcePosition); - mutePacket->writePrimitive(MUTE_RADIUS); - - // grab our audio mixer from the NodeList, if it exists - SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer); - - if (audioMixer) { - // send off this mute packet - nodeList->sendPacket(std::move(mutePacket), *audioMixer); - } -} - -void AudioClient::setMuted(bool muted, bool emitSignal) { - if (_muted != muted) { - _muted = muted; - if (emitSignal) { - emit muteToggled(_muted); - } - } -} - -void AudioClient::setNoiseReduction(bool enable, bool emitSignal) { - if (_isNoiseGateEnabled != enable) { - _isNoiseGateEnabled = enable; - if (emitSignal) { - emit noiseReductionChanged(_isNoiseGateEnabled); - } - } -} - -bool AudioClient::setIsStereoInput(bool isStereoInput) { - bool stereoInputChanged = false; - if (isStereoInput != _isStereoInput && _inputDeviceInfo.supportedChannelCounts().contains(2)) { - _isStereoInput = isStereoInput; - stereoInputChanged = true; - - if (_isStereoInput) { - _desiredInputFormat.setChannelCount(2); - } else { - _desiredInputFormat.setChannelCount(1); } - // restart the codec - if (_codec) { - if (_encoder) { - _codec->releaseEncoder(_encoder); - } - _encoder = _codec->createEncoder(AudioConstants::SAMPLE_RATE, - _isStereoInput ? AudioConstants::STEREO : AudioConstants::MONO); + for (const AudioInjectorPointer& injector : injectorsToRemove) { + qCDebug(audioclient) << "removing injector"; + _activeLocalAudioInjectors.removeOne(injector); } - qCDebug(audioclient) << "Reset Codec:" << _selectedCodecName << "isStereoInput:" << _isStereoInput; - // restart the input device - switchInputToAudioDevice(_inputDeviceInfo); - - emit isStereoInputChanged(_isStereoInput); - } - - return stereoInputChanged; -} - -bool AudioClient::outputLocalInjector(const AudioInjectorPointer& injector) { - AudioInjectorLocalBuffer* injectorBuffer = injector->getLocalBuffer(); - if (injectorBuffer) { - // local injectors are on the AudioInjectorsThread, so we must guard access - Lock lock(_injectorsMutex); - if (!_activeLocalAudioInjectors.contains(injector)) { - qCDebug(audioclient) << "adding new injector"; - _activeLocalAudioInjectors.append(injector); - // move local buffer to the LocalAudioThread to avoid dataraces with AudioInjector (like stop()) - injectorBuffer->setParent(nullptr); - - // update the flag - _localInjectorsAvailable.exchange(true, std::memory_order_release); - } else { - qCDebug(audioclient) << "injector exists in active list already"; - } + // update the flag + _localInjectorsAvailable.exchange(!_activeLocalAudioInjectors.empty(), std::memory_order_release); return true; - - } else { - // no local buffer - return false; } -} -void AudioClient::outputFormatChanged() { - _outputFrameSize = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * OUTPUT_CHANNEL_COUNT * _outputFormat.sampleRate()) / - _desiredOutputFormat.sampleRate(); - _receivedAudioStream.outputFormatChanged(_outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); -} + void AudioClient::processReceivedSamples(const QByteArray& decodedBuffer, QByteArray& outputBuffer) { + const int16_t* decodedSamples = reinterpret_cast(decodedBuffer.data()); + assert(decodedBuffer.size() == AudioConstants::NETWORK_FRAME_BYTES_STEREO); -bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInfo, bool isShutdownRequest) { - Q_ASSERT_X(QThread::currentThread() == thread(), Q_FUNC_INFO, "Function invoked on wrong thread"); + outputBuffer.resize(_outputFrameSize * AudioConstants::SAMPLE_SIZE); + int16_t* outputSamples = reinterpret_cast(outputBuffer.data()); - qCDebug(audioclient) << __FUNCTION__ << "inputDeviceInfo: [" << inputDeviceInfo.deviceName() << "]"; - bool supportedFormat = false; + bool hasReverb = _reverb || _receivedAudioStream.hasReverb(); - // NOTE: device start() uses the Qt internal device list - Lock lock(_deviceMutex); + // apply stereo reverb + if (hasReverb) { + updateReverbOptions(); + int16_t* reverbSamples = _networkToOutputResampler ? _networkScratchBuffer : outputSamples; + _listenerReverb.render(decodedSamples, reverbSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + } + + // resample to output sample rate + if (_networkToOutputResampler) { + const int16_t* inputSamples = hasReverb ? _networkScratchBuffer : decodedSamples; + _networkToOutputResampler->render(inputSamples, outputSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); + } + + // if no transformations were applied, we still need to copy the buffer + if (!hasReverb && !_networkToOutputResampler) { + memcpy(outputSamples, decodedSamples, decodedBuffer.size()); + } + } + + void AudioClient::sendMuteEnvironmentPacket() { + auto nodeList = DependencyManager::get(); + + int dataSize = sizeof(glm::vec3) + sizeof(float); + + auto mutePacket = NLPacket::create(PacketType::MuteEnvironment, dataSize); + + const float MUTE_RADIUS = 50; + + glm::vec3 currentSourcePosition = _positionGetter(); + + mutePacket->writePrimitive(currentSourcePosition); + mutePacket->writePrimitive(MUTE_RADIUS); + + // grab our audio mixer from the NodeList, if it exists + SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer); + + if (audioMixer) { + // send off this mute packet + nodeList->sendPacket(std::move(mutePacket), *audioMixer); + } + } + + void AudioClient::setMuted(bool muted, bool emitSignal) { + if (_muted != muted) { + _muted = muted; + if (emitSignal) { + emit muteToggled(_muted); + } + } + } + + void AudioClient::setNoiseReduction(bool enable, bool emitSignal) { + if (_isNoiseGateEnabled != enable) { + _isNoiseGateEnabled = enable; + if (emitSignal) { + emit noiseReductionChanged(_isNoiseGateEnabled); + } + } + } + + bool AudioClient::setIsStereoInput(bool isStereoInput) { + bool stereoInputChanged = false; + if (isStereoInput != _isStereoInput && _inputDeviceInfo.supportedChannelCounts().contains(2)) { + _isStereoInput = isStereoInput; + stereoInputChanged = true; + + if (_isStereoInput) { + _desiredInputFormat.setChannelCount(2); + } else { + _desiredInputFormat.setChannelCount(1); + } + + // restart the codec + if (_codec) { + if (_encoder) { + _codec->releaseEncoder(_encoder); + } + _encoder = _codec->createEncoder(AudioConstants::SAMPLE_RATE, + _isStereoInput ? AudioConstants::STEREO : AudioConstants::MONO); + } + qCDebug(audioclient) << "Reset Codec:" << _selectedCodecName << "isStereoInput:" << _isStereoInput; + + // restart the input device + switchInputToAudioDevice(_inputDeviceInfo); + + emit isStereoInputChanged(_isStereoInput); + } + + return stereoInputChanged; + } + + bool AudioClient::outputLocalInjector(const AudioInjectorPointer& injector) { + AudioInjectorLocalBuffer* injectorBuffer = injector->getLocalBuffer(); + if (injectorBuffer) { + // local injectors are on the AudioInjectorsThread, so we must guard access + Lock lock(_injectorsMutex); + if (!_activeLocalAudioInjectors.contains(injector)) { + qCDebug(audioclient) << "adding new injector"; + _activeLocalAudioInjectors.append(injector); + // move local buffer to the LocalAudioThread to avoid dataraces with AudioInjector (like stop()) + injectorBuffer->setParent(nullptr); + + // update the flag + _localInjectorsAvailable.exchange(true, std::memory_order_release); + } else { + qCDebug(audioclient) << "injector exists in active list already"; + } + + return true; + + } else { + // no local buffer + return false; + } + } + + void AudioClient::outputFormatChanged() { + _outputFrameSize = + (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * OUTPUT_CHANNEL_COUNT * _outputFormat.sampleRate()) / + _desiredOutputFormat.sampleRate(); + _receivedAudioStream.outputFormatChanged(_outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); + } + + bool AudioClient::switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInfo, bool isShutdownRequest) { + Q_ASSERT_X(QThread::currentThread() == thread(), Q_FUNC_INFO, "Function invoked on wrong thread"); + + qCDebug(audioclient) << __FUNCTION__ << "inputDeviceInfo: [" << inputDeviceInfo.deviceName() << "]"; + bool supportedFormat = false; + + // NOTE: device start() uses the Qt internal device list + Lock lock(_deviceMutex); #if defined(Q_OS_ANDROID) - _shouldRestartInputSetup = false; // avoid a double call to _audioInput->start() from audioInputStateChanged + _shouldRestartInputSetup = false; // avoid a double call to _audioInput->start() from audioInputStateChanged #endif - // cleanup any previously initialized device - if (_audioInput) { - // The call to stop() causes _inputDevice to be destructed. - // That in turn causes it to be disconnected (see for example - // http://stackoverflow.com/questions/9264750/qt-signals-and-slots-object-disconnect). - _audioInput->stop(); - _inputDevice = NULL; + // cleanup any previously initialized device + if (_audioInput) { + // The call to stop() causes _inputDevice to be destructed. + // That in turn causes it to be disconnected (see for example + // http://stackoverflow.com/questions/9264750/qt-signals-and-slots-object-disconnect). + _audioInput->stop(); + _inputDevice = NULL; - _audioInput->deleteLater(); - _audioInput = NULL; - _numInputCallbackBytes = 0; + _audioInput->deleteLater(); + _audioInput = NULL; + _numInputCallbackBytes = 0; - _inputDeviceInfo = QAudioDeviceInfo(); - } + _inputDeviceInfo = QAudioDeviceInfo(); + } - if (_dummyAudioInput) { - _dummyAudioInput->stop(); + if (_dummyAudioInput) { + _dummyAudioInput->stop(); - _dummyAudioInput->deleteLater(); - _dummyAudioInput = NULL; - } + _dummyAudioInput->deleteLater(); + _dummyAudioInput = NULL; + } - if (_inputToNetworkResampler) { - // if we were using an input to network resampler, delete it here - delete _inputToNetworkResampler; - _inputToNetworkResampler = NULL; - } + if (_inputToNetworkResampler) { + // if we were using an input to network resampler, delete it here + delete _inputToNetworkResampler; + _inputToNetworkResampler = NULL; + } - if (_audioGate) { - delete _audioGate; - _audioGate = nullptr; - } + if (_audioGate) { + delete _audioGate; + _audioGate = nullptr; + } - if (isShutdownRequest) { - qCDebug(audioclient) << "The audio input device has shut down."; - return true; - } + if (isShutdownRequest) { + qCDebug(audioclient) << "The audio input device has shut down."; + return true; + } - if (!inputDeviceInfo.isNull()) { - qCDebug(audioclient) << "The audio input device " << inputDeviceInfo.deviceName() << "is available."; - _inputDeviceInfo = inputDeviceInfo; - emit deviceChanged(QAudio::AudioInput, inputDeviceInfo); + if (!inputDeviceInfo.isNull()) { + qCDebug(audioclient) << "The audio input device " << inputDeviceInfo.deviceName() << "is available."; + _inputDeviceInfo = inputDeviceInfo; + emit deviceChanged(QAudio::AudioInput, inputDeviceInfo); - if (adjustedFormatForAudioDevice(inputDeviceInfo, _desiredInputFormat, _inputFormat)) { - qCDebug(audioclient) << "The format to be used for audio input is" << _inputFormat; + if (adjustedFormatForAudioDevice(inputDeviceInfo, _desiredInputFormat, _inputFormat)) { + qCDebug(audioclient) << "The format to be used for audio input is" << _inputFormat; - // we've got the best we can get for input - // if required, setup a resampler for this input to our desired network format - if (_inputFormat != _desiredInputFormat && _inputFormat.sampleRate() != _desiredInputFormat.sampleRate()) { - qCDebug(audioclient) << "Attemping to create a resampler for input format to network format."; + // we've got the best we can get for input + // if required, setup a resampler for this input to our desired network format + if (_inputFormat != _desiredInputFormat && _inputFormat.sampleRate() != _desiredInputFormat.sampleRate()) { + qCDebug(audioclient) << "Attemping to create a resampler for input format to network format."; - assert(_inputFormat.sampleSize() == 16); - assert(_desiredInputFormat.sampleSize() == 16); - int channelCount = (_inputFormat.channelCount() == 2 && _desiredInputFormat.channelCount() == 2) ? 2 : 1; + assert(_inputFormat.sampleSize() == 16); + assert(_desiredInputFormat.sampleSize() == 16); + int channelCount = (_inputFormat.channelCount() == 2 && _desiredInputFormat.channelCount() == 2) ? 2 : 1; - _inputToNetworkResampler = - new AudioSRC(_inputFormat.sampleRate(), _desiredInputFormat.sampleRate(), channelCount); + _inputToNetworkResampler = + new AudioSRC(_inputFormat.sampleRate(), _desiredInputFormat.sampleRate(), channelCount); - } else { - qCDebug(audioclient) << "No resampling required for audio input to match desired network format."; + } else { + qCDebug(audioclient) << "No resampling required for audio input to match desired network format."; + } + + // the audio gate runs after the resampler + _audioGate = new AudioGate(_desiredInputFormat.sampleRate(), _desiredInputFormat.channelCount()); + qCDebug(audioclient) << "Noise gate created with" << _desiredInputFormat.channelCount() << "channels."; + + // if the user wants stereo but this device can't provide then bail + if (!_isStereoInput || _inputFormat.channelCount() == 2) { + _audioInput = new QAudioInput(inputDeviceInfo, _inputFormat, this); + _numInputCallbackBytes = calculateNumberOfInputCallbackBytes(_inputFormat); + _audioInput->setBufferSize(_numInputCallbackBytes); + // different audio input devices may have different volumes + emit inputVolumeChanged(_audioInput->volume()); + + // how do we want to handle input working, but output not working? + int numFrameSamples = calculateNumberOfFrameSamples(_numInputCallbackBytes); + _inputRingBuffer.resizeForFrameSize(numFrameSamples); + +#if defined(Q_OS_ANDROID) + if (_audioInput) { + _shouldRestartInputSetup = true; + connect(_audioInput, &QAudioInput::stateChanged, this, &AudioClient::audioInputStateChanged); + } +#endif + _inputDevice = _audioInput->start(); + + if (_inputDevice) { + connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleMicAudioInput())); + supportedFormat = true; + } else { + qCDebug(audioclient) << "Error starting audio input -" << _audioInput->error(); + _audioInput->deleteLater(); + _audioInput = NULL; + } + } } + } + + // If there is no working input device, use the dummy input device. + // It generates audio callbacks on a timer to simulate a mic stream of silent packets. + // This enables clients without a mic to still receive an audio stream from the mixer. + if (!_audioInput) { + qCDebug(audioclient) << "Audio input device is not available, using dummy input."; + _inputDeviceInfo = QAudioDeviceInfo(); + emit deviceChanged(QAudio::AudioInput, _inputDeviceInfo); + + _inputFormat = _desiredInputFormat; + qCDebug(audioclient) << "The format to be used for audio input is" << _inputFormat; + qCDebug(audioclient) << "No resampling required for audio input to match desired network format."; - // the audio gate runs after the resampler _audioGate = new AudioGate(_desiredInputFormat.sampleRate(), _desiredInputFormat.channelCount()); qCDebug(audioclient) << "Noise gate created with" << _desiredInputFormat.channelCount() << "channels."; - // if the user wants stereo but this device can't provide then bail - if (!_isStereoInput || _inputFormat.channelCount() == 2) { - _audioInput = new QAudioInput(inputDeviceInfo, _inputFormat, this); - _numInputCallbackBytes = calculateNumberOfInputCallbackBytes(_inputFormat); - _audioInput->setBufferSize(_numInputCallbackBytes); - // different audio input devices may have different volumes - emit inputVolumeChanged(_audioInput->volume()); - - // how do we want to handle input working, but output not working? - int numFrameSamples = calculateNumberOfFrameSamples(_numInputCallbackBytes); - _inputRingBuffer.resizeForFrameSize(numFrameSamples); - -#if defined(Q_OS_ANDROID) - if (_audioInput) { - _shouldRestartInputSetup = true; - connect(_audioInput, &QAudioInput::stateChanged, this, &AudioClient::audioInputStateChanged); - } -#endif - _inputDevice = _audioInput->start(); - - if (_inputDevice) { - connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleMicAudioInput())); - supportedFormat = true; - } else { - qCDebug(audioclient) << "Error starting audio input -" << _audioInput->error(); - _audioInput->deleteLater(); - _audioInput = NULL; - } - } - } - } - - // If there is no working input device, use the dummy input device. - // It generates audio callbacks on a timer to simulate a mic stream of silent packets. - // This enables clients without a mic to still receive an audio stream from the mixer. - if (!_audioInput) { - qCDebug(audioclient) << "Audio input device is not available, using dummy input."; - _inputDeviceInfo = QAudioDeviceInfo(); - emit deviceChanged(QAudio::AudioInput, _inputDeviceInfo); - - _inputFormat = _desiredInputFormat; - qCDebug(audioclient) << "The format to be used for audio input is" << _inputFormat; - qCDebug(audioclient) << "No resampling required for audio input to match desired network format."; - - _audioGate = new AudioGate(_desiredInputFormat.sampleRate(), _desiredInputFormat.channelCount()); - qCDebug(audioclient) << "Noise gate created with" << _desiredInputFormat.channelCount() << "channels."; - - // generate audio callbacks at the network sample rate - _dummyAudioInput = new QTimer(this); - connect(_dummyAudioInput, SIGNAL(timeout()), this, SLOT(handleDummyAudioInput())); - _dummyAudioInput->start((int)(AudioConstants::NETWORK_FRAME_MSECS + 0.5f)); - } - - return supportedFormat; -} - -void AudioClient::audioInputStateChanged(QAudio::State state) { -#if defined(Q_OS_ANDROID) - switch (state) { - case QAudio::StoppedState: - if (!_audioInput) { - break; - } - // Stopped on purpose - if (_shouldRestartInputSetup) { - Lock lock(_deviceMutex); - _inputDevice = _audioInput->start(); - lock.unlock(); - if (_inputDevice) { - connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleMicAudioInput())); - } - } - break; - case QAudio::ActiveState: - break; - default: - break; - } -#endif -} - -void AudioClient::checkInputTimeout() { -#if defined(Q_OS_ANDROID) - if (_audioInput && _inputReadsSinceLastCheck < MIN_READS_TO_CONSIDER_INPUT_ALIVE) { - _audioInput->stop(); - } else { - _inputReadsSinceLastCheck = 0; - } -#endif -} - -void AudioClient::setHeadsetPluggedIn(bool pluggedIn) { -#if defined(Q_OS_ANDROID) - if (pluggedIn == !_isHeadsetPluggedIn && !_inputDeviceInfo.isNull()) { - QAndroidJniObject brand = QAndroidJniObject::getStaticObjectField("android/os/Build", "BRAND"); - // some samsung phones needs more time to shutdown the previous input device - if (brand.toString().contains("samsung", Qt::CaseInsensitive)) { - switchInputToAudioDevice(QAudioDeviceInfo(), true); - QThread::msleep(200); + // generate audio callbacks at the network sample rate + _dummyAudioInput = new QTimer(this); + connect(_dummyAudioInput, SIGNAL(timeout()), this, SLOT(handleDummyAudioInput())); + _dummyAudioInput->start((int)(AudioConstants::NETWORK_FRAME_MSECS + 0.5f)); } - Setting::Handle enableAEC(SETTING_AEC_KEY, false); - bool aecEnabled = enableAEC.get(); - - if ((pluggedIn || !aecEnabled) && _inputDeviceInfo.deviceName() != VOICE_RECOGNITION) { - switchAudioDevice(QAudio::AudioInput, VOICE_RECOGNITION); - } else if (!pluggedIn && aecEnabled && _inputDeviceInfo.deviceName() != VOICE_COMMUNICATION) { - switchAudioDevice(QAudio::AudioInput, VOICE_COMMUNICATION); - } + return supportedFormat; } - _isHeadsetPluggedIn = pluggedIn; -#endif -} -void AudioClient::outputNotify() { - int recentUnfulfilled = _audioOutputIODevice.getRecentUnfulfilledReads(); - if (recentUnfulfilled > 0) { - qCDebug(audioclient, "Starve detected, %d new unfulfilled reads", recentUnfulfilled); - - if (_outputStarveDetectionEnabled.get()) { - quint64 now = usecTimestampNow() / 1000; - int dt = (int)(now - _outputStarveDetectionStartTimeMsec); - if (dt > STARVE_DETECTION_PERIOD) { - _outputStarveDetectionStartTimeMsec = now; - _outputStarveDetectionCount = 0; - } else { - _outputStarveDetectionCount += recentUnfulfilled; - if (_outputStarveDetectionCount > STARVE_DETECTION_THRESHOLD) { - int oldOutputBufferSizeFrames = _sessionOutputBufferSizeFrames; - int newOutputBufferSizeFrames = setOutputBufferSize(oldOutputBufferSizeFrames + 1, false); - - if (newOutputBufferSizeFrames > oldOutputBufferSizeFrames) { - qCDebug(audioclient, "Starve threshold surpassed (%d starves in %d ms)", _outputStarveDetectionCount, - dt); + void AudioClient::audioInputStateChanged(QAudio::State state) { +#if defined(Q_OS_ANDROID) + switch (state) { + case QAudio::StoppedState: + if (!_audioInput) { + break; + } + // Stopped on purpose + if (_shouldRestartInputSetup) { + Lock lock(_deviceMutex); + _inputDevice = _audioInput->start(); + lock.unlock(); + if (_inputDevice) { + connect(_inputDevice, SIGNAL(readyRead()), this, SLOT(handleMicAudioInput())); } + } + break; + case QAudio::ActiveState: + break; + default: + break; + } +#endif + } + void AudioClient::checkInputTimeout() { +#if defined(Q_OS_ANDROID) + if (_audioInput && _inputReadsSinceLastCheck < MIN_READS_TO_CONSIDER_INPUT_ALIVE) { + _audioInput->stop(); + } else { + _inputReadsSinceLastCheck = 0; + } +#endif + } + + void AudioClient::setHeadsetPluggedIn(bool pluggedIn) { +#if defined(Q_OS_ANDROID) + if (pluggedIn == !_isHeadsetPluggedIn && !_inputDeviceInfo.isNull()) { + QAndroidJniObject brand = QAndroidJniObject::getStaticObjectField("android/os/Build", "BRAND"); + // some samsung phones needs more time to shutdown the previous input device + if (brand.toString().contains("samsung", Qt::CaseInsensitive)) { + switchInputToAudioDevice(QAudioDeviceInfo(), true); + QThread::msleep(200); + } + + Setting::Handle enableAEC(SETTING_AEC_KEY, false); + bool aecEnabled = enableAEC.get(); + + if ((pluggedIn || !aecEnabled) && _inputDeviceInfo.deviceName() != VOICE_RECOGNITION) { + switchAudioDevice(QAudio::AudioInput, VOICE_RECOGNITION); + } else if (!pluggedIn && aecEnabled && _inputDeviceInfo.deviceName() != VOICE_COMMUNICATION) { + switchAudioDevice(QAudio::AudioInput, VOICE_COMMUNICATION); + } + } + _isHeadsetPluggedIn = pluggedIn; +#endif + } + + void AudioClient::outputNotify() { + int recentUnfulfilled = _audioOutputIODevice.getRecentUnfulfilledReads(); + if (recentUnfulfilled > 0) { + qCDebug(audioclient, "Starve detected, %d new unfulfilled reads", recentUnfulfilled); + + if (_outputStarveDetectionEnabled.get()) { + quint64 now = usecTimestampNow() / 1000; + int dt = (int)(now - _outputStarveDetectionStartTimeMsec); + if (dt > STARVE_DETECTION_PERIOD) { _outputStarveDetectionStartTimeMsec = now; _outputStarveDetectionCount = 0; + } else { + _outputStarveDetectionCount += recentUnfulfilled; + if (_outputStarveDetectionCount > STARVE_DETECTION_THRESHOLD) { + int oldOutputBufferSizeFrames = _sessionOutputBufferSizeFrames; + int newOutputBufferSizeFrames = setOutputBufferSize(oldOutputBufferSizeFrames + 1, false); + + if (newOutputBufferSizeFrames > oldOutputBufferSizeFrames) { + qCDebug(audioclient, "Starve threshold surpassed (%d starves in %d ms)", + _outputStarveDetectionCount, dt); + } + + _outputStarveDetectionStartTimeMsec = now; + _outputStarveDetectionCount = 0; + } } } } } -} -bool AudioClient::switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceInfo, bool isShutdownRequest) { - Q_ASSERT_X(QThread::currentThread() == thread(), Q_FUNC_INFO, "Function invoked on wrong thread"); + bool AudioClient::switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceInfo, bool isShutdownRequest) { + Q_ASSERT_X(QThread::currentThread() == thread(), Q_FUNC_INFO, "Function invoked on wrong thread"); - qCDebug(audioclient) << "AudioClient::switchOutputToAudioDevice() outputDeviceInfo: [" << outputDeviceInfo.deviceName() - << "]"; - bool supportedFormat = false; + qCDebug(audioclient) << "AudioClient::switchOutputToAudioDevice() outputDeviceInfo: [" << outputDeviceInfo.deviceName() + << "]"; + bool supportedFormat = false; - // NOTE: device start() uses the Qt internal device list - Lock lock(_deviceMutex); + // NOTE: device start() uses the Qt internal device list + Lock lock(_deviceMutex); - Lock localAudioLock(_localAudioMutex); - _localSamplesAvailable.exchange(0, std::memory_order_release); + Lock localAudioLock(_localAudioMutex); + _localSamplesAvailable.exchange(0, std::memory_order_release); - // cleanup any previously initialized device - if (_audioOutput) { - _audioOutputIODevice.close(); - _audioOutput->stop(); + // cleanup any previously initialized device + if (_audioOutput) { + _audioOutputIODevice.close(); + _audioOutput->stop(); - //must be deleted in next eventloop cycle when its called from notify() - _audioOutput->deleteLater(); - _audioOutput = NULL; + //must be deleted in next eventloop cycle when its called from notify() + _audioOutput->deleteLater(); + _audioOutput = NULL; - _loopbackOutputDevice = NULL; - //must be deleted in next eventloop cycle when its called from notify() - _loopbackAudioOutput->deleteLater(); - _loopbackAudioOutput = NULL; + _loopbackOutputDevice = NULL; + //must be deleted in next eventloop cycle when its called from notify() + _loopbackAudioOutput->deleteLater(); + _loopbackAudioOutput = NULL; - delete[] _outputMixBuffer; - _outputMixBuffer = NULL; + delete[] _outputMixBuffer; + _outputMixBuffer = NULL; - delete[] _outputScratchBuffer; - _outputScratchBuffer = NULL; + delete[] _outputScratchBuffer; + _outputScratchBuffer = NULL; - delete[] _localOutputMixBuffer; - _localOutputMixBuffer = NULL; + delete[] _localOutputMixBuffer; + _localOutputMixBuffer = NULL; - _outputDeviceInfo = QAudioDeviceInfo(); - } + _outputDeviceInfo = QAudioDeviceInfo(); + } - if (_networkToOutputResampler) { - // if we were using an input to network resampler, delete it here - delete _networkToOutputResampler; - _networkToOutputResampler = NULL; + if (_networkToOutputResampler) { + // if we were using an input to network resampler, delete it here + delete _networkToOutputResampler; + _networkToOutputResampler = NULL; - delete _localToOutputResampler; - _localToOutputResampler = NULL; - } + delete _localToOutputResampler; + _localToOutputResampler = NULL; + } - if (isShutdownRequest) { - qCDebug(audioclient) << "The audio output device has shut down."; - return true; - } + if (isShutdownRequest) { + qCDebug(audioclient) << "The audio output device has shut down."; + return true; + } - if (!outputDeviceInfo.isNull()) { - qCDebug(audioclient) << "The audio output device " << outputDeviceInfo.deviceName() << "is available."; - _outputDeviceInfo = outputDeviceInfo; - emit deviceChanged(QAudio::AudioOutput, outputDeviceInfo); + if (!outputDeviceInfo.isNull()) { + qCDebug(audioclient) << "The audio output device " << outputDeviceInfo.deviceName() << "is available."; + _outputDeviceInfo = outputDeviceInfo; + emit deviceChanged(QAudio::AudioOutput, outputDeviceInfo); - if (adjustedFormatForAudioDevice(outputDeviceInfo, _desiredOutputFormat, _outputFormat)) { - qCDebug(audioclient) << "The format to be used for audio output is" << _outputFormat; + if (adjustedFormatForAudioDevice(outputDeviceInfo, _desiredOutputFormat, _outputFormat)) { + qCDebug(audioclient) << "The format to be used for audio output is" << _outputFormat; - // we've got the best we can get for input - // if required, setup a resampler for this input to our desired network format - if (_desiredOutputFormat != _outputFormat && _desiredOutputFormat.sampleRate() != _outputFormat.sampleRate()) { - qCDebug(audioclient) << "Attemping to create a resampler for network format to output format."; + // we've got the best we can get for input + // if required, setup a resampler for this input to our desired network format + if (_desiredOutputFormat != _outputFormat && _desiredOutputFormat.sampleRate() != _outputFormat.sampleRate()) { + qCDebug(audioclient) << "Attemping to create a resampler for network format to output format."; - assert(_desiredOutputFormat.sampleSize() == 16); - assert(_outputFormat.sampleSize() == 16); + assert(_desiredOutputFormat.sampleSize() == 16); + assert(_outputFormat.sampleSize() == 16); - _networkToOutputResampler = - new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); - _localToOutputResampler = - new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); + _networkToOutputResampler = + new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); + _localToOutputResampler = + new AudioSRC(_desiredOutputFormat.sampleRate(), _outputFormat.sampleRate(), OUTPUT_CHANNEL_COUNT); - } else { - qCDebug(audioclient) << "No resampling required for network output to match actual output format."; - } - - outputFormatChanged(); - - // setup our general output device for audio-mixer audio - _audioOutput = new QAudioOutput(outputDeviceInfo, _outputFormat, this); - - int deviceChannelCount = _outputFormat.channelCount(); - int frameSize = - (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * deviceChannelCount * _outputFormat.sampleRate()) / - _desiredOutputFormat.sampleRate(); - int requestedSize = _sessionOutputBufferSizeFrames * frameSize * AudioConstants::SAMPLE_SIZE; - _audioOutput->setBufferSize(requestedSize); - - // initialize mix buffers on the _audioOutput thread to avoid races - connect(_audioOutput, &QAudioOutput::stateChanged, [&, frameSize, requestedSize](QAudio::State state) { - if (state == QAudio::ActiveState) { - // restrict device callback to _outputPeriod samples - _outputPeriod = _audioOutput->periodSize() / AudioConstants::SAMPLE_SIZE; - // device callback may exceed reported period, so double it to avoid stutter - _outputPeriod *= 2; - - _outputMixBuffer = new float[_outputPeriod]; - _outputScratchBuffer = new int16_t[_outputPeriod]; - - // size local output mix buffer based on resampled network frame size - int networkPeriod = - _localToOutputResampler - ? _localToOutputResampler->getMaxOutput(AudioConstants::NETWORK_FRAME_SAMPLES_STEREO) - : AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; - _localOutputMixBuffer = new float[networkPeriod]; - - // local period should be at least twice the output period, - // in case two device reads happen before more data can be read (worst case) - int localPeriod = _outputPeriod * 2; - // round up to an exact multiple of networkPeriod - localPeriod = ((localPeriod + networkPeriod - 1) / networkPeriod) * networkPeriod; - // this ensures lowest latency without stutter from underrun - _localInjectorsStream.resizeForFrameSize(localPeriod); - - int bufferSize = _audioOutput->bufferSize(); - int bufferSamples = bufferSize / AudioConstants::SAMPLE_SIZE; - int bufferFrames = bufferSamples / (float)frameSize; - qCDebug(audioclient) << "frame (samples):" << frameSize; - qCDebug(audioclient) << "buffer (frames):" << bufferFrames; - qCDebug(audioclient) << "buffer (samples):" << bufferSamples; - qCDebug(audioclient) << "buffer (bytes):" << bufferSize; - qCDebug(audioclient) << "requested (bytes):" << requestedSize; - qCDebug(audioclient) << "period (samples):" << _outputPeriod; - qCDebug(audioclient) << "local buffer (samples):" << localPeriod; - - disconnect(_audioOutput, &QAudioOutput::stateChanged, 0, 0); - - // unlock to avoid a deadlock with the device callback (which always succeeds this initialization) - localAudioLock.unlock(); + } else { + qCDebug(audioclient) << "No resampling required for network output to match actual output format."; } - }); - connect(_audioOutput, &QAudioOutput::notify, this, &AudioClient::outputNotify); - _audioOutputIODevice.start(); + outputFormatChanged(); - _audioOutput->start(&_audioOutputIODevice); + // setup our general output device for audio-mixer audio + _audioOutput = new QAudioOutput(outputDeviceInfo, _outputFormat, this); - // setup a loopback audio output device - _loopbackAudioOutput = new QAudioOutput(outputDeviceInfo, _outputFormat, this); + int deviceChannelCount = _outputFormat.channelCount(); + int frameSize = + (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * deviceChannelCount * _outputFormat.sampleRate()) / + _desiredOutputFormat.sampleRate(); + int requestedSize = _sessionOutputBufferSizeFrames * frameSize * AudioConstants::SAMPLE_SIZE; + _audioOutput->setBufferSize(requestedSize); - _timeSinceLastReceived.start(); + // initialize mix buffers on the _audioOutput thread to avoid races + connect(_audioOutput, &QAudioOutput::stateChanged, [&, frameSize, requestedSize](QAudio::State state) { + if (state == QAudio::ActiveState) { + // restrict device callback to _outputPeriod samples + _outputPeriod = _audioOutput->periodSize() / AudioConstants::SAMPLE_SIZE; + // device callback may exceed reported period, so double it to avoid stutter + _outputPeriod *= 2; - supportedFormat = true; + _outputMixBuffer = new float[_outputPeriod]; + _outputScratchBuffer = new int16_t[_outputPeriod]; + + // size local output mix buffer based on resampled network frame size + int networkPeriod = + _localToOutputResampler + ? _localToOutputResampler->getMaxOutput(AudioConstants::NETWORK_FRAME_SAMPLES_STEREO) + : AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; + _localOutputMixBuffer = new float[networkPeriod]; + + // local period should be at least twice the output period, + // in case two device reads happen before more data can be read (worst case) + int localPeriod = _outputPeriod * 2; + // round up to an exact multiple of networkPeriod + localPeriod = ((localPeriod + networkPeriod - 1) / networkPeriod) * networkPeriod; + // this ensures lowest latency without stutter from underrun + _localInjectorsStream.resizeForFrameSize(localPeriod); + + int bufferSize = _audioOutput->bufferSize(); + int bufferSamples = bufferSize / AudioConstants::SAMPLE_SIZE; + int bufferFrames = bufferSamples / (float)frameSize; + qCDebug(audioclient) << "frame (samples):" << frameSize; + qCDebug(audioclient) << "buffer (frames):" << bufferFrames; + qCDebug(audioclient) << "buffer (samples):" << bufferSamples; + qCDebug(audioclient) << "buffer (bytes):" << bufferSize; + qCDebug(audioclient) << "requested (bytes):" << requestedSize; + qCDebug(audioclient) << "period (samples):" << _outputPeriod; + qCDebug(audioclient) << "local buffer (samples):" << localPeriod; + + disconnect(_audioOutput, &QAudioOutput::stateChanged, 0, 0); + + // unlock to avoid a deadlock with the device callback (which always succeeds this initialization) + localAudioLock.unlock(); + } + }); + connect(_audioOutput, &QAudioOutput::notify, this, &AudioClient::outputNotify); + + _audioOutputIODevice.start(); + + _audioOutput->start(&_audioOutputIODevice); + + // setup a loopback audio output device + _loopbackAudioOutput = new QAudioOutput(outputDeviceInfo, _outputFormat, this); + + _timeSinceLastReceived.start(); + + supportedFormat = true; + } } + + return supportedFormat; } - return supportedFormat; -} + int AudioClient::setOutputBufferSize(int numFrames, bool persist) { + qCDebug(audioclient) << __FUNCTION__ << "numFrames:" << numFrames << "persist:" << persist; -int AudioClient::setOutputBufferSize(int numFrames, bool persist) { - qCDebug(audioclient) << __FUNCTION__ << "numFrames:" << numFrames << "persist:" << persist; + numFrames = std::min(std::max(numFrames, MIN_BUFFER_FRAMES), MAX_BUFFER_FRAMES); + qCDebug(audioclient) << __FUNCTION__ << "clamped numFrames:" << numFrames + << "_sessionOutputBufferSizeFrames:" << _sessionOutputBufferSizeFrames; - numFrames = std::min(std::max(numFrames, MIN_BUFFER_FRAMES), MAX_BUFFER_FRAMES); - qCDebug(audioclient) << __FUNCTION__ << "clamped numFrames:" << numFrames - << "_sessionOutputBufferSizeFrames:" << _sessionOutputBufferSizeFrames; - - if (numFrames != _sessionOutputBufferSizeFrames) { - qCInfo(audioclient, "Audio output buffer set to %d frames", numFrames); - _sessionOutputBufferSizeFrames = numFrames; - if (persist) { - _outputBufferSizeFrames.set(numFrames); + if (numFrames != _sessionOutputBufferSizeFrames) { + qCInfo(audioclient, "Audio output buffer set to %d frames", numFrames); + _sessionOutputBufferSizeFrames = numFrames; + if (persist) { + _outputBufferSizeFrames.set(numFrames); + } } + return numFrames; } - return numFrames; -} -// The following constant is operating system dependent due to differences in -// the way input audio is handled. The audio input buffer size is inversely -// proportional to the accelerator ratio. + // The following constant is operating system dependent due to differences in + // the way input audio is handled. The audio input buffer size is inversely + // proportional to the accelerator ratio. #ifdef Q_OS_WIN -const float AudioClient::CALLBACK_ACCELERATOR_RATIO = IsWindows8OrGreater() ? 1.0f : 0.25f; + const float AudioClient::CALLBACK_ACCELERATOR_RATIO = IsWindows8OrGreater() ? 1.0f : 0.25f; #endif #ifdef Q_OS_MAC -const float AudioClient::CALLBACK_ACCELERATOR_RATIO = 2.0f; + const float AudioClient::CALLBACK_ACCELERATOR_RATIO = 2.0f; #endif #ifdef Q_OS_ANDROID -const float AudioClient::CALLBACK_ACCELERATOR_RATIO = 0.5f; + const float AudioClient::CALLBACK_ACCELERATOR_RATIO = 0.5f; #elif defined(Q_OS_LINUX) -const float AudioClient::CALLBACK_ACCELERATOR_RATIO = 2.0f; + const float AudioClient::CALLBACK_ACCELERATOR_RATIO = 2.0f; #endif -int AudioClient::calculateNumberOfInputCallbackBytes(const QAudioFormat& format) const { - int numInputCallbackBytes = (int)(((AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL * format.channelCount() * - ((float)format.sampleRate() / AudioConstants::SAMPLE_RATE)) / - CALLBACK_ACCELERATOR_RATIO) + - 0.5f); + int AudioClient::calculateNumberOfInputCallbackBytes(const QAudioFormat& format) const { + int numInputCallbackBytes = (int)(((AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL * format.channelCount() * + ((float)format.sampleRate() / AudioConstants::SAMPLE_RATE)) / + CALLBACK_ACCELERATOR_RATIO) + + 0.5f); - return numInputCallbackBytes; -} - -int AudioClient::calculateNumberOfFrameSamples(int numBytes) const { - int frameSamples = (int)(numBytes * CALLBACK_ACCELERATOR_RATIO + 0.5f) / AudioConstants::SAMPLE_SIZE; - return frameSamples; -} - -float AudioClient::azimuthForSource(const glm::vec3& relativePosition) { - glm::quat inverseOrientation = glm::inverse(_orientationGetter()); - - glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; - - // project the rotated source position vector onto the XZ plane - rotatedSourcePosition.y = 0.0f; - - static const float SOURCE_DISTANCE_THRESHOLD = 1e-30f; - - float rotatedSourcePositionLength2 = glm::length2(rotatedSourcePosition); - if (rotatedSourcePositionLength2 > SOURCE_DISTANCE_THRESHOLD) { - // produce an oriented angle about the y-axis - glm::vec3 direction = rotatedSourcePosition * (1.0f / fastSqrtf(rotatedSourcePositionLength2)); - float angle = fastAcosf(glm::clamp(-direction.z, -1.0f, 1.0f)); // UNIT_NEG_Z is "forward" - return (direction.x < 0.0f) ? -angle : angle; - - } else { - // no azimuth if they are in same spot - return 0.0f; - } -} - -float AudioClient::gainForSource(float distance, float volume) { - // attenuation = -6dB * log2(distance) - // reference attenuation of 0dB at distance = 1.0m - float gain = volume / std::max(distance, HRTF_NEARFIELD_MIN); - - return gain; -} - -qint64 AudioClient::AudioOutputIODevice::readData(char* data, qint64 maxSize) { - // samples requested from OUTPUT_CHANNEL_COUNT - int deviceChannelCount = _audio->_outputFormat.channelCount(); - int samplesRequested = (int)(maxSize / AudioConstants::SAMPLE_SIZE) * OUTPUT_CHANNEL_COUNT / deviceChannelCount; - // restrict samplesRequested to the size of our mix/scratch buffers - samplesRequested = std::min(samplesRequested, _audio->_outputPeriod); - - int16_t* scratchBuffer = _audio->_outputScratchBuffer; - float* mixBuffer = _audio->_outputMixBuffer; - - int networkSamplesPopped; - if ((networkSamplesPopped = _receivedAudioStream.popSamples(samplesRequested, false)) > 0) { - qCDebug(audiostream, "Read %d samples from buffer (%d available, %d requested)", networkSamplesPopped, - _receivedAudioStream.getSamplesAvailable(), samplesRequested); - AudioRingBuffer::ConstIterator lastPopOutput = _receivedAudioStream.getLastPopOutput(); - lastPopOutput.readSamples(scratchBuffer, networkSamplesPopped); - for (int i = 0; i < networkSamplesPopped; i++) { - mixBuffer[i] = convertToFloat(scratchBuffer[i]); - } - samplesRequested = networkSamplesPopped; + return numInputCallbackBytes; } - int injectorSamplesPopped = 0; - { - bool append = networkSamplesPopped > 0; - // check the samples we have available locklessly; this is possible because only two functions add to the count: - // - prepareLocalAudioInjectors will only increase samples count - // - switchOutputToAudioDevice will zero samples count, - // stop the device - so that readData will exhaust the existing buffer or see a zeroed samples count, - // and start the device - which can then only see a zeroed samples count - int samplesAvailable = _audio->_localSamplesAvailable.load(std::memory_order_acquire); - - // if we do not have enough samples buffered despite having injectors, buffer them synchronously - if (samplesAvailable < samplesRequested && _audio->_localInjectorsAvailable.load(std::memory_order_acquire)) { - // try_to_lock, in case the device is being shut down already - std::unique_ptr localAudioLock(new Lock(_audio->_localAudioMutex, std::try_to_lock)); - if (localAudioLock->owns_lock()) { - _audio->prepareLocalAudioInjectors(std::move(localAudioLock)); - samplesAvailable = _audio->_localSamplesAvailable.load(std::memory_order_acquire); - } - } - - samplesRequested = std::min(samplesRequested, samplesAvailable); - if ((injectorSamplesPopped = _localInjectorsStream.appendSamples(mixBuffer, samplesRequested, append)) > 0) { - _audio->_localSamplesAvailable.fetch_sub(injectorSamplesPopped, std::memory_order_release); - qCDebug(audiostream, "Read %d samples from injectors (%d available, %d requested)", injectorSamplesPopped, - _localInjectorsStream.samplesAvailable(), samplesRequested); - } + int AudioClient::calculateNumberOfFrameSamples(int numBytes) const { + int frameSamples = (int)(numBytes * CALLBACK_ACCELERATOR_RATIO + 0.5f) / AudioConstants::SAMPLE_SIZE; + return frameSamples; } - // prepare injectors for the next callback - QtConcurrent::run(QThreadPool::globalInstance(), [this] { _audio->prepareLocalAudioInjectors(); }); + float AudioClient::azimuthForSource(const glm::vec3& relativePosition) { + glm::quat inverseOrientation = glm::inverse(_orientationGetter()); + + glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; + + // project the rotated source position vector onto the XZ plane + rotatedSourcePosition.y = 0.0f; + + static const float SOURCE_DISTANCE_THRESHOLD = 1e-30f; + + float rotatedSourcePositionLength2 = glm::length2(rotatedSourcePosition); + if (rotatedSourcePositionLength2 > SOURCE_DISTANCE_THRESHOLD) { + // produce an oriented angle about the y-axis + glm::vec3 direction = rotatedSourcePosition * (1.0f / fastSqrtf(rotatedSourcePositionLength2)); + float angle = fastAcosf(glm::clamp(-direction.z, -1.0f, 1.0f)); // UNIT_NEG_Z is "forward" + return (direction.x < 0.0f) ? -angle : angle; - int samplesPopped = std::max(networkSamplesPopped, injectorSamplesPopped); - int framesPopped = samplesPopped / AudioConstants::STEREO; - int bytesWritten; - if (samplesPopped > 0) { - if (deviceChannelCount == OUTPUT_CHANNEL_COUNT) { - // limit the audio - _audio->_audioLimiter.render(mixBuffer, (int16_t*)data, framesPopped); } else { - _audio->_audioLimiter.render(mixBuffer, scratchBuffer, framesPopped); + // no azimuth if they are in same spot + return 0.0f; + } + } - // upmix or downmix to deviceChannelCount - if (deviceChannelCount > OUTPUT_CHANNEL_COUNT) { - int extraChannels = deviceChannelCount - OUTPUT_CHANNEL_COUNT; - channelUpmix(scratchBuffer, (int16_t*)data, samplesPopped, extraChannels); - } else { - channelDownmix(scratchBuffer, (int16_t*)data, samplesPopped); + float AudioClient::gainForSource(float distance, float volume) { + // attenuation = -6dB * log2(distance) + // reference attenuation of 0dB at distance = 1.0m + float gain = volume / std::max(distance, HRTF_NEARFIELD_MIN); + + return gain; + } + + qint64 AudioClient::AudioOutputIODevice::readData(char* data, qint64 maxSize) { + // samples requested from OUTPUT_CHANNEL_COUNT + int deviceChannelCount = _audio->_outputFormat.channelCount(); + int samplesRequested = (int)(maxSize / AudioConstants::SAMPLE_SIZE) * OUTPUT_CHANNEL_COUNT / deviceChannelCount; + // restrict samplesRequested to the size of our mix/scratch buffers + samplesRequested = std::min(samplesRequested, _audio->_outputPeriod); + + int16_t* scratchBuffer = _audio->_outputScratchBuffer; + float* mixBuffer = _audio->_outputMixBuffer; + + int networkSamplesPopped; + if ((networkSamplesPopped = _receivedAudioStream.popSamples(samplesRequested, false)) > 0) { + qCDebug(audiostream, "Read %d samples from buffer (%d available, %d requested)", networkSamplesPopped, + _receivedAudioStream.getSamplesAvailable(), samplesRequested); + AudioRingBuffer::ConstIterator lastPopOutput = _receivedAudioStream.getLastPopOutput(); + lastPopOutput.readSamples(scratchBuffer, networkSamplesPopped); + for (int i = 0; i < networkSamplesPopped; i++) { + mixBuffer[i] = convertToFloat(scratchBuffer[i]); + } + samplesRequested = networkSamplesPopped; + } + + int injectorSamplesPopped = 0; + { + bool append = networkSamplesPopped > 0; + // check the samples we have available locklessly; this is possible because only two functions add to the count: + // - prepareLocalAudioInjectors will only increase samples count + // - switchOutputToAudioDevice will zero samples count, + // stop the device - so that readData will exhaust the existing buffer or see a zeroed samples count, + // and start the device - which can then only see a zeroed samples count + int samplesAvailable = _audio->_localSamplesAvailable.load(std::memory_order_acquire); + + // if we do not have enough samples buffered despite having injectors, buffer them synchronously + if (samplesAvailable < samplesRequested && _audio->_localInjectorsAvailable.load(std::memory_order_acquire)) { + // try_to_lock, in case the device is being shut down already + std::unique_ptr localAudioLock(new Lock(_audio->_localAudioMutex, std::try_to_lock)); + if (localAudioLock->owns_lock()) { + _audio->prepareLocalAudioInjectors(std::move(localAudioLock)); + samplesAvailable = _audio->_localSamplesAvailable.load(std::memory_order_acquire); + } + } + + samplesRequested = std::min(samplesRequested, samplesAvailable); + if ((injectorSamplesPopped = _localInjectorsStream.appendSamples(mixBuffer, samplesRequested, append)) > 0) { + _audio->_localSamplesAvailable.fetch_sub(injectorSamplesPopped, std::memory_order_release); + qCDebug(audiostream, "Read %d samples from injectors (%d available, %d requested)", injectorSamplesPopped, + _localInjectorsStream.samplesAvailable(), samplesRequested); } } - bytesWritten = framesPopped * AudioConstants::SAMPLE_SIZE * deviceChannelCount; - } else { - // nothing on network, don't grab anything from injectors, and just return 0s - memset(data, 0, maxSize); - bytesWritten = maxSize; + // prepare injectors for the next callback + QtConcurrent::run(QThreadPool::globalInstance(), [this] { _audio->prepareLocalAudioInjectors(); }); + + int samplesPopped = std::max(networkSamplesPopped, injectorSamplesPopped); + int framesPopped = samplesPopped / AudioConstants::STEREO; + int bytesWritten; + if (samplesPopped > 0) { + if (deviceChannelCount == OUTPUT_CHANNEL_COUNT) { + // limit the audio + _audio->_audioLimiter.render(mixBuffer, (int16_t*)data, framesPopped); + } else { + _audio->_audioLimiter.render(mixBuffer, scratchBuffer, framesPopped); + + // upmix or downmix to deviceChannelCount + if (deviceChannelCount > OUTPUT_CHANNEL_COUNT) { + int extraChannels = deviceChannelCount - OUTPUT_CHANNEL_COUNT; + channelUpmix(scratchBuffer, (int16_t*)data, samplesPopped, extraChannels); + } else { + channelDownmix(scratchBuffer, (int16_t*)data, samplesPopped); + } + } + + bytesWritten = framesPopped * AudioConstants::SAMPLE_SIZE * deviceChannelCount; + } else { + // nothing on network, don't grab anything from injectors, and just return 0s + memset(data, 0, maxSize); + bytesWritten = maxSize; + } + + // send output buffer for recording + if (_audio->_isRecording) { + Lock lock(_recordMutex); + _audio->_audioFileWav.addRawAudioChunk(reinterpret_cast(scratchBuffer), bytesWritten); + } + + int bytesAudioOutputUnplayed = _audio->_audioOutput->bufferSize() - _audio->_audioOutput->bytesFree(); + float msecsAudioOutputUnplayed = + bytesAudioOutputUnplayed / (float)_audio->_outputFormat.bytesForDuration(USECS_PER_MSEC); + _audio->_stats.updateOutputMsUnplayed(msecsAudioOutputUnplayed); + + if (bytesAudioOutputUnplayed == 0) { + _unfulfilledReads++; + } + + return bytesWritten; } - // send output buffer for recording - if (_audio->_isRecording) { - Lock lock(_recordMutex); - _audio->_audioFileWav.addRawAudioChunk(reinterpret_cast(scratchBuffer), bytesWritten); + bool AudioClient::startRecording(const QString& filepath) { + if (!_audioFileWav.create(_outputFormat, filepath)) { + qDebug() << "Error creating audio file: " + filepath; + return false; + } + _isRecording = true; + return true; } - int bytesAudioOutputUnplayed = _audio->_audioOutput->bufferSize() - _audio->_audioOutput->bytesFree(); - float msecsAudioOutputUnplayed = bytesAudioOutputUnplayed / (float)_audio->_outputFormat.bytesForDuration(USECS_PER_MSEC); - _audio->_stats.updateOutputMsUnplayed(msecsAudioOutputUnplayed); - - if (bytesAudioOutputUnplayed == 0) { - _unfulfilledReads++; - } - - return bytesWritten; -} - -bool AudioClient::startRecording(const QString& filepath) { - if (!_audioFileWav.create(_outputFormat, filepath)) { - qDebug() << "Error creating audio file: " + filepath; - return false; - } - _isRecording = true; - return true; -} - -void AudioClient::stopRecording() { - if (_isRecording) { - _isRecording = false; - _audioFileWav.close(); - } -} - -void AudioClient::loadSettings() { - _receivedAudioStream.setDynamicJitterBufferEnabled(dynamicJitterBufferEnabled.get()); - _receivedAudioStream.setStaticJitterBufferFrames(staticJitterBufferFrames.get()); - - qCDebug(audioclient) << "---- Initializing Audio Client ----"; - auto codecPlugins = PluginManager::getInstance()->getCodecPlugins(); - for (auto& plugin : codecPlugins) { - qCDebug(audioclient) << "Codec available:" << plugin->getName(); - } -} - -void AudioClient::saveSettings() { - dynamicJitterBufferEnabled.set(_receivedAudioStream.dynamicJitterBufferEnabled()); - staticJitterBufferFrames.set(_receivedAudioStream.getStaticJitterBufferFrames()); -} - -void AudioClient::setAvatarBoundingBoxParameters(glm::vec3 corner, glm::vec3 scale) { - avatarBoundingBoxCorner = corner; - avatarBoundingBoxScale = scale; -} - -void AudioClient::startThread() { - moveToNewNamedThread(this, "Audio Thread", [this] { start(); }, QThread::TimeCriticalPriority); -} - -void AudioClient::setInputVolume(float volume, bool emitSignal) { - if (_audioInput && volume != (float)_audioInput->volume()) { - _audioInput->setVolume(volume); - if (emitSignal) { - emit inputVolumeChanged(_audioInput->volume()); + void AudioClient::stopRecording() { + if (_isRecording) { + _isRecording = false; + _audioFileWav.close(); + } + } + + void AudioClient::loadSettings() { + _receivedAudioStream.setDynamicJitterBufferEnabled(dynamicJitterBufferEnabled.get()); + _receivedAudioStream.setStaticJitterBufferFrames(staticJitterBufferFrames.get()); + + qCDebug(audioclient) << "---- Initializing Audio Client ----"; + auto codecPlugins = PluginManager::getInstance()->getCodecPlugins(); + for (auto& plugin : codecPlugins) { + qCDebug(audioclient) << "Codec available:" << plugin->getName(); + } + } + + void AudioClient::saveSettings() { + dynamicJitterBufferEnabled.set(_receivedAudioStream.dynamicJitterBufferEnabled()); + staticJitterBufferFrames.set(_receivedAudioStream.getStaticJitterBufferFrames()); + } + + void AudioClient::setAvatarBoundingBoxParameters(glm::vec3 corner, glm::vec3 scale) { + avatarBoundingBoxCorner = corner; + avatarBoundingBoxScale = scale; + } + + void AudioClient::startThread() { + moveToNewNamedThread(this, "Audio Thread", [this] { start(); }, QThread::TimeCriticalPriority); + } + + void AudioClient::setInputVolume(float volume, bool emitSignal) { + if (_audioInput && volume != (float)_audioInput->volume()) { + _audioInput->setVolume(volume); + if (emitSignal) { + emit inputVolumeChanged(_audioInput->volume()); + } } } -} diff --git a/libraries/audio-client/src/AudioClient.h b/libraries/audio-client/src/AudioClient.h index 1ca7cac6ca..2e5ef65473 100644 --- a/libraries/audio-client/src/AudioClient.h +++ b/libraries/audio-client/src/AudioClient.h @@ -197,7 +197,11 @@ public slots: void checkInputTimeout(); void handleDummyAudioInput(); void handleRecordedAudioInput(const QByteArray& audio); - void handleTTSAudioInput(const QByteArray& audio); + void handleTTSAudioInput(const QByteArray& audio, + const int& newChunkSize, + const int& timerInterval); + void clearTTSBuffer(); + void processTTSBuffer(); void reset(); void audioMixerKilled(); @@ -289,11 +293,12 @@ private: bool mixLocalAudioInjectors(float* mixBuffer); float azimuthForSource(const glm::vec3& relativePosition); float gainForSource(float distance, float volume); - - void processAudioAndAddToRingBuffer(QByteArray& inputByteArray, - const uchar& channelCount, - const qint32& bytesForDuration, - QByteArray& rollingBuffer); + + Mutex _TTSMutex; + QTimer _TTSTimer; + bool _isProcessingTTS {false}; + QByteArray _TTSAudioBuffer; + int _TTSChunkSize = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50; #ifdef Q_OS_ANDROID QTimer _checkInputTimer; @@ -401,7 +406,7 @@ private: void configureReverb(); void updateReverbOptions(); - void handleLocalEchoAndReverb(QByteArray& inputByteArray); + void handleLocalEchoAndReverb(QByteArray& inputByteArray, const int& sampleRate, const int& channelCount); bool switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInfo, bool isShutdownRequest = false); bool switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceInfo, bool isShutdownRequest = false); From 1d8994993c0e640ecaeadd8557fe09ea9a6f5fe2 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Mon, 22 Oct 2018 17:05:31 -0700 Subject: [PATCH 10/18] Whitelist TTS scripting interface to TTS app, which is now a default script --- interface/resources/qml/hifi/tts/TTS.qml | 304 ++++++++++++++++++ interface/src/Application.cpp | 12 +- .../src/scripting/TTSScriptingInterface.cpp | 4 +- scripts/defaultScripts.js | 3 +- scripts/system/tts/TTS.js | 28 ++ scripts/system/tts/tts-a.svg | 9 + scripts/system/tts/tts-i.svg | 9 + 7 files changed, 363 insertions(+), 6 deletions(-) create mode 100644 interface/resources/qml/hifi/tts/TTS.qml create mode 100644 scripts/system/tts/TTS.js create mode 100644 scripts/system/tts/tts-a.svg create mode 100644 scripts/system/tts/tts-i.svg diff --git a/interface/resources/qml/hifi/tts/TTS.qml b/interface/resources/qml/hifi/tts/TTS.qml new file mode 100644 index 0000000000..114efd0cca --- /dev/null +++ b/interface/resources/qml/hifi/tts/TTS.qml @@ -0,0 +1,304 @@ +// +// TTS.qml +// +// TTS App +// +// Created by Zach Fox on 2018-10-10 +// Copyright 2018 High Fidelity, Inc. +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +import Hifi 1.0 as Hifi +import QtQuick 2.10 +import QtQuick.Controls 2.3 +import "qrc:////qml//styles-uit" as HifiStylesUit +import "qrc:////qml//controls-uit" as HifiControlsUit +import "qrc:////qml//controls" as HifiControls + +Rectangle { + HifiStylesUit.HifiConstants { id: hifi; } + + id: root; + // Style + color: hifi.colors.darkGray; + property bool keyboardRaised: false; + + // + // TITLE BAR START + // + Item { + id: titleBarContainer; + // Size + width: root.width; + height: 50; + // Anchors + anchors.left: parent.left; + anchors.top: parent.top; + + // Title bar text + HifiStylesUit.RalewaySemiBold { + id: titleBarText; + text: "Text-to-Speech"; + // Text size + size: hifi.fontSizes.overlayTitle; + // Anchors + anchors.top: parent.top; + anchors.bottom: parent.bottom; + anchors.left: parent.left; + anchors.leftMargin: 16; + width: paintedWidth; + // Style + color: hifi.colors.lightGrayText; + // Alignment + horizontalAlignment: Text.AlignHLeft; + verticalAlignment: Text.AlignVCenter; + } + + // Separator + HifiControlsUit.Separator { + anchors.left: parent.left; + anchors.right: parent.right; + anchors.bottom: parent.bottom; + } + } + // + // TITLE BAR END + // + + + Item { + id: tagButtonContainer; + anchors.top: titleBarContainer.bottom; + anchors.topMargin: 2; + anchors.left: parent.left; + anchors.right: parent.right; + height: 70; + + HifiStylesUit.RalewaySemiBold { + id: tagButtonTitle; + text: "Insert Tag:"; + // Text size + size: 18; + // Anchors + anchors.top: parent.top; + anchors.left: parent.left; + anchors.right: parent.right; + height: 35; + // Style + color: hifi.colors.lightGrayText; + // Alignment + horizontalAlignment: Text.AlignHCenter; + verticalAlignment: Text.AlignVCenter; + } + + HifiControlsUit.Button { + id: pitch10Button; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.none; + colorScheme: hifi.colorSchemes.dark; + anchors.top: tagButtonTitle.bottom; + anchors.left: parent.left; + anchors.leftMargin: 3; + width: parent.width/6 - 6; + height: 30; + text: "Pitch 10"; + onClicked: { + messageToSpeak.insert(messageToSpeak.cursorPosition, ""); + } + } + + HifiControlsUit.Button { + id: pitch0Button; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.none; + colorScheme: hifi.colorSchemes.dark; + anchors.top: tagButtonTitle.bottom; + anchors.left: pitch10Button.right; + anchors.leftMargin: 6; + width: parent.width/6 - anchors.leftMargin; + height: 30; + text: "Pitch 0"; + onClicked: { + messageToSpeak.insert(messageToSpeak.cursorPosition, ""); + } + } + + HifiControlsUit.Button { + id: pitchNeg10Button; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.none; + colorScheme: hifi.colorSchemes.dark; + anchors.top: tagButtonTitle.bottom; + anchors.left: pitch0Button.right; + anchors.leftMargin: 6; + width: parent.width/6 - anchors.leftMargin; + height: 30; + text: "Pitch -10"; + onClicked: { + messageToSpeak.insert(messageToSpeak.cursorPosition, ""); + } + } + + HifiControlsUit.Button { + id: speed5Button; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.none; + colorScheme: hifi.colorSchemes.dark; + anchors.top: tagButtonTitle.bottom; + anchors.left: pitchNeg10Button.right; + anchors.leftMargin: 6; + width: parent.width/6 - anchors.leftMargin; + height: 30; + text: "Speed 5"; + onClicked: { + messageToSpeak.insert(messageToSpeak.cursorPosition, ""); + } + } + + HifiControlsUit.Button { + id: speed0Button; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.none; + colorScheme: hifi.colorSchemes.dark; + anchors.top: tagButtonTitle.bottom; + anchors.left: speed5Button.right; + anchors.leftMargin: 6; + width: parent.width/6 - anchors.leftMargin; + height: 30; + text: "Speed 0"; + onClicked: { + messageToSpeak.insert(messageToSpeak.cursorPosition, ""); + } + } + + HifiControlsUit.Button { + id: speedNeg10Button; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.none; + colorScheme: hifi.colorSchemes.dark; + anchors.top: tagButtonTitle.bottom; + anchors.left: speed0Button.right; + anchors.leftMargin: 6; + width: parent.width/6 - anchors.leftMargin; + height: 30; + text: "Speed -10"; + onClicked: { + messageToSpeak.insert(messageToSpeak.cursorPosition, ""); + } + } + } + + Item { + anchors.top: tagButtonContainer.bottom; + anchors.topMargin: 8; + anchors.bottom: keyboardContainer.top; + anchors.bottomMargin: 16; + anchors.left: parent.left; + anchors.leftMargin: 16; + anchors.right: parent.right; + anchors.rightMargin: 16; + + TextArea { + id: messageToSpeak; + placeholderText: "Message to Speak"; + font.family: "Fira Sans SemiBold"; + font.pixelSize: 20; + // Anchors + anchors.top: parent.top; + anchors.left: parent.left; + anchors.right: parent.right; + anchors.bottom: speakButton.top; + anchors.bottomMargin: 8; + // Style + background: Rectangle { + anchors.fill: parent; + color: parent.activeFocus ? hifi.colors.black : hifi.colors.baseGrayShadow; + border.width: parent.activeFocus ? 1 : 0; + border.color: parent.activeFocus ? hifi.colors.primaryHighlight : hifi.colors.textFieldLightBackground; + } + color: hifi.colors.white; + textFormat: TextEdit.PlainText; + wrapMode: TextEdit.Wrap; + activeFocusOnPress: true; + activeFocusOnTab: true; + Keys.onPressed: { + if (event.key == Qt.Key_Return || event.key == Qt.Key_Enter) { + TextToSpeech.speakText(messageToSpeak.text, 480, 10, 24000, 16, true); + event.accepted = true; + } + } + } + + HifiControlsUit.Button { + id: speakButton; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.blue; + colorScheme: hifi.colorSchemes.dark; + anchors.right: parent.right; + anchors.bottom: parent.bottom; + width: 215; + height: 40; + text: "Speak"; + onClicked: { + TextToSpeech.speakText(messageToSpeak.text, 480, 10, 24000, 16, true); + } + } + + HifiControlsUit.Button { + id: clearButton; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.white; + colorScheme: hifi.colorSchemes.dark; + anchors.right: speakButton.left; + anchors.rightMargin: 16; + anchors.bottom: parent.bottom; + width: 100; + height: 40; + text: "Clear"; + onClicked: { + messageToSpeak.text = ""; + } + } + + HifiControlsUit.Button { + id: stopButton; + focusPolicy: Qt.NoFocus; + color: hifi.buttons.red; + colorScheme: hifi.colorSchemes.dark; + anchors.right: clearButton.left; + anchors.rightMargin: 16; + anchors.bottom: parent.bottom; + width: 100; + height: 40; + text: "Stop Last"; + onClicked: { + TextToSpeech.stopLastSpeech(); + } + } + } + + Item { + id: keyboardContainer; + z: 998; + visible: keyboard.raised; + property bool punctuationMode: false; + anchors { + bottom: parent.bottom; + left: parent.left; + right: parent.right; + } + + HifiControlsUit.Keyboard { + id: keyboard; + raised: HMD.mounted && root.keyboardRaised; + numeric: parent.punctuationMode; + anchors { + bottom: parent.bottom; + left: parent.left; + right: parent.right; + } + } + } +} diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index 836e12e60a..9cfdc8a9bb 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -2906,7 +2906,7 @@ void Application::initializeUi() { LoginDialog::registerType(); Tooltip::registerType(); UpdateDialog::registerType(); - QmlContextCallback callback = [](QQmlContext* context) { + QmlContextCallback commerceCallback = [](QQmlContext* context) { context->setContextProperty("Commerce", new QmlCommerce()); }; OffscreenQmlSurface::addWhitelistContextHandler({ @@ -2932,7 +2932,13 @@ void Application::initializeUi() { QUrl{ "hifi/commerce/wallet/Wallet.qml" }, QUrl{ "hifi/commerce/wallet/WalletHome.qml" }, QUrl{ "hifi/commerce/wallet/WalletSetup.qml" }, - }, callback); + }, commerceCallback); + QmlContextCallback ttsCallback = [](QQmlContext* context) { + context->setContextProperty("TextToSpeech", DependencyManager::get().data()); + }; + OffscreenQmlSurface::addWhitelistContextHandler({ + QUrl{ "hifi/tts/TTS.qml" } + }, ttsCallback); qmlRegisterType("Hifi", 1, 0, "ResourceImageItem"); qmlRegisterType("Hifi", 1, 0, "Preference"); qmlRegisterType("HifiWeb", 1, 0, "WebBrowserSuggestionsEngine"); @@ -3135,7 +3141,6 @@ void Application::onDesktopRootContextCreated(QQmlContext* surfaceContext) { surfaceContext->setContextProperty("ContextOverlay", DependencyManager::get().data()); surfaceContext->setContextProperty("Wallet", DependencyManager::get().data()); surfaceContext->setContextProperty("HiFiAbout", AboutUtil::getInstance()); - surfaceContext->setContextProperty("TextToSpeech", DependencyManager::get().data()); if (auto steamClient = PluginManager::getInstance()->getSteamClientPlugin()) { surfaceContext->setContextProperty("Steam", new SteamScriptingInterface(engine, steamClient.get())); @@ -6818,7 +6823,6 @@ void Application::registerScriptEngineWithApplicationServices(ScriptEnginePointe scriptEngine->registerGlobalObject("Wallet", DependencyManager::get().data()); scriptEngine->registerGlobalObject("AddressManager", DependencyManager::get().data()); scriptEngine->registerGlobalObject("HifiAbout", AboutUtil::getInstance()); - scriptEngine->registerGlobalObject("TextToSpeech", DependencyManager::get().data()); qScriptRegisterMetaType(scriptEngine.data(), OverlayIDtoScriptValue, OverlayIDfromScriptValue); diff --git a/interface/src/scripting/TTSScriptingInterface.cpp b/interface/src/scripting/TTSScriptingInterface.cpp index 5fb47a73c3..0cdb24e15d 100644 --- a/interface/src/scripting/TTSScriptingInterface.cpp +++ b/interface/src/scripting/TTSScriptingInterface.cpp @@ -151,7 +151,9 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak, _lastSoundByteArray.resize(0); _lastSoundByteArray.append(buf1, dwSize); - emit ttsSampleCreated(_lastSoundByteArray, newChunkSize, timerInterval); + // Commented out because this doesn't work completely :) + // Obviously, commenting this out isn't fit for production, but it's fine for a test PR + //emit ttsSampleCreated(_lastSoundByteArray, newChunkSize, timerInterval); if (alsoInject) { AudioInjectorOptions options; diff --git a/scripts/defaultScripts.js b/scripts/defaultScripts.js index 9efb040624..2398973dfd 100644 --- a/scripts/defaultScripts.js +++ b/scripts/defaultScripts.js @@ -32,7 +32,8 @@ var DEFAULT_SCRIPTS_COMBINED = [ "system/firstPersonHMD.js", "system/tablet-ui/tabletUI.js", "system/emote.js", - "system/miniTablet.js" + "system/miniTablet.js", + "system/tts/TTS.js" ]; var DEFAULT_SCRIPTS_SEPARATE = [ "system/controllers/controllerScripts.js", diff --git a/scripts/system/tts/TTS.js b/scripts/system/tts/TTS.js new file mode 100644 index 0000000000..36259cfda0 --- /dev/null +++ b/scripts/system/tts/TTS.js @@ -0,0 +1,28 @@ +"use strict"; +/*jslint vars:true, plusplus:true, forin:true*/ +/*global Tablet, Script, */ +/* eslint indent: ["error", 4, { "outerIIFEBody": 0 }] */ +// +// TTS.js +// +// Created by Zach Fox on 2018-10-10 +// Copyright 2018 High Fidelity, Inc +// +// Distributed under the Apache License, Version 2.0 +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +(function () { // BEGIN LOCAL_SCOPE +var AppUi = Script.require('appUi'); + +var ui; +function startup() { + ui = new AppUi({ + buttonName: "TTS", + //home: Script.resolvePath("TTS.qml") + home: "hifi/tts/TTS.qml", + graphicsDirectory: Script.resolvePath("./") // speech by Danil Polshin from the Noun Project + }); +} +startup(); +}()); // END LOCAL_SCOPE diff --git a/scripts/system/tts/tts-a.svg b/scripts/system/tts/tts-a.svg new file mode 100644 index 0000000000..9dac3a2d53 --- /dev/null +++ b/scripts/system/tts/tts-a.svg @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/scripts/system/tts/tts-i.svg b/scripts/system/tts/tts-i.svg new file mode 100644 index 0000000000..1c52ec3193 --- /dev/null +++ b/scripts/system/tts/tts-i.svg @@ -0,0 +1,9 @@ + + + + + + + + + From 947391e49eb28140d58047d3d2194ac3a6701348 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Tue, 23 Oct 2018 11:44:16 -0700 Subject: [PATCH 11/18] Fix build errors --- interface/src/scripting/TTSScriptingInterface.cpp | 8 ++++++++ interface/src/scripting/TTSScriptingInterface.h | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/interface/src/scripting/TTSScriptingInterface.cpp b/interface/src/scripting/TTSScriptingInterface.cpp index 0cdb24e15d..f51a638471 100644 --- a/interface/src/scripting/TTSScriptingInterface.cpp +++ b/interface/src/scripting/TTSScriptingInterface.cpp @@ -13,6 +13,7 @@ #include "avatar/AvatarManager.h" TTSScriptingInterface::TTSScriptingInterface() { +#ifdef WIN32 // // Create text to speech engine // @@ -36,11 +37,13 @@ TTSScriptingInterface::TTSScriptingInterface() { if (FAILED(hr)) { qDebug() << "Can't set default voice."; } +#endif } TTSScriptingInterface::~TTSScriptingInterface() { } +#ifdef WIN32 class ReleaseOnExit { public: ReleaseOnExit(IUnknown* p) : m_p(p) {} @@ -53,6 +56,7 @@ public: private: IUnknown* m_p; }; +#endif void TTSScriptingInterface::testTone(const bool& alsoInject) { QByteArray byteArray(480000, 0); @@ -81,6 +85,7 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak, const int& sampleRate, const int& bitsPerSample, const bool& alsoInject) { +#ifdef WIN32 WAVEFORMATEX fmt; fmt.wFormatTag = WAVE_FORMAT_PCM; fmt.nSamplesPerSec = sampleRate; @@ -161,6 +166,9 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak, _lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options); } +#else + qDebug() << "Text-to-Speech isn't currently supported on non-Windows platforms."; +#endif } void TTSScriptingInterface::stopLastSpeech() { diff --git a/interface/src/scripting/TTSScriptingInterface.h b/interface/src/scripting/TTSScriptingInterface.h index f6eca081ab..7e4f3afa9d 100644 --- a/interface/src/scripting/TTSScriptingInterface.h +++ b/interface/src/scripting/TTSScriptingInterface.h @@ -13,11 +13,14 @@ #include #include +#ifdef WIN32 +#pragma warning(disable : 4996) #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include // SAPI #include // SAPI Helper +#endif #include #include @@ -42,6 +45,7 @@ signals: void clearTTSBuffer(); private: +#ifdef WIN32 class CComAutoInit { public: // Initializes COM using CoInitialize. @@ -82,6 +86,7 @@ private: // Default voice token CComPtr m_voiceToken; +#endif QByteArray _lastSoundByteArray; AudioInjectorPointer _lastSoundAudioInjector; From 83951328300acdb409d6f4c08e74f11f8b1ee880 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Tue, 23 Oct 2018 15:11:02 -0700 Subject: [PATCH 12/18] Attempt to resolve final warnings --- libraries/audio-client/src/AudioClient.cpp | 4 ++-- libraries/audio-client/src/AudioClient.h | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index 606763e4ab..1ce6c15951 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -186,11 +186,11 @@ AudioClient::AudioClient() : _networkToOutputResampler(NULL), _localToOutputResampler(NULL), _audioLimiter(AudioConstants::SAMPLE_RATE, OUTPUT_CHANNEL_COUNT), _outgoingAvatarAudioSequenceNumber(0), _audioOutputIODevice(_localInjectorsStream, _receivedAudioStream, this), _stats(&_receivedAudioStream), - _positionGetter(DEFAULT_POSITION_GETTER), _TTSTimer(this), + _positionGetter(DEFAULT_POSITION_GETTER), _orientationGetter(DEFAULT_ORIENTATION_GETTER), #if defined(Q_OS_ANDROID) _checkInputTimer(this), _isHeadsetPluggedIn(false), #endif - _orientationGetter(DEFAULT_ORIENTATION_GETTER) { + _TTSTimer(this) { // avoid putting a lock in the device callback assert(_localSamplesAvailable.is_lock_free()); diff --git a/libraries/audio-client/src/AudioClient.h b/libraries/audio-client/src/AudioClient.h index 2e5ef65473..9b50d3eccb 100644 --- a/libraries/audio-client/src/AudioClient.h +++ b/libraries/audio-client/src/AudioClient.h @@ -293,12 +293,6 @@ private: bool mixLocalAudioInjectors(float* mixBuffer); float azimuthForSource(const glm::vec3& relativePosition); float gainForSource(float distance, float volume); - - Mutex _TTSMutex; - QTimer _TTSTimer; - bool _isProcessingTTS {false}; - QByteArray _TTSAudioBuffer; - int _TTSChunkSize = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50; #ifdef Q_OS_ANDROID QTimer _checkInputTimer; @@ -464,6 +458,12 @@ private: QTimer* _checkPeakValuesTimer { nullptr }; bool _isRecording { false }; + + Mutex _TTSMutex; + bool _isProcessingTTS { false }; + QByteArray _TTSAudioBuffer; + int _TTSChunkSize = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50; + QTimer _TTSTimer; }; From bccb3f1de9ea68e6d722d77924be490854ca04e6 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Thu, 1 Nov 2018 14:53:10 -0700 Subject: [PATCH 13/18] Prepare for further work --- scripts/defaultScripts.js | 3 +-- scripts/system/tts/TTS.js | 28 ---------------------------- scripts/system/tts/tts-a.svg | 9 --------- scripts/system/tts/tts-i.svg | 9 --------- 4 files changed, 1 insertion(+), 48 deletions(-) delete mode 100644 scripts/system/tts/TTS.js delete mode 100644 scripts/system/tts/tts-a.svg delete mode 100644 scripts/system/tts/tts-i.svg diff --git a/scripts/defaultScripts.js b/scripts/defaultScripts.js index 5ed74fd833..5df1b3e511 100644 --- a/scripts/defaultScripts.js +++ b/scripts/defaultScripts.js @@ -32,8 +32,7 @@ var DEFAULT_SCRIPTS_COMBINED = [ "system/firstPersonHMD.js", "system/tablet-ui/tabletUI.js", "system/emote.js", - "system/miniTablet.js", - "system/tts/TTS.js" + "system/miniTablet.js" ]; var DEFAULT_SCRIPTS_SEPARATE = [ "system/controllers/controllerScripts.js", diff --git a/scripts/system/tts/TTS.js b/scripts/system/tts/TTS.js deleted file mode 100644 index 36259cfda0..0000000000 --- a/scripts/system/tts/TTS.js +++ /dev/null @@ -1,28 +0,0 @@ -"use strict"; -/*jslint vars:true, plusplus:true, forin:true*/ -/*global Tablet, Script, */ -/* eslint indent: ["error", 4, { "outerIIFEBody": 0 }] */ -// -// TTS.js -// -// Created by Zach Fox on 2018-10-10 -// Copyright 2018 High Fidelity, Inc -// -// Distributed under the Apache License, Version 2.0 -// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html -// - -(function () { // BEGIN LOCAL_SCOPE -var AppUi = Script.require('appUi'); - -var ui; -function startup() { - ui = new AppUi({ - buttonName: "TTS", - //home: Script.resolvePath("TTS.qml") - home: "hifi/tts/TTS.qml", - graphicsDirectory: Script.resolvePath("./") // speech by Danil Polshin from the Noun Project - }); -} -startup(); -}()); // END LOCAL_SCOPE diff --git a/scripts/system/tts/tts-a.svg b/scripts/system/tts/tts-a.svg deleted file mode 100644 index 9dac3a2d53..0000000000 --- a/scripts/system/tts/tts-a.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/scripts/system/tts/tts-i.svg b/scripts/system/tts/tts-i.svg deleted file mode 100644 index 1c52ec3193..0000000000 --- a/scripts/system/tts/tts-i.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - From 20cd1df22cf1f48bbf3c27a7347518c1022afae7 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Thu, 1 Nov 2018 16:48:08 -0700 Subject: [PATCH 14/18] Lotsa changes --- interface/resources/qml/hifi/tts/TTS.qml | 12 +++- interface/src/Application.cpp | 4 -- .../src/scripting/TTSScriptingInterface.cpp | 55 +++++++------------ .../src/scripting/TTSScriptingInterface.h | 15 ++--- libraries/audio-client/src/AudioClient.cpp | 41 -------------- libraries/audio-client/src/AudioClient.h | 5 -- 6 files changed, 36 insertions(+), 96 deletions(-) diff --git a/interface/resources/qml/hifi/tts/TTS.qml b/interface/resources/qml/hifi/tts/TTS.qml index 114efd0cca..d9507f6084 100644 --- a/interface/resources/qml/hifi/tts/TTS.qml +++ b/interface/resources/qml/hifi/tts/TTS.qml @@ -202,7 +202,6 @@ Rectangle { TextArea { id: messageToSpeak; - placeholderText: "Message to Speak"; font.family: "Fira Sans SemiBold"; font.pixelSize: 20; // Anchors @@ -229,6 +228,17 @@ Rectangle { event.accepted = true; } } + + HifiStylesUit.FiraSansRegular { + text: "Input Text to Speak..."; + size: 20; + anchors.fill: parent; + anchors.topMargin: 4; + anchors.leftMargin: 4; + color: hifi.colors.lightGrayText; + visible: !parent.activeFocus && messageToSpeak.text === ""; + verticalAlignment: Text.AlignTop; + } } HifiControlsUit.Button { diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index dfb4ef1c32..967a31135c 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -1187,10 +1187,6 @@ Application::Application(int& argc, char** argv, QElapsedTimer& startupTimer, bo recording::Frame::registerFrameHandler(AudioConstants::getAudioFrameName(), [&audioIO](recording::Frame::ConstPointer frame) { audioIO->handleRecordedAudioInput(frame->data); }); - - auto TTS = DependencyManager::get().data(); - connect(TTS, &TTSScriptingInterface::ttsSampleCreated, audioIO, &AudioClient::handleTTSAudioInput); - connect(TTS, &TTSScriptingInterface::clearTTSBuffer, audioIO, &AudioClient::clearTTSBuffer); connect(audioIO, &AudioClient::inputReceived, [](const QByteArray& audio) { static auto recorder = DependencyManager::get(); diff --git a/interface/src/scripting/TTSScriptingInterface.cpp b/interface/src/scripting/TTSScriptingInterface.cpp index f51a638471..6a5b72ea5f 100644 --- a/interface/src/scripting/TTSScriptingInterface.cpp +++ b/interface/src/scripting/TTSScriptingInterface.cpp @@ -37,6 +37,9 @@ TTSScriptingInterface::TTSScriptingInterface() { if (FAILED(hr)) { qDebug() << "Can't set default voice."; } + + _lastSoundAudioInjectorUpdateTimer.setSingleShot(true); + connect(&_lastSoundAudioInjectorUpdateTimer, &QTimer::timeout, this, &TTSScriptingInterface::updateLastSoundAudioInjector); #endif } @@ -58,38 +61,22 @@ private: }; #endif -void TTSScriptingInterface::testTone(const bool& alsoInject) { - QByteArray byteArray(480000, 0); - _lastSoundByteArray.resize(0); - _lastSoundByteArray.resize(480000); - - int32_t a = 0; - int16_t* samples = reinterpret_cast(byteArray.data()); - for (a = 0; a < 240000; a++) { - int16_t temp = (glm::sin(glm::radians((float)a))) * 32768; - samples[a] = temp; - } - emit ttsSampleCreated(_lastSoundByteArray, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50, 96); - - if (alsoInject) { +const std::chrono::milliseconds INJECTOR_INTERVAL_MS = std::chrono::milliseconds(100); +void TTSScriptingInterface::updateLastSoundAudioInjector() { + if (_lastSoundAudioInjector) { AudioInjectorOptions options; options.position = DependencyManager::get()->getMyAvatarPosition(); - - _lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options); + _lastSoundAudioInjector->setOptions(options); + _lastSoundAudioInjectorUpdateTimer.start(INJECTOR_INTERVAL_MS); } } -void TTSScriptingInterface::speakText(const QString& textToSpeak, - const int& newChunkSize, - const int& timerInterval, - const int& sampleRate, - const int& bitsPerSample, - const bool& alsoInject) { +void TTSScriptingInterface::speakText(const QString& textToSpeak) { #ifdef WIN32 WAVEFORMATEX fmt; fmt.wFormatTag = WAVE_FORMAT_PCM; - fmt.nSamplesPerSec = sampleRate; - fmt.wBitsPerSample = bitsPerSample; + fmt.nSamplesPerSec = 24000; + fmt.wBitsPerSample = 16; fmt.nChannels = 1; fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign; @@ -156,16 +143,17 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak, _lastSoundByteArray.resize(0); _lastSoundByteArray.append(buf1, dwSize); - // Commented out because this doesn't work completely :) - // Obviously, commenting this out isn't fit for production, but it's fine for a test PR - //emit ttsSampleCreated(_lastSoundByteArray, newChunkSize, timerInterval); + AudioInjectorOptions options; + options.position = DependencyManager::get()->getMyAvatarPosition(); - if (alsoInject) { - AudioInjectorOptions options; - options.position = DependencyManager::get()->getMyAvatarPosition(); - - _lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options); + if (_lastSoundAudioInjector) { + _lastSoundAudioInjector->stop(); + _lastSoundAudioInjectorUpdateTimer.stop(); } + + _lastSoundAudioInjector = AudioInjector::playSoundAndDelete(_lastSoundByteArray, options); + + _lastSoundAudioInjectorUpdateTimer.start(INJECTOR_INTERVAL_MS); #else qDebug() << "Text-to-Speech isn't currently supported on non-Windows platforms."; #endif @@ -174,7 +162,6 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak, void TTSScriptingInterface::stopLastSpeech() { if (_lastSoundAudioInjector) { _lastSoundAudioInjector->stop(); + _lastSoundAudioInjector = NULL; } - - emit clearTTSBuffer(); } diff --git a/interface/src/scripting/TTSScriptingInterface.h b/interface/src/scripting/TTSScriptingInterface.h index 7e4f3afa9d..0f1e723885 100644 --- a/interface/src/scripting/TTSScriptingInterface.h +++ b/interface/src/scripting/TTSScriptingInterface.h @@ -12,6 +12,7 @@ #define hifi_SpeechScriptingInterface_h #include +#include #include #ifdef WIN32 #pragma warning(disable : 4996) @@ -31,19 +32,9 @@ public: TTSScriptingInterface(); ~TTSScriptingInterface(); - Q_INVOKABLE void testTone(const bool& alsoInject = false); - Q_INVOKABLE void speakText(const QString& textToSpeak, - const int& newChunkSize = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50), - const int& timerInterval = 96, - const int& sampleRate = 24000, - const int& bitsPerSample = 16, - const bool& alsoInject = false); + Q_INVOKABLE void speakText(const QString& textToSpeak); Q_INVOKABLE void stopLastSpeech(); -signals: - void ttsSampleCreated(QByteArray outputArray, const int& newChunkSize, const int& timerInterval); - void clearTTSBuffer(); - private: #ifdef WIN32 class CComAutoInit { @@ -90,6 +81,8 @@ private: QByteArray _lastSoundByteArray; AudioInjectorPointer _lastSoundAudioInjector; + QTimer _lastSoundAudioInjectorUpdateTimer; + void updateLastSoundAudioInjector(); }; #endif // hifi_SpeechScriptingInterface_h diff --git a/libraries/audio-client/src/AudioClient.cpp b/libraries/audio-client/src/AudioClient.cpp index 1ce6c15951..b7557681a5 100644 --- a/libraries/audio-client/src/AudioClient.cpp +++ b/libraries/audio-client/src/AudioClient.cpp @@ -245,8 +245,6 @@ AudioClient::AudioClient() : packetReceiver.registerListener(PacketType::NoisyMute, this, "handleNoisyMutePacket"); packetReceiver.registerListener(PacketType::MuteEnvironment, this, "handleMuteEnvironmentPacket"); packetReceiver.registerListener(PacketType::SelectedAudioFormat, this, "handleSelectedAudioFormat"); - - connect(&_TTSTimer, &QTimer::timeout, this, &AudioClient::processTTSBuffer); } AudioClient::~AudioClient() { @@ -1202,45 +1200,6 @@ int rawToWav(const char* rawData, const int& rawLength, const char* wavfn, long return 0; } -void AudioClient::processTTSBuffer() { - Lock lock(_TTSMutex); - if (_TTSAudioBuffer.size() > 0) { - QByteArray part; - part.append(_TTSAudioBuffer.data(), _TTSChunkSize); - _TTSAudioBuffer.remove(0, _TTSChunkSize); - handleAudioInput(part); - } else { - _isProcessingTTS = false; - _TTSTimer.stop(); - } -} - -void AudioClient::handleTTSAudioInput(const QByteArray& audio, const int& newChunkSize, const int& timerInterval) { - _TTSChunkSize = newChunkSize; - _TTSAudioBuffer.append(audio); - - handleLocalEchoAndReverb(_TTSAudioBuffer, 48000, 1); - - //QString filename = QString::number(usecTimestampNow()); - //QString path = PathUtils::getAppDataPath() + "Audio/" + filename + "-before.wav"; - //rawToWav(_TTSAudioBuffer.data(), _TTSAudioBuffer.size(), path.toLocal8Bit(), 24000, 1); - - //QByteArray temp; - - _isProcessingTTS = true; - _TTSTimer.start(timerInterval); - - //filename = QString::number(usecTimestampNow()); - //path = PathUtils::getAppDataPath() + "Audio/" + filename + "-after.wav"; - //rawToWav(temp.data(), temp.size(), path.toLocal8Bit(), 12000, 1); -} - -void AudioClient::clearTTSBuffer() { - _TTSAudioBuffer.resize(0); - _isProcessingTTS = false; - _TTSTimer.stop(); -} - void AudioClient::prepareLocalAudioInjectors(std::unique_ptr localAudioLock) { bool doSynchronously = localAudioLock.operator bool(); if (!localAudioLock) { diff --git a/libraries/audio-client/src/AudioClient.h b/libraries/audio-client/src/AudioClient.h index 9b50d3eccb..788b764903 100644 --- a/libraries/audio-client/src/AudioClient.h +++ b/libraries/audio-client/src/AudioClient.h @@ -197,11 +197,6 @@ public slots: void checkInputTimeout(); void handleDummyAudioInput(); void handleRecordedAudioInput(const QByteArray& audio); - void handleTTSAudioInput(const QByteArray& audio, - const int& newChunkSize, - const int& timerInterval); - void clearTTSBuffer(); - void processTTSBuffer(); void reset(); void audioMixerKilled(); From 53742e90f57d096f8aa73eec369e108f636e2927 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Tue, 6 Nov 2018 10:30:10 -0800 Subject: [PATCH 15/18] Cleanup after merge --- libraries/audio-client/src/AudioClient.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/libraries/audio-client/src/AudioClient.h b/libraries/audio-client/src/AudioClient.h index 788b764903..5e7f1fb8a0 100644 --- a/libraries/audio-client/src/AudioClient.h +++ b/libraries/audio-client/src/AudioClient.h @@ -395,7 +395,7 @@ private: void configureReverb(); void updateReverbOptions(); - void handleLocalEchoAndReverb(QByteArray& inputByteArray, const int& sampleRate, const int& channelCount); + void handleLocalEchoAndReverb(QByteArray& inputByteArray); bool switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInfo, bool isShutdownRequest = false); bool switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceInfo, bool isShutdownRequest = false); @@ -453,12 +453,6 @@ private: QTimer* _checkPeakValuesTimer { nullptr }; bool _isRecording { false }; - - Mutex _TTSMutex; - bool _isProcessingTTS { false }; - QByteArray _TTSAudioBuffer; - int _TTSChunkSize = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50; - QTimer _TTSTimer; }; From c33f9b6ea311e8a88b7ad69e6a9c37da9fe56572 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Tue, 6 Nov 2018 11:26:09 -0800 Subject: [PATCH 16/18] Fix build error --- interface/src/Application.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index 07705c8cd5..bd126048dd 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -2955,7 +2955,7 @@ void Application::initializeUi() { QUrl{ "hifi/dialogs/security/SecurityImageChange.qml" }, QUrl{ "hifi/dialogs/security/SecurityImageModel.qml" }, QUrl{ "hifi/dialogs/security/SecurityImageSelection.qml" }, - }, callback); + }, commerceCallback); QmlContextCallback ttsCallback = [](QQmlContext* context) { context->setContextProperty("TextToSpeech", DependencyManager::get().data()); }; From 378bf911d447d02611b1f65dabc621a93b4fd774 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Tue, 6 Nov 2018 12:48:17 -0800 Subject: [PATCH 17/18] Fix another build error --- interface/src/scripting/TTSScriptingInterface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/src/scripting/TTSScriptingInterface.cpp b/interface/src/scripting/TTSScriptingInterface.cpp index 6a5b72ea5f..b41f22759c 100644 --- a/interface/src/scripting/TTSScriptingInterface.cpp +++ b/interface/src/scripting/TTSScriptingInterface.cpp @@ -61,7 +61,7 @@ private: }; #endif -const std::chrono::milliseconds INJECTOR_INTERVAL_MS = std::chrono::milliseconds(100); +const int INJECTOR_INTERVAL_MS = 100; void TTSScriptingInterface::updateLastSoundAudioInjector() { if (_lastSoundAudioInjector) { AudioInjectorOptions options; From cee1454f6e963598fe6409224bd3c7f070760a57 Mon Sep 17 00:00:00 2001 From: Zach Fox Date: Wed, 7 Nov 2018 15:09:53 -0800 Subject: [PATCH 18/18] CR feedback - thanks Ken! --- interface/src/scripting/TTSScriptingInterface.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/interface/src/scripting/TTSScriptingInterface.cpp b/interface/src/scripting/TTSScriptingInterface.cpp index b41f22759c..6b1677aecb 100644 --- a/interface/src/scripting/TTSScriptingInterface.cpp +++ b/interface/src/scripting/TTSScriptingInterface.cpp @@ -75,7 +75,7 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak) { #ifdef WIN32 WAVEFORMATEX fmt; fmt.wFormatTag = WAVE_FORMAT_PCM; - fmt.nSamplesPerSec = 24000; + fmt.nSamplesPerSec = AudioConstants::SAMPLE_RATE; fmt.wBitsPerSample = 16; fmt.nChannels = 1; fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; @@ -132,17 +132,13 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak) { hr = IStream_Size(pStream, &StreamSize); DWORD dwSize = StreamSize.QuadPart; - char* buf1 = new char[dwSize + 1]; - memset(buf1, 0, dwSize + 1); + _lastSoundByteArray.resize(dwSize); - hr = IStream_Read(pStream, buf1, dwSize); + hr = IStream_Read(pStream, _lastSoundByteArray.data(), dwSize); if (FAILED(hr)) { qDebug() << "Couldn't read from stream."; } - _lastSoundByteArray.resize(0); - _lastSoundByteArray.append(buf1, dwSize); - AudioInjectorOptions options; options.position = DependencyManager::get()->getMyAvatarPosition();