mirror of
https://github.com/HifiExperiments/overte.git
synced 2025-08-09 10:09:05 +02:00
Lots of progress today
This commit is contained in:
parent
5d4de3d3b0
commit
daeedc6ef1
5 changed files with 111 additions and 68 deletions
|
@ -182,7 +182,7 @@
|
||||||
#include "scripting/RatesScriptingInterface.h"
|
#include "scripting/RatesScriptingInterface.h"
|
||||||
#include "scripting/SelectionScriptingInterface.h"
|
#include "scripting/SelectionScriptingInterface.h"
|
||||||
#include "scripting/WalletScriptingInterface.h"
|
#include "scripting/WalletScriptingInterface.h"
|
||||||
#include "scripting/SpeechScriptingInterface.h"
|
#include "scripting/TTSScriptingInterface.h"
|
||||||
#if defined(Q_OS_MAC) || defined(Q_OS_WIN)
|
#if defined(Q_OS_MAC) || defined(Q_OS_WIN)
|
||||||
#include "SpeechRecognizer.h"
|
#include "SpeechRecognizer.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -944,7 +944,7 @@ bool setupEssentials(int& argc, char** argv, bool runningMarkerExisted) {
|
||||||
DependencyManager::set<Ledger>();
|
DependencyManager::set<Ledger>();
|
||||||
DependencyManager::set<Wallet>();
|
DependencyManager::set<Wallet>();
|
||||||
DependencyManager::set<WalletScriptingInterface>();
|
DependencyManager::set<WalletScriptingInterface>();
|
||||||
DependencyManager::set<SpeechScriptingInterface>();
|
DependencyManager::set<TTSScriptingInterface>();
|
||||||
|
|
||||||
DependencyManager::set<FadeEffect>();
|
DependencyManager::set<FadeEffect>();
|
||||||
|
|
||||||
|
@ -1179,6 +1179,9 @@ Application::Application(int& argc, char** argv, QElapsedTimer& startupTimer, bo
|
||||||
recording::Frame::registerFrameHandler(AudioConstants::getAudioFrameName(), [&audioIO](recording::Frame::ConstPointer frame) {
|
recording::Frame::registerFrameHandler(AudioConstants::getAudioFrameName(), [&audioIO](recording::Frame::ConstPointer frame) {
|
||||||
audioIO->handleRecordedAudioInput(frame->data);
|
audioIO->handleRecordedAudioInput(frame->data);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
auto TTS = DependencyManager::get<TTSScriptingInterface>().data();
|
||||||
|
connect(TTS, &TTSScriptingInterface::ttsSampleCreated, audioIO, &AudioClient::handleTTSAudioInput);
|
||||||
|
|
||||||
connect(audioIO, &AudioClient::inputReceived, [](const QByteArray& audio) {
|
connect(audioIO, &AudioClient::inputReceived, [](const QByteArray& audio) {
|
||||||
static auto recorder = DependencyManager::get<recording::Recorder>();
|
static auto recorder = DependencyManager::get<recording::Recorder>();
|
||||||
|
@ -3129,7 +3132,7 @@ void Application::onDesktopRootContextCreated(QQmlContext* surfaceContext) {
|
||||||
surfaceContext->setContextProperty("ContextOverlay", DependencyManager::get<ContextOverlayInterface>().data());
|
surfaceContext->setContextProperty("ContextOverlay", DependencyManager::get<ContextOverlayInterface>().data());
|
||||||
surfaceContext->setContextProperty("Wallet", DependencyManager::get<WalletScriptingInterface>().data());
|
surfaceContext->setContextProperty("Wallet", DependencyManager::get<WalletScriptingInterface>().data());
|
||||||
surfaceContext->setContextProperty("HiFiAbout", AboutUtil::getInstance());
|
surfaceContext->setContextProperty("HiFiAbout", AboutUtil::getInstance());
|
||||||
surfaceContext->setContextProperty("Speech", DependencyManager::get<SpeechScriptingInterface>().data());
|
surfaceContext->setContextProperty("TextToSpeech", DependencyManager::get<TTSScriptingInterface>().data());
|
||||||
|
|
||||||
if (auto steamClient = PluginManager::getInstance()->getSteamClientPlugin()) {
|
if (auto steamClient = PluginManager::getInstance()->getSteamClientPlugin()) {
|
||||||
surfaceContext->setContextProperty("Steam", new SteamScriptingInterface(engine, steamClient.get()));
|
surfaceContext->setContextProperty("Steam", new SteamScriptingInterface(engine, steamClient.get()));
|
||||||
|
@ -6800,7 +6803,7 @@ void Application::registerScriptEngineWithApplicationServices(ScriptEnginePointe
|
||||||
scriptEngine->registerGlobalObject("Wallet", DependencyManager::get<WalletScriptingInterface>().data());
|
scriptEngine->registerGlobalObject("Wallet", DependencyManager::get<WalletScriptingInterface>().data());
|
||||||
scriptEngine->registerGlobalObject("AddressManager", DependencyManager::get<AddressManager>().data());
|
scriptEngine->registerGlobalObject("AddressManager", DependencyManager::get<AddressManager>().data());
|
||||||
scriptEngine->registerGlobalObject("HifiAbout", AboutUtil::getInstance());
|
scriptEngine->registerGlobalObject("HifiAbout", AboutUtil::getInstance());
|
||||||
scriptEngine->registerGlobalObject("Speech", DependencyManager::get<SpeechScriptingInterface>().data());
|
scriptEngine->registerGlobalObject("TextToSpeech", DependencyManager::get<TTSScriptingInterface>().data());
|
||||||
|
|
||||||
qScriptRegisterMetaType(scriptEngine.data(), OverlayIDtoScriptValue, OverlayIDfromScriptValue);
|
qScriptRegisterMetaType(scriptEngine.data(), OverlayIDtoScriptValue, OverlayIDfromScriptValue);
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
//
|
//
|
||||||
// SpeechScriptingInterface.cpp
|
// TTSScriptingInterface.cpp
|
||||||
// interface/src/scripting
|
// libraries/audio-client/src/scripting
|
||||||
//
|
//
|
||||||
// Created by Zach Fox on 2018-10-10.
|
// Created by Zach Fox on 2018-10-10.
|
||||||
// Copyright 2018 High Fidelity, Inc.
|
// Copyright 2018 High Fidelity, Inc.
|
||||||
|
@ -9,10 +9,10 @@
|
||||||
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
|
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
|
||||||
//
|
//
|
||||||
|
|
||||||
#include "SpeechScriptingInterface.h"
|
#include "TTSScriptingInterface.h"
|
||||||
#include "avatar/AvatarManager.h"
|
#include "avatar/AvatarManager.h"
|
||||||
|
|
||||||
SpeechScriptingInterface::SpeechScriptingInterface() {
|
TTSScriptingInterface::TTSScriptingInterface() {
|
||||||
//
|
//
|
||||||
// Create text to speech engine
|
// Create text to speech engine
|
||||||
//
|
//
|
||||||
|
@ -38,8 +38,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SpeechScriptingInterface::~SpeechScriptingInterface() {
|
TTSScriptingInterface::~TTSScriptingInterface() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class ReleaseOnExit {
|
class ReleaseOnExit {
|
||||||
|
@ -55,7 +54,28 @@ private:
|
||||||
IUnknown* m_p;
|
IUnknown* m_p;
|
||||||
};
|
};
|
||||||
|
|
||||||
void SpeechScriptingInterface::speakText(const QString& textToSpeak) {
|
void TTSScriptingInterface::testTone(const bool& alsoInject) {
|
||||||
|
QByteArray byteArray(480000, 0);
|
||||||
|
_lastSoundByteArray.resize(0);
|
||||||
|
_lastSoundByteArray.resize(480000);
|
||||||
|
|
||||||
|
int32_t a = 0;
|
||||||
|
int16_t* samples = reinterpret_cast<int16_t*>(byteArray.data());
|
||||||
|
for (a = 0; a < 240000; a++) {
|
||||||
|
int16_t temp = (glm::sin(glm::radians((float)a))) * 32768;
|
||||||
|
samples[a] = temp;
|
||||||
|
}
|
||||||
|
emit ttsSampleCreated(_lastSoundByteArray);
|
||||||
|
|
||||||
|
if (alsoInject) {
|
||||||
|
AudioInjectorOptions options;
|
||||||
|
options.position = DependencyManager::get<AvatarManager>()->getMyAvatarPosition();
|
||||||
|
|
||||||
|
_lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TTSScriptingInterface::speakText(const QString& textToSpeak, const bool& alsoInject) {
|
||||||
WAVEFORMATEX fmt;
|
WAVEFORMATEX fmt;
|
||||||
fmt.wFormatTag = WAVE_FORMAT_PCM;
|
fmt.wFormatTag = WAVE_FORMAT_PCM;
|
||||||
fmt.nSamplesPerSec = 24000;
|
fmt.nSamplesPerSec = 24000;
|
||||||
|
@ -92,9 +112,8 @@ void SpeechScriptingInterface::speakText(const QString& textToSpeak) {
|
||||||
ReleaseOnExit rStream(pStream);
|
ReleaseOnExit rStream(pStream);
|
||||||
|
|
||||||
ULONG streamNumber;
|
ULONG streamNumber;
|
||||||
hr = m_tts->Speak(reinterpret_cast<LPCWSTR>(textToSpeak.utf16()),
|
hr = m_tts->Speak(reinterpret_cast<LPCWSTR>(textToSpeak.utf16()), SPF_IS_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK,
|
||||||
SPF_IS_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK,
|
&streamNumber);
|
||||||
&streamNumber);
|
|
||||||
if (FAILED(hr)) {
|
if (FAILED(hr)) {
|
||||||
qDebug() << "Speak failed.";
|
qDebug() << "Speak failed.";
|
||||||
}
|
}
|
||||||
|
@ -124,14 +143,21 @@ void SpeechScriptingInterface::speakText(const QString& textToSpeak) {
|
||||||
qDebug() << "Couldn't read from stream.";
|
qDebug() << "Couldn't read from stream.";
|
||||||
}
|
}
|
||||||
|
|
||||||
QByteArray byteArray = QByteArray::QByteArray(buf1, dwSize);
|
_lastSoundByteArray.resize(0);
|
||||||
|
_lastSoundByteArray.append(buf1, dwSize);
|
||||||
|
|
||||||
AudioInjectorOptions options;
|
emit ttsSampleCreated(_lastSoundByteArray);
|
||||||
options.position = DependencyManager::get<AvatarManager>()->getMyAvatarPosition();
|
|
||||||
|
|
||||||
lastSound = AudioInjector::playSound(byteArray, options);
|
if (alsoInject) {
|
||||||
|
AudioInjectorOptions options;
|
||||||
|
options.position = DependencyManager::get<AvatarManager>()->getMyAvatarPosition();
|
||||||
|
|
||||||
|
_lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpeechScriptingInterface::stopLastSpeech() {
|
void TTSScriptingInterface::stopLastSpeech() {
|
||||||
lastSound->stop();
|
if (_lastSoundAudioInjector) {
|
||||||
|
_lastSoundAudioInjector->stop();
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
// SpeechScriptingInterface.h
|
// TTSScriptingInterface.h
|
||||||
// interface/src/scripting
|
// libraries/audio-client/src/scripting
|
||||||
//
|
//
|
||||||
// Created by Zach Fox on 2018-10-10.
|
// Created by Zach Fox on 2018-10-10.
|
||||||
// Copyright 2018 High Fidelity, Inc.
|
// Copyright 2018 High Fidelity, Inc.
|
||||||
|
@ -20,16 +20,20 @@
|
||||||
#include <sphelper.h> // SAPI Helper
|
#include <sphelper.h> // SAPI Helper
|
||||||
#include <AudioInjector.h>
|
#include <AudioInjector.h>
|
||||||
|
|
||||||
class SpeechScriptingInterface : public QObject, public Dependency {
|
class TTSScriptingInterface : public QObject, public Dependency {
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SpeechScriptingInterface();
|
TTSScriptingInterface();
|
||||||
~SpeechScriptingInterface();
|
~TTSScriptingInterface();
|
||||||
|
|
||||||
Q_INVOKABLE void speakText(const QString& textToSpeak);
|
Q_INVOKABLE void testTone(const bool& alsoInject = false);
|
||||||
|
Q_INVOKABLE void speakText(const QString& textToSpeak, const bool& alsoInject = false);
|
||||||
Q_INVOKABLE void stopLastSpeech();
|
Q_INVOKABLE void stopLastSpeech();
|
||||||
|
|
||||||
|
signals:
|
||||||
|
void ttsSampleCreated(QByteArray outputArray);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class CComAutoInit {
|
class CComAutoInit {
|
||||||
public:
|
public:
|
||||||
|
@ -72,7 +76,8 @@ private:
|
||||||
// Default voice token
|
// Default voice token
|
||||||
CComPtr<ISpObjectToken> m_voiceToken;
|
CComPtr<ISpObjectToken> m_voiceToken;
|
||||||
|
|
||||||
AudioInjectorPointer lastSound;
|
QByteArray _lastSoundByteArray;
|
||||||
|
AudioInjectorPointer _lastSoundByteArray;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // hifi_SpeechScriptingInterface_h
|
#endif // hifi_SpeechScriptingInterface_h
|
|
@ -1135,6 +1135,46 @@ void AudioClient::handleAudioInput(QByteArray& audioBuffer) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AudioClient::processAudioAndAddToRingBuffer(QByteArray& inputByteArray, const uchar& channelCount, const qint32& bytesForDuration) {
|
||||||
|
// input samples required to produce exactly NETWORK_FRAME_SAMPLES of output
|
||||||
|
const int inputSamplesRequired =
|
||||||
|
(_inputToNetworkResampler ? _inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL)
|
||||||
|
: AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) *
|
||||||
|
channelCount;
|
||||||
|
|
||||||
|
const auto inputAudioSamples = std::unique_ptr<int16_t[]>(new int16_t[inputSamplesRequired]);
|
||||||
|
|
||||||
|
handleLocalEchoAndReverb(inputByteArray);
|
||||||
|
|
||||||
|
_inputRingBuffer.writeData(inputByteArray.data(), inputByteArray.size());
|
||||||
|
|
||||||
|
float audioInputMsecsRead = inputByteArray.size() / (float)(bytesForDuration);
|
||||||
|
_stats.updateInputMsRead(audioInputMsecsRead);
|
||||||
|
|
||||||
|
const int numNetworkBytes =
|
||||||
|
_isStereoInput ? AudioConstants::NETWORK_FRAME_BYTES_STEREO : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
|
||||||
|
const int numNetworkSamples =
|
||||||
|
_isStereoInput ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
|
||||||
|
|
||||||
|
static int16_t networkAudioSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
|
||||||
|
|
||||||
|
while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) {
|
||||||
|
if (_muted) {
|
||||||
|
_inputRingBuffer.shiftReadPosition(inputSamplesRequired);
|
||||||
|
} else {
|
||||||
|
_inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired);
|
||||||
|
possibleResampling(_inputToNetworkResampler, inputAudioSamples.get(), networkAudioSamples, inputSamplesRequired,
|
||||||
|
numNetworkSamples, channelCount, _desiredInputFormat.channelCount());
|
||||||
|
}
|
||||||
|
int bytesInInputRingBuffer = _inputRingBuffer.samplesAvailable() * AudioConstants::SAMPLE_SIZE;
|
||||||
|
float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC));
|
||||||
|
_stats.updateInputMsUnplayed(msecsInInputRingBuffer);
|
||||||
|
|
||||||
|
QByteArray audioBuffer(reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes);
|
||||||
|
handleAudioInput(audioBuffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AudioClient::handleMicAudioInput() {
|
void AudioClient::handleMicAudioInput() {
|
||||||
if (!_inputDevice || _isPlayingBackRecording) {
|
if (!_inputDevice || _isPlayingBackRecording) {
|
||||||
return;
|
return;
|
||||||
|
@ -1144,47 +1184,8 @@ void AudioClient::handleMicAudioInput() {
|
||||||
_inputReadsSinceLastCheck++;
|
_inputReadsSinceLastCheck++;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// input samples required to produce exactly NETWORK_FRAME_SAMPLES of output
|
processAudioAndAddToRingBuffer(_inputDevice->readAll(), _inputFormat.channelCount(),
|
||||||
const int inputSamplesRequired = (_inputToNetworkResampler ?
|
_inputFormat.bytesForDuration(USECS_PER_MSEC));
|
||||||
_inputToNetworkResampler->getMinInput(AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) :
|
|
||||||
AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL) * _inputFormat.channelCount();
|
|
||||||
|
|
||||||
const auto inputAudioSamples = std::unique_ptr<int16_t[]>(new int16_t[inputSamplesRequired]);
|
|
||||||
QByteArray inputByteArray = _inputDevice->readAll();
|
|
||||||
|
|
||||||
handleLocalEchoAndReverb(inputByteArray);
|
|
||||||
|
|
||||||
_inputRingBuffer.writeData(inputByteArray.data(), inputByteArray.size());
|
|
||||||
|
|
||||||
float audioInputMsecsRead = inputByteArray.size() / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC));
|
|
||||||
_stats.updateInputMsRead(audioInputMsecsRead);
|
|
||||||
|
|
||||||
const int numNetworkBytes = _isStereoInput
|
|
||||||
? AudioConstants::NETWORK_FRAME_BYTES_STEREO
|
|
||||||
: AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
|
|
||||||
const int numNetworkSamples = _isStereoInput
|
|
||||||
? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO
|
|
||||||
: AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
|
|
||||||
|
|
||||||
static int16_t networkAudioSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
|
|
||||||
|
|
||||||
while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) {
|
|
||||||
if (_muted) {
|
|
||||||
_inputRingBuffer.shiftReadPosition(inputSamplesRequired);
|
|
||||||
} else {
|
|
||||||
_inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired);
|
|
||||||
possibleResampling(_inputToNetworkResampler,
|
|
||||||
inputAudioSamples.get(), networkAudioSamples,
|
|
||||||
inputSamplesRequired, numNetworkSamples,
|
|
||||||
_inputFormat.channelCount(), _desiredInputFormat.channelCount());
|
|
||||||
}
|
|
||||||
int bytesInInputRingBuffer = _inputRingBuffer.samplesAvailable() * AudioConstants::SAMPLE_SIZE;
|
|
||||||
float msecsInInputRingBuffer = bytesInInputRingBuffer / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC));
|
|
||||||
_stats.updateInputMsUnplayed(msecsInInputRingBuffer);
|
|
||||||
|
|
||||||
QByteArray audioBuffer(reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes);
|
|
||||||
handleAudioInput(audioBuffer);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioClient::handleDummyAudioInput() {
|
void AudioClient::handleDummyAudioInput() {
|
||||||
|
@ -1201,6 +1202,11 @@ void AudioClient::handleRecordedAudioInput(const QByteArray& audio) {
|
||||||
handleAudioInput(audioBuffer);
|
handleAudioInput(audioBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AudioClient::handleTTSAudioInput(const QByteArray& audio) {
|
||||||
|
QByteArray audioBuffer(audio);
|
||||||
|
processAudioAndAddToRingBuffer(audioBuffer, 1, 48);
|
||||||
|
}
|
||||||
|
|
||||||
void AudioClient::prepareLocalAudioInjectors(std::unique_ptr<Lock> localAudioLock) {
|
void AudioClient::prepareLocalAudioInjectors(std::unique_ptr<Lock> localAudioLock) {
|
||||||
bool doSynchronously = localAudioLock.operator bool();
|
bool doSynchronously = localAudioLock.operator bool();
|
||||||
if (!localAudioLock) {
|
if (!localAudioLock) {
|
||||||
|
|
|
@ -197,6 +197,7 @@ public slots:
|
||||||
void checkInputTimeout();
|
void checkInputTimeout();
|
||||||
void handleDummyAudioInput();
|
void handleDummyAudioInput();
|
||||||
void handleRecordedAudioInput(const QByteArray& audio);
|
void handleRecordedAudioInput(const QByteArray& audio);
|
||||||
|
void handleTTSAudioInput(const QByteArray& audio);
|
||||||
void reset();
|
void reset();
|
||||||
void audioMixerKilled();
|
void audioMixerKilled();
|
||||||
|
|
||||||
|
@ -289,6 +290,8 @@ private:
|
||||||
float azimuthForSource(const glm::vec3& relativePosition);
|
float azimuthForSource(const glm::vec3& relativePosition);
|
||||||
float gainForSource(float distance, float volume);
|
float gainForSource(float distance, float volume);
|
||||||
|
|
||||||
|
void processAudioAndAddToRingBuffer(QByteArray& inputByteArray, const uchar& channelCount, const qint32& bytesForDuration);
|
||||||
|
|
||||||
#ifdef Q_OS_ANDROID
|
#ifdef Q_OS_ANDROID
|
||||||
QTimer _checkInputTimer;
|
QTimer _checkInputTimer;
|
||||||
long _inputReadsSinceLastCheck = 0l;
|
long _inputReadsSinceLastCheck = 0l;
|
||||||
|
|
Loading…
Reference in a new issue