Lotsa changes

2025-08-10 16:23:17 +02:00 · 2018-11-01 16:48:08 -07:00 · 2018-11-01 16:48:08 -07:00 · 20cd1df22c
commit 20cd1df22c
parent bccb3f1de9
6 changed files with 36 additions and 96 deletions
--- a/interface/resources/qml/hifi/tts/TTS.qml
+++ b/interface/resources/qml/hifi/tts/TTS.qml
@ -202,7 +202,6 @@ Rectangle {
        TextArea {
            id: messageToSpeak;
            placeholderText: "<i>Message to Speak</i>";
            font.family: "Fira Sans SemiBold";
            font.pixelSize: 20;
            // Anchors
@ -229,6 +228,17 @@ Rectangle {
                    event.accepted = true;
                }
            }
            HifiStylesUit.FiraSansRegular {
                text: "<i>Input Text to Speak...</i>";
                size: 20;
                anchors.fill: parent;
                anchors.topMargin: 4;
                anchors.leftMargin: 4;
                color: hifi.colors.lightGrayText;
                visible: !parent.activeFocus && messageToSpeak.text === "";
                verticalAlignment: Text.AlignTop;
            }
        }
 		HifiControlsUit.Button {
--- a/interface/src/Application.cpp
+++ b/interface/src/Application.cpp
@ -1187,10 +1187,6 @@ Application::Application(int& argc, char** argv, QElapsedTimer& startupTimer, bo
        recording::Frame::registerFrameHandler(AudioConstants::getAudioFrameName(), [&audioIO](recording::Frame::ConstPointer frame) {
            audioIO->handleRecordedAudioInput(frame->data);
        });
        auto TTS = DependencyManager::get<TTSScriptingInterface>().data();
        connect(TTS, &TTSScriptingInterface::ttsSampleCreated, audioIO, &AudioClient::handleTTSAudioInput);
        connect(TTS, &TTSScriptingInterface::clearTTSBuffer, audioIO, &AudioClient::clearTTSBuffer);
        connect(audioIO, &AudioClient::inputReceived, [](const QByteArray& audio) {
            static auto recorder = DependencyManager::get<recording::Recorder>();
--- a/interface/src/scripting/TTSScriptingInterface.cpp
+++ b/interface/src/scripting/TTSScriptingInterface.cpp
@ -37,6 +37,9 @@ TTSScriptingInterface::TTSScriptingInterface() {
    if (FAILED(hr)) {
        qDebug() << "Can't set default voice.";
    }
    _lastSoundAudioInjectorUpdateTimer.setSingleShot(true);
    connect(&_lastSoundAudioInjectorUpdateTimer, &QTimer::timeout, this, &TTSScriptingInterface::updateLastSoundAudioInjector);
 #endif
 }
@ -58,38 +61,22 @@ private:
 };
 #endif
-void TTSScriptingInterface::testTone(const bool& alsoInject) {
+const std::chrono::milliseconds INJECTOR_INTERVAL_MS = std::chrono::milliseconds(100);
-    QByteArray byteArray(480000, 0);
+void TTSScriptingInterface::updateLastSoundAudioInjector() {
-    _lastSoundByteArray.resize(0);
+    if (_lastSoundAudioInjector) {
    _lastSoundByteArray.resize(480000);
    int32_t a = 0;
    int16_t* samples = reinterpret_cast<int16_t*>(byteArray.data());
    for (a = 0; a < 240000; a++) {
        int16_t temp = (glm::sin(glm::radians((float)a))) * 32768;
        samples[a] = temp;
    }
    emit ttsSampleCreated(_lastSoundByteArray, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50, 96);
    if (alsoInject) {
        AudioInjectorOptions options;
        options.position = DependencyManager::get<AvatarManager>()->getMyAvatarPosition();
-
+        _lastSoundAudioInjector->setOptions(options);
-        _lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options);
+        _lastSoundAudioInjectorUpdateTimer.start(INJECTOR_INTERVAL_MS);
    }
 }
-void TTSScriptingInterface::speakText(const QString& textToSpeak,
+void TTSScriptingInterface::speakText(const QString& textToSpeak) {
                                      const int& newChunkSize,
                                      const int& timerInterval,
                                      const int& sampleRate,
                                      const int& bitsPerSample,
                                      const bool& alsoInject) {
 #ifdef WIN32
    WAVEFORMATEX fmt;
    fmt.wFormatTag = WAVE_FORMAT_PCM;
-    fmt.nSamplesPerSec = sampleRate;
+    fmt.nSamplesPerSec = 24000;
-    fmt.wBitsPerSample = bitsPerSample;
+    fmt.wBitsPerSample = 16;
    fmt.nChannels = 1;
    fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8;
    fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign;
@ -156,16 +143,17 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak,
    _lastSoundByteArray.resize(0);
    _lastSoundByteArray.append(buf1, dwSize);
-    // Commented out because this doesn't work completely :)
+    AudioInjectorOptions options;
-    // Obviously, commenting this out isn't fit for production, but it's fine for a test PR
+    options.position = DependencyManager::get<AvatarManager>()->getMyAvatarPosition();
    //emit ttsSampleCreated(_lastSoundByteArray, newChunkSize, timerInterval);
-    if (alsoInject) {
+    if (_lastSoundAudioInjector) {
-        AudioInjectorOptions options;
+        _lastSoundAudioInjector->stop();
-        options.position = DependencyManager::get<AvatarManager>()->getMyAvatarPosition();
+        _lastSoundAudioInjectorUpdateTimer.stop();
        _lastSoundAudioInjector = AudioInjector::playSound(_lastSoundByteArray, options);
    }
    _lastSoundAudioInjector = AudioInjector::playSoundAndDelete(_lastSoundByteArray, options);
    _lastSoundAudioInjectorUpdateTimer.start(INJECTOR_INTERVAL_MS);
 #else
    qDebug() << "Text-to-Speech isn't currently supported on non-Windows platforms.";
 #endif
@ -174,7 +162,6 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak,
 void TTSScriptingInterface::stopLastSpeech() {
    if (_lastSoundAudioInjector) {
        _lastSoundAudioInjector->stop();
        _lastSoundAudioInjector = NULL;
    }
    emit clearTTSBuffer();
 }
--- a/interface/src/scripting/TTSScriptingInterface.h
+++ b/interface/src/scripting/TTSScriptingInterface.h
@ -12,6 +12,7 @@
 #define hifi_SpeechScriptingInterface_h
 #include <QtCore/QObject>
 #include <QTimer>
 #include <DependencyManager.h>
 #ifdef WIN32
 #pragma warning(disable : 4996)
@ -31,19 +32,9 @@ public:
    TTSScriptingInterface();
    ~TTSScriptingInterface();
-    Q_INVOKABLE void testTone(const bool& alsoInject = false);
+    Q_INVOKABLE void speakText(const QString& textToSpeak);
    Q_INVOKABLE void speakText(const QString& textToSpeak,
                               const int& newChunkSize = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50),
                               const int& timerInterval = 96,
                               const int& sampleRate = 24000,
                               const int& bitsPerSample = 16,
                               const bool& alsoInject = false);
    Q_INVOKABLE void stopLastSpeech();
 signals:
    void ttsSampleCreated(QByteArray outputArray, const int& newChunkSize, const int& timerInterval);
    void clearTTSBuffer();
 private:
 #ifdef WIN32
    class CComAutoInit {
@ -90,6 +81,8 @@ private:
    QByteArray _lastSoundByteArray;
    AudioInjectorPointer _lastSoundAudioInjector;
    QTimer _lastSoundAudioInjectorUpdateTimer;
    void updateLastSoundAudioInjector();
 };
 #endif // hifi_SpeechScriptingInterface_h
--- a/libraries/audio-client/src/AudioClient.cpp
+++ b/libraries/audio-client/src/AudioClient.cpp
@ -245,8 +245,6 @@ AudioClient::AudioClient() :
    packetReceiver.registerListener(PacketType::NoisyMute, this, "handleNoisyMutePacket");
    packetReceiver.registerListener(PacketType::MuteEnvironment, this, "handleMuteEnvironmentPacket");
    packetReceiver.registerListener(PacketType::SelectedAudioFormat, this, "handleSelectedAudioFormat");
    connect(&_TTSTimer, &QTimer::timeout, this, &AudioClient::processTTSBuffer);
 }
 AudioClient::~AudioClient() {
@ -1202,45 +1200,6 @@ int rawToWav(const char* rawData, const int& rawLength, const char* wavfn, long
    return 0;
 }
 void AudioClient::processTTSBuffer() {
    Lock lock(_TTSMutex);
    if (_TTSAudioBuffer.size() > 0) {
        QByteArray part;
        part.append(_TTSAudioBuffer.data(), _TTSChunkSize);
        _TTSAudioBuffer.remove(0, _TTSChunkSize);
        handleAudioInput(part);
    } else {
        _isProcessingTTS = false;
        _TTSTimer.stop();
    }
 }
 void AudioClient::handleTTSAudioInput(const QByteArray& audio, const int& newChunkSize, const int& timerInterval) {
    _TTSChunkSize = newChunkSize;
    _TTSAudioBuffer.append(audio);
    handleLocalEchoAndReverb(_TTSAudioBuffer, 48000, 1);
    //QString filename = QString::number(usecTimestampNow());
    //QString path = PathUtils::getAppDataPath() + "Audio/" + filename + "-before.wav";
    //rawToWav(_TTSAudioBuffer.data(), _TTSAudioBuffer.size(), path.toLocal8Bit(), 24000, 1);
    //QByteArray temp;
    _isProcessingTTS = true;
    _TTSTimer.start(timerInterval);
    //filename = QString::number(usecTimestampNow());
    //path = PathUtils::getAppDataPath() + "Audio/" + filename + "-after.wav";
    //rawToWav(temp.data(), temp.size(), path.toLocal8Bit(), 12000, 1);
 }
 void AudioClient::clearTTSBuffer() {
    _TTSAudioBuffer.resize(0);
    _isProcessingTTS = false;
    _TTSTimer.stop();
 }
 void AudioClient::prepareLocalAudioInjectors(std::unique_ptr<Lock> localAudioLock) {
        bool doSynchronously = localAudioLock.operator bool();
        if (!localAudioLock) {
--- a/libraries/audio-client/src/AudioClient.h
+++ b/libraries/audio-client/src/AudioClient.h
@ -197,11 +197,6 @@ public slots:
    void checkInputTimeout();
    void handleDummyAudioInput();
    void handleRecordedAudioInput(const QByteArray& audio);
    void handleTTSAudioInput(const QByteArray& audio,
                             const int& newChunkSize,
                             const int& timerInterval);
    void clearTTSBuffer();
    void processTTSBuffer();
    void reset();
    void audioMixerKilled();