Some experimentation yields promising results...

This commit is contained in:
Zach Fox 2018-10-16 17:34:48 -07:00
parent 79f30eb05d
commit 26e388b139
5 changed files with 844 additions and 811 deletions

View file

@ -1182,6 +1182,7 @@ Application::Application(int& argc, char** argv, QElapsedTimer& startupTimer, bo
auto TTS = DependencyManager::get<TTSScriptingInterface>().data();
connect(TTS, &TTSScriptingInterface::ttsSampleCreated, audioIO, &AudioClient::handleTTSAudioInput);
connect(TTS, &TTSScriptingInterface::clearTTSBuffer, audioIO, &AudioClient::clearTTSBuffer);
connect(audioIO, &AudioClient::inputReceived, [](const QByteArray& audio) {
static auto recorder = DependencyManager::get<recording::Recorder>();

View file

@ -65,7 +65,7 @@ void TTSScriptingInterface::testTone(const bool& alsoInject) {
int16_t temp = (glm::sin(glm::radians((float)a))) * 32768;
samples[a] = temp;
}
emit ttsSampleCreated(_lastSoundByteArray);
emit ttsSampleCreated(_lastSoundByteArray, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50, 96);
if (alsoInject) {
AudioInjectorOptions options;
@ -75,11 +75,16 @@ void TTSScriptingInterface::testTone(const bool& alsoInject) {
}
}
void TTSScriptingInterface::speakText(const QString& textToSpeak, const bool& alsoInject) {
void TTSScriptingInterface::speakText(const QString& textToSpeak,
const int& newChunkSize,
const int& timerInterval,
const int& sampleRate,
const int& bitsPerSample,
const bool& alsoInject) {
WAVEFORMATEX fmt;
fmt.wFormatTag = WAVE_FORMAT_PCM;
fmt.nSamplesPerSec = 24000;
fmt.wBitsPerSample = 16;
fmt.nSamplesPerSec = sampleRate;
fmt.wBitsPerSample = bitsPerSample;
fmt.nChannels = 1;
fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8;
fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign;
@ -146,7 +151,7 @@ void TTSScriptingInterface::speakText(const QString& textToSpeak, const bool& al
_lastSoundByteArray.resize(0);
_lastSoundByteArray.append(buf1, dwSize);
emit ttsSampleCreated(_lastSoundByteArray);
emit ttsSampleCreated(_lastSoundByteArray, newChunkSize, timerInterval);
if (alsoInject) {
AudioInjectorOptions options;
@ -160,4 +165,6 @@ void TTSScriptingInterface::stopLastSpeech() {
if (_lastSoundAudioInjector) {
_lastSoundAudioInjector->stop();
}
emit clearTTSBuffer();
}

View file

@ -19,6 +19,7 @@
#include <sapi.h> // SAPI
#include <sphelper.h> // SAPI Helper
#include <AudioInjector.h>
#include <AudioConstants.h>
class TTSScriptingInterface : public QObject, public Dependency {
Q_OBJECT
@ -28,11 +29,17 @@ public:
~TTSScriptingInterface();
Q_INVOKABLE void testTone(const bool& alsoInject = false);
Q_INVOKABLE void speakText(const QString& textToSpeak, const bool& alsoInject = false);
Q_INVOKABLE void speakText(const QString& textToSpeak,
const int& newChunkSize = (AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50),
const int& timerInterval = 96,
const int& sampleRate = 24000,
const int& bitsPerSample = 16,
const bool& alsoInject = false);
Q_INVOKABLE void stopLastSpeech();
signals:
void ttsSampleCreated(QByteArray outputArray);
void ttsSampleCreated(QByteArray outputArray, const int& newChunkSize, const int& timerInterval);
void clearTTSBuffer();
private:
class CComAutoInit {

File diff suppressed because it is too large Load diff

View file

@ -197,7 +197,11 @@ public slots:
void checkInputTimeout();
void handleDummyAudioInput();
void handleRecordedAudioInput(const QByteArray& audio);
void handleTTSAudioInput(const QByteArray& audio);
void handleTTSAudioInput(const QByteArray& audio,
const int& newChunkSize,
const int& timerInterval);
void clearTTSBuffer();
void processTTSBuffer();
void reset();
void audioMixerKilled();
@ -289,11 +293,12 @@ private:
bool mixLocalAudioInjectors(float* mixBuffer);
float azimuthForSource(const glm::vec3& relativePosition);
float gainForSource(float distance, float volume);
void processAudioAndAddToRingBuffer(QByteArray& inputByteArray,
const uchar& channelCount,
const qint32& bytesForDuration,
QByteArray& rollingBuffer);
Mutex _TTSMutex;
QTimer _TTSTimer;
bool _isProcessingTTS {false};
QByteArray _TTSAudioBuffer;
int _TTSChunkSize = AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * 50;
#ifdef Q_OS_ANDROID
QTimer _checkInputTimer;
@ -401,7 +406,7 @@ private:
void configureReverb();
void updateReverbOptions();
void handleLocalEchoAndReverb(QByteArray& inputByteArray);
void handleLocalEchoAndReverb(QByteArray& inputByteArray, const int& sampleRate, const int& channelCount);
bool switchInputToAudioDevice(const QAudioDeviceInfo inputDeviceInfo, bool isShutdownRequest = false);
bool switchOutputToAudioDevice(const QAudioDeviceInfo outputDeviceInfo, bool isShutdownRequest = false);