diff --git a/interface/src/scripting/SpeechScriptingInterface.cpp b/interface/src/scripting/SpeechScriptingInterface.cpp index a38e1aa824..b9c7718075 100644 --- a/interface/src/scripting/SpeechScriptingInterface.cpp +++ b/interface/src/scripting/SpeechScriptingInterface.cpp @@ -19,8 +19,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() { // HRESULT hr = m_tts.CoCreateInstance(CLSID_SpVoice); if (FAILED(hr)) { - ATLTRACE(TEXT("Text-to-speech creation failed.\n")); - AtlThrow(hr); + qDebug() << "Text-to-speech engine creation failed."; } // @@ -28,8 +27,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() { // hr = SpGetDefaultTokenFromCategoryId(SPCAT_VOICES, &m_voiceToken, FALSE); if (FAILED(hr)) { - ATLTRACE(TEXT("Can't get default voice token.\n")); - AtlThrow(hr); + qDebug() << "Can't get default voice token."; } // @@ -37,28 +35,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() { // hr = m_tts->SetVoice(m_voiceToken); if (FAILED(hr)) { - ATLTRACE(TEXT("Can't set default voice.\n")); - AtlThrow(hr); - } - - WAVEFORMATEX fmt; - fmt.wFormatTag = WAVE_FORMAT_PCM; - fmt.nSamplesPerSec = 48000; - fmt.wBitsPerSample = 16; - fmt.nChannels = 1; - fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; - fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign; - fmt.cbSize = 0; - - BYTE* pcontent = new BYTE[1024 * 1000]; - - cpIStream = SHCreateMemStream(NULL, 0); - hr = outputStream->SetBaseStream(cpIStream, SPDFID_WaveFormatEx, &fmt); - - hr = m_tts->SetOutput(outputStream, true); - if (FAILED(hr)) { - ATLTRACE(TEXT("Can't set output stream.\n")); - AtlThrow(hr); + qDebug() << "Can't set default voice."; } } @@ -66,30 +43,91 @@ SpeechScriptingInterface::~SpeechScriptingInterface() { } +class ReleaseOnExit { +public: + ReleaseOnExit(IUnknown* p) : m_p(p) {} + ~ReleaseOnExit() { + if (m_p) { + m_p->Release(); + } + } + +private: + IUnknown* m_p; +}; + void SpeechScriptingInterface::speakText(const QString& textToSpeak) { + WAVEFORMATEX fmt; + fmt.wFormatTag = WAVE_FORMAT_PCM; + fmt.nSamplesPerSec = 44100; + fmt.wBitsPerSample = 16; + fmt.nChannels = 1; + fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8; + fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign; + fmt.cbSize = 0; + + IStream* pStream = NULL; + + ISpStream* pSpStream = nullptr; + HRESULT hr = CoCreateInstance(CLSID_SpStream, nullptr, CLSCTX_ALL, __uuidof(ISpStream), (void**)&pSpStream); + if (FAILED(hr)) { + qDebug() << "CoCreateInstance failed."; + } + ReleaseOnExit rSpStream(pSpStream); + + pStream = SHCreateMemStream(NULL, 0); + if (nullptr == pStream) { + qDebug() << "SHCreateMemStream failed."; + } + + hr = pSpStream->SetBaseStream(pStream, SPDFID_WaveFormatEx, &fmt); + if (FAILED(hr)) { + qDebug() << "Can't set base stream."; + } + + hr = m_tts->SetOutput(pSpStream, true); + if (FAILED(hr)) { + qDebug() << "Can't set output stream."; + } + + ReleaseOnExit rStream(pStream); + ULONG streamNumber; - HRESULT hr = m_tts->Speak(reinterpret_cast(textToSpeak.utf16()), + hr = m_tts->Speak(reinterpret_cast(textToSpeak.utf16()), SPF_IS_NOT_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK, &streamNumber); if (FAILED(hr)) { - ATLTRACE(TEXT("Speak failed.\n")); - AtlThrow(hr); + qDebug() << "Speak failed."; } m_tts->WaitUntilDone(-1); - outputStream->GetBaseStream(&cpIStream); + hr = pSpStream->GetBaseStream(&pStream); + if (FAILED(hr)) { + qDebug() << "Couldn't get base stream."; + } + + hr = IStream_Reset(pStream); + if (FAILED(hr)) { + qDebug() << "Couldn't reset stream."; + } + ULARGE_INTEGER StreamSize; StreamSize.LowPart = 0; - hr = IStream_Size(cpIStream, &StreamSize); + hr = IStream_Size(pStream, &StreamSize); DWORD dwSize = StreamSize.QuadPart; char* buf1 = new char[dwSize + 1]; - hr = IStream_Read(cpIStream, buf1, dwSize); + memset(buf1, 0, dwSize + 1); + + hr = IStream_Read(pStream, buf1, dwSize); + if (FAILED(hr)) { + qDebug() << "Couldn't read from stream."; + } + + QByteArray byteArray = QByteArray::QByteArray(buf1, dwSize); - QByteArray byteArray = QByteArray::QByteArray(buf1, (int)dwSize); AudioInjectorOptions options; - options.position = DependencyManager::get()->getMyAvatarPosition(); AudioInjector::playSound(byteArray, options); diff --git a/interface/src/scripting/SpeechScriptingInterface.h b/interface/src/scripting/SpeechScriptingInterface.h index 311bd80605..ad6777e339 100644 --- a/interface/src/scripting/SpeechScriptingInterface.h +++ b/interface/src/scripting/SpeechScriptingInterface.h @@ -13,7 +13,9 @@ #include #include - +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif #include // SAPI #include // SAPI Helper @@ -68,9 +70,6 @@ private: // Default voice token CComPtr m_voiceToken; - - CComPtr outputStream; - CComPtr cpIStream; }; #endif // hifi_SpeechScriptingInterface_h