mirror of
https://github.com/overte-org/overte.git
synced 2025-08-06 18:25:52 +02:00
Almost working
This commit is contained in:
parent
7de784ce27
commit
f1446532d0
2 changed files with 75 additions and 38 deletions
|
@ -19,8 +19,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() {
|
||||||
//
|
//
|
||||||
HRESULT hr = m_tts.CoCreateInstance(CLSID_SpVoice);
|
HRESULT hr = m_tts.CoCreateInstance(CLSID_SpVoice);
|
||||||
if (FAILED(hr)) {
|
if (FAILED(hr)) {
|
||||||
ATLTRACE(TEXT("Text-to-speech creation failed.\n"));
|
qDebug() << "Text-to-speech engine creation failed.";
|
||||||
AtlThrow(hr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -28,8 +27,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() {
|
||||||
//
|
//
|
||||||
hr = SpGetDefaultTokenFromCategoryId(SPCAT_VOICES, &m_voiceToken, FALSE);
|
hr = SpGetDefaultTokenFromCategoryId(SPCAT_VOICES, &m_voiceToken, FALSE);
|
||||||
if (FAILED(hr)) {
|
if (FAILED(hr)) {
|
||||||
ATLTRACE(TEXT("Can't get default voice token.\n"));
|
qDebug() << "Can't get default voice token.";
|
||||||
AtlThrow(hr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -37,28 +35,7 @@ SpeechScriptingInterface::SpeechScriptingInterface() {
|
||||||
//
|
//
|
||||||
hr = m_tts->SetVoice(m_voiceToken);
|
hr = m_tts->SetVoice(m_voiceToken);
|
||||||
if (FAILED(hr)) {
|
if (FAILED(hr)) {
|
||||||
ATLTRACE(TEXT("Can't set default voice.\n"));
|
qDebug() << "Can't set default voice.";
|
||||||
AtlThrow(hr);
|
|
||||||
}
|
|
||||||
|
|
||||||
WAVEFORMATEX fmt;
|
|
||||||
fmt.wFormatTag = WAVE_FORMAT_PCM;
|
|
||||||
fmt.nSamplesPerSec = 48000;
|
|
||||||
fmt.wBitsPerSample = 16;
|
|
||||||
fmt.nChannels = 1;
|
|
||||||
fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8;
|
|
||||||
fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign;
|
|
||||||
fmt.cbSize = 0;
|
|
||||||
|
|
||||||
BYTE* pcontent = new BYTE[1024 * 1000];
|
|
||||||
|
|
||||||
cpIStream = SHCreateMemStream(NULL, 0);
|
|
||||||
hr = outputStream->SetBaseStream(cpIStream, SPDFID_WaveFormatEx, &fmt);
|
|
||||||
|
|
||||||
hr = m_tts->SetOutput(outputStream, true);
|
|
||||||
if (FAILED(hr)) {
|
|
||||||
ATLTRACE(TEXT("Can't set output stream.\n"));
|
|
||||||
AtlThrow(hr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,30 +43,91 @@ SpeechScriptingInterface::~SpeechScriptingInterface() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class ReleaseOnExit {
|
||||||
|
public:
|
||||||
|
ReleaseOnExit(IUnknown* p) : m_p(p) {}
|
||||||
|
~ReleaseOnExit() {
|
||||||
|
if (m_p) {
|
||||||
|
m_p->Release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
IUnknown* m_p;
|
||||||
|
};
|
||||||
|
|
||||||
void SpeechScriptingInterface::speakText(const QString& textToSpeak) {
|
void SpeechScriptingInterface::speakText(const QString& textToSpeak) {
|
||||||
|
WAVEFORMATEX fmt;
|
||||||
|
fmt.wFormatTag = WAVE_FORMAT_PCM;
|
||||||
|
fmt.nSamplesPerSec = 44100;
|
||||||
|
fmt.wBitsPerSample = 16;
|
||||||
|
fmt.nChannels = 1;
|
||||||
|
fmt.nBlockAlign = fmt.nChannels * fmt.wBitsPerSample / 8;
|
||||||
|
fmt.nAvgBytesPerSec = fmt.nSamplesPerSec * fmt.nBlockAlign;
|
||||||
|
fmt.cbSize = 0;
|
||||||
|
|
||||||
|
IStream* pStream = NULL;
|
||||||
|
|
||||||
|
ISpStream* pSpStream = nullptr;
|
||||||
|
HRESULT hr = CoCreateInstance(CLSID_SpStream, nullptr, CLSCTX_ALL, __uuidof(ISpStream), (void**)&pSpStream);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
qDebug() << "CoCreateInstance failed.";
|
||||||
|
}
|
||||||
|
ReleaseOnExit rSpStream(pSpStream);
|
||||||
|
|
||||||
|
pStream = SHCreateMemStream(NULL, 0);
|
||||||
|
if (nullptr == pStream) {
|
||||||
|
qDebug() << "SHCreateMemStream failed.";
|
||||||
|
}
|
||||||
|
|
||||||
|
hr = pSpStream->SetBaseStream(pStream, SPDFID_WaveFormatEx, &fmt);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
qDebug() << "Can't set base stream.";
|
||||||
|
}
|
||||||
|
|
||||||
|
hr = m_tts->SetOutput(pSpStream, true);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
qDebug() << "Can't set output stream.";
|
||||||
|
}
|
||||||
|
|
||||||
|
ReleaseOnExit rStream(pStream);
|
||||||
|
|
||||||
ULONG streamNumber;
|
ULONG streamNumber;
|
||||||
HRESULT hr = m_tts->Speak(reinterpret_cast<LPCWSTR>(textToSpeak.utf16()),
|
hr = m_tts->Speak(reinterpret_cast<LPCWSTR>(textToSpeak.utf16()),
|
||||||
SPF_IS_NOT_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK,
|
SPF_IS_NOT_XML | SPF_ASYNC | SPF_PURGEBEFORESPEAK,
|
||||||
&streamNumber);
|
&streamNumber);
|
||||||
if (FAILED(hr)) {
|
if (FAILED(hr)) {
|
||||||
ATLTRACE(TEXT("Speak failed.\n"));
|
qDebug() << "Speak failed.";
|
||||||
AtlThrow(hr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
m_tts->WaitUntilDone(-1);
|
m_tts->WaitUntilDone(-1);
|
||||||
|
|
||||||
outputStream->GetBaseStream(&cpIStream);
|
hr = pSpStream->GetBaseStream(&pStream);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
qDebug() << "Couldn't get base stream.";
|
||||||
|
}
|
||||||
|
|
||||||
|
hr = IStream_Reset(pStream);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
qDebug() << "Couldn't reset stream.";
|
||||||
|
}
|
||||||
|
|
||||||
ULARGE_INTEGER StreamSize;
|
ULARGE_INTEGER StreamSize;
|
||||||
StreamSize.LowPart = 0;
|
StreamSize.LowPart = 0;
|
||||||
hr = IStream_Size(cpIStream, &StreamSize);
|
hr = IStream_Size(pStream, &StreamSize);
|
||||||
|
|
||||||
DWORD dwSize = StreamSize.QuadPart;
|
DWORD dwSize = StreamSize.QuadPart;
|
||||||
char* buf1 = new char[dwSize + 1];
|
char* buf1 = new char[dwSize + 1];
|
||||||
hr = IStream_Read(cpIStream, buf1, dwSize);
|
memset(buf1, 0, dwSize + 1);
|
||||||
|
|
||||||
|
hr = IStream_Read(pStream, buf1, dwSize);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
qDebug() << "Couldn't read from stream.";
|
||||||
|
}
|
||||||
|
|
||||||
|
QByteArray byteArray = QByteArray::QByteArray(buf1, dwSize);
|
||||||
|
|
||||||
QByteArray byteArray = QByteArray::QByteArray(buf1, (int)dwSize);
|
|
||||||
AudioInjectorOptions options;
|
AudioInjectorOptions options;
|
||||||
|
|
||||||
options.position = DependencyManager::get<AvatarManager>()->getMyAvatarPosition();
|
options.position = DependencyManager::get<AvatarManager>()->getMyAvatarPosition();
|
||||||
|
|
||||||
AudioInjector::playSound(byteArray, options);
|
AudioInjector::playSound(byteArray, options);
|
||||||
|
|
|
@ -13,7 +13,9 @@
|
||||||
|
|
||||||
#include <QtCore/QObject>
|
#include <QtCore/QObject>
|
||||||
#include <DependencyManager.h>
|
#include <DependencyManager.h>
|
||||||
|
#ifndef WIN32_LEAN_AND_MEAN
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#endif
|
||||||
#include <sapi.h> // SAPI
|
#include <sapi.h> // SAPI
|
||||||
#include <sphelper.h> // SAPI Helper
|
#include <sphelper.h> // SAPI Helper
|
||||||
|
|
||||||
|
@ -68,9 +70,6 @@ private:
|
||||||
|
|
||||||
// Default voice token
|
// Default voice token
|
||||||
CComPtr<ISpObjectToken> m_voiceToken;
|
CComPtr<ISpObjectToken> m_voiceToken;
|
||||||
|
|
||||||
CComPtr<ISpStream> outputStream;
|
|
||||||
CComPtr<IStream> cpIStream;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // hifi_SpeechScriptingInterface_h
|
#endif // hifi_SpeechScriptingInterface_h
|
||||||
|
|
Loading…
Reference in a new issue