Add SpeechRecognizer

2025-04-14 14:46:55 +02:00 · 2014-08-01 13:44:06 -07:00 · 2014-08-01 13:44:06 -07:00 · ffb178cb43
commit ffb178cb43
parent d4d4069f11
7 changed files with 152 additions and 1 deletions
--- a/interface/CMakeLists.txt
+++ b/interface/CMakeLists.txt
@ -46,6 +46,7 @@ configure_file(InterfaceConfig.h.in "${PROJECT_BINARY_DIR}/includes/InterfaceCon
 configure_file(InterfaceVersion.h.in "${PROJECT_BINARY_DIR}/includes/InterfaceVersion.h")

 # grab the implementation and header files from src dirs
+file(GLOB INTERFACE_OBJCPP_SRCS src/*.mm)
 file(GLOB INTERFACE_SRCS src/*.cpp src/*.h)
 foreach(SUBDIR avatar devices renderer ui starfield location scripting voxels particles models)
  file(GLOB_RECURSE SUBDIR_SRCS src/${SUBDIR}/*.cpp src/${SUBDIR}/*.h)
@ -95,7 +96,7 @@ if (APPLE)
 endif()

 # create the executable, make it a bundle on OS X
-add_executable(${TARGET_NAME} MACOSX_BUNDLE ${INTERFACE_SRCS} ${QM})
+add_executable(${TARGET_NAME} MACOSX_BUNDLE ${INTERFACE_SRCS} ${INTERFACE_OBJCPP_SRCS}  ${QM})

 # link in the hifi shared library
 include(${MACRO_DIR}/LinkHifiLibrary.cmake)
--- a/interface/src/Application.cpp
+++ b/interface/src/Application.cpp
@ -170,6 +170,7 @@ Application::Application(int& argc, char** argv, QElapsedTimer &startup_time) :
        _runningScriptsWidget(NULL),
        _runningScriptsWidgetWasVisible(false),
        _trayIcon(new QSystemTrayIcon(_window)),
+        _speechRecognizer(),
        _lastNackTime(usecTimestampNow()),
        _lastSendDownstreamAudioStats(usecTimestampNow())
 {
@ -1443,6 +1444,10 @@ void Application::setLowVelocityFilter(bool lowVelocityFilter) {
    getSixenseManager()->setLowVelocityFilter(lowVelocityFilter);
 }

+void Application::setSpeechRecognitionEnabled(bool enabled) {
+    _speechRecognizer.setEnabled(enabled);
+}
+
 void Application::doKillLocalVoxels() {
    _wantToKillLocalVoxels = true;
 }
--- a/interface/src/Application.h
+++ b/interface/src/Application.h
@ -53,6 +53,7 @@
 #include "Menu.h"
 #include "MetavoxelSystem.h"
 #include "PacketHeaders.h"
+#include "SpeechRecognizer.h"
 #include "Stars.h"
 #include "avatar/Avatar.h"
 #include "avatar/AvatarManager.h"
@ -324,6 +325,8 @@ public slots:

    void setRenderVoxels(bool renderVoxels);
    void setLowVelocityFilter(bool lowVelocityFilter);
+    bool getSpeechRecognitionEnabled() { return _speechRecognizer.getEnabled(); }
+    void setSpeechRecognitionEnabled(bool enabled);
    void doKillLocalVoxels();
    void loadDialog();
    void loadScriptURLDialog();
@ -593,6 +596,8 @@ private:

    QSystemTrayIcon* _trayIcon;

+    SpeechRecognizer _speechRecognizer;
+
    quint64 _lastNackTime;
    quint64 _lastSendDownstreamAudioStats;
 };
--- a/interface/src/Menu.cpp
+++ b/interface/src/Menu.cpp
@ -233,6 +233,8 @@ Menu::Menu() :
    QMenu* toolsMenu = addMenu("Tools");
    addActionToQMenuAndActionHash(toolsMenu, MenuOption::MetavoxelEditor, 0, this, SLOT(showMetavoxelEditor()));
    addActionToQMenuAndActionHash(toolsMenu, MenuOption::ScriptEditor,  Qt::ALT | Qt::Key_S, this, SLOT(showScriptEditor()));
+    addCheckableActionToQMenuAndActionHash(toolsMenu, MenuOption::ControlWithSpeech, Qt::CTRL | Qt::SHIFT | Qt::Key_C, true,
+            Application::getInstance(), SLOT(setSpeechRecognitionEnabled(bool)));

 #ifdef HAVE_QXMPP
    _chatAction = addActionToQMenuAndActionHash(toolsMenu,
@ -651,6 +653,7 @@ void Menu::loadSettings(QSettings* settings) {
    _snapshotsLocation = settings->value("snapshotsLocation",
                                         QStandardPaths::writableLocation(QStandardPaths::DesktopLocation)).toString();
    setScriptsLocation(settings->value("scriptsLocation", QString()).toString());
+    Application::getInstance()->setSpeechRecognitionEnabled(settings->value("speechRecognitionEnabled", false).toBool());

    settings->beginGroup("View Frustum Offset Camera");
    // in case settings is corrupt or missing loadSetting() will check for NaN
@ -699,6 +702,7 @@ void Menu::saveSettings(QSettings* settings) {
    settings->setValue("boundaryLevelAdjust", _boundaryLevelAdjust);
    settings->setValue("snapshotsLocation", _snapshotsLocation);
    settings->setValue("scriptsLocation", _scriptsLocation);
+    settings->setValue("speechRecognitionEnabled", Application::getInstance()->getSpeechRecognitionEnabled());
    settings->beginGroup("View Frustum Offset Camera");
    settings->setValue("viewFrustumOffsetYaw", _viewFrustumOffset.yaw);
    settings->setValue("viewFrustumOffsetPitch", _viewFrustumOffset.pitch);
--- a/interface/src/Menu.h
+++ b/interface/src/Menu.h
@ -348,6 +348,7 @@ namespace MenuOption {
    const QString CollideWithVoxels = "Collide With Voxels";
    const QString Collisions = "Collisions";
    const QString Console = "Console...";
+    const QString ControlWithSpeech = "Control With Speech";
    const QString DecreaseAvatarSize = "Decrease Avatar Size";
    const QString DecreaseVoxelSize = "Decrease Voxel Size";
    const QString DisableActivityLogger = "Disable Activity Logger";
--- a/interface/src/SpeechRecognizer.h
+++ b/interface/src/SpeechRecognizer.h
@ -0,0 +1,41 @@
+//
+//  SpeechRecognizer.h
+//  interface/src
+//
+//  Created by Ryan Huffman on 07/31/14.
+//  Copyright 2014 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+
+#ifndef hifi_SpeechRecognizer_h
+#define hifi_SpeechRecognizer_h
+
+#include <QObject>
+
+class SpeechRecognizer : public QObject {
+    Q_OBJECT
+public:
+    SpeechRecognizer();
+    ~SpeechRecognizer();
+
+    void init();
+    void handleCommandRecognized(const char* command);
+    bool getEnabled() { return _enabled; }
+    void setEnabled(bool enabled);
+
+public slots:
+    void addCommand(const QString& command);
+    void removeCommand(const QString& command);
+
+signals:
+    void commandRecognized(const QString& command);
+
+private:
+    bool _enabled;
+    void* _speechRecognizerDelegate;
+    void* _speechRecognizer;
+};
+
+#endif // hifi_SpeechRecognizer_h
--- a/interface/src/SpeechRecognizer.mm
+++ b/interface/src/SpeechRecognizer.mm
@ -0,0 +1,94 @@
+//
+//  SpeechRecognizer.mm
+//  interface/src
+//
+//  Created by Ryan Huffman on 07/31/14.
+//  Copyright 2014 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+
+#import <Foundation/Foundation.h>
+#import <AppKit/NSSpeechRecognizer.h>
+
+#include <QDebug>
+
+#include "SpeechRecognizer.h"
+
+@interface SpeechRecognizerDelegate : NSObject <NSSpeechRecognizerDelegate> {
+    SpeechRecognizer* _listener;
+}
+
+- (void)setListener:(SpeechRecognizer*)listener;
+- (void)speechRecognizer:(NSSpeechRecognizer*)sender didRecognizeCommand:(id)command;
+
+@end
+
+@implementation SpeechRecognizerDelegate
+
+- (void)setListener:(SpeechRecognizer*)listener {
+    _listener = listener;
+}
+
+- (void)speechRecognizer:(NSSpeechRecognizer*)sender didRecognizeCommand:(id)command {
+    _listener->handleCommandRecognized(((NSString*)command).UTF8String);
+}
+
+@end
+
+SpeechRecognizer::SpeechRecognizer() :
+    QObject(),
+    _enabled(false),
+    _speechRecognizerDelegate(NULL),
+    _speechRecognizer(NULL) {
+
+    init();
+}
+
+SpeechRecognizer::~SpeechRecognizer() {
+    if (_speechRecognizer) {
+        [(id)_speechRecognizer dealloc];
+    }
+    if (_speechRecognizerDelegate) {
+        [(id)_speechRecognizerDelegate dealloc];
+    }
+}
+
+void SpeechRecognizer::init() {
+    _speechRecognizerDelegate = [[SpeechRecognizerDelegate alloc] init];
+    [(id)_speechRecognizerDelegate setListener:this];
+
+    _speechRecognizer = [[NSSpeechRecognizer alloc] init];
+
+    [(id)_speechRecognizer setCommands:[NSArray array]];
+    [(id)_speechRecognizer setDelegate:(id)_speechRecognizerDelegate];
+
+    setEnabled(_enabled);
+}
+
+void SpeechRecognizer::handleCommandRecognized(const char* command) {
+    qDebug() << "Got command: " << command;
+    emit commandRecognized(QString(command));
+}
+
+void SpeechRecognizer::setEnabled(bool enabled) {
+    _enabled = enabled;
+    if (enabled) {
+        [(id)_speechRecognizer startListening];
+    } else {
+        [(id)_speechRecognizer stopListening];
+    }
+}
+
+void SpeechRecognizer::addCommand(const QString& command) {
+    NSArray *cmds = [(id)_speechRecognizer commands];
+    NSString *cmd = [NSString stringWithUTF8String:command.toLocal8Bit().data()];
+    [(id)_speechRecognizer setCommands:[cmds arrayByAddingObject:cmd]];
+}
+
+void SpeechRecognizer::removeCommand(const QString& command) {
+    NSMutableArray* cmds = [NSMutableArray arrayWithArray:[(id)_speechRecognizer commands]];
+    [cmds removeObject:[NSString stringWithUTF8String:command.toLocal8Bit().data()]];
+    [(id)_speechRecognizer setCommands:cmds];
+}