Track features on face, add a mode to render just key points.

2025-04-20 18:23:54 +02:00 · 2013-07-16 11:54:19 -07:00 · 2013-07-16 11:54:19 -07:00 · 905bd9b011
commit 905bd9b011
parent d8d9f5e315
5 changed files with 92 additions and 22 deletions
--- a/interface/src/Webcam.cpp
+++ b/interface/src/Webcam.cpp
@ -26,7 +26,8 @@ using namespace xn;
 #endif

 // register types with Qt metatype system
-int jointVectorMetaType = qRegisterMetaType<JointVector>("JointVector"); 
+int jointVectorMetaType = qRegisterMetaType<JointVector>("JointVector");
+int keyPointVectorMetaType = qRegisterMetaType<KeyPointVector>("KeyPointVector"); 
 int matMetaType = qRegisterMetaType<Mat>("cv::Mat");
 int rotatedRectMetaType = qRegisterMetaType<RotatedRect>("cv::RotatedRect");

@ -132,8 +133,18 @@ void Webcam::renderPreview(int screenWidth, int screenHeight) {
            glVertex2f(left + facePoints[3].x * xScale, top + facePoints[3].y * yScale);
        glEnd();
        
-        char fps[20];
-        sprintf(fps, "FPS: %d", (int)(roundf(_frameCount * 1000000.0f / (usecTimestampNow() - _startTimestamp))));
+        if (!_keyPoints.empty()) {
+            glColor3f(0.0f, 1.0f, 0.0f);
+            glBegin(GL_POINTS);
+            for (KeyPointVector::iterator it = _keyPoints.begin(); it != _keyPoints.end(); it++) {
+                glVertex2f(left + it->pt.x * xScale, top + it->pt.y * yScale);
+            }
+            glEnd();
+        }
+        
+        char fps[30];
+        sprintf(fps, "FPS: %d, Points: %d", (int)(roundf(_frameCount * 1000000.0f / (usecTimestampNow() - _startTimestamp))),
+            (int)_keyPoints.size());
        drawtext(left, top + PREVIEW_HEIGHT + 20, 0.10, 0, 1, 0, fps);
    }
 }
@ -147,7 +158,7 @@ Webcam::~Webcam() {
 }

 void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Mat& depthPreview,
-        const RotatedRect& faceRect, const JointVector& joints) {
+        const RotatedRect& faceRect, const KeyPointVector& keyPoints, const JointVector& joints) {
    IplImage colorImage = color;
    glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3);
    if (_colorTextureID == 0) {
@ -186,6 +197,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Mat&
    // store our face rect and joints, update our frame count for fps computation
    _faceRect = faceRect;
    _joints = joints;
+    _keyPoints = keyPoints;
    _frameCount++;
    
    const int MAX_FPS = 60;
@ -276,7 +288,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Mat&
    QTimer::singleShot(qMax((int)remaining / 1000, 0), _grabber, SLOT(grabFrame()));
 }

-FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0) {
+FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), _depthOffset(-512.0) {
 }

 FrameGrabber::~FrameGrabber() {
@ -389,6 +401,11 @@ void FrameGrabber::shutdown() {
    thread()->quit();
 }

+static Point clip(const Point& point, const Rect& bounds) {
+    return Point(glm::clamp(point.x, bounds.x, bounds.x + bounds.width - 1),
+        glm::clamp(point.y, bounds.y, bounds.y + bounds.height - 1));
+}
+
 void FrameGrabber::grabFrame() {
    if (!(_initialized || init())) {
        return;
@ -404,9 +421,6 @@ void FrameGrabber::grabFrame() {
        format = GL_RGB;
        
        depth = Mat(_depthMetaData.YRes(), _depthMetaData.XRes(), CV_16UC1, (void*)_depthGenerator.GetDepthMap());
-        const double EIGHT_BIT_MAX = 255;
-        const double ELEVEN_BIT_MAX = 2047;
-        depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, -512);
        
        _userID = 0;
        XnUInt16 userCount = 1; 
@ -463,7 +477,7 @@ void FrameGrabber::grabFrame() {
    float ranges[] = { 0, 180 };
    const float* range = ranges;
    if (_searchWindow.area() == 0) {
-        vector<cv::Rect> faces;
+        vector<Rect> faces;
        _faceCascade.detectMultiScale(color, faces, 1.1, 6);
        if (!faces.empty()) {
            _searchWindow = faces.front();
@ -486,11 +500,40 @@ void FrameGrabber::grabFrame() {
        bitwise_and(_backProject, _mask, _backProject);
        
        faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1));
-        _searchWindow = faceRect.boundingRect();
-    }   
+        Rect faceBounds = faceRect.boundingRect();
+        Rect imageBounds(0, 0, depth.cols, depth.rows);
+        _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds));
+    }
+
+#ifdef HAVE_OPENNI
+    if (_depthGenerator.IsValid()) {
+        if (_searchWindow.area() > 0) {
+            const double DEPTH_OFFSET_SMOOTHING = 0.95;
+            _depthOffset = glm::mix(128.0 - mean(depth(_searchWindow))[0], _depthOffset, DEPTH_OFFSET_SMOOTHING);
+        }
+        depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, _depthOffset);
+    }
+#endif
+
+    KeyPointVector keyPoints;
+    if (!_hsvFrame.empty()) {
+        _grayFrame.create(_hsvFrame.rows, _hsvFrame.cols, CV_8UC1);
+        int fromTo[] = { 2, 0 };
+        Mat hsvInner = _hsvFrame(_searchWindow);
+        Mat grayInner = _grayFrame(_searchWindow);
+        mixChannels(&hsvInner, 1, &grayInner, 1, fromTo, 1);
+        FAST(grayInner, keyPoints, 4);
+    
+        // offset the detected points
+        for (KeyPointVector::iterator it = keyPoints.begin(); it != keyPoints.end(); it++) {
+            it->pt.x += _searchWindow.x;
+            it->pt.y += _searchWindow.y;
+        }
+    }
+    
    QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame",
        Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, depth), Q_ARG(cv::Mat, _grayDepthFrame),
-        Q_ARG(cv::RotatedRect, faceRect), Q_ARG(JointVector, joints));
+        Q_ARG(cv::RotatedRect, faceRect), Q_ARG(KeyPointVector, keyPoints), Q_ARG(JointVector, joints));
 }

 bool FrameGrabber::init() {
--- a/interface/src/Webcam.h
+++ b/interface/src/Webcam.h
@ -33,6 +33,7 @@ class FrameGrabber;
 class Joint;

 typedef QVector<Joint> JointVector;
+typedef std::vector<cv::KeyPoint> KeyPointVector;

 class Webcam : public QObject {
    Q_OBJECT
@ -47,6 +48,7 @@ public:
    GLuint getColorTextureID() const { return _colorTextureID; }
    GLuint getDepthTextureID() const { return _depthTextureID; }
    const cv::Size2f& getTextureSize() const { return _textureSize; }
+    const KeyPointVector& getKeyPoints() const { return _keyPoints; }
    
    const cv::RotatedRect& getEstimatedFaceRect() const { return _estimatedFaceRect; }
    const glm::vec3& getEstimatedPosition() const { return _estimatedPosition; }
@ -59,8 +61,8 @@ public:
 public slots:
    
    void setEnabled(bool enabled);
-    void setFrame(const cv::Mat& color, int format, const cv::Mat& depth,
-        const cv::Mat& depthPreview, const cv::RotatedRect& faceRect, const JointVector& joints);
+    void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, const cv::Mat& depthPreview,
+        const cv::RotatedRect& faceRect, const KeyPointVector& keyPoints, const JointVector& joints);
    
 private:
    
@ -74,6 +76,7 @@ private:
    cv::Size2f _textureSize;
    cv::RotatedRect _faceRect;
    cv::RotatedRect _initialFaceRect;
+    KeyPointVector _keyPoints;
    JointVector _joints;
    
    uint64_t _startTimestamp;
@ -110,12 +113,14 @@ private:
    CvCapture* _capture;
    cv::CascadeClassifier _faceCascade;
    cv::Mat _hsvFrame;
+    cv::Mat _grayFrame;
    cv::Mat _mask;
    cv::SparseMat _histogram;
    cv::Mat _backProject;
    cv::Rect _searchWindow;
    cv::Mat _grayDepthFrame;
-
+    double _depthOffset;
+    
 #ifdef HAVE_OPENNI
    xn::Context _xnContext;
    xn::DepthGenerator _depthGenerator;
@ -140,6 +145,7 @@ public:
 };

 Q_DECLARE_METATYPE(JointVector)
+Q_DECLARE_METATYPE(KeyPointVector)
 Q_DECLARE_METATYPE(cv::Mat)
 Q_DECLARE_METATYPE(cv::RotatedRect)

--- a/interface/src/avatar/Avatar.cpp
+++ b/interface/src/avatar/Avatar.cpp
@ -316,6 +316,7 @@ void Avatar::updateFromGyrosAndOrWebcam(bool gyroLook,
        _head.getFace().setDepthTextureID(webcam->getDepthTextureID());
        _head.getFace().setTextureSize(webcam->getTextureSize());
        _head.getFace().setTextureRect(webcam->getEstimatedFaceRect());
+        _head.getFace().setKeyPoints(webcam->getKeyPoints());
        
        // compute and store the joint rotations
        const JointVector& joints = webcam->getEstimatedJoints();
--- a/interface/src/avatar/Face.cpp
+++ b/interface/src/avatar/Face.cpp
@ -22,7 +22,7 @@ int Face::_texCoordUpLocation;
 GLuint Face::_vboID;
 GLuint Face::_iboID;

-Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(MESH), _colorTextureID(0), _depthTextureID(0) {
+Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(POINTS), _colorTextureID(0), _depthTextureID(0) {
 }

 bool Face::render(float alpha) {
@ -126,14 +126,31 @@ bool Face::render(float alpha) {
        glEnable(GL_ALPHA_TEST);
        glAlphaFunc(GL_EQUAL, 1.0f);
        
-        if (_renderMode == POINTS) {
-            glPointSize(3.0f);
+        if (_renderMode == MESH) {
+            glDrawRangeElementsEXT(GL_TRIANGLES, 0, VERTEX_COUNT - 1, INDEX_COUNT, GL_UNSIGNED_INT, 0);
+            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+        
+        } else if (_renderMode == POINTS) {
+            glPointSize(5.0f);
            glDrawArrays(GL_POINTS, 0, VERTEX_COUNT);
            glPointSize(1.0f);
        
-        } else {
-            glDrawRangeElementsEXT(GL_TRIANGLES, 0, VERTEX_COUNT - 1, INDEX_COUNT, GL_UNSIGNED_INT, 0);
-            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+        } else if (_renderMode == KEY_POINTS) {
+            glPointSize(10.0f);
+            float widthSquared = _textureRect.size.width * _textureRect.size.width;
+            float heightSquared = _textureRect.size.height * _textureRect.size.height;
+            float rightX = (points[3].x - points[0].x) / widthSquared;
+            float rightY = (points[3].y - points[0].y) / widthSquared;
+            float upX = (points[1].x - points[0].x) / heightSquared;
+            float upY = (points[1].y - points[0].y) / heightSquared;
+            glBegin(GL_POINTS);
+            for (KeyPointVector::iterator it = _keyPoints.begin(); it != _keyPoints.end(); it++) { 
+                float relativeX = it->pt.x - points[0].x;
+                float relativeY = it->pt.y - points[0].y;
+                glVertex2f(relativeX * rightX + relativeY * rightY, relativeX * upX + relativeY * upY);
+            }
+            glEnd();
+            glPointSize(1.0f);    
        }
        
        glDisable(GL_ALPHA_TEST);
--- a/interface/src/avatar/Face.h
+++ b/interface/src/avatar/Face.h
@ -14,6 +14,7 @@
 #include <opencv2/opencv.hpp>

 #include "InterfaceConfig.h"
+#include "Webcam.h"

 class Head;
 class ProgramObject;
@ -29,6 +30,7 @@ public:
    void setDepthTextureID(GLuint depthTextureID) { _depthTextureID = depthTextureID; }
    void setTextureSize(const cv::Size2f& textureSize) { _textureSize = textureSize; }
    void setTextureRect(const cv::RotatedRect& textureRect) { _textureRect = textureRect; }
+    void setKeyPoints(const KeyPointVector& keyPoints) { _keyPoints = keyPoints; }
    
    bool render(float alpha);
    
@ -38,7 +40,7 @@ public slots:
    
 private:

-    enum RenderMode { POINTS, MESH, RENDER_MODE_COUNT };
+    enum RenderMode { MESH, POINTS, KEY_POINTS, RENDER_MODE_COUNT };

    Head* _owningHead;
    RenderMode _renderMode;
@ -46,6 +48,7 @@ private:
    GLuint _depthTextureID;
    cv::Size2f _textureSize;
    cv::RotatedRect _textureRect;
+    KeyPointVector _keyPoints;
    
    static ProgramObject* _program;
    static int _texCoordCornerLocation;