From 905bd9b011116772b345e2509c4398cce7ee87a5 Mon Sep 17 00:00:00 2001
From: Andrzej Kapolka <drzej.k@gmail.com>
Date: Tue, 16 Jul 2013 11:54:19 -0700
Subject: [PATCH] Track features on face, add a mode to render just key points.

---
 interface/src/Webcam.cpp        | 67 +++++++++++++++++++++++++++------
 interface/src/Webcam.h          | 12 ++++--
 interface/src/avatar/Avatar.cpp |  1 +
 interface/src/avatar/Face.cpp   | 29 +++++++++++---
 interface/src/avatar/Face.h     |  5 ++-
 5 files changed, 92 insertions(+), 22 deletions(-)
diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp
index 853546c276..a053f7e3e4 100644
--- a/interface/src/Webcam.cpp
+++ b/interface/src/Webcam.cpp
@@ -26,7 +26,8 @@ using namespace xn;
 #endif
 
 // register types with Qt metatype system
-int jointVectorMetaType = qRegisterMetaType<JointVector>("JointVector"); 
+int jointVectorMetaType = qRegisterMetaType<JointVector>("JointVector");
+int keyPointVectorMetaType = qRegisterMetaType<KeyPointVector>("KeyPointVector"); 
 int matMetaType = qRegisterMetaType<Mat>("cv::Mat");
 int rotatedRectMetaType = qRegisterMetaType<RotatedRect>("cv::RotatedRect");
 
@@ -132,8 +133,18 @@ void Webcam::renderPreview(int screenWidth, int screenHeight) {
             glVertex2f(left + facePoints[3].x * xScale, top + facePoints[3].y * yScale);
         glEnd();
         
-        char fps[20];
-        sprintf(fps, "FPS: %d", (int)(roundf(_frameCount * 1000000.0f / (usecTimestampNow() - _startTimestamp))));
+        if (!_keyPoints.empty()) {
+            glColor3f(0.0f, 1.0f, 0.0f);
+            glBegin(GL_POINTS);
+            for (KeyPointVector::iterator it = _keyPoints.begin(); it != _keyPoints.end(); it++) {
+                glVertex2f(left + it->pt.x * xScale, top + it->pt.y * yScale);
+            }
+            glEnd();
+        }
+        
+        char fps[30];
+        sprintf(fps, "FPS: %d, Points: %d", (int)(roundf(_frameCount * 1000000.0f / (usecTimestampNow() - _startTimestamp))),
+            (int)_keyPoints.size());
         drawtext(left, top + PREVIEW_HEIGHT + 20, 0.10, 0, 1, 0, fps);
     }
 }
@@ -147,7 +158,7 @@ Webcam::~Webcam() {
 }
 
 void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Mat& depthPreview,
-        const RotatedRect& faceRect, const JointVector& joints) {
+        const RotatedRect& faceRect, const KeyPointVector& keyPoints, const JointVector& joints) {
     IplImage colorImage = color;
     glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3);
     if (_colorTextureID == 0) {
@@ -186,6 +197,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Mat&
     // store our face rect and joints, update our frame count for fps computation
     _faceRect = faceRect;
     _joints = joints;
+    _keyPoints = keyPoints;
     _frameCount++;
     
     const int MAX_FPS = 60;
@@ -276,7 +288,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Mat&
     QTimer::singleShot(qMax((int)remaining / 1000, 0), _grabber, SLOT(grabFrame()));
 }
 
-FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0) {
+FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), _depthOffset(-512.0) {
 }
 
 FrameGrabber::~FrameGrabber() {
@@ -389,6 +401,11 @@ void FrameGrabber::shutdown() {
     thread()->quit();
 }
 
+static Point clip(const Point& point, const Rect& bounds) {
+    return Point(glm::clamp(point.x, bounds.x, bounds.x + bounds.width - 1),
+        glm::clamp(point.y, bounds.y, bounds.y + bounds.height - 1));
+}
+
 void FrameGrabber::grabFrame() {
     if (!(_initialized || init())) {
         return;
@@ -404,9 +421,6 @@ void FrameGrabber::grabFrame() {
         format = GL_RGB;
         
         depth = Mat(_depthMetaData.YRes(), _depthMetaData.XRes(), CV_16UC1, (void*)_depthGenerator.GetDepthMap());
-        const double EIGHT_BIT_MAX = 255;
-        const double ELEVEN_BIT_MAX = 2047;
-        depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, -512);
         
         _userID = 0;
         XnUInt16 userCount = 1; 
@@ -463,7 +477,7 @@ void FrameGrabber::grabFrame() {
     float ranges[] = { 0, 180 };
     const float* range = ranges;
     if (_searchWindow.area() == 0) {
-        vector<cv::Rect> faces;
+        vector<Rect> faces;
         _faceCascade.detectMultiScale(color, faces, 1.1, 6);
         if (!faces.empty()) {
             _searchWindow = faces.front();
@@ -486,11 +500,40 @@ void FrameGrabber::grabFrame() {
         bitwise_and(_backProject, _mask, _backProject);
         
         faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1));
-        _searchWindow = faceRect.boundingRect();
-    }   
+        Rect faceBounds = faceRect.boundingRect();
+        Rect imageBounds(0, 0, depth.cols, depth.rows);
+        _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds));
+    }
+
+#ifdef HAVE_OPENNI
+    if (_depthGenerator.IsValid()) {
+        if (_searchWindow.area() > 0) {
+            const double DEPTH_OFFSET_SMOOTHING = 0.95;
+            _depthOffset = glm::mix(128.0 - mean(depth(_searchWindow))[0], _depthOffset, DEPTH_OFFSET_SMOOTHING);
+        }
+        depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, _depthOffset);
+    }
+#endif
+
+    KeyPointVector keyPoints;
+    if (!_hsvFrame.empty()) {
+        _grayFrame.create(_hsvFrame.rows, _hsvFrame.cols, CV_8UC1);
+        int fromTo[] = { 2, 0 };
+        Mat hsvInner = _hsvFrame(_searchWindow);
+        Mat grayInner = _grayFrame(_searchWindow);
+        mixChannels(&hsvInner, 1, &grayInner, 1, fromTo, 1);
+        FAST(grayInner, keyPoints, 4);
+    
+        // offset the detected points
+        for (KeyPointVector::iterator it = keyPoints.begin(); it != keyPoints.end(); it++) {
+            it->pt.x += _searchWindow.x;
+            it->pt.y += _searchWindow.y;
+        }
+    }
+    
     QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame",
         Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, depth), Q_ARG(cv::Mat, _grayDepthFrame),
-        Q_ARG(cv::RotatedRect, faceRect), Q_ARG(JointVector, joints));
+        Q_ARG(cv::RotatedRect, faceRect), Q_ARG(KeyPointVector, keyPoints), Q_ARG(JointVector, joints));
 }
 
 bool FrameGrabber::init() {
diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h
index 3218f11eca..cc22ecf602 100644
--- a/interface/src/Webcam.h
+++ b/interface/src/Webcam.h
@@ -33,6 +33,7 @@ class FrameGrabber;
 class Joint;
 
 typedef QVector<Joint> JointVector;
+typedef std::vector<cv::KeyPoint> KeyPointVector;
 
 class Webcam : public QObject {
     Q_OBJECT
@@ -47,6 +48,7 @@ public:
     GLuint getColorTextureID() const { return _colorTextureID; }
     GLuint getDepthTextureID() const { return _depthTextureID; }
     const cv::Size2f& getTextureSize() const { return _textureSize; }
+    const KeyPointVector& getKeyPoints() const { return _keyPoints; }
     
     const cv::RotatedRect& getEstimatedFaceRect() const { return _estimatedFaceRect; }
     const glm::vec3& getEstimatedPosition() const { return _estimatedPosition; }
@@ -59,8 +61,8 @@ public:
 public slots:
     
     void setEnabled(bool enabled);
-    void setFrame(const cv::Mat& color, int format, const cv::Mat& depth,
-        const cv::Mat& depthPreview, const cv::RotatedRect& faceRect, const JointVector& joints);
+    void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, const cv::Mat& depthPreview,
+        const cv::RotatedRect& faceRect, const KeyPointVector& keyPoints, const JointVector& joints);
     
 private:
     
@@ -74,6 +76,7 @@ private:
     cv::Size2f _textureSize;
     cv::RotatedRect _faceRect;
     cv::RotatedRect _initialFaceRect;
+    KeyPointVector _keyPoints;
     JointVector _joints;
     
     uint64_t _startTimestamp;
@@ -110,12 +113,14 @@ private:
     CvCapture* _capture;
     cv::CascadeClassifier _faceCascade;
     cv::Mat _hsvFrame;
+    cv::Mat _grayFrame;
     cv::Mat _mask;
     cv::SparseMat _histogram;
     cv::Mat _backProject;
     cv::Rect _searchWindow;
     cv::Mat _grayDepthFrame;
-
+    double _depthOffset;
+    
 #ifdef HAVE_OPENNI
     xn::Context _xnContext;
     xn::DepthGenerator _depthGenerator;
@@ -140,6 +145,7 @@ public:
 };
 
 Q_DECLARE_METATYPE(JointVector)
+Q_DECLARE_METATYPE(KeyPointVector)
 Q_DECLARE_METATYPE(cv::Mat)
 Q_DECLARE_METATYPE(cv::RotatedRect)
 
diff --git a/interface/src/avatar/Avatar.cpp b/interface/src/avatar/Avatar.cpp
index d13a152676..0a6f9b741b 100755
--- a/interface/src/avatar/Avatar.cpp
+++ b/interface/src/avatar/Avatar.cpp
@@ -316,6 +316,7 @@ void Avatar::updateFromGyrosAndOrWebcam(bool gyroLook,
         _head.getFace().setDepthTextureID(webcam->getDepthTextureID());
         _head.getFace().setTextureSize(webcam->getTextureSize());
         _head.getFace().setTextureRect(webcam->getEstimatedFaceRect());
+        _head.getFace().setKeyPoints(webcam->getKeyPoints());
         
         // compute and store the joint rotations
         const JointVector& joints = webcam->getEstimatedJoints();
diff --git a/interface/src/avatar/Face.cpp b/interface/src/avatar/Face.cpp
index 775f015bb6..b2d97616f1 100644
--- a/interface/src/avatar/Face.cpp
+++ b/interface/src/avatar/Face.cpp
@@ -22,7 +22,7 @@ int Face::_texCoordUpLocation;
 GLuint Face::_vboID;
 GLuint Face::_iboID;
 
-Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(MESH), _colorTextureID(0), _depthTextureID(0) {
+Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(POINTS), _colorTextureID(0), _depthTextureID(0) {
 }
 
 bool Face::render(float alpha) {
@@ -126,14 +126,31 @@ bool Face::render(float alpha) {
         glEnable(GL_ALPHA_TEST);
         glAlphaFunc(GL_EQUAL, 1.0f);
         
-        if (_renderMode == POINTS) {
-            glPointSize(3.0f);
+        if (_renderMode == MESH) {
+            glDrawRangeElementsEXT(GL_TRIANGLES, 0, VERTEX_COUNT - 1, INDEX_COUNT, GL_UNSIGNED_INT, 0);
+            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+        
+        } else if (_renderMode == POINTS) {
+            glPointSize(5.0f);
             glDrawArrays(GL_POINTS, 0, VERTEX_COUNT);
             glPointSize(1.0f);
         
-        } else {
-            glDrawRangeElementsEXT(GL_TRIANGLES, 0, VERTEX_COUNT - 1, INDEX_COUNT, GL_UNSIGNED_INT, 0);
-            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+        } else if (_renderMode == KEY_POINTS) {
+            glPointSize(10.0f);
+            float widthSquared = _textureRect.size.width * _textureRect.size.width;
+            float heightSquared = _textureRect.size.height * _textureRect.size.height;
+            float rightX = (points[3].x - points[0].x) / widthSquared;
+            float rightY = (points[3].y - points[0].y) / widthSquared;
+            float upX = (points[1].x - points[0].x) / heightSquared;
+            float upY = (points[1].y - points[0].y) / heightSquared;
+            glBegin(GL_POINTS);
+            for (KeyPointVector::iterator it = _keyPoints.begin(); it != _keyPoints.end(); it++) { 
+                float relativeX = it->pt.x - points[0].x;
+                float relativeY = it->pt.y - points[0].y;
+                glVertex2f(relativeX * rightX + relativeY * rightY, relativeX * upX + relativeY * upY);
+            }
+            glEnd();
+            glPointSize(1.0f);    
         }
         
         glDisable(GL_ALPHA_TEST);
diff --git a/interface/src/avatar/Face.h b/interface/src/avatar/Face.h
index 0911894bbb..326b447648 100644
--- a/interface/src/avatar/Face.h
+++ b/interface/src/avatar/Face.h
@@ -14,6 +14,7 @@
 #include <opencv2/opencv.hpp>
 
 #include "InterfaceConfig.h"
+#include "Webcam.h"
 
 class Head;
 class ProgramObject;
@@ -29,6 +30,7 @@ public:
     void setDepthTextureID(GLuint depthTextureID) { _depthTextureID = depthTextureID; }
     void setTextureSize(const cv::Size2f& textureSize) { _textureSize = textureSize; }
     void setTextureRect(const cv::RotatedRect& textureRect) { _textureRect = textureRect; }
+    void setKeyPoints(const KeyPointVector& keyPoints) { _keyPoints = keyPoints; }
     
     bool render(float alpha);
     
@@ -38,7 +40,7 @@ public slots:
     
 private:
 
-    enum RenderMode { POINTS, MESH, RENDER_MODE_COUNT };
+    enum RenderMode { MESH, POINTS, KEY_POINTS, RENDER_MODE_COUNT };
 
     Head* _owningHead;
     RenderMode _renderMode;
@@ -46,6 +48,7 @@ private:
     GLuint _depthTextureID;
     cv::Size2f _textureSize;
     cv::RotatedRect _textureRect;
+    KeyPointVector _keyPoints;
     
     static ProgramObject* _program;
     static int _texCoordCornerLocation;