From 3c437076a7e5d30c60b9c01c5337fee164e3a387 Mon Sep 17 00:00:00 2001
From: Andrzej Kapolka <drzej.k@gmail.com>
Date: Tue, 30 Jul 2013 11:07:33 -0700
Subject: [PATCH] When we have a mean face depth, use that rather than the face
 size to estimate the z coordinate.

---
 interface/src/Webcam.cpp | 47 +++++++++++++++++++++++++---------------
 interface/src/Webcam.h   |  5 +++--
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp
index 2efb12aed2..84654b8f96 100644
--- a/interface/src/Webcam.cpp
+++ b/interface/src/Webcam.cpp
@@ -57,8 +57,11 @@ void Webcam::setEnabled(bool enabled) {
     }
 }
 
+const float UNINITIALIZED_FACE_DEPTH = 0.0f;
+
 void Webcam::reset() {
     _initialFaceRect = RotatedRect();
+    _initialFaceDepth = UNINITIALIZED_FACE_DEPTH;
     
     if (_enabled) {
         // send a message to the grabber
@@ -149,7 +152,8 @@ Webcam::~Webcam() {
     delete _grabber;
 }
 
-void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const RotatedRect& faceRect, const JointVector& joints) {
+void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float meanFaceDepth,
+        const RotatedRect& faceRect, const JointVector& joints) {
     IplImage colorImage = color;
     glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3);
     if (_colorTextureID == 0) {
@@ -232,22 +236,28 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota
         const float ROTATION_SMOOTHING = 0.95f;
         _estimatedRotation.z = glm::mix(_faceRect.angle, _estimatedRotation.z, ROTATION_SMOOTHING);
         
-        // determine position based on translation and scaling of the face rect
+        // determine position based on translation and scaling of the face rect/mean face depth
         if (_initialFaceRect.size.area() == 0) {
             _initialFaceRect = _faceRect;
             _estimatedPosition = glm::vec3();
+            _initialFaceDepth = meanFaceDepth;
         
         } else {
-            float proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area());
-            const float DISTANCE_TO_CAMERA = 0.333f;
+            float proportion, z;
+            if (meanFaceDepth == UNINITIALIZED_FACE_DEPTH) {
+                proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area());
+                const float INITIAL_DISTANCE_TO_CAMERA = 0.333f;
+                z = INITIAL_DISTANCE_TO_CAMERA * proportion - INITIAL_DISTANCE_TO_CAMERA;           
+                   
+            } else {
+                z = (meanFaceDepth - _initialFaceDepth) / 1000.0f;    
+                proportion = meanFaceDepth / _initialFaceDepth;
+            }
             const float POSITION_SCALE = 0.5f;
-            float z = DISTANCE_TO_CAMERA * proportion - DISTANCE_TO_CAMERA;
-            glm::vec3 position = glm::vec3(
+            _estimatedPosition = glm::vec3(
                 (_faceRect.center.x - _initialFaceRect.center.x) * proportion * POSITION_SCALE / _textureSize.width,
                 (_faceRect.center.y - _initialFaceRect.center.y) * proportion * POSITION_SCALE / _textureSize.width,
                 z);
-            const float POSITION_SMOOTHING = 0.95f;
-            _estimatedPosition = glm::mix(position, _estimatedPosition, POSITION_SMOOTHING);
         }
     }
     
@@ -259,7 +269,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota
 }
 
 FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0),
-    _depthOffset(0.0), _codec(), _frameCount(0) {
+    _smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _codec(), _frameCount(0) {
 }
 
 FrameGrabber::~FrameGrabber() {
@@ -598,16 +608,17 @@ void FrameGrabber::grabFrame() {
                 }
             }
         }
-        double mean = (depthSamples == 0) ? 0.0 : depthTotal / (double)depthSamples;
+        float mean = (depthSamples == 0) ? UNINITIALIZED_FACE_DEPTH : depthTotal / (float)depthSamples;
         
-        // update the depth offset based on the mean
-        const double DEPTH_OFFSET_SMOOTHING = 0.95;
-        const double EIGHT_BIT_MIDPOINT = 128.0;
-        double meanOffset = EIGHT_BIT_MIDPOINT - mean;
-        _depthOffset = (_depthOffset == 0.0) ? meanOffset : glm::mix(meanOffset, _depthOffset, DEPTH_OFFSET_SMOOTHING);
+        // smooth the mean over time
+        const float DEPTH_OFFSET_SMOOTHING = 0.95f;
+        _smoothedMeanFaceDepth = (_smoothedMeanFaceDepth == UNINITIALIZED_FACE_DEPTH) ? mean :
+            glm::mix(mean, _smoothedMeanFaceDepth, DEPTH_OFFSET_SMOOTHING);
 
         // convert from 11 to 8 bits for preview/local display
-        depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, _depthOffset);
+        const double EIGHT_BIT_MIDPOINT = 128.0;
+        double depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMeanFaceDepth;
+        depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset);
 
         // likewise for the encoded representation
         uchar* yline = (uchar*)_encodedFace.data() + vpxImage.stride[0] * ENCODED_FACE_HEIGHT;
@@ -621,7 +632,7 @@ void FrameGrabber::grabFrame() {
                     *ydest++ = EIGHT_BIT_MAXIMUM;
                     
                 } else {
-                    *ydest++ = saturate_cast<uchar>(depth + _depthOffset);
+                    *ydest++ = saturate_cast<uchar>(depth + depthOffset);
                 }
             }
             yline += vpxImage.stride[0];
@@ -646,7 +657,7 @@ void FrameGrabber::grabFrame() {
     }
 
     QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame",
-        Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame),
+        Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMeanFaceDepth),
         Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints));
 }
 
diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h
index 260eda0897..16cf3a33a1 100644
--- a/interface/src/Webcam.h
+++ b/interface/src/Webcam.h
@@ -62,7 +62,7 @@ public:
 public slots:
     
     void setEnabled(bool enabled);
-    void setFrame(const cv::Mat& color, int format, const cv::Mat& depth,
+    void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float meanFaceDepth,
         const cv::RotatedRect& faceRect, const JointVector& joints);
 
 private:
@@ -77,6 +77,7 @@ private:
     cv::Size2f _textureSize;
     cv::RotatedRect _faceRect;
     cv::RotatedRect _initialFaceRect;
+    float _initialFaceDepth;
     JointVector _joints;
     
     uint64_t _startTimestamp;
@@ -117,7 +118,7 @@ private:
     cv::Mat _backProject;
     cv::Rect _searchWindow;
     cv::Mat _grayDepthFrame;
-    double _depthOffset;
+    float _smoothedMeanFaceDepth;
     
     vpx_codec_ctx_t _codec;
     int _frameCount;