From 3c437076a7e5d30c60b9c01c5337fee164e3a387 Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Tue, 30 Jul 2013 11:07:33 -0700 Subject: [PATCH] When we have a mean face depth, use that rather than the face size to estimate the z coordinate. --- interface/src/Webcam.cpp | 47 +++++++++++++++++++++++++--------------- interface/src/Webcam.h | 5 +++-- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index 2efb12aed2..84654b8f96 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -57,8 +57,11 @@ void Webcam::setEnabled(bool enabled) { } } +const float UNINITIALIZED_FACE_DEPTH = 0.0f; + void Webcam::reset() { _initialFaceRect = RotatedRect(); + _initialFaceDepth = UNINITIALIZED_FACE_DEPTH; if (_enabled) { // send a message to the grabber @@ -149,7 +152,8 @@ Webcam::~Webcam() { delete _grabber; } -void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const RotatedRect& faceRect, const JointVector& joints) { +void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float meanFaceDepth, + const RotatedRect& faceRect, const JointVector& joints) { IplImage colorImage = color; glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3); if (_colorTextureID == 0) { @@ -232,22 +236,28 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota const float ROTATION_SMOOTHING = 0.95f; _estimatedRotation.z = glm::mix(_faceRect.angle, _estimatedRotation.z, ROTATION_SMOOTHING); - // determine position based on translation and scaling of the face rect + // determine position based on translation and scaling of the face rect/mean face depth if (_initialFaceRect.size.area() == 0) { _initialFaceRect = _faceRect; _estimatedPosition = glm::vec3(); + _initialFaceDepth = meanFaceDepth; } else { - float proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area()); - const float DISTANCE_TO_CAMERA = 0.333f; + float proportion, z; + if (meanFaceDepth == UNINITIALIZED_FACE_DEPTH) { + proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area()); + const float INITIAL_DISTANCE_TO_CAMERA = 0.333f; + z = INITIAL_DISTANCE_TO_CAMERA * proportion - INITIAL_DISTANCE_TO_CAMERA; + + } else { + z = (meanFaceDepth - _initialFaceDepth) / 1000.0f; + proportion = meanFaceDepth / _initialFaceDepth; + } const float POSITION_SCALE = 0.5f; - float z = DISTANCE_TO_CAMERA * proportion - DISTANCE_TO_CAMERA; - glm::vec3 position = glm::vec3( + _estimatedPosition = glm::vec3( (_faceRect.center.x - _initialFaceRect.center.x) * proportion * POSITION_SCALE / _textureSize.width, (_faceRect.center.y - _initialFaceRect.center.y) * proportion * POSITION_SCALE / _textureSize.width, z); - const float POSITION_SMOOTHING = 0.95f; - _estimatedPosition = glm::mix(position, _estimatedPosition, POSITION_SMOOTHING); } } @@ -259,7 +269,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota } FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), - _depthOffset(0.0), _codec(), _frameCount(0) { + _smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _codec(), _frameCount(0) { } FrameGrabber::~FrameGrabber() { @@ -598,16 +608,17 @@ void FrameGrabber::grabFrame() { } } } - double mean = (depthSamples == 0) ? 0.0 : depthTotal / (double)depthSamples; + float mean = (depthSamples == 0) ? UNINITIALIZED_FACE_DEPTH : depthTotal / (float)depthSamples; - // update the depth offset based on the mean - const double DEPTH_OFFSET_SMOOTHING = 0.95; - const double EIGHT_BIT_MIDPOINT = 128.0; - double meanOffset = EIGHT_BIT_MIDPOINT - mean; - _depthOffset = (_depthOffset == 0.0) ? meanOffset : glm::mix(meanOffset, _depthOffset, DEPTH_OFFSET_SMOOTHING); + // smooth the mean over time + const float DEPTH_OFFSET_SMOOTHING = 0.95f; + _smoothedMeanFaceDepth = (_smoothedMeanFaceDepth == UNINITIALIZED_FACE_DEPTH) ? mean : + glm::mix(mean, _smoothedMeanFaceDepth, DEPTH_OFFSET_SMOOTHING); // convert from 11 to 8 bits for preview/local display - depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, _depthOffset); + const double EIGHT_BIT_MIDPOINT = 128.0; + double depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMeanFaceDepth; + depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); // likewise for the encoded representation uchar* yline = (uchar*)_encodedFace.data() + vpxImage.stride[0] * ENCODED_FACE_HEIGHT; @@ -621,7 +632,7 @@ void FrameGrabber::grabFrame() { *ydest++ = EIGHT_BIT_MAXIMUM; } else { - *ydest++ = saturate_cast(depth + _depthOffset); + *ydest++ = saturate_cast(depth + depthOffset); } } yline += vpxImage.stride[0]; @@ -646,7 +657,7 @@ void FrameGrabber::grabFrame() { } QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", - Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), + Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMeanFaceDepth), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); } diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index 260eda0897..16cf3a33a1 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -62,7 +62,7 @@ public: public slots: void setEnabled(bool enabled); - void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, + void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float meanFaceDepth, const cv::RotatedRect& faceRect, const JointVector& joints); private: @@ -77,6 +77,7 @@ private: cv::Size2f _textureSize; cv::RotatedRect _faceRect; cv::RotatedRect _initialFaceRect; + float _initialFaceDepth; JointVector _joints; uint64_t _startTimestamp; @@ -117,7 +118,7 @@ private: cv::Mat _backProject; cv::Rect _searchWindow; cv::Mat _grayDepthFrame; - double _depthOffset; + float _smoothedMeanFaceDepth; vpx_codec_ctx_t _codec; int _frameCount;