From bceb416d8a675bf5ad5b4227524fcdd679349b8d Mon Sep 17 00:00:00 2001
From: atlante45 <clement.brisset@gmail.com>
Date: Fri, 2 Aug 2013 17:18:35 -0700
Subject: [PATCH] merge

---
 interface/src/Webcam.cpp | 502 ++++++++++++++-------------------------
 1 file changed, 182 insertions(+), 320 deletions(-)
diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp
index 3965bf887b..e37ca6c70e 100644
--- a/interface/src/Webcam.cpp
+++ b/interface/src/Webcam.cpp
@@ -19,7 +19,6 @@
 
 #include "Application.h"
 #include "Webcam.h"
-#include "avatar/Face.h"
 
 using namespace cv;
 using namespace std;
@@ -33,7 +32,7 @@ int jointVectorMetaType = qRegisterMetaType<JointVector>("JointVector");
 int matMetaType = qRegisterMetaType<Mat>("cv::Mat");
 int rotatedRectMetaType = qRegisterMetaType<RotatedRect>("cv::RotatedRect");
 
-Webcam::Webcam() : _enabled(false), _active(false), _colorTextureID(0), _depthTextureID(0), _skeletonTrackingOn(false) {
+Webcam::Webcam() : _enabled(false), _active(false), _colorTextureID(0), _depthTextureID(0) {
     // the grabber simply runs as fast as possible
     _grabber = new FrameGrabber();
     _grabber->moveToThread(&_grabberThread);
@@ -58,11 +57,8 @@ void Webcam::setEnabled(bool enabled) {
     }
 }
 
-const float UNINITIALIZED_FACE_DEPTH = 0.0f;
-
 void Webcam::reset() {
     _initialFaceRect = RotatedRect();
-    _initialFaceDepth = UNINITIALIZED_FACE_DEPTH;
     
     if (_enabled) {
         // send a message to the grabber
@@ -153,10 +149,7 @@ Webcam::~Webcam() {
     delete _grabber;
 }
 
-const float METERS_PER_MM = 1.0f / 1000.0f;
-
-void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midFaceDepth,
-        float aspectRatio, const RotatedRect& faceRect, bool sending, const JointVector& joints) {
+void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const RotatedRect& faceRect, const JointVector& joints) {
     IplImage colorImage = color;
     glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3);
     if (_colorTextureID == 0) {
@@ -193,11 +186,9 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF
     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
     glBindTexture(GL_TEXTURE_2D, 0);
     
-    // store our various data, update our frame count for fps computation
-    _aspectRatio = aspectRatio;
+    // store our face rect and joints, update our frame count for fps computation
     _faceRect = faceRect;
-    _sending = sending;
-    _joints = _skeletonTrackingOn ? joints : JointVector();
+    _joints = joints;
     _frameCount++;
     
     const int MAX_FPS = 60;
@@ -241,28 +232,22 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF
         const float ROTATION_SMOOTHING = 0.95f;
         _estimatedRotation.z = glm::mix(_faceRect.angle, _estimatedRotation.z, ROTATION_SMOOTHING);
         
-        // determine position based on translation and scaling of the face rect/mean face depth
+        // determine position based on translation and scaling of the face rect
         if (_initialFaceRect.size.area() == 0) {
             _initialFaceRect = _faceRect;
             _estimatedPosition = glm::vec3();
-            _initialFaceDepth = midFaceDepth;
         
         } else {
-            float proportion, z;
-            if (midFaceDepth == UNINITIALIZED_FACE_DEPTH) {
-                proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area());
-                const float INITIAL_DISTANCE_TO_CAMERA = 0.333f;
-                z = INITIAL_DISTANCE_TO_CAMERA * proportion - INITIAL_DISTANCE_TO_CAMERA;           
-                   
-            } else {
-                z = (midFaceDepth - _initialFaceDepth) * METERS_PER_MM;    
-                proportion = midFaceDepth / _initialFaceDepth;
-            }
+            float proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area());
+            const float DISTANCE_TO_CAMERA = 0.333f;
             const float POSITION_SCALE = 0.5f;
-            _estimatedPosition = glm::vec3(
+            float z = DISTANCE_TO_CAMERA * proportion - DISTANCE_TO_CAMERA;
+            glm::vec3 position = glm::vec3(
                 (_faceRect.center.x - _initialFaceRect.center.x) * proportion * POSITION_SCALE / _textureSize.width,
                 (_faceRect.center.y - _initialFaceRect.center.y) * proportion * POSITION_SCALE / _textureSize.width,
                 z);
+            const float POSITION_SMOOTHING = 0.95f;
+            _estimatedPosition = glm::mix(position, _estimatedPosition, POSITION_SMOOTHING);
         }
     }
     
@@ -273,8 +258,8 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF
     QTimer::singleShot(qMax((int)remaining / 1000, 0), _grabber, SLOT(grabFrame()));
 }
 
-FrameGrabber::FrameGrabber() : _initialized(false), _videoSendMode(FULL_FRAME_VIDEO), _capture(0), _searchWindow(0, 0, 0, 0),
-    _smoothedMidFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) {
+FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0),
+    _depthOffset(0.0), _codec(), _frameCount(0) {
 }
 
 FrameGrabber::~FrameGrabber() {
@@ -367,13 +352,6 @@ static void XN_CALLBACK_TYPE calibrationCompleted(SkeletonCapability& capability
 }
 #endif
 
-void FrameGrabber::cycleVideoSendMode() {
-    _videoSendMode = (VideoSendMode)((_videoSendMode + 1) % VIDEO_SEND_MODE_COUNT);
-    _searchWindow = cv::Rect(0, 0, 0, 0);
-    
-    destroyCodecs();
-}
-
 void FrameGrabber::reset() {
     _searchWindow = cv::Rect(0, 0, 0, 0);
 
@@ -389,7 +367,10 @@ void FrameGrabber::shutdown() {
         cvReleaseCapture(&_capture);
         _capture = 0;
     }
-    destroyCodecs();
+    if (_codec.name != 0) {
+        vpx_codec_destroy(&_codec);
+        _codec.name = 0;
+    }
     _initialized = false;
     
     thread()->quit();
@@ -442,6 +423,7 @@ void FrameGrabber::grabFrame() {
                         _userID, (XnSkeletonJoint)parentJoint, parentOrientation);
                     rotation = glm::inverse(xnToGLM(parentOrientation.orientation)) * rotation;
                 }
+                const float METERS_PER_MM = 1.0f / 1000.0f;
                 joints[avatarJoint] = Joint(xnToGLM(transform.position.position, true) * METERS_PER_MM,
                     rotation, xnToGLM(projected));
             }
@@ -465,290 +447,181 @@ void FrameGrabber::grabFrame() {
         color = image;
     }
     
+    // if we don't have a search window (yet), try using the face cascade
+    int channels = 0;
+    float ranges[] = { 0, 180 };
+    const float* range = ranges;
+    if (_searchWindow.area() == 0) {
+        vector<Rect> faces;
+        _faceCascade.detectMultiScale(color, faces, 1.1, 6);
+        if (!faces.empty()) {
+            _searchWindow = faces.front();
+            updateHSVFrame(color, format);
+        
+            Mat faceHsv(_hsvFrame, _searchWindow);
+            Mat faceMask(_mask, _searchWindow);
+            int sizes = 30;
+            calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range);
+            double min, max;
+            minMaxLoc(_histogram, &min, &max);
+            _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max);
+        }
+    }
+    RotatedRect faceRect;
+    if (_searchWindow.area() > 0) {
+        updateHSVFrame(color, format);
+        
+        calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range);
+        bitwise_and(_backProject, _mask, _backProject);
+        
+        faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1));
+        Rect faceBounds = faceRect.boundingRect();
+        Rect imageBounds(0, 0, color.cols, color.rows);
+        _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds));
+    }
+
+#ifdef HAVE_OPENNI
+    if (_depthGenerator.IsValid()) {
+        // convert from 11 to 8 bits, centered about the mean face depth (if possible)
+        if (_searchWindow.area() > 0) {
+            const double DEPTH_OFFSET_SMOOTHING = 0.95;
+            const double EIGHT_BIT_MIDPOINT = 128.0;
+            double meanOffset = EIGHT_BIT_MIDPOINT - mean(depth(_searchWindow))[0];
+            _depthOffset = (_depthOffset == 0.0) ? meanOffset : glm::mix(meanOffset, _depthOffset, DEPTH_OFFSET_SMOOTHING);
+        }
+        depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, _depthOffset);
+    }
+#endif
+
     const int ENCODED_FACE_WIDTH = 128;
     const int ENCODED_FACE_HEIGHT = 128;
-    int encodedWidth;
-    int encodedHeight;
-    float colorBitrateMultiplier = 1.0f;
-    float depthBitrateMultiplier = 1.0f;
-    Mat faceTransform;
-    float aspectRatio;
-    if (_videoSendMode == FULL_FRAME_VIDEO) {
-        // no need to find the face if we're sending full frame video
-        _smoothedFaceRect = RotatedRect(Point2f(color.cols / 2.0f, color.rows / 2.0f), Size2f(color.cols, color.rows), 0.0f);
-        encodedWidth = color.cols;
-        encodedHeight = color.rows;
-        aspectRatio = FULL_FRAME_ASPECT;
-        colorBitrateMultiplier = 4.0f;
+    int combinedFaceHeight = ENCODED_FACE_HEIGHT * (depth.empty() ? 1 : 2);
+    if (_codec.name == 0) {
+        // initialize encoder context
+        vpx_codec_enc_cfg_t codecConfig;
+        vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0);
+        codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * combinedFaceHeight * codecConfig.rc_target_bitrate /
+            codecConfig.g_w / codecConfig.g_h;
+        codecConfig.g_w = ENCODED_FACE_WIDTH;
+        codecConfig.g_h = combinedFaceHeight;
+        vpx_codec_enc_init(&_codec, vpx_codec_vp8_cx(), &codecConfig, 0); 
+    }
+    
+    // correct for 180 degree rotations
+    if (faceRect.angle < -90.0f) {
+        faceRect.angle += 180.0f;
+        
+    } else if (faceRect.angle > 90.0f) {
+        faceRect.angle -= 180.0f;
+    }
+    
+    // compute the smoothed face rect
+    if (_smoothedFaceRect.size.area() == 0) {
+        _smoothedFaceRect = faceRect;
         
     } else {
-        // if we don't have a search window (yet), try using the face cascade
-        int channels = 0;
-        float ranges[] = { 0, 180 };
-        const float* range = ranges;
-        if (_searchWindow.area() == 0) {
-            vector<Rect> faces;
-            _faceCascade.detectMultiScale(color, faces, 1.1, 6);
-            if (!faces.empty()) {
-                _searchWindow = faces.front();
-                updateHSVFrame(color, format);
-            
-                Mat faceHsv(_hsvFrame, _searchWindow);
-                Mat faceMask(_mask, _searchWindow);
-                int sizes = 30;
-                calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range);
-                double min, max;
-                minMaxLoc(_histogram, &min, &max);
-                _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max);
-            }
-        }
-        RotatedRect faceRect;
-        if (_searchWindow.area() > 0) {
-            updateHSVFrame(color, format);
-            
-            calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range);
-            bitwise_and(_backProject, _mask, _backProject);
-            
-            faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1));
-            Rect faceBounds = faceRect.boundingRect();
-            Rect imageBounds(0, 0, color.cols, color.rows);
-            _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds));
-        }
-        encodedWidth = ENCODED_FACE_WIDTH;
-        encodedHeight = ENCODED_FACE_HEIGHT;
-        depthBitrateMultiplier = 2.0f;
-        
-        // correct for 180 degree rotations
-        if (faceRect.angle < -90.0f) {
-            faceRect.angle += 180.0f;
-            
-        } else if (faceRect.angle > 90.0f) {
-            faceRect.angle -= 180.0f;
-        }
-        
-        // compute the smoothed face rect
-        if (_smoothedFaceRect.size.area() == 0) {
-            _smoothedFaceRect = faceRect;
-            
-        } else {
-            const float FACE_RECT_SMOOTHING = 0.9f;
-            _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING);
-            _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING);
-            _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING);
-            _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); 
-            _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING);
-        }
-    
-        // use the face rect to compute the face transform, aspect ratio
-        Point2f sourcePoints[4];
-        _smoothedFaceRect.points(sourcePoints);
-        Point2f destPoints[] = { Point2f(0, encodedHeight), Point2f(0, 0), Point2f(encodedWidth, 0) };
-        faceTransform = getAffineTransform(sourcePoints, destPoints);
-        aspectRatio = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height;
+        const float FACE_RECT_SMOOTHING = 0.9f;
+        _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING);
+        _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING);
+        _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING);
+        _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); 
+        _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING);
     }
-        
-    const ushort ELEVEN_BIT_MINIMUM = 0;
-    const uchar EIGHT_BIT_MIDPOINT = 128;
-    double depthOffset;
+    
+    // resize/rotate face into encoding rectangle
+    _faceColor.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC3);
+    Point2f sourcePoints[4];
+    _smoothedFaceRect.points(sourcePoints);
+    Point2f destPoints[] = { Point2f(0, ENCODED_FACE_HEIGHT), Point2f(0, 0), Point2f(ENCODED_FACE_WIDTH, 0) };
+    Mat transform = getAffineTransform(sourcePoints, destPoints);
+    warpAffine(color, _faceColor, transform, _faceColor.size());
+    
+    // convert from RGB to YV12
+    const int ENCODED_BITS_PER_Y = 8;
+    const int ENCODED_BITS_PER_VU = 2;
+    const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU;
+    const int BITS_PER_BYTE = 8;
+    _encodedFace.fill(128, ENCODED_FACE_WIDTH * combinedFaceHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE);
+    vpx_image_t vpxImage;
+    vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, combinedFaceHeight, 1, (unsigned char*)_encodedFace.data());
+    uchar* yline = vpxImage.planes[0];
+    uchar* vline = vpxImage.planes[1];
+    uchar* uline = vpxImage.planes[2];
+    const int Y_RED_WEIGHT = (int)(0.299 * 256);
+    const int Y_GREEN_WEIGHT = (int)(0.587 * 256);
+    const int Y_BLUE_WEIGHT = (int)(0.114 * 256);
+    const int V_RED_WEIGHT = (int)(0.713 * 256);
+    const int U_BLUE_WEIGHT = (int)(0.564 * 256);
+    int redIndex = 0;
+    int greenIndex = 1;
+    int blueIndex = 2;
+    if (format == GL_BGR) {
+        redIndex = 2;
+        blueIndex = 0;
+    }
+    for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) {
+        uchar* ydest = yline;
+        uchar* vdest = vline;
+        uchar* udest = uline;
+        for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) {
+            uchar* tl = _faceColor.ptr(i, j);
+            uchar* tr = _faceColor.ptr(i, j + 1);
+            uchar* bl = _faceColor.ptr(i + 1, j);
+            uchar* br = _faceColor.ptr(i + 1, j + 1);
+            
+            ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8;
+            ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8;
+            ydest[ENCODED_FACE_WIDTH] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] *
+                Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8;
+            ydest[ENCODED_FACE_WIDTH + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] *
+                Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8;
+            ydest += 2;
+            
+            int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex];
+            int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex];
+            int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex];
+            int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8;
+            
+            *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128;
+            *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128;
+        }
+        yline += vpxImage.stride[0] * 2;
+        vline += vpxImage.stride[1];
+        uline += vpxImage.stride[2];
+    }
+    
+    // if we have depth data, warp that and just copy it in
     if (!depth.empty()) {
-        if (_videoSendMode == FACE_VIDEO) {
-            // warp the face depth without interpolation (because it will contain invalid zero values)
-            _faceDepth.create(encodedHeight, encodedWidth, CV_16UC1);
-            warpAffine(depth, _faceDepth, faceTransform, _faceDepth.size(), INTER_NEAREST);
+        _faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC1);
+        warpAffine(_grayDepthFrame, _faceDepth, transform, _faceDepth.size());
         
-        } else {
-            _faceDepth = depth;
-        }
-        _smoothedFaceDepth.create(encodedHeight, encodedWidth, CV_16UC1);
-            
-        // smooth the depth over time
-        const ushort ELEVEN_BIT_MAXIMUM = 2047;
-        const float DEPTH_SMOOTHING = 0.25f;
-        ushort* src = _faceDepth.ptr<ushort>();
-        ushort* dest = _smoothedFaceDepth.ptr<ushort>();
-        ushort minimumDepth = numeric_limits<ushort>::max();
-        for (int i = 0; i < encodedHeight; i++) {
-            for (int j = 0; j < encodedWidth; j++) {
-                ushort depth = *src++;
-                if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) {
-                    minimumDepth = min(minimumDepth, depth);
-                    *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING);
-                }
-                dest++;
-            }
-        }
-        const ushort MINIMUM_DEPTH_OFFSET = 64;
-        const float FIXED_MID_DEPTH = 640.0f;
-        float midFaceDepth = (_videoSendMode == FACE_VIDEO) ? (minimumDepth + MINIMUM_DEPTH_OFFSET) : FIXED_MID_DEPTH;
-        
-        // smooth the mid face depth over time
-        const float MID_FACE_DEPTH_SMOOTHING = 0.5f;
-        _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth :
-            glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING);
-
-        // convert from 11 to 8 bits for preview/local display
-        depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth;
-        depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset);
-    }
-    
-    QByteArray payload;
-    if (_videoSendMode != NO_VIDEO) {
-        if (_colorCodec.name == 0) {
-            // initialize encoder context(s)
-            vpx_codec_enc_cfg_t codecConfig;
-            vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0);
-            codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * colorBitrateMultiplier * 
-                codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h;
-            codecConfig.g_w = encodedWidth;
-            codecConfig.g_h = encodedHeight;
-            vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0);
-            
-            if (!depth.empty()) {
-                codecConfig.rc_target_bitrate *= depthBitrateMultiplier;
-                vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0);
-            }
-        }
-    
-        Mat transform;
-        if (_videoSendMode == FACE_VIDEO) {
-            // resize/rotate face into encoding rectangle
-            _faceColor.create(encodedHeight, encodedWidth, CV_8UC3);
-            warpAffine(color, _faceColor, faceTransform, _faceColor.size());
-        
-        } else {
-            _faceColor = color;
-        }
-        
-        // convert from RGB to YV12: see http://www.fourcc.org/yuv.php and
-        // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor
-        const int ENCODED_BITS_PER_Y = 8;
-        const int ENCODED_BITS_PER_VU = 2;
-        const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU;
-        const int BITS_PER_BYTE = 8;
-        _encodedFace.resize(encodedWidth * encodedHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE);
-        vpx_image_t vpxImage;
-        vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, encodedWidth, encodedHeight, 1,
-            (unsigned char*)_encodedFace.data());
-        uchar* yline = vpxImage.planes[0];
-        uchar* vline = vpxImage.planes[1];
-        uchar* uline = vpxImage.planes[2];
-        const int Y_RED_WEIGHT = (int)(0.299 * 256);
-        const int Y_GREEN_WEIGHT = (int)(0.587 * 256);
-        const int Y_BLUE_WEIGHT = (int)(0.114 * 256);
-        const int V_RED_WEIGHT = (int)(0.713 * 256);
-        const int U_BLUE_WEIGHT = (int)(0.564 * 256);
-        int redIndex = 0;
-        int greenIndex = 1;
-        int blueIndex = 2;
-        if (format == GL_BGR) {
-            redIndex = 2;
-            blueIndex = 0;
-        }
-        for (int i = 0; i < encodedHeight; i += 2) {
-            uchar* ydest = yline;
-            uchar* vdest = vline;
-            uchar* udest = uline;
-            for (int j = 0; j < encodedWidth; j += 2) {
-                uchar* tl = _faceColor.ptr(i, j);
-                uchar* tr = _faceColor.ptr(i, j + 1);
-                uchar* bl = _faceColor.ptr(i + 1, j);
-                uchar* br = _faceColor.ptr(i + 1, j + 1);
-                
-                ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8;
-                ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8;
-                ydest[vpxImage.stride[0]] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] *
-                    Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8;
-                ydest[vpxImage.stride[0] + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] *
-                    Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8;
-                ydest += 2;
-                
-                int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex];
-                int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex];
-                int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex];
-                int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8;
-                
-                *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128;
-                *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128;
-            }
-            yline += vpxImage.stride[0] * 2;
-            vline += vpxImage.stride[1];
-            uline += vpxImage.stride[2];
-        }
-        
-        // encode the frame
-        vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME);
-
-        // start the payload off with the aspect ratio (zero for full frame)
-        payload.append((const char*)&aspectRatio, sizeof(float));
-
-        // extract the encoded frame
-        vpx_codec_iter_t iterator = 0;
-        const vpx_codec_cx_pkt_t* packet;
-        while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) {
-            if (packet->kind == VPX_CODEC_CX_FRAME_PKT) {
-                // prepend the length, which will indicate whether there's a depth frame too
-                payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz));
-                payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz);
-            }
-        }
-        
-        if (!depth.empty()) {
-            // convert with mask
-            uchar* yline = vpxImage.planes[0];
-            uchar* vline = vpxImage.planes[1];
-            uchar* uline = vpxImage.planes[2];
-            const uchar EIGHT_BIT_MAXIMUM = 255;
-            for (int i = 0; i < encodedHeight; i += 2) {
-                uchar* ydest = yline;
-                uchar* vdest = vline;
-                uchar* udest = uline;
-                for (int j = 0; j < encodedWidth; j += 2) {
-                    ushort tl = *_smoothedFaceDepth.ptr<ushort>(i, j);
-                    ushort tr = *_smoothedFaceDepth.ptr<ushort>(i, j + 1);
-                    ushort bl = *_smoothedFaceDepth.ptr<ushort>(i + 1, j);
-                    ushort br = *_smoothedFaceDepth.ptr<ushort>(i + 1, j + 1);
-                
-                    uchar mask = EIGHT_BIT_MAXIMUM;
-                    
-                    ydest[0] = (tl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) :
-                        saturate_cast<uchar>(tl + depthOffset);
-                    ydest[1] = (tr == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) :
-                        saturate_cast<uchar>(tr + depthOffset);
-                    ydest[vpxImage.stride[0]] = (bl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) :
-                        saturate_cast<uchar>(bl + depthOffset);
-                    ydest[vpxImage.stride[0] + 1] = (br == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) :
-                        saturate_cast<uchar>(br + depthOffset);
-                    ydest += 2;
-                
-                    *vdest++ = mask;
-                    *udest++ = EIGHT_BIT_MIDPOINT;
-                }
-                yline += vpxImage.stride[0] * 2;
-                vline += vpxImage.stride[1];
-                uline += vpxImage.stride[2];
-            }
-            
-            // encode the frame
-            vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME);
-
-            // extract the encoded frame
-            vpx_codec_iter_t iterator = 0;
-            const vpx_codec_cx_pkt_t* packet;
-            while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) {
-                if (packet->kind == VPX_CODEC_CX_FRAME_PKT) {
-                    payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz);
-                }
-            }
+        uchar* dest = (uchar*)_encodedFace.data() + vpxImage.stride[0] * ENCODED_FACE_HEIGHT;
+        for (int i = 0; i < ENCODED_FACE_HEIGHT; i++) {
+            memcpy(dest, _faceDepth.ptr(i), ENCODED_FACE_WIDTH);
+            dest += vpxImage.stride[0];
         }
     }
     
-    QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage",
-            Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload));
-            
+    // encode the frame
+    vpx_codec_encode(&_codec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME);
+
+    // extract the encoded frame
+    vpx_codec_iter_t iterator = 0;
+    const vpx_codec_cx_pkt_t* packet;
+    while ((packet = vpx_codec_get_cx_data(&_codec, &iterator)) != 0) {
+        if (packet->kind == VPX_CODEC_CX_FRAME_PKT) {
+            // prepend the aspect ratio
+            QByteArray payload(sizeof(float), 0);
+            *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height;
+            payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz);
+            QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", Q_ARG(int, _frameCount),
+                Q_ARG(QByteArray, payload));
+        }
+    }
+
     QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame",
-        Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMidFaceDepth),
-        Q_ARG(float, aspectRatio), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(bool, !payload.isEmpty()),
-        Q_ARG(JointVector, joints));
+        Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame),
+        Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints));
 }
 
 bool FrameGrabber::init() {
@@ -817,17 +690,6 @@ void FrameGrabber::updateHSVFrame(const Mat& frame, int format) {
     inRange(_hsvFrame, Scalar(0, 55, 65), Scalar(180, 256, 256), _mask);
 }
 
-void FrameGrabber::destroyCodecs() {
-    if (_colorCodec.name != 0) {
-        vpx_codec_destroy(&_colorCodec);
-        _colorCodec.name = 0;
-    }
-    if (_depthCodec.name != 0) {
-        vpx_codec_destroy(&_depthCodec);
-        _depthCodec.name = 0;
-    }
-}
-
 Joint::Joint(const glm::vec3& position, const glm::quat& rotation, const glm::vec3& projected) :
     isValid(true), position(position), rotation(rotation), projected(projected) {
 }