Most of the way towards working video encoding.

2025-08-04 15:59:57 +02:00 · 2013-07-18 14:14:44 -07:00 · 2013-07-18 14:14:44 -07:00 · 7ce04444fc
commit 7ce04444fc
parent 8863645dc9
5 changed files with 166 additions and 35 deletions
--- a/interface/src/Webcam.cpp
+++ b/interface/src/Webcam.cpp
@ -202,27 +202,6 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota
    }
    _lastFrameTimestamp = now;
    
-    // correct for 180 degree rotations
-    if (_faceRect.angle < -90.0f) {
-        _faceRect.angle += 180.0f;
-        
-    } else if (_faceRect.angle > 90.0f) {
-        _faceRect.angle -= 180.0f;
-    }
-    
-    // compute the smoothed face rect
-    if (_estimatedFaceRect.size.area() == 0) {
-        _estimatedFaceRect = _faceRect;
-        
-    } else {
-        const float FACE_RECT_SMOOTHING = 0.9f;
-        _estimatedFaceRect.center.x = glm::mix(_faceRect.center.x, _estimatedFaceRect.center.x, FACE_RECT_SMOOTHING);
-        _estimatedFaceRect.center.y = glm::mix(_faceRect.center.y, _estimatedFaceRect.center.y, FACE_RECT_SMOOTHING);
-        _estimatedFaceRect.size.width = glm::mix(_faceRect.size.width, _estimatedFaceRect.size.width, FACE_RECT_SMOOTHING);
-        _estimatedFaceRect.size.height = glm::mix(_faceRect.size.height, _estimatedFaceRect.size.height, FACE_RECT_SMOOTHING); 
-        _estimatedFaceRect.angle = glm::mix(_faceRect.angle, _estimatedFaceRect.angle, FACE_RECT_SMOOTHING);
-    }
-    
    // see if we have joint data
    if (!_joints.isEmpty()) {
        _estimatedJoints.resize(NUM_AVATAR_JOINTS);
@ -503,6 +482,7 @@ void FrameGrabber::grabFrame() {

 #ifdef HAVE_OPENNI
    if (_depthGenerator.IsValid()) {
+        // convert from 11 to 8 bits, centered about the mean face depth (if possible)
        if (_searchWindow.area() > 0) {
            const double DEPTH_OFFSET_SMOOTHING = 0.95;
            double meanOffset = 128.0 - mean(depth(_searchWindow))[0];
@ -512,20 +492,91 @@ void FrameGrabber::grabFrame() {
    }
 #endif

+    const int ENCODED_FACE_WIDTH = 192;
+    const int ENCODED_FACE_HEIGHT = 192;
    if (_codec.name == 0) {
        // initialize encoder context
        vpx_codec_enc_cfg_t codecConfig;
        vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0);
-        codecConfig.rc_target_bitrate = color.cols * color.rows * codecConfig.rc_target_bitrate /
+        codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * codecConfig.rc_target_bitrate /
            codecConfig.g_w / codecConfig.g_h;
-        codecConfig.g_w = color.cols;
-        codecConfig.g_h = color.rows;
+        codecConfig.g_w = ENCODED_FACE_WIDTH;
+        codecConfig.g_h = ENCODED_FACE_HEIGHT;
        vpx_codec_enc_init(&_codec, vpx_codec_vp8_cx(), &codecConfig, 0); 
    }
+    
+    // correct for 180 degree rotations
+    if (faceRect.angle < -90.0f) {
+        faceRect.angle += 180.0f;
+        
+    } else if (faceRect.angle > 90.0f) {
+        faceRect.angle -= 180.0f;
+    }
+    
+    // compute the smoothed face rect
+    if (_smoothedFaceRect.size.area() == 0) {
+        _smoothedFaceRect = faceRect;
+        
+    } else {
+        const float FACE_RECT_SMOOTHING = 0.9f;
+        _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING);
+        _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING);
+        _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING);
+        _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); 
+        _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING);
+    }
+    
+    // resize/rotate face into encoding rectangle
+    _faceFrame.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC3);
+    Point2f sourcePoints[4];
+    _smoothedFaceRect.points(sourcePoints);
+    Point2f destPoints[] = { Point2f(0, ENCODED_FACE_HEIGHT), Point2f(0, 0), Point2f(ENCODED_FACE_WIDTH, 0) };
+    warpAffine(color, _faceFrame, getAffineTransform(sourcePoints, destPoints), _faceFrame.size());
+    
+    // convert from RGB to YV12
+    const int ENCODED_BITS_PER_Y = 8;
+    const int ENCODED_BITS_PER_VU = 2;
+    const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU;
+    const int BITS_PER_BYTE = 8;
+    _encodedFace.resize(ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE);
+    uchar* ydest = (uchar*)_encodedFace.data();
+    uchar* vdest = ydest + ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * ENCODED_BITS_PER_Y / BITS_PER_BYTE;
+    uchar* udest = vdest + ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * ENCODED_BITS_PER_VU / BITS_PER_BYTE;
+    const int Y_RED_WEIGHT = (int)(0.299 * 256);
+    const int Y_GREEN_WEIGHT = (int)(0.587 * 256);
+    const int Y_BLUE_WEIGHT = (int)(0.114 * 256);
+    const int V_RED_WEIGHT = (int)(0.713 * 256);
+    const int U_BLUE_WEIGHT = (int)(0.564 * 256);
+    for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) {
+        for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) {
+            uchar* tl = _faceFrame.ptr(i, j);
+            uchar* tr = _faceFrame.ptr(i, j + 1);
+            uchar* bl = _faceFrame.ptr(i + 1, j);
+            uchar* br = _faceFrame.ptr(i + 1, j + 1);
+            
+            ydest[0] = (tl[0] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[2] * Y_BLUE_WEIGHT) >> 8;
+            ydest[1] = (tr[0] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[2] * Y_BLUE_WEIGHT) >> 8;
+            ydest[ENCODED_FACE_WIDTH] = (bl[0] * Y_RED_WEIGHT + bl[1] * Y_GREEN_WEIGHT + bl[2] * Y_BLUE_WEIGHT) >> 8;
+            ydest[ENCODED_FACE_WIDTH + 1] = (br[0] * Y_RED_WEIGHT + br[1] * Y_GREEN_WEIGHT + br[2] * Y_BLUE_WEIGHT) >> 8;
+            ydest += 2;
+            
+            int totalBlue = tl[0] + tr[0] + bl[0] + br[0];
+            int totalGreen = tl[1] + tr[1] + bl[1] + br[1];
+            int totalRed = tl[2] + tr[2] + bl[2] + br[2];
+            int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8;
+            
+            *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128;
+            *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128;
+        }
+        ydest += ENCODED_FACE_WIDTH;
+    }
+    
+    // encode the frame
    vpx_image_t vpxImage;
-    vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, color.cols, color.rows, 1, color.ptr());    
-    vpx_codec_encode(&_codec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME);
+    vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, 1, (unsigned char*)_encodedFace.data());
+    int result = vpx_codec_encode(&_codec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME);

+    // extract the encoded frame
    vpx_codec_iter_t iterator = 0;
    const vpx_codec_cx_pkt_t* packet;
    while ((packet = vpx_codec_get_cx_data(&_codec, &iterator)) != 0) {
@ -537,7 +588,7 @@ void FrameGrabber::grabFrame() {

    QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame",
        Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame),
-        Q_ARG(cv::RotatedRect, faceRect), Q_ARG(JointVector, joints));
+        Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints));
 }

 bool FrameGrabber::init() {
--- a/interface/src/Webcam.h
+++ b/interface/src/Webcam.h
@ -50,7 +50,8 @@ public:
    GLuint getDepthTextureID() const { return _depthTextureID; }
    const cv::Size2f& getTextureSize() const { return _textureSize; }
    
-    const cv::RotatedRect& getEstimatedFaceRect() const { return _estimatedFaceRect; }
+    const cv::RotatedRect& getFaceRect() const { return _faceRect; }
+    
    const glm::vec3& getEstimatedPosition() const { return _estimatedPosition; }
    const glm::vec3& getEstimatedRotation() const { return _estimatedRotation; }
    const JointVector& getEstimatedJoints() const { return _estimatedJoints; }
@ -63,7 +64,7 @@ public slots:
    void setEnabled(bool enabled);
    void setFrame(const cv::Mat& color, int format, const cv::Mat& depth,
        const cv::RotatedRect& faceRect, const JointVector& joints);
-    
+
 private:
    
    QThread _grabberThread;
@ -83,7 +84,6 @@ private:
    
    uint64_t _lastFrameTimestamp;
    
-    cv::RotatedRect _estimatedFaceRect;
    glm::vec3 _estimatedPosition;
    glm::vec3 _estimatedRotation;
    JointVector _estimatedJoints;
@ -121,6 +121,9 @@ private:
    
    vpx_codec_ctx_t _codec;
    int _frameCount;
+    cv::Mat _faceFrame;
+    QByteArray _encodedFace;
+    cv::RotatedRect _smoothedFaceRect;
    
 #ifdef HAVE_OPENNI
    xn::Context _xnContext;
--- a/interface/src/avatar/Avatar.cpp
+++ b/interface/src/avatar/Avatar.cpp
@ -315,7 +315,7 @@ void Avatar::updateFromGyrosAndOrWebcam(bool gyroLook,
        _head.getFace().setColorTextureID(webcam->getColorTextureID());
        _head.getFace().setDepthTextureID(webcam->getDepthTextureID());
        _head.getFace().setTextureSize(webcam->getTextureSize());
-        _head.getFace().setTextureRect(webcam->getEstimatedFaceRect());
+        _head.getFace().setTextureRect(webcam->getFaceRect());
        
        // compute and store the joint rotations
        const JointVector& joints = webcam->getEstimatedJoints();
--- a/interface/src/avatar/Face.cpp
+++ b/interface/src/avatar/Face.cpp
@ -13,6 +13,7 @@

 #include <PacketHeaders.h>

+#include "Application.h"
 #include "Avatar.h"
 #include "Head.h"
 #include "Face.h"
@ -28,13 +29,22 @@ GLuint Face::_vboID;
 GLuint Face::_iboID;

 Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(MESH),
-    _colorTextureID(0), _depthTextureID(0), _codec(), _frameCount(0) {
+        _colorTextureID(0), _depthTextureID(0), _codec(), _frameCount(0) {
+    // we may have been created in the network thread, but we live in the main thread
+    moveToThread(Application::getInstance()->thread());
 }

 Face::~Face() {
    if (_codec.name != 0) {
        vpx_codec_destroy(&_codec);
-        _codec.name = 0;
+        
+        // delete our textures, since we know that we own them
+        if (_colorTextureID != 0) {
+            glDeleteTextures(1, &_colorTextureID);
+        }
+        if (_depthTextureID != 0) {
+            glDeleteTextures(1, &_depthTextureID);
+        }
    }
 }

@ -72,7 +82,54 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
        vpx_codec_iter_t iterator = 0;
        vpx_image_t* image;
        while ((image = vpx_codec_get_frame(&_codec, &iterator)) != 0) {
-            
+            // convert from YV12 to RGB
+            Mat frame(image->h, image->w, CV_8UC3);
+            uchar* ysrc = image->planes[0];
+            uchar* vsrc = image->planes[1];
+            uchar* usrc = image->planes[2];
+            const int RED_V_WEIGHT = (int)(1.403 * 256);
+            const int GREEN_V_WEIGHT = (int)(0.714 * 256);
+            const int GREEN_U_WEIGHT = (int)(0.344 * 256);
+            const int BLUE_U_WEIGHT = (int)(1.773 * 256);
+            for (int i = 0; i < image->h; i += 2) {
+                for (int j = 0; j < image->w; j += 2) {
+                    uchar* tl = frame.ptr(i, j);
+                    uchar* tr = frame.ptr(i, j + 1);
+                    uchar* bl = frame.ptr(i + 1, j);
+                    uchar* br = frame.ptr(i + 1, j + 1);
+                    
+                    int v = *vsrc++ - 128;
+                    int u = *usrc++ - 128;
+                    
+                    int redOffset = (RED_V_WEIGHT * v) >> 8;
+                    int greenOffset = (GREEN_V_WEIGHT * v + GREEN_U_WEIGHT * u) >> 8;
+                    int blueOffset = (BLUE_U_WEIGHT * u) >> 8;
+                    
+                    int ytl = ysrc[0];
+                    int ytr = ysrc[1];
+                    int ybl = ysrc[image->w];
+                    int ybr = ysrc[image->w + 1];
+                    ysrc += 2;
+                    
+                    tl[0] = ytl + redOffset;
+                    tl[1] = ytl - greenOffset;
+                    tl[2] = ytl + blueOffset;
+                    
+                    tr[0] = ytr + redOffset;
+                    tr[1] = ytr - greenOffset; 
+                    tr[2] = ytr + blueOffset;
+                    
+                    bl[0] = ybl + redOffset;
+                    bl[1] = ybl - greenOffset;
+                    bl[2] = ybl + blueOffset;
+                    
+                    br[0] = ybr + redOffset;
+                    br[1] = ybr - greenOffset;
+                    br[2] = ybr + blueOffset;
+                }
+                ysrc += image->w;
+            }
+            QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, frame));
        }
    }
    
@ -228,3 +285,19 @@ void Face::cycleRenderMode() {
    _renderMode = (RenderMode)((_renderMode + 1) % RENDER_MODE_COUNT);    
 }

+void Face::setFrame(const cv::Mat& color) {
+    if (_colorTextureID == 0) {
+        glGenTextures(1, &_colorTextureID);
+        glBindTexture(GL_TEXTURE_2D, _colorTextureID);
+        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, color.cols, color.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, color.ptr());
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        _textureSize = color.size();
+        _textureRect = RotatedRect(Point2f(color.cols * 0.5f, color.rows * 0.5f), _textureSize, 0.0f);
+        
+    } else {
+        glBindTexture(GL_TEXTURE_2D, _colorTextureID);
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, color.cols, color.rows, GL_RGB, GL_UNSIGNED_BYTE, color.ptr());
+    }
+    glBindTexture(GL_TEXTURE_2D, 0);
+}
+
--- a/interface/src/avatar/Face.h
+++ b/interface/src/avatar/Face.h
@ -40,7 +40,11 @@ public:
 public slots:

    void cycleRenderMode();
-    
+
+private slots:
+
+    void setFrame(const cv::Mat& color);    
+        
 private:

    enum RenderMode { MESH, POINTS, RENDER_MODE_COUNT };