From e9a53ed9b4fa4167bf92da116232ec86890d6769 Mon Sep 17 00:00:00 2001 From: atlante45 Date: Fri, 2 Aug 2013 17:14:57 -0700 Subject: [PATCH 1/4] Changed avatar resizing shortcuts to plus and minus. --- interface/src/Application.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index 850dd9871d..beb7ab6412 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -571,7 +571,6 @@ void Application::keyPressEvent(QKeyEvent* event) { _myAvatar.getHand().setRaveGloveEffectsMode((QKeyEvent*)event); } - bool alt = event->modifiers().testFlag(Qt::AltModifier); bool meta = event->modifiers().testFlag(Qt::MetaModifier); bool shifted = event->modifiers().testFlag(Qt::ShiftModifier); switch (event->key()) { @@ -825,14 +824,10 @@ void Application::keyPressEvent(QKeyEvent* event) { } break; case Qt::Key_Plus: - if (alt) { - increaseAvatarSize(); - } + increaseAvatarSize(); break; case Qt::Key_Minus: - if (alt) { - decreaseAvatarSize(); - } + decreaseAvatarSize(); break; case Qt::Key_1: @@ -1926,8 +1921,8 @@ void Application::initMenu() { "First Person", this, SLOT(setRenderFirstPerson(bool)), Qt::Key_P))->setCheckable(true); (_manualThirdPerson = renderMenu->addAction( "Third Person", this, SLOT(setRenderThirdPerson(bool))))->setCheckable(true); - renderMenu->addAction("Increase Avatar Size", this, SLOT(increaseAvatarSize()), Qt::ALT | Qt::Key_Plus); - renderMenu->addAction("Decrease Avatar Size", this, SLOT(decreaseAvatarSize()), Qt::ALT | Qt::Key_Minus); + renderMenu->addAction("Increase Avatar Size", this, SLOT(increaseAvatarSize()), Qt::Key_Plus); + renderMenu->addAction("Decrease Avatar Size", this, SLOT(decreaseAvatarSize()), Qt::Key_Minus); QMenu* toolsMenu = menuBar->addMenu("Tools"); From bceb416d8a675bf5ad5b4227524fcdd679349b8d Mon Sep 17 00:00:00 2001 From: atlante45 Date: Fri, 2 Aug 2013 17:18:35 -0700 Subject: [PATCH 2/4] merge --- interface/src/Webcam.cpp | 502 ++++++++++++++------------------------- 1 file changed, 182 insertions(+), 320 deletions(-) diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index 3965bf887b..e37ca6c70e 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -19,7 +19,6 @@ #include "Application.h" #include "Webcam.h" -#include "avatar/Face.h" using namespace cv; using namespace std; @@ -33,7 +32,7 @@ int jointVectorMetaType = qRegisterMetaType("JointVector"); int matMetaType = qRegisterMetaType("cv::Mat"); int rotatedRectMetaType = qRegisterMetaType("cv::RotatedRect"); -Webcam::Webcam() : _enabled(false), _active(false), _colorTextureID(0), _depthTextureID(0), _skeletonTrackingOn(false) { +Webcam::Webcam() : _enabled(false), _active(false), _colorTextureID(0), _depthTextureID(0) { // the grabber simply runs as fast as possible _grabber = new FrameGrabber(); _grabber->moveToThread(&_grabberThread); @@ -58,11 +57,8 @@ void Webcam::setEnabled(bool enabled) { } } -const float UNINITIALIZED_FACE_DEPTH = 0.0f; - void Webcam::reset() { _initialFaceRect = RotatedRect(); - _initialFaceDepth = UNINITIALIZED_FACE_DEPTH; if (_enabled) { // send a message to the grabber @@ -153,10 +149,7 @@ Webcam::~Webcam() { delete _grabber; } -const float METERS_PER_MM = 1.0f / 1000.0f; - -void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midFaceDepth, - float aspectRatio, const RotatedRect& faceRect, bool sending, const JointVector& joints) { +void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const RotatedRect& faceRect, const JointVector& joints) { IplImage colorImage = color; glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3); if (_colorTextureID == 0) { @@ -193,11 +186,9 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glBindTexture(GL_TEXTURE_2D, 0); - // store our various data, update our frame count for fps computation - _aspectRatio = aspectRatio; + // store our face rect and joints, update our frame count for fps computation _faceRect = faceRect; - _sending = sending; - _joints = _skeletonTrackingOn ? joints : JointVector(); + _joints = joints; _frameCount++; const int MAX_FPS = 60; @@ -241,28 +232,22 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF const float ROTATION_SMOOTHING = 0.95f; _estimatedRotation.z = glm::mix(_faceRect.angle, _estimatedRotation.z, ROTATION_SMOOTHING); - // determine position based on translation and scaling of the face rect/mean face depth + // determine position based on translation and scaling of the face rect if (_initialFaceRect.size.area() == 0) { _initialFaceRect = _faceRect; _estimatedPosition = glm::vec3(); - _initialFaceDepth = midFaceDepth; } else { - float proportion, z; - if (midFaceDepth == UNINITIALIZED_FACE_DEPTH) { - proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area()); - const float INITIAL_DISTANCE_TO_CAMERA = 0.333f; - z = INITIAL_DISTANCE_TO_CAMERA * proportion - INITIAL_DISTANCE_TO_CAMERA; - - } else { - z = (midFaceDepth - _initialFaceDepth) * METERS_PER_MM; - proportion = midFaceDepth / _initialFaceDepth; - } + float proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area()); + const float DISTANCE_TO_CAMERA = 0.333f; const float POSITION_SCALE = 0.5f; - _estimatedPosition = glm::vec3( + float z = DISTANCE_TO_CAMERA * proportion - DISTANCE_TO_CAMERA; + glm::vec3 position = glm::vec3( (_faceRect.center.x - _initialFaceRect.center.x) * proportion * POSITION_SCALE / _textureSize.width, (_faceRect.center.y - _initialFaceRect.center.y) * proportion * POSITION_SCALE / _textureSize.width, z); + const float POSITION_SMOOTHING = 0.95f; + _estimatedPosition = glm::mix(position, _estimatedPosition, POSITION_SMOOTHING); } } @@ -273,8 +258,8 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF QTimer::singleShot(qMax((int)remaining / 1000, 0), _grabber, SLOT(grabFrame())); } -FrameGrabber::FrameGrabber() : _initialized(false), _videoSendMode(FULL_FRAME_VIDEO), _capture(0), _searchWindow(0, 0, 0, 0), - _smoothedMidFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) { +FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), + _depthOffset(0.0), _codec(), _frameCount(0) { } FrameGrabber::~FrameGrabber() { @@ -367,13 +352,6 @@ static void XN_CALLBACK_TYPE calibrationCompleted(SkeletonCapability& capability } #endif -void FrameGrabber::cycleVideoSendMode() { - _videoSendMode = (VideoSendMode)((_videoSendMode + 1) % VIDEO_SEND_MODE_COUNT); - _searchWindow = cv::Rect(0, 0, 0, 0); - - destroyCodecs(); -} - void FrameGrabber::reset() { _searchWindow = cv::Rect(0, 0, 0, 0); @@ -389,7 +367,10 @@ void FrameGrabber::shutdown() { cvReleaseCapture(&_capture); _capture = 0; } - destroyCodecs(); + if (_codec.name != 0) { + vpx_codec_destroy(&_codec); + _codec.name = 0; + } _initialized = false; thread()->quit(); @@ -442,6 +423,7 @@ void FrameGrabber::grabFrame() { _userID, (XnSkeletonJoint)parentJoint, parentOrientation); rotation = glm::inverse(xnToGLM(parentOrientation.orientation)) * rotation; } + const float METERS_PER_MM = 1.0f / 1000.0f; joints[avatarJoint] = Joint(xnToGLM(transform.position.position, true) * METERS_PER_MM, rotation, xnToGLM(projected)); } @@ -465,290 +447,181 @@ void FrameGrabber::grabFrame() { color = image; } + // if we don't have a search window (yet), try using the face cascade + int channels = 0; + float ranges[] = { 0, 180 }; + const float* range = ranges; + if (_searchWindow.area() == 0) { + vector faces; + _faceCascade.detectMultiScale(color, faces, 1.1, 6); + if (!faces.empty()) { + _searchWindow = faces.front(); + updateHSVFrame(color, format); + + Mat faceHsv(_hsvFrame, _searchWindow); + Mat faceMask(_mask, _searchWindow); + int sizes = 30; + calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range); + double min, max; + minMaxLoc(_histogram, &min, &max); + _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); + } + } + RotatedRect faceRect; + if (_searchWindow.area() > 0) { + updateHSVFrame(color, format); + + calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range); + bitwise_and(_backProject, _mask, _backProject); + + faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1)); + Rect faceBounds = faceRect.boundingRect(); + Rect imageBounds(0, 0, color.cols, color.rows); + _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); + } + +#ifdef HAVE_OPENNI + if (_depthGenerator.IsValid()) { + // convert from 11 to 8 bits, centered about the mean face depth (if possible) + if (_searchWindow.area() > 0) { + const double DEPTH_OFFSET_SMOOTHING = 0.95; + const double EIGHT_BIT_MIDPOINT = 128.0; + double meanOffset = EIGHT_BIT_MIDPOINT - mean(depth(_searchWindow))[0]; + _depthOffset = (_depthOffset == 0.0) ? meanOffset : glm::mix(meanOffset, _depthOffset, DEPTH_OFFSET_SMOOTHING); + } + depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, _depthOffset); + } +#endif + const int ENCODED_FACE_WIDTH = 128; const int ENCODED_FACE_HEIGHT = 128; - int encodedWidth; - int encodedHeight; - float colorBitrateMultiplier = 1.0f; - float depthBitrateMultiplier = 1.0f; - Mat faceTransform; - float aspectRatio; - if (_videoSendMode == FULL_FRAME_VIDEO) { - // no need to find the face if we're sending full frame video - _smoothedFaceRect = RotatedRect(Point2f(color.cols / 2.0f, color.rows / 2.0f), Size2f(color.cols, color.rows), 0.0f); - encodedWidth = color.cols; - encodedHeight = color.rows; - aspectRatio = FULL_FRAME_ASPECT; - colorBitrateMultiplier = 4.0f; + int combinedFaceHeight = ENCODED_FACE_HEIGHT * (depth.empty() ? 1 : 2); + if (_codec.name == 0) { + // initialize encoder context + vpx_codec_enc_cfg_t codecConfig; + vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); + codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * combinedFaceHeight * codecConfig.rc_target_bitrate / + codecConfig.g_w / codecConfig.g_h; + codecConfig.g_w = ENCODED_FACE_WIDTH; + codecConfig.g_h = combinedFaceHeight; + vpx_codec_enc_init(&_codec, vpx_codec_vp8_cx(), &codecConfig, 0); + } + + // correct for 180 degree rotations + if (faceRect.angle < -90.0f) { + faceRect.angle += 180.0f; + + } else if (faceRect.angle > 90.0f) { + faceRect.angle -= 180.0f; + } + + // compute the smoothed face rect + if (_smoothedFaceRect.size.area() == 0) { + _smoothedFaceRect = faceRect; } else { - // if we don't have a search window (yet), try using the face cascade - int channels = 0; - float ranges[] = { 0, 180 }; - const float* range = ranges; - if (_searchWindow.area() == 0) { - vector faces; - _faceCascade.detectMultiScale(color, faces, 1.1, 6); - if (!faces.empty()) { - _searchWindow = faces.front(); - updateHSVFrame(color, format); - - Mat faceHsv(_hsvFrame, _searchWindow); - Mat faceMask(_mask, _searchWindow); - int sizes = 30; - calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range); - double min, max; - minMaxLoc(_histogram, &min, &max); - _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); - } - } - RotatedRect faceRect; - if (_searchWindow.area() > 0) { - updateHSVFrame(color, format); - - calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range); - bitwise_and(_backProject, _mask, _backProject); - - faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1)); - Rect faceBounds = faceRect.boundingRect(); - Rect imageBounds(0, 0, color.cols, color.rows); - _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); - } - encodedWidth = ENCODED_FACE_WIDTH; - encodedHeight = ENCODED_FACE_HEIGHT; - depthBitrateMultiplier = 2.0f; - - // correct for 180 degree rotations - if (faceRect.angle < -90.0f) { - faceRect.angle += 180.0f; - - } else if (faceRect.angle > 90.0f) { - faceRect.angle -= 180.0f; - } - - // compute the smoothed face rect - if (_smoothedFaceRect.size.area() == 0) { - _smoothedFaceRect = faceRect; - - } else { - const float FACE_RECT_SMOOTHING = 0.9f; - _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING); - _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING); - _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING); - _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); - _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); - } - - // use the face rect to compute the face transform, aspect ratio - Point2f sourcePoints[4]; - _smoothedFaceRect.points(sourcePoints); - Point2f destPoints[] = { Point2f(0, encodedHeight), Point2f(0, 0), Point2f(encodedWidth, 0) }; - faceTransform = getAffineTransform(sourcePoints, destPoints); - aspectRatio = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; + const float FACE_RECT_SMOOTHING = 0.9f; + _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING); + _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING); + _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING); + _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); + _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); } - - const ushort ELEVEN_BIT_MINIMUM = 0; - const uchar EIGHT_BIT_MIDPOINT = 128; - double depthOffset; + + // resize/rotate face into encoding rectangle + _faceColor.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC3); + Point2f sourcePoints[4]; + _smoothedFaceRect.points(sourcePoints); + Point2f destPoints[] = { Point2f(0, ENCODED_FACE_HEIGHT), Point2f(0, 0), Point2f(ENCODED_FACE_WIDTH, 0) }; + Mat transform = getAffineTransform(sourcePoints, destPoints); + warpAffine(color, _faceColor, transform, _faceColor.size()); + + // convert from RGB to YV12 + const int ENCODED_BITS_PER_Y = 8; + const int ENCODED_BITS_PER_VU = 2; + const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; + const int BITS_PER_BYTE = 8; + _encodedFace.fill(128, ENCODED_FACE_WIDTH * combinedFaceHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); + vpx_image_t vpxImage; + vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, combinedFaceHeight, 1, (unsigned char*)_encodedFace.data()); + uchar* yline = vpxImage.planes[0]; + uchar* vline = vpxImage.planes[1]; + uchar* uline = vpxImage.planes[2]; + const int Y_RED_WEIGHT = (int)(0.299 * 256); + const int Y_GREEN_WEIGHT = (int)(0.587 * 256); + const int Y_BLUE_WEIGHT = (int)(0.114 * 256); + const int V_RED_WEIGHT = (int)(0.713 * 256); + const int U_BLUE_WEIGHT = (int)(0.564 * 256); + int redIndex = 0; + int greenIndex = 1; + int blueIndex = 2; + if (format == GL_BGR) { + redIndex = 2; + blueIndex = 0; + } + for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) { + uchar* ydest = yline; + uchar* vdest = vline; + uchar* udest = uline; + for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { + uchar* tl = _faceColor.ptr(i, j); + uchar* tr = _faceColor.ptr(i, j + 1); + uchar* bl = _faceColor.ptr(i + 1, j); + uchar* br = _faceColor.ptr(i + 1, j + 1); + + ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[ENCODED_FACE_WIDTH] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] * + Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[ENCODED_FACE_WIDTH + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] * + Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest += 2; + + int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex]; + int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex]; + int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex]; + int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8; + + *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128; + *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128; + } + yline += vpxImage.stride[0] * 2; + vline += vpxImage.stride[1]; + uline += vpxImage.stride[2]; + } + + // if we have depth data, warp that and just copy it in if (!depth.empty()) { - if (_videoSendMode == FACE_VIDEO) { - // warp the face depth without interpolation (because it will contain invalid zero values) - _faceDepth.create(encodedHeight, encodedWidth, CV_16UC1); - warpAffine(depth, _faceDepth, faceTransform, _faceDepth.size(), INTER_NEAREST); + _faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC1); + warpAffine(_grayDepthFrame, _faceDepth, transform, _faceDepth.size()); - } else { - _faceDepth = depth; - } - _smoothedFaceDepth.create(encodedHeight, encodedWidth, CV_16UC1); - - // smooth the depth over time - const ushort ELEVEN_BIT_MAXIMUM = 2047; - const float DEPTH_SMOOTHING = 0.25f; - ushort* src = _faceDepth.ptr(); - ushort* dest = _smoothedFaceDepth.ptr(); - ushort minimumDepth = numeric_limits::max(); - for (int i = 0; i < encodedHeight; i++) { - for (int j = 0; j < encodedWidth; j++) { - ushort depth = *src++; - if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { - minimumDepth = min(minimumDepth, depth); - *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); - } - dest++; - } - } - const ushort MINIMUM_DEPTH_OFFSET = 64; - const float FIXED_MID_DEPTH = 640.0f; - float midFaceDepth = (_videoSendMode == FACE_VIDEO) ? (minimumDepth + MINIMUM_DEPTH_OFFSET) : FIXED_MID_DEPTH; - - // smooth the mid face depth over time - const float MID_FACE_DEPTH_SMOOTHING = 0.5f; - _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth : - glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING); - - // convert from 11 to 8 bits for preview/local display - depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth; - depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); - } - - QByteArray payload; - if (_videoSendMode != NO_VIDEO) { - if (_colorCodec.name == 0) { - // initialize encoder context(s) - vpx_codec_enc_cfg_t codecConfig; - vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); - codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * colorBitrateMultiplier * - codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h; - codecConfig.g_w = encodedWidth; - codecConfig.g_h = encodedHeight; - vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0); - - if (!depth.empty()) { - codecConfig.rc_target_bitrate *= depthBitrateMultiplier; - vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0); - } - } - - Mat transform; - if (_videoSendMode == FACE_VIDEO) { - // resize/rotate face into encoding rectangle - _faceColor.create(encodedHeight, encodedWidth, CV_8UC3); - warpAffine(color, _faceColor, faceTransform, _faceColor.size()); - - } else { - _faceColor = color; - } - - // convert from RGB to YV12: see http://www.fourcc.org/yuv.php and - // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor - const int ENCODED_BITS_PER_Y = 8; - const int ENCODED_BITS_PER_VU = 2; - const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; - const int BITS_PER_BYTE = 8; - _encodedFace.resize(encodedWidth * encodedHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); - vpx_image_t vpxImage; - vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, encodedWidth, encodedHeight, 1, - (unsigned char*)_encodedFace.data()); - uchar* yline = vpxImage.planes[0]; - uchar* vline = vpxImage.planes[1]; - uchar* uline = vpxImage.planes[2]; - const int Y_RED_WEIGHT = (int)(0.299 * 256); - const int Y_GREEN_WEIGHT = (int)(0.587 * 256); - const int Y_BLUE_WEIGHT = (int)(0.114 * 256); - const int V_RED_WEIGHT = (int)(0.713 * 256); - const int U_BLUE_WEIGHT = (int)(0.564 * 256); - int redIndex = 0; - int greenIndex = 1; - int blueIndex = 2; - if (format == GL_BGR) { - redIndex = 2; - blueIndex = 0; - } - for (int i = 0; i < encodedHeight; i += 2) { - uchar* ydest = yline; - uchar* vdest = vline; - uchar* udest = uline; - for (int j = 0; j < encodedWidth; j += 2) { - uchar* tl = _faceColor.ptr(i, j); - uchar* tr = _faceColor.ptr(i, j + 1); - uchar* bl = _faceColor.ptr(i + 1, j); - uchar* br = _faceColor.ptr(i + 1, j + 1); - - ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[vpxImage.stride[0]] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] * - Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[vpxImage.stride[0] + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] * - Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest += 2; - - int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex]; - int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex]; - int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex]; - int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8; - - *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128; - *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128; - } - yline += vpxImage.stride[0] * 2; - vline += vpxImage.stride[1]; - uline += vpxImage.stride[2]; - } - - // encode the frame - vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); - - // start the payload off with the aspect ratio (zero for full frame) - payload.append((const char*)&aspectRatio, sizeof(float)); - - // extract the encoded frame - vpx_codec_iter_t iterator = 0; - const vpx_codec_cx_pkt_t* packet; - while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) { - if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { - // prepend the length, which will indicate whether there's a depth frame too - payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz)); - payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); - } - } - - if (!depth.empty()) { - // convert with mask - uchar* yline = vpxImage.planes[0]; - uchar* vline = vpxImage.planes[1]; - uchar* uline = vpxImage.planes[2]; - const uchar EIGHT_BIT_MAXIMUM = 255; - for (int i = 0; i < encodedHeight; i += 2) { - uchar* ydest = yline; - uchar* vdest = vline; - uchar* udest = uline; - for (int j = 0; j < encodedWidth; j += 2) { - ushort tl = *_smoothedFaceDepth.ptr(i, j); - ushort tr = *_smoothedFaceDepth.ptr(i, j + 1); - ushort bl = *_smoothedFaceDepth.ptr(i + 1, j); - ushort br = *_smoothedFaceDepth.ptr(i + 1, j + 1); - - uchar mask = EIGHT_BIT_MAXIMUM; - - ydest[0] = (tl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : - saturate_cast(tl + depthOffset); - ydest[1] = (tr == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : - saturate_cast(tr + depthOffset); - ydest[vpxImage.stride[0]] = (bl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : - saturate_cast(bl + depthOffset); - ydest[vpxImage.stride[0] + 1] = (br == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : - saturate_cast(br + depthOffset); - ydest += 2; - - *vdest++ = mask; - *udest++ = EIGHT_BIT_MIDPOINT; - } - yline += vpxImage.stride[0] * 2; - vline += vpxImage.stride[1]; - uline += vpxImage.stride[2]; - } - - // encode the frame - vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME); - - // extract the encoded frame - vpx_codec_iter_t iterator = 0; - const vpx_codec_cx_pkt_t* packet; - while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) { - if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { - payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); - } - } + uchar* dest = (uchar*)_encodedFace.data() + vpxImage.stride[0] * ENCODED_FACE_HEIGHT; + for (int i = 0; i < ENCODED_FACE_HEIGHT; i++) { + memcpy(dest, _faceDepth.ptr(i), ENCODED_FACE_WIDTH); + dest += vpxImage.stride[0]; } } - QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", - Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); - + // encode the frame + vpx_codec_encode(&_codec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); + + // extract the encoded frame + vpx_codec_iter_t iterator = 0; + const vpx_codec_cx_pkt_t* packet; + while ((packet = vpx_codec_get_cx_data(&_codec, &iterator)) != 0) { + if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + // prepend the aspect ratio + QByteArray payload(sizeof(float), 0); + *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; + payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); + QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", Q_ARG(int, _frameCount), + Q_ARG(QByteArray, payload)); + } + } + QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", - Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMidFaceDepth), - Q_ARG(float, aspectRatio), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(bool, !payload.isEmpty()), - Q_ARG(JointVector, joints)); + Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), + Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); } bool FrameGrabber::init() { @@ -817,17 +690,6 @@ void FrameGrabber::updateHSVFrame(const Mat& frame, int format) { inRange(_hsvFrame, Scalar(0, 55, 65), Scalar(180, 256, 256), _mask); } -void FrameGrabber::destroyCodecs() { - if (_colorCodec.name != 0) { - vpx_codec_destroy(&_colorCodec); - _colorCodec.name = 0; - } - if (_depthCodec.name != 0) { - vpx_codec_destroy(&_depthCodec); - _depthCodec.name = 0; - } -} - Joint::Joint(const glm::vec3& position, const glm::quat& rotation, const glm::vec3& projected) : isValid(true), position(position), rotation(rotation), projected(projected) { } From 84eae284ea2d24ba3abb5f985f6c1ff7422bccc8 Mon Sep 17 00:00:00 2001 From: atlante45 Date: Fri, 2 Aug 2013 17:44:21 -0700 Subject: [PATCH 3/4] Changed shifted and meta variable name --- interface/src/Application.cpp | 42 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index e7ff59a6f4..d1ac237146 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -571,8 +571,8 @@ void Application::keyPressEvent(QKeyEvent* event) { _myAvatar.getHand().setRaveGloveEffectsMode((QKeyEvent*)event); } - bool meta = event->modifiers().testFlag(Qt::MetaModifier); - bool shifted = event->modifiers().testFlag(Qt::ShiftModifier); + bool isMeta = event->modifiers().testFlag(Qt::MetaModifier); + bool isShifted = event->modifiers().testFlag(Qt::ShiftModifier); switch (event->key()) { case Qt::Key_BracketLeft: _viewFrustumOffsetYaw -= 0.5; @@ -645,9 +645,9 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_C: - if (shifted) { + if (isShifted) { _occlusionCulling->trigger(); - } else if (meta) { + } else if (isMeta) { chooseVoxelPaintColor(); } else { _myAvatar.setDriveKeys(DOWN, 1); @@ -659,7 +659,7 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_S: - if (shifted) { + if (isShifted) { doTreeStats(); } else { _myAvatar.setDriveKeys(BACK, 1); @@ -672,7 +672,7 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_G: - if (shifted) { + if (isShifted) { _gravityUse->trigger(); } else { _eyedropperMode->trigger(); @@ -680,7 +680,7 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_A: - if (shifted) { + if (isShifted) { _renderAtmosphereOn->trigger(); } else { _myAvatar.setDriveKeys(ROT_LEFT, 1); @@ -700,23 +700,23 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_Up: - _myAvatar.setDriveKeys(shifted ? UP : FWD, 1); + _myAvatar.setDriveKeys(isShifted ? UP : FWD, 1); break; case Qt::Key_Down: - _myAvatar.setDriveKeys(shifted ? DOWN : BACK, 1); + _myAvatar.setDriveKeys(isShifted ? DOWN : BACK, 1); break; case Qt::Key_Left: - _myAvatar.setDriveKeys(shifted ? LEFT : ROT_LEFT, 1); + _myAvatar.setDriveKeys(isShifted ? LEFT : ROT_LEFT, 1); break; case Qt::Key_Right: - _myAvatar.setDriveKeys(shifted ? RIGHT : ROT_RIGHT, 1); + _myAvatar.setDriveKeys(isShifted ? RIGHT : ROT_RIGHT, 1); break; case Qt::Key_I: - if (shifted) { + if (isShifted) { _myCamera.setEyeOffsetOrientation(glm::normalize( glm::quat(glm::vec3(0.002f, 0, 0)) * _myCamera.getEyeOffsetOrientation())); } else { @@ -726,7 +726,7 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_K: - if (shifted) { + if (isShifted) { _myCamera.setEyeOffsetOrientation(glm::normalize( glm::quat(glm::vec3(-0.002f, 0, 0)) * _myCamera.getEyeOffsetOrientation())); } else { @@ -736,7 +736,7 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_J: - if (shifted) { + if (isShifted) { _myCamera.setEyeOffsetOrientation(glm::normalize( glm::quat(glm::vec3(0, 0.002f, 0)) * _myCamera.getEyeOffsetOrientation())); } else { @@ -746,7 +746,7 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_M: - if (shifted) { + if (isShifted) { _myCamera.setEyeOffsetOrientation(glm::normalize( glm::quat(glm::vec3(0, -0.002f, 0)) * _myCamera.getEyeOffsetOrientation())); } else { @@ -756,7 +756,7 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_U: - if (shifted) { + if (isShifted) { _myCamera.setEyeOffsetOrientation(glm::normalize( glm::quat(glm::vec3(0, 0, -0.002f)) * _myCamera.getEyeOffsetOrientation())); } else { @@ -766,7 +766,7 @@ void Application::keyPressEvent(QKeyEvent* event) { break; case Qt::Key_Y: - if (shifted) { + if (isShifted) { _myCamera.setEyeOffsetOrientation(glm::normalize( glm::quat(glm::vec3(0, 0, 0.002f)) * _myCamera.getEyeOffsetOrientation())); } else { @@ -781,14 +781,14 @@ void Application::keyPressEvent(QKeyEvent* event) { _lookingInMirror->trigger(); break; case Qt::Key_F: - if (shifted) { + if (isShifted) { _frustumOn->trigger(); } else { _fullScreenMode->trigger(); } break; case Qt::Key_V: - if (shifted) { + if (isShifted) { _renderVoxels->trigger(); } else { _addVoxelMode->trigger(); @@ -798,7 +798,7 @@ void Application::keyPressEvent(QKeyEvent* event) { _manualFirstPerson->trigger(); break; case Qt::Key_R: - if (shifted) { + if (isShifted) { _frustumRenderModeAction->trigger(); } else { _deleteVoxelMode->trigger(); @@ -808,7 +808,7 @@ void Application::keyPressEvent(QKeyEvent* event) { _colorVoxelMode->trigger(); break; case Qt::Key_O: - if (shifted) { + if (isShifted) { _viewFrustumFromOffset->trigger(); } else { _selectVoxelMode->trigger(); From be83bf5635157d59139aa1d5a827fc3496446a37 Mon Sep 17 00:00:00 2001 From: atlante45 Date: Fri, 2 Aug 2013 17:57:53 -0700 Subject: [PATCH 4/4] Fixed fucked up merge --- interface/src/Webcam.cpp | 598 ++++++++++++++++++++++++--------------- interface/src/Webcam.h | 48 ++-- 2 files changed, 392 insertions(+), 254 deletions(-) diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index e37ca6c70e..4d2edcac37 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -19,6 +19,7 @@ #include "Application.h" #include "Webcam.h" +#include "avatar/Face.h" using namespace cv; using namespace std; @@ -32,7 +33,7 @@ int jointVectorMetaType = qRegisterMetaType("JointVector"); int matMetaType = qRegisterMetaType("cv::Mat"); int rotatedRectMetaType = qRegisterMetaType("cv::RotatedRect"); -Webcam::Webcam() : _enabled(false), _active(false), _colorTextureID(0), _depthTextureID(0) { +Webcam::Webcam() : _enabled(false), _active(false), _colorTextureID(0), _depthTextureID(0), _skeletonTrackingOn(false) { // the grabber simply runs as fast as possible _grabber = new FrameGrabber(); _grabber->moveToThread(&_grabberThread); @@ -46,20 +47,23 @@ void Webcam::setEnabled(bool enabled) { _grabberThread.start(); _startTimestamp = 0; _frameCount = 0; - + // let the grabber know we're ready for the first frame QMetaObject::invokeMethod(_grabber, "reset"); QMetaObject::invokeMethod(_grabber, "grabFrame"); - + } else { QMetaObject::invokeMethod(_grabber, "shutdown"); _active = false; } } +const float UNINITIALIZED_FACE_DEPTH = 0.0f; + void Webcam::reset() { _initialFaceRect = RotatedRect(); - + _initialFaceDepth = UNINITIALIZED_FACE_DEPTH; + if (_enabled) { // send a message to the grabber QMetaObject::invokeMethod(_grabber, "reset"); @@ -76,7 +80,7 @@ void Webcam::renderPreview(int screenWidth, int screenHeight) { int previewWidth = _textureSize.width * PREVIEW_HEIGHT / _textureSize.height; int top = screenHeight - 600; int left = screenWidth - previewWidth - 10; - + glTexCoord2f(0, 0); glVertex2f(left, top); glTexCoord2f(1, 0); @@ -86,7 +90,7 @@ void Webcam::renderPreview(int screenWidth, int screenHeight) { glTexCoord2f(0, 1); glVertex2f(left, top + PREVIEW_HEIGHT); glEnd(); - + if (_depthTextureID != 0) { glBindTexture(GL_TEXTURE_2D, _depthTextureID); glBegin(GL_QUADS); @@ -99,10 +103,10 @@ void Webcam::renderPreview(int screenWidth, int screenHeight) { glTexCoord2f(0, 1); glVertex2f(left, top); glEnd(); - + glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_TEXTURE_2D); - + if (!_joints.isEmpty()) { glColor3f(1.0f, 0.0f, 0.0f); glPointSize(4.0f); @@ -121,7 +125,7 @@ void Webcam::renderPreview(int screenWidth, int screenHeight) { glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_TEXTURE_2D); } - + glColor3f(1.0f, 1.0f, 1.0f); glBegin(GL_LINE_LOOP); Point2f facePoints[4]; @@ -133,7 +137,7 @@ void Webcam::renderPreview(int screenWidth, int screenHeight) { glVertex2f(left + facePoints[2].x * xScale, top + facePoints[2].y * yScale); glVertex2f(left + facePoints[3].x * xScale, top + facePoints[3].y * yScale); glEnd(); - + const int MAX_FPS_CHARACTERS = 30; char fps[MAX_FPS_CHARACTERS]; sprintf(fps, "FPS: %d", (int)(roundf(_frameCount * 1000000.0f / (usecTimestampNow() - _startTimestamp)))); @@ -145,11 +149,14 @@ Webcam::~Webcam() { // stop the grabber thread _grabberThread.quit(); _grabberThread.wait(); - + delete _grabber; } -void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const RotatedRect& faceRect, const JointVector& joints) { +const float METERS_PER_MM = 1.0f / 1000.0f; + +void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midFaceDepth, + float aspectRatio, const RotatedRect& faceRect, bool sending, const JointVector& joints) { IplImage colorImage = color; glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3); if (_colorTextureID == 0) { @@ -159,13 +166,13 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota 0, format, GL_UNSIGNED_BYTE, colorImage.imageData); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); qDebug("Capturing video at %gx%g.\n", _textureSize.width, _textureSize.height); - + } else { glBindTexture(GL_TEXTURE_2D, _colorTextureID); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _textureSize.width, _textureSize.height, format, GL_UNSIGNED_BYTE, colorImage.imageData); } - + if (!depth.empty()) { IplImage depthImage = depth; glPixelStorei(GL_UNPACK_ROW_LENGTH, depthImage.widthStep); @@ -176,21 +183,23 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota GL_LUMINANCE, GL_UNSIGNED_BYTE, depthImage.imageData); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - + } else { glBindTexture(GL_TEXTURE_2D, _depthTextureID); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _textureSize.width, _textureSize.height, GL_LUMINANCE, - GL_UNSIGNED_BYTE, depthImage.imageData); + GL_UNSIGNED_BYTE, depthImage.imageData); } } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glBindTexture(GL_TEXTURE_2D, 0); - - // store our face rect and joints, update our frame count for fps computation + + // store our various data, update our frame count for fps computation + _aspectRatio = aspectRatio; _faceRect = faceRect; - _joints = joints; + _sending = sending; + _joints = _skeletonTrackingOn ? joints : JointVector(); _frameCount++; - + const int MAX_FPS = 60; const int MIN_FRAME_DELAY = 1000000 / MAX_FPS; uint64_t now = usecTimestampNow(); @@ -201,14 +210,14 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota remaining -= (now - _lastFrameTimestamp); } _lastFrameTimestamp = now; - + // see if we have joint data if (!_joints.isEmpty()) { _estimatedJoints.resize(NUM_AVATAR_JOINTS); glm::vec3 origin; if (_joints[AVATAR_JOINT_LEFT_HIP].isValid && _joints[AVATAR_JOINT_RIGHT_HIP].isValid) { origin = glm::mix(_joints[AVATAR_JOINT_LEFT_HIP].position, _joints[AVATAR_JOINT_RIGHT_HIP].position, 0.5f); - + } else if (_joints[AVATAR_JOINT_TORSO].isValid) { const glm::vec3 TORSO_TO_PELVIS = glm::vec3(0.0f, -0.09f, -0.01f); origin = _joints[AVATAR_JOINT_TORSO].position + TORSO_TO_PELVIS; @@ -226,40 +235,46 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, const Rota } _estimatedRotation = safeEulerAngles(_estimatedJoints[AVATAR_JOINT_HEAD_BASE].rotation); _estimatedPosition = _estimatedJoints[AVATAR_JOINT_HEAD_BASE].position; - + } else { // roll is just the angle of the face rect const float ROTATION_SMOOTHING = 0.95f; _estimatedRotation.z = glm::mix(_faceRect.angle, _estimatedRotation.z, ROTATION_SMOOTHING); - - // determine position based on translation and scaling of the face rect + + // determine position based on translation and scaling of the face rect/mean face depth if (_initialFaceRect.size.area() == 0) { _initialFaceRect = _faceRect; _estimatedPosition = glm::vec3(); - + _initialFaceDepth = midFaceDepth; + } else { - float proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area()); - const float DISTANCE_TO_CAMERA = 0.333f; + float proportion, z; + if (midFaceDepth == UNINITIALIZED_FACE_DEPTH) { + proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area()); + const float INITIAL_DISTANCE_TO_CAMERA = 0.333f; + z = INITIAL_DISTANCE_TO_CAMERA * proportion - INITIAL_DISTANCE_TO_CAMERA; + + } else { + z = (midFaceDepth - _initialFaceDepth) * METERS_PER_MM; + proportion = midFaceDepth / _initialFaceDepth; + } const float POSITION_SCALE = 0.5f; - float z = DISTANCE_TO_CAMERA * proportion - DISTANCE_TO_CAMERA; - glm::vec3 position = glm::vec3( + _estimatedPosition = glm::vec3( (_faceRect.center.x - _initialFaceRect.center.x) * proportion * POSITION_SCALE / _textureSize.width, (_faceRect.center.y - _initialFaceRect.center.y) * proportion * POSITION_SCALE / _textureSize.width, z); - const float POSITION_SMOOTHING = 0.95f; - _estimatedPosition = glm::mix(position, _estimatedPosition, POSITION_SMOOTHING); } } - + // note that we have data _active = true; - + // let the grabber know we're ready for the next frame QTimer::singleShot(qMax((int)remaining / 1000, 0), _grabber, SLOT(grabFrame())); } -FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), - _depthOffset(0.0), _codec(), _frameCount(0) { +FrameGrabber::FrameGrabber() : _initialized(false), _videoSendMode(FULL_FRAME_VIDEO), _capture(0), _searchWindow(0, 0, 0, 0), + _smoothedMidFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) { } FrameGrabber::~FrameGrabber() { @@ -274,21 +289,21 @@ static AvatarJointID xnToAvatarJoint(XnSkeletonJoint joint) { case XN_SKEL_HEAD: return AVATAR_JOINT_HEAD_TOP; case XN_SKEL_NECK: return AVATAR_JOINT_HEAD_BASE; case XN_SKEL_TORSO: return AVATAR_JOINT_CHEST; - + case XN_SKEL_LEFT_SHOULDER: return AVATAR_JOINT_RIGHT_ELBOW; case XN_SKEL_LEFT_ELBOW: return AVATAR_JOINT_RIGHT_WRIST; - + case XN_SKEL_RIGHT_SHOULDER: return AVATAR_JOINT_LEFT_ELBOW; case XN_SKEL_RIGHT_ELBOW: return AVATAR_JOINT_LEFT_WRIST; - + case XN_SKEL_LEFT_HIP: return AVATAR_JOINT_RIGHT_KNEE; case XN_SKEL_LEFT_KNEE: return AVATAR_JOINT_RIGHT_HEEL; case XN_SKEL_LEFT_FOOT: return AVATAR_JOINT_RIGHT_TOES; - + case XN_SKEL_RIGHT_HIP: return AVATAR_JOINT_LEFT_KNEE; case XN_SKEL_RIGHT_KNEE: return AVATAR_JOINT_LEFT_HEEL; case XN_SKEL_RIGHT_FOOT: return AVATAR_JOINT_LEFT_TOES; - + default: return AVATAR_JOINT_NULL; } } @@ -297,19 +312,19 @@ static int getParentJoint(XnSkeletonJoint joint) { switch (joint) { case XN_SKEL_HEAD: return XN_SKEL_NECK; case XN_SKEL_TORSO: return -1; - + case XN_SKEL_LEFT_ELBOW: return XN_SKEL_LEFT_SHOULDER; case XN_SKEL_LEFT_HAND: return XN_SKEL_LEFT_ELBOW; - + case XN_SKEL_RIGHT_ELBOW: return XN_SKEL_RIGHT_SHOULDER; case XN_SKEL_RIGHT_HAND: return XN_SKEL_RIGHT_ELBOW; - + case XN_SKEL_LEFT_KNEE: return XN_SKEL_LEFT_HIP; case XN_SKEL_LEFT_FOOT: return XN_SKEL_LEFT_KNEE; - + case XN_SKEL_RIGHT_KNEE: return XN_SKEL_RIGHT_HIP; case XN_SKEL_RIGHT_FOOT: return XN_SKEL_RIGHT_KNEE; - + default: return XN_SKEL_TORSO; } } @@ -344,7 +359,7 @@ static void XN_CALLBACK_TYPE calibrationCompleted(SkeletonCapability& capability if (status == XN_CALIBRATION_STATUS_OK) { qDebug("Calibration completed for user %d.\n", id); capability.StartTracking(id); - + } else { qDebug("Calibration failed to user %d.\n", id); capability.RequestCalibration(id, true); @@ -352,6 +367,13 @@ static void XN_CALLBACK_TYPE calibrationCompleted(SkeletonCapability& capability } #endif +void FrameGrabber::cycleVideoSendMode() { + _videoSendMode = (VideoSendMode)((_videoSendMode + 1) % VIDEO_SEND_MODE_COUNT); + _searchWindow = cv::Rect(0, 0, 0, 0); + + destroyCodecs(); +} + void FrameGrabber::reset() { _searchWindow = cv::Rect(0, 0, 0, 0); @@ -367,12 +389,9 @@ void FrameGrabber::shutdown() { cvReleaseCapture(&_capture); _capture = 0; } - if (_codec.name != 0) { - vpx_codec_destroy(&_codec); - _codec.name = 0; - } + destroyCodecs(); _initialized = false; - + thread()->quit(); } @@ -388,17 +407,17 @@ void FrameGrabber::grabFrame() { int format = GL_BGR; Mat color, depth; JointVector joints; - + #ifdef HAVE_OPENNI if (_depthGenerator.IsValid()) { _xnContext.WaitAnyUpdateAll(); color = Mat(_imageMetaData.YRes(), _imageMetaData.XRes(), CV_8UC3, (void*)_imageGenerator.GetImageMap()); format = GL_RGB; - + depth = Mat(_depthMetaData.YRes(), _depthMetaData.XRes(), CV_16UC1, (void*)_depthGenerator.GetDepthMap()); - + _userID = 0; - XnUInt16 userCount = 1; + XnUInt16 userCount = 1; _userGenerator.GetUsers(&_userID, userCount); if (userCount > 0 && _userGenerator.GetSkeletonCap().IsTracking(_userID)) { joints.resize(NUM_AVATAR_JOINTS); @@ -423,7 +442,6 @@ void FrameGrabber::grabFrame() { _userID, (XnSkeletonJoint)parentJoint, parentOrientation); rotation = glm::inverse(xnToGLM(parentOrientation.orientation)) * rotation; } - const float METERS_PER_MM = 1.0f / 1000.0f; joints[avatarJoint] = Joint(xnToGLM(transform.position.position, true) * METERS_PER_MM, rotation, xnToGLM(projected)); } @@ -446,182 +464,291 @@ void FrameGrabber::grabFrame() { } color = image; } - - // if we don't have a search window (yet), try using the face cascade - int channels = 0; - float ranges[] = { 0, 180 }; - const float* range = ranges; - if (_searchWindow.area() == 0) { - vector faces; - _faceCascade.detectMultiScale(color, faces, 1.1, 6); - if (!faces.empty()) { - _searchWindow = faces.front(); - updateHSVFrame(color, format); - - Mat faceHsv(_hsvFrame, _searchWindow); - Mat faceMask(_mask, _searchWindow); - int sizes = 30; - calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range); - double min, max; - minMaxLoc(_histogram, &min, &max); - _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); - } - } - RotatedRect faceRect; - if (_searchWindow.area() > 0) { - updateHSVFrame(color, format); - - calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range); - bitwise_and(_backProject, _mask, _backProject); - - faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1)); - Rect faceBounds = faceRect.boundingRect(); - Rect imageBounds(0, 0, color.cols, color.rows); - _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); - } - -#ifdef HAVE_OPENNI - if (_depthGenerator.IsValid()) { - // convert from 11 to 8 bits, centered about the mean face depth (if possible) - if (_searchWindow.area() > 0) { - const double DEPTH_OFFSET_SMOOTHING = 0.95; - const double EIGHT_BIT_MIDPOINT = 128.0; - double meanOffset = EIGHT_BIT_MIDPOINT - mean(depth(_searchWindow))[0]; - _depthOffset = (_depthOffset == 0.0) ? meanOffset : glm::mix(meanOffset, _depthOffset, DEPTH_OFFSET_SMOOTHING); - } - depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, _depthOffset); - } -#endif const int ENCODED_FACE_WIDTH = 128; const int ENCODED_FACE_HEIGHT = 128; - int combinedFaceHeight = ENCODED_FACE_HEIGHT * (depth.empty() ? 1 : 2); - if (_codec.name == 0) { - // initialize encoder context - vpx_codec_enc_cfg_t codecConfig; - vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); - codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * combinedFaceHeight * codecConfig.rc_target_bitrate / - codecConfig.g_w / codecConfig.g_h; - codecConfig.g_w = ENCODED_FACE_WIDTH; - codecConfig.g_h = combinedFaceHeight; - vpx_codec_enc_init(&_codec, vpx_codec_vp8_cx(), &codecConfig, 0); - } - - // correct for 180 degree rotations - if (faceRect.angle < -90.0f) { - faceRect.angle += 180.0f; - - } else if (faceRect.angle > 90.0f) { - faceRect.angle -= 180.0f; - } - - // compute the smoothed face rect - if (_smoothedFaceRect.size.area() == 0) { - _smoothedFaceRect = faceRect; - - } else { - const float FACE_RECT_SMOOTHING = 0.9f; - _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING); - _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING); - _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING); - _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); - _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); - } - - // resize/rotate face into encoding rectangle - _faceColor.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC3); - Point2f sourcePoints[4]; - _smoothedFaceRect.points(sourcePoints); - Point2f destPoints[] = { Point2f(0, ENCODED_FACE_HEIGHT), Point2f(0, 0), Point2f(ENCODED_FACE_WIDTH, 0) }; - Mat transform = getAffineTransform(sourcePoints, destPoints); - warpAffine(color, _faceColor, transform, _faceColor.size()); - - // convert from RGB to YV12 - const int ENCODED_BITS_PER_Y = 8; - const int ENCODED_BITS_PER_VU = 2; - const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; - const int BITS_PER_BYTE = 8; - _encodedFace.fill(128, ENCODED_FACE_WIDTH * combinedFaceHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); - vpx_image_t vpxImage; - vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, combinedFaceHeight, 1, (unsigned char*)_encodedFace.data()); - uchar* yline = vpxImage.planes[0]; - uchar* vline = vpxImage.planes[1]; - uchar* uline = vpxImage.planes[2]; - const int Y_RED_WEIGHT = (int)(0.299 * 256); - const int Y_GREEN_WEIGHT = (int)(0.587 * 256); - const int Y_BLUE_WEIGHT = (int)(0.114 * 256); - const int V_RED_WEIGHT = (int)(0.713 * 256); - const int U_BLUE_WEIGHT = (int)(0.564 * 256); - int redIndex = 0; - int greenIndex = 1; - int blueIndex = 2; - if (format == GL_BGR) { - redIndex = 2; - blueIndex = 0; - } - for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) { - uchar* ydest = yline; - uchar* vdest = vline; - uchar* udest = uline; - for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { - uchar* tl = _faceColor.ptr(i, j); - uchar* tr = _faceColor.ptr(i, j + 1); - uchar* bl = _faceColor.ptr(i + 1, j); - uchar* br = _faceColor.ptr(i + 1, j + 1); - - ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[ENCODED_FACE_WIDTH] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] * - Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[ENCODED_FACE_WIDTH + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] * - Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest += 2; - - int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex]; - int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex]; - int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex]; - int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8; - - *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128; - *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128; - } - yline += vpxImage.stride[0] * 2; - vline += vpxImage.stride[1]; - uline += vpxImage.stride[2]; - } - - // if we have depth data, warp that and just copy it in - if (!depth.empty()) { - _faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC1); - warpAffine(_grayDepthFrame, _faceDepth, transform, _faceDepth.size()); - - uchar* dest = (uchar*)_encodedFace.data() + vpxImage.stride[0] * ENCODED_FACE_HEIGHT; - for (int i = 0; i < ENCODED_FACE_HEIGHT; i++) { - memcpy(dest, _faceDepth.ptr(i), ENCODED_FACE_WIDTH); - dest += vpxImage.stride[0]; - } - } - - // encode the frame - vpx_codec_encode(&_codec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); + int encodedWidth; + int encodedHeight; + float colorBitrateMultiplier = 1.0f; + float depthBitrateMultiplier = 1.0f; + Mat faceTransform; + float aspectRatio; + if (_videoSendMode == FULL_FRAME_VIDEO) { + // no need to find the face if we're sending full frame video + _smoothedFaceRect = RotatedRect(Point2f(color.cols / 2.0f, color.rows / 2.0f), Size2f(color.cols, color.rows), 0.0f); + encodedWidth = color.cols; + encodedHeight = color.rows; + aspectRatio = FULL_FRAME_ASPECT; + colorBitrateMultiplier = 4.0f; - // extract the encoded frame - vpx_codec_iter_t iterator = 0; - const vpx_codec_cx_pkt_t* packet; - while ((packet = vpx_codec_get_cx_data(&_codec, &iterator)) != 0) { - if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { - // prepend the aspect ratio - QByteArray payload(sizeof(float), 0); - *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; - payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); - QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", Q_ARG(int, _frameCount), - Q_ARG(QByteArray, payload)); + } else { + // if we don't have a search window (yet), try using the face cascade + int channels = 0; + float ranges[] = { 0, 180 }; + const float* range = ranges; + if (_searchWindow.area() == 0) { + vector faces; + _faceCascade.detectMultiScale(color, faces, 1.1, 6); + if (!faces.empty()) { + _searchWindow = faces.front(); + updateHSVFrame(color, format); + + Mat faceHsv(_hsvFrame, _searchWindow); + Mat faceMask(_mask, _searchWindow); + int sizes = 30; + calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range); + double min, max; + minMaxLoc(_histogram, &min, &max); + _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); + } + } + RotatedRect faceRect; + if (_searchWindow.area() > 0) { + updateHSVFrame(color, format); + + calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range); + bitwise_and(_backProject, _mask, _backProject); + + faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1)); + Rect faceBounds = faceRect.boundingRect(); + Rect imageBounds(0, 0, color.cols, color.rows); + _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); + } + encodedWidth = ENCODED_FACE_WIDTH; + encodedHeight = ENCODED_FACE_HEIGHT; + depthBitrateMultiplier = 2.0f; + + // correct for 180 degree rotations + if (faceRect.angle < -90.0f) { + faceRect.angle += 180.0f; + + } else if (faceRect.angle > 90.0f) { + faceRect.angle -= 180.0f; + } + + // compute the smoothed face rect + if (_smoothedFaceRect.size.area() == 0) { + _smoothedFaceRect = faceRect; + + } else { + const float FACE_RECT_SMOOTHING = 0.9f; + _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING); + _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING); + _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING); + _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); + _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); + } + + // use the face rect to compute the face transform, aspect ratio + Point2f sourcePoints[4]; + _smoothedFaceRect.points(sourcePoints); + Point2f destPoints[] = { Point2f(0, encodedHeight), Point2f(0, 0), Point2f(encodedWidth, 0) }; + faceTransform = getAffineTransform(sourcePoints, destPoints); + aspectRatio = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; + } + + const ushort ELEVEN_BIT_MINIMUM = 0; + const uchar EIGHT_BIT_MIDPOINT = 128; + double depthOffset; + if (!depth.empty()) { + if (_videoSendMode == FACE_VIDEO) { + // warp the face depth without interpolation (because it will contain invalid zero values) + _faceDepth.create(encodedHeight, encodedWidth, CV_16UC1); + warpAffine(depth, _faceDepth, faceTransform, _faceDepth.size(), INTER_NEAREST); + + } else { + _faceDepth = depth; + } + _smoothedFaceDepth.create(encodedHeight, encodedWidth, CV_16UC1); + + // smooth the depth over time + const ushort ELEVEN_BIT_MAXIMUM = 2047; + const float DEPTH_SMOOTHING = 0.25f; + ushort* src = _faceDepth.ptr(); + ushort* dest = _smoothedFaceDepth.ptr(); + ushort minimumDepth = numeric_limits::max(); + for (int i = 0; i < encodedHeight; i++) { + for (int j = 0; j < encodedWidth; j++) { + ushort depth = *src++; + if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { + minimumDepth = min(minimumDepth, depth); + *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); + } + dest++; + } + } + const ushort MINIMUM_DEPTH_OFFSET = 64; + const float FIXED_MID_DEPTH = 640.0f; + float midFaceDepth = (_videoSendMode == FACE_VIDEO) ? (minimumDepth + MINIMUM_DEPTH_OFFSET) : FIXED_MID_DEPTH; + + // smooth the mid face depth over time + const float MID_FACE_DEPTH_SMOOTHING = 0.5f; + _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth : + glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING); + + // convert from 11 to 8 bits for preview/local display + depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth; + depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); + } + + QByteArray payload; + if (_videoSendMode != NO_VIDEO) { + if (_colorCodec.name == 0) { + // initialize encoder context(s) + vpx_codec_enc_cfg_t codecConfig; + vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); + codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * colorBitrateMultiplier * + codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h; + codecConfig.g_w = encodedWidth; + codecConfig.g_h = encodedHeight; + vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0); + + if (!depth.empty()) { + codecConfig.rc_target_bitrate *= depthBitrateMultiplier; + vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0); + } + } + + Mat transform; + if (_videoSendMode == FACE_VIDEO) { + // resize/rotate face into encoding rectangle + _faceColor.create(encodedHeight, encodedWidth, CV_8UC3); + warpAffine(color, _faceColor, faceTransform, _faceColor.size()); + + } else { + _faceColor = color; + } + + // convert from RGB to YV12: see http://www.fourcc.org/yuv.php and + // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor + const int ENCODED_BITS_PER_Y = 8; + const int ENCODED_BITS_PER_VU = 2; + const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; + const int BITS_PER_BYTE = 8; + _encodedFace.resize(encodedWidth * encodedHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); + vpx_image_t vpxImage; + vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, encodedWidth, encodedHeight, 1, + (unsigned char*)_encodedFace.data()); + uchar* yline = vpxImage.planes[0]; + uchar* vline = vpxImage.planes[1]; + uchar* uline = vpxImage.planes[2]; + const int Y_RED_WEIGHT = (int)(0.299 * 256); + const int Y_GREEN_WEIGHT = (int)(0.587 * 256); + const int Y_BLUE_WEIGHT = (int)(0.114 * 256); + const int V_RED_WEIGHT = (int)(0.713 * 256); + const int U_BLUE_WEIGHT = (int)(0.564 * 256); + int redIndex = 0; + int greenIndex = 1; + int blueIndex = 2; + if (format == GL_BGR) { + redIndex = 2; + blueIndex = 0; + } + for (int i = 0; i < encodedHeight; i += 2) { + uchar* ydest = yline; + uchar* vdest = vline; + uchar* udest = uline; + for (int j = 0; j < encodedWidth; j += 2) { + uchar* tl = _faceColor.ptr(i, j); + uchar* tr = _faceColor.ptr(i, j + 1); + uchar* bl = _faceColor.ptr(i + 1, j); + uchar* br = _faceColor.ptr(i + 1, j + 1); + + ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[vpxImage.stride[0]] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] * + Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[vpxImage.stride[0] + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] * + Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest += 2; + + int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex]; + int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex]; + int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex]; + int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8; + + *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128; + *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128; + } + yline += vpxImage.stride[0] * 2; + vline += vpxImage.stride[1]; + uline += vpxImage.stride[2]; + } + + // encode the frame + vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); + + // start the payload off with the aspect ratio (zero for full frame) + payload.append((const char*)&aspectRatio, sizeof(float)); + + // extract the encoded frame + vpx_codec_iter_t iterator = 0; + const vpx_codec_cx_pkt_t* packet; + while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) { + if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + // prepend the length, which will indicate whether there's a depth frame too + payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz)); + payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); + } + } + + if (!depth.empty()) { + // convert with mask + uchar* yline = vpxImage.planes[0]; + uchar* vline = vpxImage.planes[1]; + uchar* uline = vpxImage.planes[2]; + const uchar EIGHT_BIT_MAXIMUM = 255; + for (int i = 0; i < encodedHeight; i += 2) { + uchar* ydest = yline; + uchar* vdest = vline; + uchar* udest = uline; + for (int j = 0; j < encodedWidth; j += 2) { + ushort tl = *_smoothedFaceDepth.ptr(i, j); + ushort tr = *_smoothedFaceDepth.ptr(i, j + 1); + ushort bl = *_smoothedFaceDepth.ptr(i + 1, j); + ushort br = *_smoothedFaceDepth.ptr(i + 1, j + 1); + + uchar mask = EIGHT_BIT_MAXIMUM; + + ydest[0] = (tl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(tl + depthOffset); + ydest[1] = (tr == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(tr + depthOffset); + ydest[vpxImage.stride[0]] = (bl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(bl + depthOffset); + ydest[vpxImage.stride[0] + 1] = (br == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(br + depthOffset); + ydest += 2; + + *vdest++ = mask; + *udest++ = EIGHT_BIT_MIDPOINT; + } + yline += vpxImage.stride[0] * 2; + vline += vpxImage.stride[1]; + uline += vpxImage.stride[2]; + } + + // encode the frame + vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME); + + // extract the encoded frame + vpx_codec_iter_t iterator = 0; + const vpx_codec_cx_pkt_t* packet; + while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) { + if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); + } + } } } + QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", + Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); + QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", - Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), - Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); + Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMidFaceDepth), + Q_ARG(float, aspectRatio), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(bool, !payload.isEmpty()), + Q_ARG(JointVector, joints)); } bool FrameGrabber::init() { @@ -643,19 +770,19 @@ bool FrameGrabber::init() { _depthGenerator.GetMetaData(_depthMetaData); _imageGenerator.SetPixelFormat(XN_PIXEL_FORMAT_RGB24); _imageGenerator.GetMetaData(_imageMetaData); - + XnCallbackHandle userCallbacks, calibrationStartCallback, calibrationCompleteCallback; _userGenerator.RegisterUserCallbacks(newUser, lostUser, 0, userCallbacks); _userGenerator.GetSkeletonCap().RegisterToCalibrationStart(calibrationStarted, 0, calibrationStartCallback); _userGenerator.GetSkeletonCap().RegisterToCalibrationComplete(calibrationCompleted, 0, calibrationCompleteCallback); - + _userGenerator.GetSkeletonCap().SetSkeletonProfile(XN_SKEL_PROFILE_UPPER); - + // make the depth viewpoint match that of the video image if (_depthGenerator.IsCapabilitySupported(XN_CAPABILITY_ALTERNATIVE_VIEW_POINT)) { _depthGenerator.GetAlternativeViewPointCap().SetViewPoint(_imageGenerator); } - + _xnContext.StartGeneratingAll(); return true; } @@ -670,7 +797,7 @@ bool FrameGrabber::init() { const int IDEAL_FRAME_HEIGHT = 240; cvSetCaptureProperty(_capture, CV_CAP_PROP_FRAME_WIDTH, IDEAL_FRAME_WIDTH); cvSetCaptureProperty(_capture, CV_CAP_PROP_FRAME_HEIGHT, IDEAL_FRAME_HEIGHT); - + #ifdef __APPLE__ configureCamera(0x5ac, 0x8510, false, 0.975, 0.5, 1.0, 0.5, true, 0.5); #else @@ -690,6 +817,17 @@ void FrameGrabber::updateHSVFrame(const Mat& frame, int format) { inRange(_hsvFrame, Scalar(0, 55, 65), Scalar(180, 256, 256), _mask); } +void FrameGrabber::destroyCodecs() { + if (_colorCodec.name != 0) { + vpx_codec_destroy(&_colorCodec); + _colorCodec.name = 0; + } + if (_depthCodec.name != 0) { + vpx_codec_destroy(&_depthCodec); + _depthCodec.name = 0; + } +} + Joint::Joint(const glm::vec3& position, const glm::quat& rotation, const glm::vec3& projected) : isValid(true), position(position), rotation(rotation), projected(projected) { } diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index 35c85cc63a..aee87e1bc7 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -38,9 +38,9 @@ typedef QVector JointVector; class Webcam : public QObject { Q_OBJECT - + public: - + Webcam(); ~Webcam(); @@ -49,34 +49,34 @@ public: bool isActive() const { return _active; } bool isSending() const { return _sending; } - + GLuint getColorTextureID() const { return _colorTextureID; } GLuint getDepthTextureID() const { return _depthTextureID; } const cv::Size2f& getTextureSize() const { return _textureSize; } - + float getAspectRatio() const { return _aspectRatio; } - + const cv::RotatedRect& getFaceRect() const { return _faceRect; } - + const glm::vec3& getEstimatedPosition() const { return _estimatedPosition; } const glm::vec3& getEstimatedRotation() const { return _estimatedRotation; } const JointVector& getEstimatedJoints() const { return _estimatedJoints; } void reset(); - void renderPreview(int screenWidth, int screenHeight); + void renderPreview(int screenWidth, int screenHeight); public slots: - + void setEnabled(bool enabled); void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float midFaceDepth, float aspectRatio, const cv::RotatedRect& faceRect, bool sending, const JointVector& joints); void setSkeletonTrackingOn(bool toggle) { _skeletonTrackingOn = toggle; }; private: - + QThread _grabberThread; FrameGrabber* _grabber; - + bool _enabled; bool _active; bool _sending; @@ -88,12 +88,12 @@ private: cv::RotatedRect _initialFaceRect; float _initialFaceDepth; JointVector _joints; - + uint64_t _startTimestamp; int _frameCount; - + uint64_t _lastFrameTimestamp; - + glm::vec3 _estimatedPosition; glm::vec3 _estimatedRotation; JointVector _estimatedJoints; @@ -103,27 +103,27 @@ private: class FrameGrabber : public QObject { Q_OBJECT - + public: - + FrameGrabber(); virtual ~FrameGrabber(); public slots: - + void cycleVideoSendMode(); void reset(); void shutdown(); void grabFrame(); - + private: - + enum VideoSendMode { NO_VIDEO, FACE_VIDEO, FULL_FRAME_VIDEO, VIDEO_SEND_MODE_COUNT }; - + bool init(); void updateHSVFrame(const cv::Mat& frame, int format); void destroyCodecs(); - + bool _initialized; VideoSendMode _videoSendMode; CvCapture* _capture; @@ -135,7 +135,7 @@ private: cv::Rect _searchWindow; cv::Mat _grayDepthFrame; float _smoothedMidFaceDepth; - + vpx_codec_ctx_t _colorCodec; vpx_codec_ctx_t _depthCodec; int _frameCount; @@ -144,7 +144,7 @@ private: cv::Mat _smoothedFaceDepth; QByteArray _encodedFace; cv::RotatedRect _smoothedFaceRect; - + #ifdef HAVE_OPENNI xn::Context _xnContext; xn::DepthGenerator _depthGenerator; @@ -158,10 +158,10 @@ private: class Joint { public: - + Joint(const glm::vec3& position, const glm::quat& rotation, const glm::vec3& projected); Joint(); - + bool isValid; glm::vec3 position; glm::quat rotation;