From 729158f8824c8002921f23fd2e5584e45f66b968 Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Wed, 31 Jul 2013 12:19:14 -0700 Subject: [PATCH 1/8] Added depth smoothing, experimenting with using depth minimum rather than mean. --- interface/src/Webcam.cpp | 30 ++++++++++++++++++++---------- interface/src/Webcam.h | 1 + 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index d4fa015ba0..032b483082 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -620,25 +620,35 @@ void FrameGrabber::grabFrame() { _faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1); warpAffine(depth, _faceDepth, transform, _faceDepth.size(), INTER_NEAREST); - // find the mean of the valid values - qint64 depthTotal = 0; - qint64 depthSamples = 0; - ushort* src = _faceDepth.ptr(); + _smoothedFaceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1); + + // smooth and find the minimum/mean of the valid values const ushort ELEVEN_BIT_MINIMUM = 0; const ushort ELEVEN_BIT_MAXIMUM = 2047; + const float DEPTH_SMOOTHING = 0.25f; + qint64 depthTotal = 0; + qint64 depthSamples = 0; + ushort depthMinimum = ELEVEN_BIT_MAXIMUM; + ushort* src = _faceDepth.ptr(); + ushort* dest = _smoothedFaceDepth.ptr(); for (int i = 0; i < ENCODED_FACE_HEIGHT; i++) { for (int j = 0; j < ENCODED_FACE_WIDTH; j++) { ushort depth = *src++; if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { depthTotal += depth; + depthMinimum = min(depthMinimum, depth); depthSamples++; + + *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); } + dest++; } } - float mean = (depthSamples == 0) ? UNINITIALIZED_FACE_DEPTH : depthTotal / (float)depthSamples; + const ushort DEPTH_MINIMUM_OFFSET = 64; + float mean = (depthSamples == 0) ? UNINITIALIZED_FACE_DEPTH : depthMinimum + DEPTH_MINIMUM_OFFSET; // smooth the mean over time - const float DEPTH_OFFSET_SMOOTHING = 0.95f; + const float DEPTH_OFFSET_SMOOTHING = 0.5f; _smoothedMeanFaceDepth = (_smoothedMeanFaceDepth == UNINITIALIZED_FACE_DEPTH) ? mean : glm::mix(mean, _smoothedMeanFaceDepth, DEPTH_OFFSET_SMOOTHING); @@ -657,10 +667,10 @@ void FrameGrabber::grabFrame() { uchar* vdest = vline; uchar* udest = uline; for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { - ushort tl = *_faceDepth.ptr(i, j); - ushort tr = *_faceDepth.ptr(i, j + 1); - ushort bl = *_faceDepth.ptr(i + 1, j); - ushort br = *_faceDepth.ptr(i + 1, j + 1); + ushort tl = *_smoothedFaceDepth.ptr(i, j); + ushort tr = *_smoothedFaceDepth.ptr(i, j + 1); + ushort bl = *_smoothedFaceDepth.ptr(i + 1, j); + ushort br = *_smoothedFaceDepth.ptr(i + 1, j + 1); uchar mask = EIGHT_BIT_MAXIMUM; diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index 6c6d250897..077e9eed98 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -125,6 +125,7 @@ private: int _frameCount; cv::Mat _faceColor; cv::Mat _faceDepth; + cv::Mat _smoothedFaceDepth; QByteArray _encodedFace; cv::RotatedRect _smoothedFaceRect; From 415949cc26551be3b2d7127c2bb0c6b471c8bb4c Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Wed, 31 Jul 2013 14:40:33 -0700 Subject: [PATCH 2/8] Terminology change to reflect the fact that we're no longer using the mean. --- interface/src/Webcam.cpp | 41 ++++++++++++++++++---------------------- interface/src/Webcam.h | 4 ++-- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index 032b483082..3457064733 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -154,7 +154,7 @@ Webcam::~Webcam() { const float METERS_PER_MM = 1.0f / 1000.0f; -void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float meanFaceDepth, +void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midFaceDepth, const RotatedRect& faceRect, const JointVector& joints) { IplImage colorImage = color; glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3); @@ -242,18 +242,18 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean if (_initialFaceRect.size.area() == 0) { _initialFaceRect = _faceRect; _estimatedPosition = glm::vec3(); - _initialFaceDepth = meanFaceDepth; + _initialFaceDepth = midFaceDepth; } else { float proportion, z; - if (meanFaceDepth == UNINITIALIZED_FACE_DEPTH) { + if (midFaceDepth == UNINITIALIZED_FACE_DEPTH) { proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area()); const float INITIAL_DISTANCE_TO_CAMERA = 0.333f; z = INITIAL_DISTANCE_TO_CAMERA * proportion - INITIAL_DISTANCE_TO_CAMERA; } else { - z = (meanFaceDepth - _initialFaceDepth) * METERS_PER_MM; - proportion = meanFaceDepth / _initialFaceDepth; + z = (midFaceDepth - _initialFaceDepth) * METERS_PER_MM; + proportion = midFaceDepth / _initialFaceDepth; } const float POSITION_SCALE = 0.5f; _estimatedPosition = glm::vec3( @@ -271,7 +271,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean } FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), - _smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) { + _smoothedMidFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) { } FrameGrabber::~FrameGrabber() { @@ -494,7 +494,7 @@ void FrameGrabber::grabFrame() { Rect imageBounds(0, 0, color.cols, color.rows); _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); } - + const int ENCODED_FACE_WIDTH = 128; const int ENCODED_FACE_HEIGHT = 128; if (_colorCodec.name == 0) { @@ -622,39 +622,34 @@ void FrameGrabber::grabFrame() { _smoothedFaceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1); - // smooth and find the minimum/mean of the valid values + // smooth the depth over time const ushort ELEVEN_BIT_MINIMUM = 0; const ushort ELEVEN_BIT_MAXIMUM = 2047; const float DEPTH_SMOOTHING = 0.25f; - qint64 depthTotal = 0; - qint64 depthSamples = 0; - ushort depthMinimum = ELEVEN_BIT_MAXIMUM; ushort* src = _faceDepth.ptr(); ushort* dest = _smoothedFaceDepth.ptr(); + ushort minimumDepth = numeric_limits::max(); for (int i = 0; i < ENCODED_FACE_HEIGHT; i++) { for (int j = 0; j < ENCODED_FACE_WIDTH; j++) { ushort depth = *src++; if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { - depthTotal += depth; - depthMinimum = min(depthMinimum, depth); - depthSamples++; - + minimumDepth = min(minimumDepth, depth); *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); } dest++; } } - const ushort DEPTH_MINIMUM_OFFSET = 64; - float mean = (depthSamples == 0) ? UNINITIALIZED_FACE_DEPTH : depthMinimum + DEPTH_MINIMUM_OFFSET; + const ushort MINIMUM_DEPTH_OFFSET = 64; + float midFaceDepth = minimumDepth + MINIMUM_DEPTH_OFFSET; - // smooth the mean over time - const float DEPTH_OFFSET_SMOOTHING = 0.5f; - _smoothedMeanFaceDepth = (_smoothedMeanFaceDepth == UNINITIALIZED_FACE_DEPTH) ? mean : - glm::mix(mean, _smoothedMeanFaceDepth, DEPTH_OFFSET_SMOOTHING); + // smooth the mid face depth over time + const float MID_FACE_DEPTH_SMOOTHING = 0.5f; + _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth : + glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING); // convert from 11 to 8 bits for preview/local display const uchar EIGHT_BIT_MIDPOINT = 128; - double depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMeanFaceDepth; + double depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth; depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); // likewise for the encoded representation @@ -707,7 +702,7 @@ void FrameGrabber::grabFrame() { Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", - Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMeanFaceDepth), + Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMidFaceDepth), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); } diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index 077e9eed98..021ef3bc15 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -62,7 +62,7 @@ public: public slots: void setEnabled(bool enabled); - void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float meanFaceDepth, + void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float midFaceDepth, const cv::RotatedRect& faceRect, const JointVector& joints); private: @@ -118,7 +118,7 @@ private: cv::Mat _backProject; cv::Rect _searchWindow; cv::Mat _grayDepthFrame; - float _smoothedMeanFaceDepth; + float _smoothedMidFaceDepth; vpx_codec_ctx_t _colorCodec; vpx_codec_ctx_t _depthCodec; From d18a9dc49905eeffa899c370a548684e15bb3bba Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Wed, 31 Jul 2013 17:06:09 -0700 Subject: [PATCH 3/8] Working on full frame video support. --- interface/src/Webcam.cpp | 405 +++++++++++++++++++++------------------ interface/src/Webcam.h | 4 + 2 files changed, 224 insertions(+), 185 deletions(-) diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index 3457064733..b11f134fc5 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -270,7 +270,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF QTimer::singleShot(qMax((int)remaining / 1000, 0), _grabber, SLOT(grabFrame())); } -FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), +FrameGrabber::FrameGrabber() : _initialized(false), _videoSendMode(FULL_FRAME_VIDEO), _capture(0), _searchWindow(0, 0, 0, 0), _smoothedMidFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) { } @@ -364,6 +364,11 @@ static void XN_CALLBACK_TYPE calibrationCompleted(SkeletonCapability& capability } #endif +void FrameGrabber::cycleVideoSendMode() { + _videoSendMode = (VideoSendMode)((_videoSendMode + 1) % VIDEO_SEND_MODE_COUNT); + _searchWindow = cv::Rect(0, 0, 0, 0); +} + void FrameGrabber::reset() { _searchWindow = cv::Rect(0, 0, 0, 0); @@ -462,56 +467,54 @@ void FrameGrabber::grabFrame() { color = image; } - // if we don't have a search window (yet), try using the face cascade - int channels = 0; - float ranges[] = { 0, 180 }; - const float* range = ranges; - if (_searchWindow.area() == 0) { - vector faces; - _faceCascade.detectMultiScale(color, faces, 1.1, 6); - if (!faces.empty()) { - _searchWindow = faces.front(); - updateHSVFrame(color, format); - - Mat faceHsv(_hsvFrame, _searchWindow); - Mat faceMask(_mask, _searchWindow); - int sizes = 30; - calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range); - double min, max; - minMaxLoc(_histogram, &min, &max); - _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); - } - } RotatedRect faceRect; - if (_searchWindow.area() > 0) { - updateHSVFrame(color, format); + int encodedWidth; + int encodedHeight; + int depthBitrateMultiplier = 1; + if (_videoSendMode == FULL_FRAME_VIDEO) { + // no need to find the face if we're sending full frame video + faceRect.center = Point2f(color.cols / 2.0f, color.rows / 2.0f); + faceRect.size = Size2f(color.cols, color.rows); + encodedWidth = color.cols; + encodedHeight = color.rows; - calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range); - bitwise_and(_backProject, _mask, _backProject); - - faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1)); - Rect faceBounds = faceRect.boundingRect(); - Rect imageBounds(0, 0, color.cols, color.rows); - _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); - } - - const int ENCODED_FACE_WIDTH = 128; - const int ENCODED_FACE_HEIGHT = 128; - if (_colorCodec.name == 0) { - // initialize encoder context(s) - vpx_codec_enc_cfg_t codecConfig; - vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); - codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * - codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h; - codecConfig.g_w = ENCODED_FACE_WIDTH; - codecConfig.g_h = ENCODED_FACE_HEIGHT; - vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0); - - if (!depth.empty()) { - int DEPTH_BITRATE_MULTIPLIER = 2; - codecConfig.rc_target_bitrate *= 2; - vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0); + } else { + // if we don't have a search window (yet), try using the face cascade + int channels = 0; + float ranges[] = { 0, 180 }; + const float* range = ranges; + if (_searchWindow.area() == 0) { + vector faces; + _faceCascade.detectMultiScale(color, faces, 1.1, 6); + if (!faces.empty()) { + _searchWindow = faces.front(); + updateHSVFrame(color, format); + + Mat faceHsv(_hsvFrame, _searchWindow); + Mat faceMask(_mask, _searchWindow); + int sizes = 30; + calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range); + double min, max; + minMaxLoc(_histogram, &min, &max); + _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); + } } + if (_searchWindow.area() > 0) { + updateHSVFrame(color, format); + + calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range); + bitwise_and(_backProject, _mask, _backProject); + + faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1)); + Rect faceBounds = faceRect.boundingRect(); + Rect imageBounds(0, 0, color.cols, color.rows); + _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); + } + const int ENCODED_FACE_WIDTH = 128; + const int ENCODED_FACE_HEIGHT = 128; + encodedWidth = ENCODED_FACE_WIDTH; + encodedHeight = ENCODED_FACE_HEIGHT; + depthBitrateMultiplier = 2; } // correct for 180 degree rotations @@ -535,150 +538,86 @@ void FrameGrabber::grabFrame() { _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); } - // resize/rotate face into encoding rectangle - _faceColor.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC3); - Point2f sourcePoints[4]; - _smoothedFaceRect.points(sourcePoints); - Point2f destPoints[] = { Point2f(0, ENCODED_FACE_HEIGHT), Point2f(0, 0), Point2f(ENCODED_FACE_WIDTH, 0) }; - Mat transform = getAffineTransform(sourcePoints, destPoints); - warpAffine(color, _faceColor, transform, _faceColor.size()); - - // convert from RGB to YV12 - const int ENCODED_BITS_PER_Y = 8; - const int ENCODED_BITS_PER_VU = 2; - const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; - const int BITS_PER_BYTE = 8; - _encodedFace.resize(ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); - vpx_image_t vpxImage; - vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, 1, (unsigned char*)_encodedFace.data()); - uchar* yline = vpxImage.planes[0]; - uchar* vline = vpxImage.planes[1]; - uchar* uline = vpxImage.planes[2]; - const int Y_RED_WEIGHT = (int)(0.299 * 256); - const int Y_GREEN_WEIGHT = (int)(0.587 * 256); - const int Y_BLUE_WEIGHT = (int)(0.114 * 256); - const int V_RED_WEIGHT = (int)(0.713 * 256); - const int U_BLUE_WEIGHT = (int)(0.564 * 256); - int redIndex = 0; - int greenIndex = 1; - int blueIndex = 2; - if (format == GL_BGR) { - redIndex = 2; - blueIndex = 0; - } - for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) { - uchar* ydest = yline; - uchar* vdest = vline; - uchar* udest = uline; - for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { - uchar* tl = _faceColor.ptr(i, j); - uchar* tr = _faceColor.ptr(i, j + 1); - uchar* bl = _faceColor.ptr(i + 1, j); - uchar* br = _faceColor.ptr(i + 1, j + 1); + if (_videoSendMode != NO_VIDEO) { + if (_colorCodec.name == 0) { + // initialize encoder context(s) + vpx_codec_enc_cfg_t codecConfig; + vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); + codecConfig.rc_target_bitrate = encodedWidth * encodedHeight * + codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h; + codecConfig.g_w = encodedWidth; + codecConfig.g_h = encodedHeight; + vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0); - ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[vpxImage.stride[0]] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] * - Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[vpxImage.stride[0] + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] * - Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest += 2; - - int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex]; - int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex]; - int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex]; - int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8; - - *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128; - *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128; - } - yline += vpxImage.stride[0] * 2; - vline += vpxImage.stride[1]; - uline += vpxImage.stride[2]; - } - - // encode the frame - vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); - - // start the payload off with the aspect ratio - QByteArray payload(sizeof(float), 0); - *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; - - // extract the encoded frame - vpx_codec_iter_t iterator = 0; - const vpx_codec_cx_pkt_t* packet; - while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) { - if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { - // prepend the length, which will indicate whether there's a depth frame too - payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz)); - payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); - } - } - - if (!depth.empty()) { - // warp the face depth without interpolation (because it will contain invalid zero values) - _faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1); - warpAffine(depth, _faceDepth, transform, _faceDepth.size(), INTER_NEAREST); - - _smoothedFaceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1); - - // smooth the depth over time - const ushort ELEVEN_BIT_MINIMUM = 0; - const ushort ELEVEN_BIT_MAXIMUM = 2047; - const float DEPTH_SMOOTHING = 0.25f; - ushort* src = _faceDepth.ptr(); - ushort* dest = _smoothedFaceDepth.ptr(); - ushort minimumDepth = numeric_limits::max(); - for (int i = 0; i < ENCODED_FACE_HEIGHT; i++) { - for (int j = 0; j < ENCODED_FACE_WIDTH; j++) { - ushort depth = *src++; - if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { - minimumDepth = min(minimumDepth, depth); - *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); - } - dest++; + if (!depth.empty()) { + codecConfig.rc_target_bitrate *= depthBitrateMultiplier; + vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0); } } - const ushort MINIMUM_DEPTH_OFFSET = 64; - float midFaceDepth = minimumDepth + MINIMUM_DEPTH_OFFSET; + + Mat transform; + if (_videoSendMode == FACE_VIDEO) { + // resize/rotate face into encoding rectangle + _faceColor.create(encodedHeight, encodedWidth, CV_8UC3); + Point2f sourcePoints[4]; + _smoothedFaceRect.points(sourcePoints); + Point2f destPoints[] = { Point2f(0, encodedHeight), Point2f(0, 0), Point2f(encodedWidth, 0) }; + transform = getAffineTransform(sourcePoints, destPoints); + warpAffine(color, _faceColor, transform, _faceColor.size()); - // smooth the mid face depth over time - const float MID_FACE_DEPTH_SMOOTHING = 0.5f; - _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth : - glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING); - - // convert from 11 to 8 bits for preview/local display - const uchar EIGHT_BIT_MIDPOINT = 128; - double depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth; - depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); - - // likewise for the encoded representation + } else { + _faceColor = color; + } + + // convert from RGB to YV12 + const int ENCODED_BITS_PER_Y = 8; + const int ENCODED_BITS_PER_VU = 2; + const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; + const int BITS_PER_BYTE = 8; + _encodedFace.resize(encodedWidth * encodedHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); + vpx_image_t vpxImage; + vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, encodedWidth, encodedHeight, 1, + (unsigned char*)_encodedFace.data()); uchar* yline = vpxImage.planes[0]; uchar* vline = vpxImage.planes[1]; uchar* uline = vpxImage.planes[2]; - const uchar EIGHT_BIT_MAXIMUM = 255; - for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) { + const int Y_RED_WEIGHT = (int)(0.299 * 256); + const int Y_GREEN_WEIGHT = (int)(0.587 * 256); + const int Y_BLUE_WEIGHT = (int)(0.114 * 256); + const int V_RED_WEIGHT = (int)(0.713 * 256); + const int U_BLUE_WEIGHT = (int)(0.564 * 256); + int redIndex = 0; + int greenIndex = 1; + int blueIndex = 2; + if (format == GL_BGR) { + redIndex = 2; + blueIndex = 0; + } + for (int i = 0; i < encodedHeight; i += 2) { uchar* ydest = yline; uchar* vdest = vline; uchar* udest = uline; - for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { - ushort tl = *_smoothedFaceDepth.ptr(i, j); - ushort tr = *_smoothedFaceDepth.ptr(i, j + 1); - ushort bl = *_smoothedFaceDepth.ptr(i + 1, j); - ushort br = *_smoothedFaceDepth.ptr(i + 1, j + 1); - - uchar mask = EIGHT_BIT_MAXIMUM; + for (int j = 0; j < encodedWidth; j += 2) { + uchar* tl = _faceColor.ptr(i, j); + uchar* tr = _faceColor.ptr(i, j + 1); + uchar* bl = _faceColor.ptr(i + 1, j); + uchar* br = _faceColor.ptr(i + 1, j + 1); - ydest[0] = (tl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : saturate_cast(tl + depthOffset); - ydest[1] = (tr == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : saturate_cast(tr + depthOffset); - ydest[vpxImage.stride[0]] = (bl == ELEVEN_BIT_MINIMUM) ? - (mask = EIGHT_BIT_MIDPOINT) : saturate_cast(bl + depthOffset); - ydest[vpxImage.stride[0] + 1] = (br == ELEVEN_BIT_MINIMUM) ? - (mask = EIGHT_BIT_MIDPOINT) : saturate_cast(br + depthOffset); + ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[vpxImage.stride[0]] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] * + Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[vpxImage.stride[0] + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] * + Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8; ydest += 2; - - *vdest++ = mask; - *udest++ = EIGHT_BIT_MIDPOINT; + + int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex]; + int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex]; + int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex]; + int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8; + + *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128; + *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128; } yline += vpxImage.stride[0] * 2; vline += vpxImage.stride[1]; @@ -686,21 +625,117 @@ void FrameGrabber::grabFrame() { } // encode the frame - vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME); + vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); + + // start the payload off with the aspect ratio + QByteArray payload(sizeof(float), 0); + *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; // extract the encoded frame vpx_codec_iter_t iterator = 0; const vpx_codec_cx_pkt_t* packet; - while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) { + while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) { if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + // prepend the length, which will indicate whether there's a depth frame too + payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz)); payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); } } + + if (!depth.empty()) { + if (_videoSendMode == FACE_VIDEO) { + // warp the face depth without interpolation (because it will contain invalid zero values) + _faceDepth.create(encodedHeight, encodedWidth, CV_16UC1); + warpAffine(depth, _faceDepth, transform, _faceDepth.size(), INTER_NEAREST); + + } else { + _faceDepth = depth; + } + _smoothedFaceDepth.create(encodedHeight, encodedWidth, CV_16UC1); + + // smooth the depth over time + const ushort ELEVEN_BIT_MINIMUM = 0; + const ushort ELEVEN_BIT_MAXIMUM = 2047; + const float DEPTH_SMOOTHING = 0.25f; + ushort* src = _faceDepth.ptr(); + ushort* dest = _smoothedFaceDepth.ptr(); + ushort minimumDepth = numeric_limits::max(); + for (int i = 0; i < encodedHeight; i++) { + for (int j = 0; j < encodedWidth; j++) { + ushort depth = *src++; + if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { + minimumDepth = min(minimumDepth, depth); + *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); + } + dest++; + } + } + const ushort MINIMUM_DEPTH_OFFSET = 64; + const float FIXED_MID_DEPTH = 640.0f; + float midFaceDepth = (_videoSendMode == FACE_VIDEO) ? (minimumDepth + MINIMUM_DEPTH_OFFSET) : FIXED_MID_DEPTH; + + // smooth the mid face depth over time + const float MID_FACE_DEPTH_SMOOTHING = 0.5f; + _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth : + glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING); + + // convert from 11 to 8 bits for preview/local display + const uchar EIGHT_BIT_MIDPOINT = 128; + double depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth; + depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); + + // likewise for the encoded representation + uchar* yline = vpxImage.planes[0]; + uchar* vline = vpxImage.planes[1]; + uchar* uline = vpxImage.planes[2]; + const uchar EIGHT_BIT_MAXIMUM = 255; + for (int i = 0; i < encodedHeight; i += 2) { + uchar* ydest = yline; + uchar* vdest = vline; + uchar* udest = uline; + for (int j = 0; j < encodedWidth; j += 2) { + ushort tl = *_smoothedFaceDepth.ptr(i, j); + ushort tr = *_smoothedFaceDepth.ptr(i, j + 1); + ushort bl = *_smoothedFaceDepth.ptr(i + 1, j); + ushort br = *_smoothedFaceDepth.ptr(i + 1, j + 1); + + uchar mask = EIGHT_BIT_MAXIMUM; + + ydest[0] = (tl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(tl + depthOffset); + ydest[1] = (tr == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(tr + depthOffset); + ydest[vpxImage.stride[0]] = (bl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(bl + depthOffset); + ydest[vpxImage.stride[0] + 1] = (br == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(br + depthOffset); + ydest += 2; + + *vdest++ = mask; + *udest++ = EIGHT_BIT_MIDPOINT; + } + yline += vpxImage.stride[0] * 2; + vline += vpxImage.stride[1]; + uline += vpxImage.stride[2]; + } + + // encode the frame + vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME); + + // extract the encoded frame + vpx_codec_iter_t iterator = 0; + const vpx_codec_cx_pkt_t* packet; + while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) { + if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); + } + } + } + + QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", + Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); } - QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", - Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); - QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMidFaceDepth), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index 021ef3bc15..b5a26d9b13 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -100,16 +100,20 @@ public: public slots: + void cycleVideoSendMode(); void reset(); void shutdown(); void grabFrame(); private: + enum VideoSendMode { NO_VIDEO, FACE_VIDEO, FULL_FRAME_VIDEO, VIDEO_SEND_MODE_COUNT }; + bool init(); void updateHSVFrame(const cv::Mat& frame, int format); bool _initialized; + VideoSendMode _videoSendMode; CvCapture* _capture; cv::CascadeClassifier _faceCascade; cv::Mat _hsvFrame; From a86ceb275961c259e3ac8f1ff03b3da059d98f62 Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Wed, 31 Jul 2013 18:18:37 -0700 Subject: [PATCH 4/8] More work on full frame video. --- interface/src/Webcam.cpp | 8 +++++--- interface/src/avatar/Face.cpp | 27 +++++++++++++++++++-------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index b11f134fc5..3b1f1ec9ee 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -569,7 +569,8 @@ void FrameGrabber::grabFrame() { _faceColor = color; } - // convert from RGB to YV12 + // convert from RGB to YV12: see http://www.fourcc.org/yuv.php and + // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor const int ENCODED_BITS_PER_Y = 8; const int ENCODED_BITS_PER_VU = 2; const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; @@ -627,9 +628,10 @@ void FrameGrabber::grabFrame() { // encode the frame vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); - // start the payload off with the aspect ratio + // start the payload off with the aspect ratio (zero for no face) QByteArray payload(sizeof(float), 0); - *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; + *(float*)payload.data() = (_videoSendMode == FACE_VIDEO) ? + (_smoothedFaceRect.size.width / _smoothedFaceRect.size.height) : 0.0f; // extract the encoded frame vpx_codec_iter_t iterator = 0; diff --git a/interface/src/avatar/Face.cpp b/interface/src/avatar/Face.cpp index ff31241c54..72d1b5d5c2 100644 --- a/interface/src/avatar/Face.cpp +++ b/interface/src/avatar/Face.cpp @@ -97,7 +97,8 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { vpx_codec_iter_t iterator = 0; vpx_image_t* image; while ((image = vpx_codec_get_frame(&_colorCodec, &iterator)) != 0) { - // convert from YV12 to RGB + // convert from YV12 to RGB: see http://www.fourcc.org/yuv.php and + // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor Mat color(image->d_h, image->d_w, CV_8UC3); uchar* yline = image->planes[0]; uchar* vline = image->planes[1]; @@ -208,9 +209,19 @@ bool Face::render(float alpha) { glm::quat orientation = _owningHead->getOrientation(); glm::vec3 axis = glm::axis(orientation); glRotatef(glm::angle(orientation), axis.x, axis.y, axis.z); - float scale = BODY_BALL_RADIUS_HEAD_BASE * _owningHead->getScale(); + + float aspect, scale; + if (_aspectRatio == 0.0f) { + aspect = _textureSize.width / _textureSize.height; + const float FULL_FRAME_SCALE = 0.5f; + scale = FULL_FRAME_SCALE * _owningHead->getScale(); + + } else { + aspect = _aspectRatio; + scale = BODY_BALL_RADIUS_HEAD_BASE * _owningHead->getScale(); + } glScalef(scale, scale, scale); - + glColor4f(1.0f, 1.0f, 1.0f, alpha); Point2f points[4]; @@ -292,7 +303,7 @@ bool Face::render(float alpha) { (points[3].x - points[0].x) / _textureSize.width, (points[3].y - points[0].y) / _textureSize.height); _program->setUniformValue(_texCoordUpLocation, (points[1].x - points[0].x) / _textureSize.width, (points[1].y - points[0].y) / _textureSize.height); - _program->setUniformValue(_aspectRatioLocation, _aspectRatio); + _program->setUniformValue(_aspectRatioLocation, aspect); glEnableClientState(GL_VERTEX_ARRAY); glVertexPointer(2, GL_FLOAT, 0, 0); @@ -324,13 +335,13 @@ bool Face::render(float alpha) { glBegin(GL_QUADS); glTexCoord2f(points[0].x / _textureSize.width, points[0].y / _textureSize.height); - glVertex3f(0.5f, -0.5f / _aspectRatio, -0.5f); + glVertex3f(0.5f, -0.5f / aspect, -0.5f); glTexCoord2f(points[1].x / _textureSize.width, points[1].y / _textureSize.height); - glVertex3f(0.5f, 0.5f / _aspectRatio, -0.5f); + glVertex3f(0.5f, 0.5f / aspect, -0.5f); glTexCoord2f(points[2].x / _textureSize.width, points[2].y / _textureSize.height); - glVertex3f(-0.5f, 0.5f / _aspectRatio, -0.5f); + glVertex3f(-0.5f, 0.5f / aspect, -0.5f); glTexCoord2f(points[3].x / _textureSize.width, points[3].y / _textureSize.height); - glVertex3f(-0.5f, -0.5f / _aspectRatio, -0.5f); + glVertex3f(-0.5f, -0.5f / aspect, -0.5f); glEnd(); glDisable(GL_TEXTURE_2D); From ba9b5888d25543267ee050f6d310a4ad7f550ab5 Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Thu, 1 Aug 2013 13:00:01 -0700 Subject: [PATCH 5/8] More work on full frame sending. --- interface/resources/shaders/face.vert | 6 +---- interface/src/Webcam.cpp | 17 +++++++----- interface/src/Webcam.h | 5 +++- interface/src/avatar/Avatar.cpp | 19 +++++++------ interface/src/avatar/Face.cpp | 39 ++++++++++++++++----------- interface/src/avatar/Face.h | 11 ++++---- 6 files changed, 57 insertions(+), 40 deletions(-) diff --git a/interface/resources/shaders/face.vert b/interface/resources/shaders/face.vert index 358a8902fe..018a85f524 100644 --- a/interface/resources/shaders/face.vert +++ b/interface/resources/shaders/face.vert @@ -17,9 +17,6 @@ uniform vec2 texCoordRight; // the texture coordinate vector from bottom to the top uniform vec2 texCoordUp; -// the aspect ratio of the image -uniform float aspectRatio; - // the depth texture uniform sampler2D depthTexture; @@ -31,6 +28,5 @@ void main(void) { const float MIN_VISIBLE_DEPTH = 1.0 / 255.0; const float MAX_VISIBLE_DEPTH = 254.0 / 255.0; gl_FrontColor = vec4(1.0, 1.0, 1.0, step(MIN_VISIBLE_DEPTH, depth) * (1.0 - step(MAX_VISIBLE_DEPTH, depth))); - gl_Position = gl_ModelViewProjectionMatrix * vec4(0.5 - gl_Vertex.x, - (gl_Vertex.y - 0.5) / aspectRatio, depth * 2.0 - 2.0, 1.0); + gl_Position = gl_ModelViewProjectionMatrix * vec4(0.5 - gl_Vertex.x, gl_Vertex.y - 0.5, depth - 0.5, 1.0); } diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index 3b1f1ec9ee..8340e8ba06 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -19,6 +19,7 @@ #include "Application.h" #include "Webcam.h" +#include "avatar/Face.h" using namespace cv; using namespace std; @@ -155,7 +156,7 @@ Webcam::~Webcam() { const float METERS_PER_MM = 1.0f / 1000.0f; void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midFaceDepth, - const RotatedRect& faceRect, const JointVector& joints) { + float aspectRatio, const RotatedRect& faceRect, const JointVector& joints) { IplImage colorImage = color; glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3); if (_colorTextureID == 0) { @@ -192,7 +193,8 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glBindTexture(GL_TEXTURE_2D, 0); - // store our face rect and joints, update our frame count for fps computation + // store our various data, update our frame count for fps computation + _aspectRatio = aspectRatio; _faceRect = faceRect; _joints = joints; _frameCount++; @@ -538,6 +540,10 @@ void FrameGrabber::grabFrame() { _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); } + // the aspect ratio is derived from the face rect dimensions unless we're full-frame + float aspectRatio = (_videoSendMode == FULL_FRAME_VIDEO) ? FULL_FRAME_ASPECT : + (_smoothedFaceRect.size.width / _smoothedFaceRect.size.height); + if (_videoSendMode != NO_VIDEO) { if (_colorCodec.name == 0) { // initialize encoder context(s) @@ -628,10 +634,9 @@ void FrameGrabber::grabFrame() { // encode the frame vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); - // start the payload off with the aspect ratio (zero for no face) + // start the payload off with the aspect ratio (zero for full frame) QByteArray payload(sizeof(float), 0); - *(float*)payload.data() = (_videoSendMode == FACE_VIDEO) ? - (_smoothedFaceRect.size.width / _smoothedFaceRect.size.height) : 0.0f; + *(float*)payload.data() = aspectRatio; // extract the encoded frame vpx_codec_iter_t iterator = 0; @@ -740,7 +745,7 @@ void FrameGrabber::grabFrame() { QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMidFaceDepth), - Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); + Q_ARG(float, aspectRatio), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); } bool FrameGrabber::init() { diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index b5a26d9b13..cc5c313ece 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -50,6 +50,8 @@ public: GLuint getDepthTextureID() const { return _depthTextureID; } const cv::Size2f& getTextureSize() const { return _textureSize; } + float getAspectRatio() const { return _aspectRatio; } + const cv::RotatedRect& getFaceRect() const { return _faceRect; } const glm::vec3& getEstimatedPosition() const { return _estimatedPosition; } @@ -63,7 +65,7 @@ public slots: void setEnabled(bool enabled); void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float midFaceDepth, - const cv::RotatedRect& faceRect, const JointVector& joints); + float aspectRatio, const cv::RotatedRect& faceRect, const JointVector& joints); private: @@ -75,6 +77,7 @@ private: GLuint _colorTextureID; GLuint _depthTextureID; cv::Size2f _textureSize; + float _aspectRatio; cv::RotatedRect _faceRect; cv::RotatedRect _initialFaceRect; float _initialFaceDepth; diff --git a/interface/src/avatar/Avatar.cpp b/interface/src/avatar/Avatar.cpp index 0bf2c94b10..bddc5d1df1 100755 --- a/interface/src/avatar/Avatar.cpp +++ b/interface/src/avatar/Avatar.cpp @@ -314,10 +314,7 @@ void Avatar::updateFromGyrosAndOrWebcam(bool gyroLook, estimatedPosition = webcam->getEstimatedPosition(); // apply face data - _head.getFace().setColorTextureID(webcam->getColorTextureID()); - _head.getFace().setDepthTextureID(webcam->getDepthTextureID()); - _head.getFace().setTextureSize(webcam->getTextureSize()); - _head.getFace().setTextureRect(webcam->getFaceRect()); + _head.getFace().setFrameFromWebcam(); // compute and store the joint rotations const JointVector& joints = webcam->getEstimatedJoints(); @@ -334,7 +331,7 @@ void Avatar::updateFromGyrosAndOrWebcam(bool gyroLook, } } } else { - _head.getFace().setColorTextureID(0); + _head.getFace().clearFrame(); } _head.setPitch(estimatedRotation.x * amplifyAngle.x + pitchFromTouch); _head.setYaw(estimatedRotation.y * amplifyAngle.y + yawFromTouch); @@ -1300,9 +1297,15 @@ float Avatar::getBallRenderAlpha(int ball, bool lookingInMirror) const { } void Avatar::renderBody(bool lookingInMirror, bool renderAvatarBalls) { - - // Render the body as balls and cones - if (renderAvatarBalls || !_voxels.getVoxelURL().isValid()) { + + if (_head.getFace().isFullFrame()) { + // Render the full-frame video + float alpha = getBallRenderAlpha(BODY_BALL_HEAD_BASE, lookingInMirror); + if (alpha > 0.0f) { + _head.getFace().render(1.0f); + } + } else if (renderAvatarBalls || !_voxels.getVoxelURL().isValid()) { + // Render the body as balls and cones for (int b = 0; b < NUM_AVATAR_BODY_BALLS; b++) { float alpha = getBallRenderAlpha(b, lookingInMirror); diff --git a/interface/src/avatar/Face.cpp b/interface/src/avatar/Face.cpp index 72d1b5d5c2..d982d1a4a5 100644 --- a/interface/src/avatar/Face.cpp +++ b/interface/src/avatar/Face.cpp @@ -17,6 +17,7 @@ #include "Avatar.h" #include "Head.h" #include "Face.h" +#include "Webcam.h" #include "renderer/ProgramObject.h" using namespace cv; @@ -25,7 +26,6 @@ ProgramObject* Face::_program = 0; int Face::_texCoordCornerLocation; int Face::_texCoordRightLocation; int Face::_texCoordUpLocation; -int Face::_aspectRatioLocation; GLuint Face::_vboID; GLuint Face::_iboID; @@ -55,11 +55,19 @@ Face::~Face() { } } -void Face::setTextureRect(const cv::RotatedRect& textureRect) { - _textureRect = textureRect; - _aspectRatio = _textureRect.size.width / _textureRect.size.height; +void Face::setFrameFromWebcam() { + Webcam* webcam = Application::getInstance()->getWebcam(); + _colorTextureID = webcam->getColorTextureID(); + _depthTextureID = webcam->getDepthTextureID(); + _textureSize = webcam->getTextureSize(); + _textureRect = webcam->getFaceRect(); + _aspectRatio = webcam->getAspectRatio(); } +void Face::clearFrame() { + _colorTextureID = 0; +} + int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { if (_colorCodec.name == 0) { // initialize decoder context @@ -210,17 +218,20 @@ bool Face::render(float alpha) { glm::vec3 axis = glm::axis(orientation); glRotatef(glm::angle(orientation), axis.x, axis.y, axis.z); - float aspect, scale; - if (_aspectRatio == 0.0f) { + float aspect, xScale, zScale; + if (_aspectRatio == FULL_FRAME_ASPECT) { aspect = _textureSize.width / _textureSize.height; const float FULL_FRAME_SCALE = 0.5f; - scale = FULL_FRAME_SCALE * _owningHead->getScale(); + xScale = FULL_FRAME_SCALE * _owningHead->getScale(); + zScale = xScale * 0.3f; } else { aspect = _aspectRatio; - scale = BODY_BALL_RADIUS_HEAD_BASE * _owningHead->getScale(); + xScale = BODY_BALL_RADIUS_HEAD_BASE * _owningHead->getScale(); + zScale = xScale * 1.5f; + glTranslatef(0.0f, -xScale * 0.75f, -xScale); } - glScalef(scale, scale, scale); + glScalef(xScale, xScale / aspect, zScale); glColor4f(1.0f, 1.0f, 1.0f, alpha); @@ -254,7 +265,6 @@ bool Face::render(float alpha) { _texCoordCornerLocation = _program->uniformLocation("texCoordCorner"); _texCoordRightLocation = _program->uniformLocation("texCoordRight"); _texCoordUpLocation = _program->uniformLocation("texCoordUp"); - _aspectRatioLocation = _program->uniformLocation("aspectRatio"); glGenBuffers(1, &_vboID); glBindBuffer(GL_ARRAY_BUFFER, _vboID); @@ -303,7 +313,6 @@ bool Face::render(float alpha) { (points[3].x - points[0].x) / _textureSize.width, (points[3].y - points[0].y) / _textureSize.height); _program->setUniformValue(_texCoordUpLocation, (points[1].x - points[0].x) / _textureSize.width, (points[1].y - points[0].y) / _textureSize.height); - _program->setUniformValue(_aspectRatioLocation, aspect); glEnableClientState(GL_VERTEX_ARRAY); glVertexPointer(2, GL_FLOAT, 0, 0); @@ -335,13 +344,13 @@ bool Face::render(float alpha) { glBegin(GL_QUADS); glTexCoord2f(points[0].x / _textureSize.width, points[0].y / _textureSize.height); - glVertex3f(0.5f, -0.5f / aspect, -0.5f); + glVertex3f(0.5f, -0.5f, 0.0f); glTexCoord2f(points[1].x / _textureSize.width, points[1].y / _textureSize.height); - glVertex3f(0.5f, 0.5f / aspect, -0.5f); + glVertex3f(0.5f, 0.5f, 0.0f); glTexCoord2f(points[2].x / _textureSize.width, points[2].y / _textureSize.height); - glVertex3f(-0.5f, 0.5f / aspect, -0.5f); + glVertex3f(-0.5f, 0.5f, 0.0f); glTexCoord2f(points[3].x / _textureSize.width, points[3].y / _textureSize.height); - glVertex3f(-0.5f, -0.5f / aspect, -0.5f); + glVertex3f(-0.5f, -0.5f, 0.0f); glEnd(); glDisable(GL_TEXTURE_2D); diff --git a/interface/src/avatar/Face.h b/interface/src/avatar/Face.h index d4812fecfb..a40111dcd3 100644 --- a/interface/src/avatar/Face.h +++ b/interface/src/avatar/Face.h @@ -20,6 +20,8 @@ class Head; class ProgramObject; +const float FULL_FRAME_ASPECT = 0.0f; + class Face : public QObject { Q_OBJECT @@ -28,10 +30,10 @@ public: Face(Head* owningHead); ~Face(); - void setColorTextureID(GLuint colorTextureID) { _colorTextureID = colorTextureID; } - void setDepthTextureID(GLuint depthTextureID) { _depthTextureID = depthTextureID; } - void setTextureSize(const cv::Size2f& textureSize) { _textureSize = textureSize; } - void setTextureRect(const cv::RotatedRect& textureRect); + bool isFullFrame() const { return _colorTextureID != 0 && _aspectRatio == FULL_FRAME_ASPECT; } + + void setFrameFromWebcam(); + void clearFrame(); int processVideoMessage(unsigned char* packetData, size_t dataBytes); @@ -68,7 +70,6 @@ private: static int _texCoordCornerLocation; static int _texCoordRightLocation; static int _texCoordUpLocation; - static int _aspectRatioLocation; static GLuint _vboID; static GLuint _iboID; }; From 6a6c51f80ec919cc84e66a00559e1bc04f746ca5 Mon Sep 17 00:00:00 2001 From: Mark Peng Date: Thu, 1 Aug 2013 15:04:58 -0700 Subject: [PATCH 6/8] Add skeleton tracking toggle in options menu in interface. --- interface/src/Application.cpp | 1 + interface/src/Webcam.cpp | 4 ++-- interface/src/Webcam.h | 7 +++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index 7fc9bd7ac8..0de45b3dcf 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -1728,6 +1728,7 @@ void Application::initMenu() { _testPing->setChecked(true); (_fullScreenMode = optionsMenu->addAction("Fullscreen", this, SLOT(setFullscreen(bool)), Qt::Key_F))->setCheckable(true); optionsMenu->addAction("Webcam", &_webcam, SLOT(setEnabled(bool)))->setCheckable(true); + optionsMenu->addAction("Toggle Skeleton Tracking", &_webcam, SLOT(toggleSkeletonTracking(bool)))->setCheckable(true); optionsMenu->addAction("Go Home", this, SLOT(goHome())); QMenu* renderMenu = menuBar->addMenu("Render"); diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index d4fa015ba0..921f4f427b 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -32,7 +32,7 @@ int jointVectorMetaType = qRegisterMetaType("JointVector"); int matMetaType = qRegisterMetaType("cv::Mat"); int rotatedRectMetaType = qRegisterMetaType("cv::RotatedRect"); -Webcam::Webcam() : _enabled(false), _active(false), _colorTextureID(0), _depthTextureID(0) { +Webcam::Webcam() : _enabled(false), _active(false), _colorTextureID(0), _depthTextureID(0), _skeletonTrackingOn(false) { // the grabber simply runs as fast as possible _grabber = new FrameGrabber(); _grabber->moveToThread(&_grabberThread); @@ -194,7 +194,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean // store our face rect and joints, update our frame count for fps computation _faceRect = faceRect; - _joints = joints; + if (_skeletonTrackingOn) _joints = joints; _frameCount++; const int MAX_FPS = 60; diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index 3910bb4a19..7e27fba189 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -45,7 +45,7 @@ public: ~Webcam(); bool isActive() const { return _active; } - + GLuint getColorTextureID() const { return _colorTextureID; } GLuint getDepthTextureID() const { return _depthTextureID; } const cv::Size2f& getTextureSize() const { return _textureSize; } @@ -57,13 +57,14 @@ public: const JointVector& getEstimatedJoints() const { return _estimatedJoints; } void reset(); - void renderPreview(int screenWidth, int screenHeight); + void renderPreview(int screenWidth, int screenHeight); public slots: void setEnabled(bool enabled); void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float meanFaceDepth, const cv::RotatedRect& faceRect, const JointVector& joints); + void toggleSkeletonTracking(bool toggle) { _skeletonTrackingOn = toggle; }; private: @@ -88,6 +89,8 @@ private: glm::vec3 _estimatedPosition; glm::vec3 _estimatedRotation; JointVector _estimatedJoints; + + bool _skeletonTrackingOn; }; class FrameGrabber : public QObject { From 12f42358d58df85b63ab54ac5a22f850c093e870 Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Thu, 1 Aug 2013 15:39:53 -0700 Subject: [PATCH 7/8] Allow cycling between video send modes. --- interface/src/Application.cpp | 8 +- interface/src/Webcam.cpp | 192 ++++++++++++----------- interface/src/Webcam.h | 7 +- interface/src/avatar/Face.cpp | 285 ++++++++++++++++++++-------------- interface/src/avatar/Face.h | 3 + 5 files changed, 284 insertions(+), 211 deletions(-) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index f331ee9407..ad40edd0db 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -960,14 +960,15 @@ void Application::sendAvatarFaceVideoMessage(int frameCount, const QByteArray& d int headerSize = packetPosition - packet; - // break the data up into submessages of the maximum size + // break the data up into submessages of the maximum size (at least one, for zero-length packets) *offsetPosition = 0; - while (*offsetPosition < data.size()) { + do { int payloadSize = min(data.size() - (int)*offsetPosition, MAX_PACKET_SIZE - headerSize); memcpy(packetPosition, data.constData() + *offsetPosition, payloadSize); getInstance()->controlledBroadcastToNodes(packet, headerSize + payloadSize, &NODE_TYPE_AVATAR_MIXER, 1); *offsetPosition += payloadSize; - } + + } while (*offsetPosition < data.size()); } // Every second, check the frame rates and other stuff @@ -1723,6 +1724,7 @@ void Application::initMenu() { _testPing->setChecked(true); (_fullScreenMode = optionsMenu->addAction("Fullscreen", this, SLOT(setFullscreen(bool)), Qt::Key_F))->setCheckable(true); optionsMenu->addAction("Webcam", &_webcam, SLOT(setEnabled(bool)))->setCheckable(true); + optionsMenu->addAction("Cycle Webcam Send Mode", _webcam.getGrabber(), SLOT(cycleVideoSendMode())); optionsMenu->addAction("Go Home", this, SLOT(goHome())); QMenu* renderMenu = menuBar->addMenu("Render"); diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index 8340e8ba06..b30d72aec6 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -156,7 +156,7 @@ Webcam::~Webcam() { const float METERS_PER_MM = 1.0f / 1000.0f; void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midFaceDepth, - float aspectRatio, const RotatedRect& faceRect, const JointVector& joints) { + float aspectRatio, const RotatedRect& faceRect, bool sending, const JointVector& joints) { IplImage colorImage = color; glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3); if (_colorTextureID == 0) { @@ -196,6 +196,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midF // store our various data, update our frame count for fps computation _aspectRatio = aspectRatio; _faceRect = faceRect; + _sending = sending; _joints = joints; _frameCount++; @@ -369,6 +370,8 @@ static void XN_CALLBACK_TYPE calibrationCompleted(SkeletonCapability& capability void FrameGrabber::cycleVideoSendMode() { _videoSendMode = (VideoSendMode)((_videoSendMode + 1) % VIDEO_SEND_MODE_COUNT); _searchWindow = cv::Rect(0, 0, 0, 0); + + destroyCodecs(); } void FrameGrabber::reset() { @@ -386,14 +389,7 @@ void FrameGrabber::shutdown() { cvReleaseCapture(&_capture); _capture = 0; } - if (_colorCodec.name != 0) { - vpx_codec_destroy(&_colorCodec); - _colorCodec.name = 0; - } - if (_depthCodec.name != 0) { - vpx_codec_destroy(&_depthCodec); - _depthCodec.name = 0; - } + destroyCodecs(); _initialized = false; thread()->quit(); @@ -469,16 +465,17 @@ void FrameGrabber::grabFrame() { color = image; } - RotatedRect faceRect; int encodedWidth; int encodedHeight; int depthBitrateMultiplier = 1; + Mat faceTransform; + float aspectRatio; if (_videoSendMode == FULL_FRAME_VIDEO) { // no need to find the face if we're sending full frame video - faceRect.center = Point2f(color.cols / 2.0f, color.rows / 2.0f); - faceRect.size = Size2f(color.cols, color.rows); + _smoothedFaceRect = RotatedRect(Point2f(color.cols / 2.0f, color.rows / 2.0f), Size2f(color.cols, color.rows), 0.0f); encodedWidth = color.cols; encodedHeight = color.rows; + aspectRatio = FULL_FRAME_ASPECT; } else { // if we don't have a search window (yet), try using the face cascade @@ -501,6 +498,7 @@ void FrameGrabber::grabFrame() { _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); } } + RotatedRect faceRect; if (_searchWindow.area() > 0) { updateHSVFrame(color, format); @@ -517,33 +515,81 @@ void FrameGrabber::grabFrame() { encodedWidth = ENCODED_FACE_WIDTH; encodedHeight = ENCODED_FACE_HEIGHT; depthBitrateMultiplier = 2; + + // correct for 180 degree rotations + if (faceRect.angle < -90.0f) { + faceRect.angle += 180.0f; + + } else if (faceRect.angle > 90.0f) { + faceRect.angle -= 180.0f; + } + + // compute the smoothed face rect + if (_smoothedFaceRect.size.area() == 0) { + _smoothedFaceRect = faceRect; + + } else { + const float FACE_RECT_SMOOTHING = 0.9f; + _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING); + _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING); + _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING); + _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); + _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); + } + + // use the face rect to compute the face transform, aspect ratio + Point2f sourcePoints[4]; + _smoothedFaceRect.points(sourcePoints); + Point2f destPoints[] = { Point2f(0, encodedHeight), Point2f(0, 0), Point2f(encodedWidth, 0) }; + faceTransform = getAffineTransform(sourcePoints, destPoints); + aspectRatio = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; + } + + const ushort ELEVEN_BIT_MINIMUM = 0; + const uchar EIGHT_BIT_MIDPOINT = 128; + double depthOffset; + if (!depth.empty()) { + if (_videoSendMode == FACE_VIDEO) { + // warp the face depth without interpolation (because it will contain invalid zero values) + _faceDepth.create(encodedHeight, encodedWidth, CV_16UC1); + warpAffine(depth, _faceDepth, faceTransform, _faceDepth.size(), INTER_NEAREST); + + } else { + _faceDepth = depth; + } + _smoothedFaceDepth.create(encodedHeight, encodedWidth, CV_16UC1); + + // smooth the depth over time + const ushort ELEVEN_BIT_MAXIMUM = 2047; + const float DEPTH_SMOOTHING = 0.25f; + ushort* src = _faceDepth.ptr(); + ushort* dest = _smoothedFaceDepth.ptr(); + ushort minimumDepth = numeric_limits::max(); + for (int i = 0; i < encodedHeight; i++) { + for (int j = 0; j < encodedWidth; j++) { + ushort depth = *src++; + if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { + minimumDepth = min(minimumDepth, depth); + *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); + } + dest++; + } + } + const ushort MINIMUM_DEPTH_OFFSET = 64; + const float FIXED_MID_DEPTH = 640.0f; + float midFaceDepth = (_videoSendMode == FACE_VIDEO) ? (minimumDepth + MINIMUM_DEPTH_OFFSET) : FIXED_MID_DEPTH; + + // smooth the mid face depth over time + const float MID_FACE_DEPTH_SMOOTHING = 0.5f; + _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth : + glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING); + + // convert from 11 to 8 bits for preview/local display + depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth; + depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); } - // correct for 180 degree rotations - if (faceRect.angle < -90.0f) { - faceRect.angle += 180.0f; - - } else if (faceRect.angle > 90.0f) { - faceRect.angle -= 180.0f; - } - - // compute the smoothed face rect - if (_smoothedFaceRect.size.area() == 0) { - _smoothedFaceRect = faceRect; - - } else { - const float FACE_RECT_SMOOTHING = 0.9f; - _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING); - _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING); - _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING); - _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); - _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); - } - - // the aspect ratio is derived from the face rect dimensions unless we're full-frame - float aspectRatio = (_videoSendMode == FULL_FRAME_VIDEO) ? FULL_FRAME_ASPECT : - (_smoothedFaceRect.size.width / _smoothedFaceRect.size.height); - + QByteArray payload; if (_videoSendMode != NO_VIDEO) { if (_colorCodec.name == 0) { // initialize encoder context(s) @@ -565,11 +611,7 @@ void FrameGrabber::grabFrame() { if (_videoSendMode == FACE_VIDEO) { // resize/rotate face into encoding rectangle _faceColor.create(encodedHeight, encodedWidth, CV_8UC3); - Point2f sourcePoints[4]; - _smoothedFaceRect.points(sourcePoints); - Point2f destPoints[] = { Point2f(0, encodedHeight), Point2f(0, 0), Point2f(encodedWidth, 0) }; - transform = getAffineTransform(sourcePoints, destPoints); - warpAffine(color, _faceColor, transform, _faceColor.size()); + warpAffine(color, _faceColor, faceTransform, _faceColor.size()); } else { _faceColor = color; @@ -635,8 +677,7 @@ void FrameGrabber::grabFrame() { vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); // start the payload off with the aspect ratio (zero for full frame) - QByteArray payload(sizeof(float), 0); - *(float*)payload.data() = aspectRatio; + payload.append((const char*)&aspectRatio, sizeof(float)); // extract the encoded frame vpx_codec_iter_t iterator = 0; @@ -650,48 +691,7 @@ void FrameGrabber::grabFrame() { } if (!depth.empty()) { - if (_videoSendMode == FACE_VIDEO) { - // warp the face depth without interpolation (because it will contain invalid zero values) - _faceDepth.create(encodedHeight, encodedWidth, CV_16UC1); - warpAffine(depth, _faceDepth, transform, _faceDepth.size(), INTER_NEAREST); - - } else { - _faceDepth = depth; - } - _smoothedFaceDepth.create(encodedHeight, encodedWidth, CV_16UC1); - - // smooth the depth over time - const ushort ELEVEN_BIT_MINIMUM = 0; - const ushort ELEVEN_BIT_MAXIMUM = 2047; - const float DEPTH_SMOOTHING = 0.25f; - ushort* src = _faceDepth.ptr(); - ushort* dest = _smoothedFaceDepth.ptr(); - ushort minimumDepth = numeric_limits::max(); - for (int i = 0; i < encodedHeight; i++) { - for (int j = 0; j < encodedWidth; j++) { - ushort depth = *src++; - if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { - minimumDepth = min(minimumDepth, depth); - *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); - } - dest++; - } - } - const ushort MINIMUM_DEPTH_OFFSET = 64; - const float FIXED_MID_DEPTH = 640.0f; - float midFaceDepth = (_videoSendMode == FACE_VIDEO) ? (minimumDepth + MINIMUM_DEPTH_OFFSET) : FIXED_MID_DEPTH; - - // smooth the mid face depth over time - const float MID_FACE_DEPTH_SMOOTHING = 0.5f; - _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth : - glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING); - - // convert from 11 to 8 bits for preview/local display - const uchar EIGHT_BIT_MIDPOINT = 128; - double depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth; - depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); - - // likewise for the encoded representation + // convert with mask uchar* yline = vpxImage.planes[0]; uchar* vline = vpxImage.planes[1]; uchar* uline = vpxImage.planes[2]; @@ -738,14 +738,15 @@ void FrameGrabber::grabFrame() { } } } - - QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", - Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); } + QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", + Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); + QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMidFaceDepth), - Q_ARG(float, aspectRatio), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); + Q_ARG(float, aspectRatio), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(bool, !payload.isEmpty()), + Q_ARG(JointVector, joints)); } bool FrameGrabber::init() { @@ -814,6 +815,17 @@ void FrameGrabber::updateHSVFrame(const Mat& frame, int format) { inRange(_hsvFrame, Scalar(0, 55, 65), Scalar(180, 256, 256), _mask); } +void FrameGrabber::destroyCodecs() { + if (_colorCodec.name != 0) { + vpx_codec_destroy(&_colorCodec); + _colorCodec.name = 0; + } + if (_depthCodec.name != 0) { + vpx_codec_destroy(&_depthCodec); + _depthCodec.name = 0; + } +} + Joint::Joint(const glm::vec3& position, const glm::quat& rotation, const glm::vec3& projected) : isValid(true), position(position), rotation(rotation), projected(projected) { } diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index cc5c313ece..12ef2558db 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -44,7 +44,10 @@ public: Webcam(); ~Webcam(); + FrameGrabber* getGrabber() { return _grabber; } + bool isActive() const { return _active; } + bool isSending() const { return _sending; } GLuint getColorTextureID() const { return _colorTextureID; } GLuint getDepthTextureID() const { return _depthTextureID; } @@ -65,7 +68,7 @@ public slots: void setEnabled(bool enabled); void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float midFaceDepth, - float aspectRatio, const cv::RotatedRect& faceRect, const JointVector& joints); + float aspectRatio, const cv::RotatedRect& faceRect, bool sending, const JointVector& joints); private: @@ -74,6 +77,7 @@ private: bool _enabled; bool _active; + bool _sending; GLuint _colorTextureID; GLuint _depthTextureID; cv::Size2f _textureSize; @@ -114,6 +118,7 @@ private: bool init(); void updateHSVFrame(const cv::Mat& frame, int format); + void destroyCodecs(); bool _initialized; VideoSendMode _videoSendMode; diff --git a/interface/src/avatar/Face.cpp b/interface/src/avatar/Face.cpp index d982d1a4a5..0541b3c21c 100644 --- a/interface/src/avatar/Face.cpp +++ b/interface/src/avatar/Face.cpp @@ -57,11 +57,16 @@ Face::~Face() { void Face::setFrameFromWebcam() { Webcam* webcam = Application::getInstance()->getWebcam(); - _colorTextureID = webcam->getColorTextureID(); - _depthTextureID = webcam->getDepthTextureID(); - _textureSize = webcam->getTextureSize(); - _textureRect = webcam->getFaceRect(); - _aspectRatio = webcam->getAspectRatio(); + if (webcam->isSending()) { + _colorTextureID = webcam->getColorTextureID(); + _depthTextureID = webcam->getDepthTextureID(); + _textureSize = webcam->getTextureSize(); + _textureRect = webcam->getFaceRect(); + _aspectRatio = webcam->getAspectRatio(); + + } else { + clearFrame(); + } } void Face::clearFrame() { @@ -69,11 +74,6 @@ void Face::clearFrame() { } int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { - if (_colorCodec.name == 0) { - // initialize decoder context - vpx_codec_dec_init(&_colorCodec, vpx_codec_vp8_dx(), 0, 0); - } - // skip the header unsigned char* packetPosition = packetData; int frameCount = *(uint32_t*)packetPosition; @@ -97,111 +97,135 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { int payloadSize = dataBytes - (packetPosition - packetData); memcpy(_arrivingFrame.data() + frameOffset, packetPosition, payloadSize); - if ((_frameBytesRemaining -= payloadSize) <= 0) { - float aspectRatio = *(const float*)_arrivingFrame.constData(); - size_t colorSize = *(const size_t*)(_arrivingFrame.constData() + sizeof(float)); - const uint8_t* colorData = (const uint8_t*)(_arrivingFrame.constData() + sizeof(float) + sizeof(size_t)); - vpx_codec_decode(&_colorCodec, colorData, colorSize, 0, 0); - vpx_codec_iter_t iterator = 0; - vpx_image_t* image; - while ((image = vpx_codec_get_frame(&_colorCodec, &iterator)) != 0) { - // convert from YV12 to RGB: see http://www.fourcc.org/yuv.php and - // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor - Mat color(image->d_h, image->d_w, CV_8UC3); - uchar* yline = image->planes[0]; - uchar* vline = image->planes[1]; - uchar* uline = image->planes[2]; - const int RED_V_WEIGHT = (int)(1.403 * 256); - const int GREEN_V_WEIGHT = (int)(0.714 * 256); - const int GREEN_U_WEIGHT = (int)(0.344 * 256); - const int BLUE_U_WEIGHT = (int)(1.773 * 256); - for (int i = 0; i < image->d_h; i += 2) { - uchar* ysrc = yline; - uchar* vsrc = vline; - uchar* usrc = uline; - for (int j = 0; j < image->d_w; j += 2) { - uchar* tl = color.ptr(i, j); - uchar* tr = color.ptr(i, j + 1); - uchar* bl = color.ptr(i + 1, j); - uchar* br = color.ptr(i + 1, j + 1); - - int v = *vsrc++ - 128; - int u = *usrc++ - 128; - - int redOffset = (RED_V_WEIGHT * v) >> 8; - int greenOffset = (GREEN_V_WEIGHT * v + GREEN_U_WEIGHT * u) >> 8; - int blueOffset = (BLUE_U_WEIGHT * u) >> 8; - - int ytl = ysrc[0]; - int ytr = ysrc[1]; - int ybl = ysrc[image->w]; - int ybr = ysrc[image->w + 1]; - ysrc += 2; - - tl[0] = ytl + redOffset; - tl[1] = ytl - greenOffset; - tl[2] = ytl + blueOffset; - - tr[0] = ytr + redOffset; - tr[1] = ytr - greenOffset; - tr[2] = ytr + blueOffset; - - bl[0] = ybl + redOffset; - bl[1] = ybl - greenOffset; - bl[2] = ybl + blueOffset; - - br[0] = ybr + redOffset; - br[1] = ybr - greenOffset; - br[2] = ybr + blueOffset; - } - yline += image->stride[0] * 2; - vline += image->stride[1]; - uline += image->stride[2]; + if ((_frameBytesRemaining -= payloadSize) > 0) { + return dataBytes; // wait for the rest of the frame + } + + if (frameSize == 0) { + // destroy the codecs, if we have any + destroyCodecs(); + + // disables video data + QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, Mat()), + Q_ARG(cv::Mat, Mat()), Q_ARG(float, 0.0f)); + return dataBytes; + } + + // the switch from full frame to not (or vice versa) requires us to reinit the codecs + float aspectRatio = *(const float*)_arrivingFrame.constData(); + bool fullFrame = (aspectRatio == FULL_FRAME_ASPECT); + if (fullFrame != _lastFullFrame) { + destroyCodecs(); + _lastFullFrame = fullFrame; + } + + if (_colorCodec.name == 0) { + // initialize decoder context + vpx_codec_dec_init(&_colorCodec, vpx_codec_vp8_dx(), 0, 0); + } + + size_t colorSize = *(const size_t*)(_arrivingFrame.constData() + sizeof(float)); + const uint8_t* colorData = (const uint8_t*)(_arrivingFrame.constData() + sizeof(float) + sizeof(size_t)); + vpx_codec_decode(&_colorCodec, colorData, colorSize, 0, 0); + vpx_codec_iter_t iterator = 0; + vpx_image_t* image; + while ((image = vpx_codec_get_frame(&_colorCodec, &iterator)) != 0) { + // convert from YV12 to RGB: see http://www.fourcc.org/yuv.php and + // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor + Mat color(image->d_h, image->d_w, CV_8UC3); + uchar* yline = image->planes[0]; + uchar* vline = image->planes[1]; + uchar* uline = image->planes[2]; + const int RED_V_WEIGHT = (int)(1.403 * 256); + const int GREEN_V_WEIGHT = (int)(0.714 * 256); + const int GREEN_U_WEIGHT = (int)(0.344 * 256); + const int BLUE_U_WEIGHT = (int)(1.773 * 256); + for (int i = 0; i < image->d_h; i += 2) { + uchar* ysrc = yline; + uchar* vsrc = vline; + uchar* usrc = uline; + for (int j = 0; j < image->d_w; j += 2) { + uchar* tl = color.ptr(i, j); + uchar* tr = color.ptr(i, j + 1); + uchar* bl = color.ptr(i + 1, j); + uchar* br = color.ptr(i + 1, j + 1); + + int v = *vsrc++ - 128; + int u = *usrc++ - 128; + + int redOffset = (RED_V_WEIGHT * v) >> 8; + int greenOffset = (GREEN_V_WEIGHT * v + GREEN_U_WEIGHT * u) >> 8; + int blueOffset = (BLUE_U_WEIGHT * u) >> 8; + + int ytl = ysrc[0]; + int ytr = ysrc[1]; + int ybl = ysrc[image->w]; + int ybr = ysrc[image->w + 1]; + ysrc += 2; + + tl[0] = ytl + redOffset; + tl[1] = ytl - greenOffset; + tl[2] = ytl + blueOffset; + + tr[0] = ytr + redOffset; + tr[1] = ytr - greenOffset; + tr[2] = ytr + blueOffset; + + bl[0] = ybl + redOffset; + bl[1] = ybl - greenOffset; + bl[2] = ybl + blueOffset; + + br[0] = ybr + redOffset; + br[1] = ybr - greenOffset; + br[2] = ybr + blueOffset; } - Mat depth; - - const uint8_t* depthData = colorData + colorSize; - int depthSize = _arrivingFrame.size() - ((const char*)depthData - _arrivingFrame.constData()); - if (depthSize > 0) { - if (_depthCodec.name == 0) { - // initialize decoder context - vpx_codec_dec_init(&_depthCodec, vpx_codec_vp8_dx(), 0, 0); - } - vpx_codec_decode(&_depthCodec, depthData, depthSize, 0, 0); - vpx_codec_iter_t iterator = 0; - vpx_image_t* image; - while ((image = vpx_codec_get_frame(&_depthCodec, &iterator)) != 0) { - depth.create(image->d_h, image->d_w, CV_8UC1); - uchar* yline = image->planes[0]; - uchar* vline = image->planes[1]; - const uchar EIGHT_BIT_MAXIMUM = 255; - const uchar MASK_THRESHOLD = 192; - for (int i = 0; i < image->d_h; i += 2) { - uchar* ysrc = yline; - uchar* vsrc = vline; - for (int j = 0; j < image->d_w; j += 2) { - if (*vsrc++ < MASK_THRESHOLD) { - *depth.ptr(i, j) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i, j + 1) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i + 1, j) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i + 1, j + 1) = EIGHT_BIT_MAXIMUM; - - } else { - *depth.ptr(i, j) = ysrc[0]; - *depth.ptr(i, j + 1) = ysrc[1]; - *depth.ptr(i + 1, j) = ysrc[image->stride[0]]; - *depth.ptr(i + 1, j + 1) = ysrc[image->stride[0] + 1]; - } - ysrc += 2; - } - yline += image->stride[0] * 2; - vline += image->stride[1]; - } - } - } - QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, color), - Q_ARG(cv::Mat, depth), Q_ARG(float, aspectRatio)); + yline += image->stride[0] * 2; + vline += image->stride[1]; + uline += image->stride[2]; } + Mat depth; + + const uint8_t* depthData = colorData + colorSize; + int depthSize = _arrivingFrame.size() - ((const char*)depthData - _arrivingFrame.constData()); + if (depthSize > 0) { + if (_depthCodec.name == 0) { + // initialize decoder context + vpx_codec_dec_init(&_depthCodec, vpx_codec_vp8_dx(), 0, 0); + } + vpx_codec_decode(&_depthCodec, depthData, depthSize, 0, 0); + vpx_codec_iter_t iterator = 0; + vpx_image_t* image; + while ((image = vpx_codec_get_frame(&_depthCodec, &iterator)) != 0) { + depth.create(image->d_h, image->d_w, CV_8UC1); + uchar* yline = image->planes[0]; + uchar* vline = image->planes[1]; + const uchar EIGHT_BIT_MAXIMUM = 255; + const uchar MASK_THRESHOLD = 192; + for (int i = 0; i < image->d_h; i += 2) { + uchar* ysrc = yline; + uchar* vsrc = vline; + for (int j = 0; j < image->d_w; j += 2) { + if (*vsrc++ < MASK_THRESHOLD) { + *depth.ptr(i, j) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i, j + 1) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i + 1, j) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i + 1, j + 1) = EIGHT_BIT_MAXIMUM; + + } else { + *depth.ptr(i, j) = ysrc[0]; + *depth.ptr(i, j + 1) = ysrc[1]; + *depth.ptr(i + 1, j) = ysrc[image->stride[0]]; + *depth.ptr(i + 1, j + 1) = ysrc[image->stride[0] + 1]; + } + ysrc += 2; + } + yline += image->stride[0] * 2; + vline += image->stride[1]; + } + } + } + QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, color), + Q_ARG(cv::Mat, depth), Q_ARG(float, aspectRatio)); } return dataBytes; @@ -368,23 +392,40 @@ void Face::cycleRenderMode() { } void Face::setFrame(const cv::Mat& color, const cv::Mat& depth, float aspectRatio) { + if (color.empty()) { + // release our textures, if any; there's no more video + if (_colorTextureID != 0) { + glDeleteTextures(1, &_colorTextureID); + _colorTextureID = 0; + } + if (_depthTextureID != 0) { + glDeleteTextures(1, &_depthTextureID); + _depthTextureID = 0; + } + return; + } + if (_colorTextureID == 0) { glGenTextures(1, &_colorTextureID); - glBindTexture(GL_TEXTURE_2D, _colorTextureID); + } + glBindTexture(GL_TEXTURE_2D, _colorTextureID); + bool recreateTextures = (_textureSize.width != color.cols || _textureSize.height != color.rows); + if (recreateTextures) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, color.cols, color.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, color.ptr()); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); _textureSize = color.size(); _textureRect = RotatedRect(Point2f(color.cols * 0.5f, color.rows * 0.5f), _textureSize, 0.0f); - + } else { - glBindTexture(GL_TEXTURE_2D, _colorTextureID); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, color.cols, color.rows, GL_RGB, GL_UNSIGNED_BYTE, color.ptr()); } if (!depth.empty()) { if (_depthTextureID == 0) { glGenTextures(1, &_depthTextureID); - glBindTexture(GL_TEXTURE_2D, _depthTextureID); + } + glBindTexture(GL_TEXTURE_2D, _depthTextureID); + if (recreateTextures) { glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, depth.cols, depth.rows, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, depth.ptr()); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -400,3 +441,13 @@ void Face::setFrame(const cv::Mat& color, const cv::Mat& depth, float aspectRati _aspectRatio = aspectRatio; } +void Face::destroyCodecs() { + if (_colorCodec.name != 0) { + vpx_codec_destroy(&_colorCodec); + _colorCodec.name = 0; + } + if (_depthCodec.name != 0) { + vpx_codec_destroy(&_depthCodec); + _depthCodec.name = 0; + } +} diff --git a/interface/src/avatar/Face.h b/interface/src/avatar/Face.h index a40111dcd3..893318f186 100644 --- a/interface/src/avatar/Face.h +++ b/interface/src/avatar/Face.h @@ -51,6 +51,8 @@ private: enum RenderMode { MESH, POINTS, RENDER_MODE_COUNT }; + void destroyCodecs(); + Head* _owningHead; RenderMode _renderMode; GLuint _colorTextureID; @@ -61,6 +63,7 @@ private: vpx_codec_ctx_t _colorCodec; vpx_codec_ctx_t _depthCodec; + bool _lastFullFrame; QByteArray _arrivingFrame; int _frameCount; From 11664f94463a0bc4e73aa634a92a5af8930194a3 Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Fri, 2 Aug 2013 10:12:21 -0700 Subject: [PATCH 8/8] Have the particle system default to off. --- interface/src/Application.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index f37241a720..0f87e2f063 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -1816,7 +1816,6 @@ void Application::initMenu() { (_renderLookatIndicatorOn = renderMenu->addAction("Lookat Indicator"))->setCheckable(true); _renderLookatIndicatorOn->setChecked(true); (_renderParticleSystemOn = renderMenu->addAction("Particle System"))->setCheckable(true); - _renderParticleSystemOn->setChecked(true); (_manualFirstPerson = renderMenu->addAction( "First Person", this, SLOT(setRenderFirstPerson(bool)), Qt::Key_P))->setCheckable(true); (_manualThirdPerson = renderMenu->addAction(