diff --git a/interface/resources/shaders/face.vert b/interface/resources/shaders/face.vert index 358a8902fe..018a85f524 100644 --- a/interface/resources/shaders/face.vert +++ b/interface/resources/shaders/face.vert @@ -17,9 +17,6 @@ uniform vec2 texCoordRight; // the texture coordinate vector from bottom to the top uniform vec2 texCoordUp; -// the aspect ratio of the image -uniform float aspectRatio; - // the depth texture uniform sampler2D depthTexture; @@ -31,6 +28,5 @@ void main(void) { const float MIN_VISIBLE_DEPTH = 1.0 / 255.0; const float MAX_VISIBLE_DEPTH = 254.0 / 255.0; gl_FrontColor = vec4(1.0, 1.0, 1.0, step(MIN_VISIBLE_DEPTH, depth) * (1.0 - step(MAX_VISIBLE_DEPTH, depth))); - gl_Position = gl_ModelViewProjectionMatrix * vec4(0.5 - gl_Vertex.x, - (gl_Vertex.y - 0.5) / aspectRatio, depth * 2.0 - 2.0, 1.0); + gl_Position = gl_ModelViewProjectionMatrix * vec4(0.5 - gl_Vertex.x, gl_Vertex.y - 0.5, depth - 0.5, 1.0); } diff --git a/interface/src/Application.cpp b/interface/src/Application.cpp index 031d434b19..f37241a720 100644 --- a/interface/src/Application.cpp +++ b/interface/src/Application.cpp @@ -965,14 +965,15 @@ void Application::sendAvatarFaceVideoMessage(int frameCount, const QByteArray& d int headerSize = packetPosition - packet; - // break the data up into submessages of the maximum size + // break the data up into submessages of the maximum size (at least one, for zero-length packets) *offsetPosition = 0; - while (*offsetPosition < data.size()) { + do { int payloadSize = min(data.size() - (int)*offsetPosition, MAX_PACKET_SIZE - headerSize); memcpy(packetPosition, data.constData() + *offsetPosition, payloadSize); getInstance()->controlledBroadcastToNodes(packet, headerSize + payloadSize, &NODE_TYPE_AVATAR_MIXER, 1); *offsetPosition += payloadSize; - } + + } while (*offsetPosition < data.size()); } // Every second, check the frame rates and other stuff @@ -1787,6 +1788,7 @@ void Application::initMenu() { _testPing->setChecked(true); (_fullScreenMode = optionsMenu->addAction("Fullscreen", this, SLOT(setFullscreen(bool)), Qt::Key_F))->setCheckable(true); optionsMenu->addAction("Webcam", &_webcam, SLOT(setEnabled(bool)))->setCheckable(true); + optionsMenu->addAction("Cycle Webcam Send Mode", _webcam.getGrabber(), SLOT(cycleVideoSendMode())); optionsMenu->addAction("Go Home", this, SLOT(goHome())); QMenu* renderMenu = menuBar->addMenu("Render"); diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index d4fa015ba0..b30d72aec6 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -19,6 +19,7 @@ #include "Application.h" #include "Webcam.h" +#include "avatar/Face.h" using namespace cv; using namespace std; @@ -154,8 +155,8 @@ Webcam::~Webcam() { const float METERS_PER_MM = 1.0f / 1000.0f; -void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float meanFaceDepth, - const RotatedRect& faceRect, const JointVector& joints) { +void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float midFaceDepth, + float aspectRatio, const RotatedRect& faceRect, bool sending, const JointVector& joints) { IplImage colorImage = color; glPixelStorei(GL_UNPACK_ROW_LENGTH, colorImage.widthStep / 3); if (_colorTextureID == 0) { @@ -192,8 +193,10 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glBindTexture(GL_TEXTURE_2D, 0); - // store our face rect and joints, update our frame count for fps computation + // store our various data, update our frame count for fps computation + _aspectRatio = aspectRatio; _faceRect = faceRect; + _sending = sending; _joints = joints; _frameCount++; @@ -242,18 +245,18 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean if (_initialFaceRect.size.area() == 0) { _initialFaceRect = _faceRect; _estimatedPosition = glm::vec3(); - _initialFaceDepth = meanFaceDepth; + _initialFaceDepth = midFaceDepth; } else { float proportion, z; - if (meanFaceDepth == UNINITIALIZED_FACE_DEPTH) { + if (midFaceDepth == UNINITIALIZED_FACE_DEPTH) { proportion = sqrtf(_initialFaceRect.size.area() / (float)_faceRect.size.area()); const float INITIAL_DISTANCE_TO_CAMERA = 0.333f; z = INITIAL_DISTANCE_TO_CAMERA * proportion - INITIAL_DISTANCE_TO_CAMERA; } else { - z = (meanFaceDepth - _initialFaceDepth) * METERS_PER_MM; - proportion = meanFaceDepth / _initialFaceDepth; + z = (midFaceDepth - _initialFaceDepth) * METERS_PER_MM; + proportion = midFaceDepth / _initialFaceDepth; } const float POSITION_SCALE = 0.5f; _estimatedPosition = glm::vec3( @@ -270,8 +273,8 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean QTimer::singleShot(qMax((int)remaining / 1000, 0), _grabber, SLOT(grabFrame())); } -FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), - _smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) { +FrameGrabber::FrameGrabber() : _initialized(false), _videoSendMode(FULL_FRAME_VIDEO), _capture(0), _searchWindow(0, 0, 0, 0), + _smoothedMidFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) { } FrameGrabber::~FrameGrabber() { @@ -364,6 +367,13 @@ static void XN_CALLBACK_TYPE calibrationCompleted(SkeletonCapability& capability } #endif +void FrameGrabber::cycleVideoSendMode() { + _videoSendMode = (VideoSendMode)((_videoSendMode + 1) % VIDEO_SEND_MODE_COUNT); + _searchWindow = cv::Rect(0, 0, 0, 0); + + destroyCodecs(); +} + void FrameGrabber::reset() { _searchWindow = cv::Rect(0, 0, 0, 0); @@ -379,14 +389,7 @@ void FrameGrabber::shutdown() { cvReleaseCapture(&_capture); _capture = 0; } - if (_colorCodec.name != 0) { - vpx_codec_destroy(&_colorCodec); - _colorCodec.name = 0; - } - if (_depthCodec.name != 0) { - vpx_codec_destroy(&_depthCodec); - _depthCodec.name = 0; - } + destroyCodecs(); _initialized = false; thread()->quit(); @@ -462,218 +465,208 @@ void FrameGrabber::grabFrame() { color = image; } - // if we don't have a search window (yet), try using the face cascade - int channels = 0; - float ranges[] = { 0, 180 }; - const float* range = ranges; - if (_searchWindow.area() == 0) { - vector faces; - _faceCascade.detectMultiScale(color, faces, 1.1, 6); - if (!faces.empty()) { - _searchWindow = faces.front(); - updateHSVFrame(color, format); - - Mat faceHsv(_hsvFrame, _searchWindow); - Mat faceMask(_mask, _searchWindow); - int sizes = 30; - calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range); - double min, max; - minMaxLoc(_histogram, &min, &max); - _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); - } - } - RotatedRect faceRect; - if (_searchWindow.area() > 0) { - updateHSVFrame(color, format); - - calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range); - bitwise_and(_backProject, _mask, _backProject); - - faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1)); - Rect faceBounds = faceRect.boundingRect(); - Rect imageBounds(0, 0, color.cols, color.rows); - _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); - } - - const int ENCODED_FACE_WIDTH = 128; - const int ENCODED_FACE_HEIGHT = 128; - if (_colorCodec.name == 0) { - // initialize encoder context(s) - vpx_codec_enc_cfg_t codecConfig; - vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); - codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * - codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h; - codecConfig.g_w = ENCODED_FACE_WIDTH; - codecConfig.g_h = ENCODED_FACE_HEIGHT; - vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0); - - if (!depth.empty()) { - int DEPTH_BITRATE_MULTIPLIER = 2; - codecConfig.rc_target_bitrate *= 2; - vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0); - } - } - - // correct for 180 degree rotations - if (faceRect.angle < -90.0f) { - faceRect.angle += 180.0f; - - } else if (faceRect.angle > 90.0f) { - faceRect.angle -= 180.0f; - } - - // compute the smoothed face rect - if (_smoothedFaceRect.size.area() == 0) { - _smoothedFaceRect = faceRect; + int encodedWidth; + int encodedHeight; + int depthBitrateMultiplier = 1; + Mat faceTransform; + float aspectRatio; + if (_videoSendMode == FULL_FRAME_VIDEO) { + // no need to find the face if we're sending full frame video + _smoothedFaceRect = RotatedRect(Point2f(color.cols / 2.0f, color.rows / 2.0f), Size2f(color.cols, color.rows), 0.0f); + encodedWidth = color.cols; + encodedHeight = color.rows; + aspectRatio = FULL_FRAME_ASPECT; } else { - const float FACE_RECT_SMOOTHING = 0.9f; - _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING); - _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING); - _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING); - _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); - _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); - } - - // resize/rotate face into encoding rectangle - _faceColor.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_8UC3); - Point2f sourcePoints[4]; - _smoothedFaceRect.points(sourcePoints); - Point2f destPoints[] = { Point2f(0, ENCODED_FACE_HEIGHT), Point2f(0, 0), Point2f(ENCODED_FACE_WIDTH, 0) }; - Mat transform = getAffineTransform(sourcePoints, destPoints); - warpAffine(color, _faceColor, transform, _faceColor.size()); - - // convert from RGB to YV12 - const int ENCODED_BITS_PER_Y = 8; - const int ENCODED_BITS_PER_VU = 2; - const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; - const int BITS_PER_BYTE = 8; - _encodedFace.resize(ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); - vpx_image_t vpxImage; - vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, 1, (unsigned char*)_encodedFace.data()); - uchar* yline = vpxImage.planes[0]; - uchar* vline = vpxImage.planes[1]; - uchar* uline = vpxImage.planes[2]; - const int Y_RED_WEIGHT = (int)(0.299 * 256); - const int Y_GREEN_WEIGHT = (int)(0.587 * 256); - const int Y_BLUE_WEIGHT = (int)(0.114 * 256); - const int V_RED_WEIGHT = (int)(0.713 * 256); - const int U_BLUE_WEIGHT = (int)(0.564 * 256); - int redIndex = 0; - int greenIndex = 1; - int blueIndex = 2; - if (format == GL_BGR) { - redIndex = 2; - blueIndex = 0; - } - for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) { - uchar* ydest = yline; - uchar* vdest = vline; - uchar* udest = uline; - for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { - uchar* tl = _faceColor.ptr(i, j); - uchar* tr = _faceColor.ptr(i, j + 1); - uchar* bl = _faceColor.ptr(i + 1, j); - uchar* br = _faceColor.ptr(i + 1, j + 1); + // if we don't have a search window (yet), try using the face cascade + int channels = 0; + float ranges[] = { 0, 180 }; + const float* range = ranges; + if (_searchWindow.area() == 0) { + vector faces; + _faceCascade.detectMultiScale(color, faces, 1.1, 6); + if (!faces.empty()) { + _searchWindow = faces.front(); + updateHSVFrame(color, format); - ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[vpxImage.stride[0]] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] * - Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest[vpxImage.stride[0] + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] * - Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8; - ydest += 2; - - int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex]; - int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex]; - int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex]; - int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8; - - *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128; - *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128; - } - yline += vpxImage.stride[0] * 2; - vline += vpxImage.stride[1]; - uline += vpxImage.stride[2]; - } - - // encode the frame - vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); - - // start the payload off with the aspect ratio - QByteArray payload(sizeof(float), 0); - *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; - - // extract the encoded frame - vpx_codec_iter_t iterator = 0; - const vpx_codec_cx_pkt_t* packet; - while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) { - if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { - // prepend the length, which will indicate whether there's a depth frame too - payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz)); - payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); - } - } - - if (!depth.empty()) { - // warp the face depth without interpolation (because it will contain invalid zero values) - _faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1); - warpAffine(depth, _faceDepth, transform, _faceDepth.size(), INTER_NEAREST); - - // find the mean of the valid values - qint64 depthTotal = 0; - qint64 depthSamples = 0; - ushort* src = _faceDepth.ptr(); - const ushort ELEVEN_BIT_MINIMUM = 0; - const ushort ELEVEN_BIT_MAXIMUM = 2047; - for (int i = 0; i < ENCODED_FACE_HEIGHT; i++) { - for (int j = 0; j < ENCODED_FACE_WIDTH; j++) { - ushort depth = *src++; - if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { - depthTotal += depth; - depthSamples++; - } + Mat faceHsv(_hsvFrame, _searchWindow); + Mat faceMask(_mask, _searchWindow); + int sizes = 30; + calcHist(&faceHsv, 1, &channels, faceMask, _histogram, 1, &sizes, &range); + double min, max; + minMaxLoc(_histogram, &min, &max); + _histogram.convertTo(_histogram, -1, (max == 0.0) ? 0.0 : 255.0 / max); } } - float mean = (depthSamples == 0) ? UNINITIALIZED_FACE_DEPTH : depthTotal / (float)depthSamples; + RotatedRect faceRect; + if (_searchWindow.area() > 0) { + updateHSVFrame(color, format); + + calcBackProject(&_hsvFrame, 1, &channels, _histogram, _backProject, &range); + bitwise_and(_backProject, _mask, _backProject); + + faceRect = CamShift(_backProject, _searchWindow, TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1)); + Rect faceBounds = faceRect.boundingRect(); + Rect imageBounds(0, 0, color.cols, color.rows); + _searchWindow = Rect(clip(faceBounds.tl(), imageBounds), clip(faceBounds.br(), imageBounds)); + } + const int ENCODED_FACE_WIDTH = 128; + const int ENCODED_FACE_HEIGHT = 128; + encodedWidth = ENCODED_FACE_WIDTH; + encodedHeight = ENCODED_FACE_HEIGHT; + depthBitrateMultiplier = 2; - // smooth the mean over time - const float DEPTH_OFFSET_SMOOTHING = 0.95f; - _smoothedMeanFaceDepth = (_smoothedMeanFaceDepth == UNINITIALIZED_FACE_DEPTH) ? mean : - glm::mix(mean, _smoothedMeanFaceDepth, DEPTH_OFFSET_SMOOTHING); + // correct for 180 degree rotations + if (faceRect.angle < -90.0f) { + faceRect.angle += 180.0f; + + } else if (faceRect.angle > 90.0f) { + faceRect.angle -= 180.0f; + } + + // compute the smoothed face rect + if (_smoothedFaceRect.size.area() == 0) { + _smoothedFaceRect = faceRect; + + } else { + const float FACE_RECT_SMOOTHING = 0.9f; + _smoothedFaceRect.center.x = glm::mix(faceRect.center.x, _smoothedFaceRect.center.x, FACE_RECT_SMOOTHING); + _smoothedFaceRect.center.y = glm::mix(faceRect.center.y, _smoothedFaceRect.center.y, FACE_RECT_SMOOTHING); + _smoothedFaceRect.size.width = glm::mix(faceRect.size.width, _smoothedFaceRect.size.width, FACE_RECT_SMOOTHING); + _smoothedFaceRect.size.height = glm::mix(faceRect.size.height, _smoothedFaceRect.size.height, FACE_RECT_SMOOTHING); + _smoothedFaceRect.angle = glm::mix(faceRect.angle, _smoothedFaceRect.angle, FACE_RECT_SMOOTHING); + } + + // use the face rect to compute the face transform, aspect ratio + Point2f sourcePoints[4]; + _smoothedFaceRect.points(sourcePoints); + Point2f destPoints[] = { Point2f(0, encodedHeight), Point2f(0, 0), Point2f(encodedWidth, 0) }; + faceTransform = getAffineTransform(sourcePoints, destPoints); + aspectRatio = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; + } + + const ushort ELEVEN_BIT_MINIMUM = 0; + const uchar EIGHT_BIT_MIDPOINT = 128; + double depthOffset; + if (!depth.empty()) { + if (_videoSendMode == FACE_VIDEO) { + // warp the face depth without interpolation (because it will contain invalid zero values) + _faceDepth.create(encodedHeight, encodedWidth, CV_16UC1); + warpAffine(depth, _faceDepth, faceTransform, _faceDepth.size(), INTER_NEAREST); + + } else { + _faceDepth = depth; + } + _smoothedFaceDepth.create(encodedHeight, encodedWidth, CV_16UC1); + + // smooth the depth over time + const ushort ELEVEN_BIT_MAXIMUM = 2047; + const float DEPTH_SMOOTHING = 0.25f; + ushort* src = _faceDepth.ptr(); + ushort* dest = _smoothedFaceDepth.ptr(); + ushort minimumDepth = numeric_limits::max(); + for (int i = 0; i < encodedHeight; i++) { + for (int j = 0; j < encodedWidth; j++) { + ushort depth = *src++; + if (depth != ELEVEN_BIT_MINIMUM && depth != ELEVEN_BIT_MAXIMUM) { + minimumDepth = min(minimumDepth, depth); + *dest = (*dest == ELEVEN_BIT_MINIMUM) ? depth : (ushort)glm::mix(depth, *dest, DEPTH_SMOOTHING); + } + dest++; + } + } + const ushort MINIMUM_DEPTH_OFFSET = 64; + const float FIXED_MID_DEPTH = 640.0f; + float midFaceDepth = (_videoSendMode == FACE_VIDEO) ? (minimumDepth + MINIMUM_DEPTH_OFFSET) : FIXED_MID_DEPTH; + + // smooth the mid face depth over time + const float MID_FACE_DEPTH_SMOOTHING = 0.5f; + _smoothedMidFaceDepth = (_smoothedMidFaceDepth == UNINITIALIZED_FACE_DEPTH) ? midFaceDepth : + glm::mix(midFaceDepth, _smoothedMidFaceDepth, MID_FACE_DEPTH_SMOOTHING); // convert from 11 to 8 bits for preview/local display - const uchar EIGHT_BIT_MIDPOINT = 128; - double depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMeanFaceDepth; + depthOffset = EIGHT_BIT_MIDPOINT - _smoothedMidFaceDepth; depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); - - // likewise for the encoded representation + } + + QByteArray payload; + if (_videoSendMode != NO_VIDEO) { + if (_colorCodec.name == 0) { + // initialize encoder context(s) + vpx_codec_enc_cfg_t codecConfig; + vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); + codecConfig.rc_target_bitrate = encodedWidth * encodedHeight * + codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h; + codecConfig.g_w = encodedWidth; + codecConfig.g_h = encodedHeight; + vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0); + + if (!depth.empty()) { + codecConfig.rc_target_bitrate *= depthBitrateMultiplier; + vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0); + } + } + + Mat transform; + if (_videoSendMode == FACE_VIDEO) { + // resize/rotate face into encoding rectangle + _faceColor.create(encodedHeight, encodedWidth, CV_8UC3); + warpAffine(color, _faceColor, faceTransform, _faceColor.size()); + + } else { + _faceColor = color; + } + + // convert from RGB to YV12: see http://www.fourcc.org/yuv.php and + // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor + const int ENCODED_BITS_PER_Y = 8; + const int ENCODED_BITS_PER_VU = 2; + const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; + const int BITS_PER_BYTE = 8; + _encodedFace.resize(encodedWidth * encodedHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); + vpx_image_t vpxImage; + vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, encodedWidth, encodedHeight, 1, + (unsigned char*)_encodedFace.data()); uchar* yline = vpxImage.planes[0]; uchar* vline = vpxImage.planes[1]; uchar* uline = vpxImage.planes[2]; - const uchar EIGHT_BIT_MAXIMUM = 255; - for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) { + const int Y_RED_WEIGHT = (int)(0.299 * 256); + const int Y_GREEN_WEIGHT = (int)(0.587 * 256); + const int Y_BLUE_WEIGHT = (int)(0.114 * 256); + const int V_RED_WEIGHT = (int)(0.713 * 256); + const int U_BLUE_WEIGHT = (int)(0.564 * 256); + int redIndex = 0; + int greenIndex = 1; + int blueIndex = 2; + if (format == GL_BGR) { + redIndex = 2; + blueIndex = 0; + } + for (int i = 0; i < encodedHeight; i += 2) { uchar* ydest = yline; uchar* vdest = vline; uchar* udest = uline; - for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { - ushort tl = *_faceDepth.ptr(i, j); - ushort tr = *_faceDepth.ptr(i, j + 1); - ushort bl = *_faceDepth.ptr(i + 1, j); - ushort br = *_faceDepth.ptr(i + 1, j + 1); - - uchar mask = EIGHT_BIT_MAXIMUM; + for (int j = 0; j < encodedWidth; j += 2) { + uchar* tl = _faceColor.ptr(i, j); + uchar* tr = _faceColor.ptr(i, j + 1); + uchar* bl = _faceColor.ptr(i + 1, j); + uchar* br = _faceColor.ptr(i + 1, j + 1); - ydest[0] = (tl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : saturate_cast(tl + depthOffset); - ydest[1] = (tr == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : saturate_cast(tr + depthOffset); - ydest[vpxImage.stride[0]] = (bl == ELEVEN_BIT_MINIMUM) ? - (mask = EIGHT_BIT_MIDPOINT) : saturate_cast(bl + depthOffset); - ydest[vpxImage.stride[0] + 1] = (br == ELEVEN_BIT_MINIMUM) ? - (mask = EIGHT_BIT_MIDPOINT) : saturate_cast(br + depthOffset); + ydest[0] = (tl[redIndex] * Y_RED_WEIGHT + tl[1] * Y_GREEN_WEIGHT + tl[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[1] = (tr[redIndex] * Y_RED_WEIGHT + tr[1] * Y_GREEN_WEIGHT + tr[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[vpxImage.stride[0]] = (bl[redIndex] * Y_RED_WEIGHT + bl[greenIndex] * + Y_GREEN_WEIGHT + bl[blueIndex] * Y_BLUE_WEIGHT) >> 8; + ydest[vpxImage.stride[0] + 1] = (br[redIndex] * Y_RED_WEIGHT + br[greenIndex] * + Y_GREEN_WEIGHT + br[blueIndex] * Y_BLUE_WEIGHT) >> 8; ydest += 2; - - *vdest++ = mask; - *udest++ = EIGHT_BIT_MIDPOINT; + + int totalRed = tl[redIndex] + tr[redIndex] + bl[redIndex] + br[redIndex]; + int totalGreen = tl[greenIndex] + tr[greenIndex] + bl[greenIndex] + br[greenIndex]; + int totalBlue = tl[blueIndex] + tr[blueIndex] + bl[blueIndex] + br[blueIndex]; + int totalY = (totalRed * Y_RED_WEIGHT + totalGreen * Y_GREEN_WEIGHT + totalBlue * Y_BLUE_WEIGHT) >> 8; + + *vdest++ = (((totalRed - totalY) * V_RED_WEIGHT) >> 10) + 128; + *udest++ = (((totalBlue - totalY) * U_BLUE_WEIGHT) >> 10) + 128; } yline += vpxImage.stride[0] * 2; vline += vpxImage.stride[1]; @@ -681,24 +674,79 @@ void FrameGrabber::grabFrame() { } // encode the frame - vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME); + vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); + + // start the payload off with the aspect ratio (zero for full frame) + payload.append((const char*)&aspectRatio, sizeof(float)); // extract the encoded frame vpx_codec_iter_t iterator = 0; const vpx_codec_cx_pkt_t* packet; - while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) { + while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) { if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + // prepend the length, which will indicate whether there's a depth frame too + payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz)); payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); } } + + if (!depth.empty()) { + // convert with mask + uchar* yline = vpxImage.planes[0]; + uchar* vline = vpxImage.planes[1]; + uchar* uline = vpxImage.planes[2]; + const uchar EIGHT_BIT_MAXIMUM = 255; + for (int i = 0; i < encodedHeight; i += 2) { + uchar* ydest = yline; + uchar* vdest = vline; + uchar* udest = uline; + for (int j = 0; j < encodedWidth; j += 2) { + ushort tl = *_smoothedFaceDepth.ptr(i, j); + ushort tr = *_smoothedFaceDepth.ptr(i, j + 1); + ushort bl = *_smoothedFaceDepth.ptr(i + 1, j); + ushort br = *_smoothedFaceDepth.ptr(i + 1, j + 1); + + uchar mask = EIGHT_BIT_MAXIMUM; + + ydest[0] = (tl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(tl + depthOffset); + ydest[1] = (tr == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(tr + depthOffset); + ydest[vpxImage.stride[0]] = (bl == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(bl + depthOffset); + ydest[vpxImage.stride[0] + 1] = (br == ELEVEN_BIT_MINIMUM) ? (mask = EIGHT_BIT_MIDPOINT) : + saturate_cast(br + depthOffset); + ydest += 2; + + *vdest++ = mask; + *udest++ = EIGHT_BIT_MIDPOINT; + } + yline += vpxImage.stride[0] * 2; + vline += vpxImage.stride[1]; + uline += vpxImage.stride[2]; + } + + // encode the frame + vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME); + + // extract the encoded frame + vpx_codec_iter_t iterator = 0; + const vpx_codec_cx_pkt_t* packet; + while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) { + if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); + } + } + } } QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", - Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); - + Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); + QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", - Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMeanFaceDepth), - Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(JointVector, joints)); + Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMidFaceDepth), + Q_ARG(float, aspectRatio), Q_ARG(cv::RotatedRect, _smoothedFaceRect), Q_ARG(bool, !payload.isEmpty()), + Q_ARG(JointVector, joints)); } bool FrameGrabber::init() { @@ -767,6 +815,17 @@ void FrameGrabber::updateHSVFrame(const Mat& frame, int format) { inRange(_hsvFrame, Scalar(0, 55, 65), Scalar(180, 256, 256), _mask); } +void FrameGrabber::destroyCodecs() { + if (_colorCodec.name != 0) { + vpx_codec_destroy(&_colorCodec); + _colorCodec.name = 0; + } + if (_depthCodec.name != 0) { + vpx_codec_destroy(&_depthCodec); + _depthCodec.name = 0; + } +} + Joint::Joint(const glm::vec3& position, const glm::quat& rotation, const glm::vec3& projected) : isValid(true), position(position), rotation(rotation), projected(projected) { } diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index 3910bb4a19..1df6a846a8 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -44,12 +44,17 @@ public: Webcam(); ~Webcam(); + FrameGrabber* getGrabber() { return _grabber; } + bool isActive() const { return _active; } + bool isSending() const { return _sending; } GLuint getColorTextureID() const { return _colorTextureID; } GLuint getDepthTextureID() const { return _depthTextureID; } const cv::Size2f& getTextureSize() const { return _textureSize; } + float getAspectRatio() const { return _aspectRatio; } + const cv::RotatedRect& getFaceRect() const { return _faceRect; } const glm::vec3& getEstimatedPosition() const { return _estimatedPosition; } @@ -62,8 +67,8 @@ public: public slots: void setEnabled(bool enabled); - void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float meanFaceDepth, - const cv::RotatedRect& faceRect, const JointVector& joints); + void setFrame(const cv::Mat& color, int format, const cv::Mat& depth, float midFaceDepth, + float aspectRatio, const cv::RotatedRect& faceRect, bool sending, const JointVector& joints); private: @@ -72,9 +77,11 @@ private: bool _enabled; bool _active; + bool _sending; GLuint _colorTextureID; GLuint _depthTextureID; cv::Size2f _textureSize; + float _aspectRatio; cv::RotatedRect _faceRect; cv::RotatedRect _initialFaceRect; float _initialFaceDepth; @@ -100,16 +107,21 @@ public: public slots: + void cycleVideoSendMode(); void reset(); void shutdown(); void grabFrame(); private: + enum VideoSendMode { NO_VIDEO, FACE_VIDEO, FULL_FRAME_VIDEO, VIDEO_SEND_MODE_COUNT }; + bool init(); void updateHSVFrame(const cv::Mat& frame, int format); + void destroyCodecs(); bool _initialized; + VideoSendMode _videoSendMode; CvCapture* _capture; cv::CascadeClassifier _faceCascade; cv::Mat _hsvFrame; @@ -118,13 +130,14 @@ private: cv::Mat _backProject; cv::Rect _searchWindow; cv::Mat _grayDepthFrame; - float _smoothedMeanFaceDepth; + float _smoothedMidFaceDepth; vpx_codec_ctx_t _colorCodec; vpx_codec_ctx_t _depthCodec; int _frameCount; cv::Mat _faceColor; cv::Mat _faceDepth; + cv::Mat _smoothedFaceDepth; QByteArray _encodedFace; cv::RotatedRect _smoothedFaceRect; diff --git a/interface/src/avatar/Avatar.cpp b/interface/src/avatar/Avatar.cpp index 1af82083ae..6c43088876 100755 --- a/interface/src/avatar/Avatar.cpp +++ b/interface/src/avatar/Avatar.cpp @@ -314,10 +314,7 @@ void Avatar::updateFromGyrosAndOrWebcam(bool gyroLook, estimatedPosition = webcam->getEstimatedPosition(); // apply face data - _head.getFace().setColorTextureID(webcam->getColorTextureID()); - _head.getFace().setDepthTextureID(webcam->getDepthTextureID()); - _head.getFace().setTextureSize(webcam->getTextureSize()); - _head.getFace().setTextureRect(webcam->getFaceRect()); + _head.getFace().setFrameFromWebcam(); // compute and store the joint rotations const JointVector& joints = webcam->getEstimatedJoints(); @@ -334,7 +331,7 @@ void Avatar::updateFromGyrosAndOrWebcam(bool gyroLook, } } } else { - _head.getFace().setColorTextureID(0); + _head.getFace().clearFrame(); } _head.setPitch(estimatedRotation.x * amplifyAngle.x + pitchFromTouch); _head.setYaw(estimatedRotation.y * amplifyAngle.y + yawFromTouch); @@ -1300,9 +1297,15 @@ float Avatar::getBallRenderAlpha(int ball, bool lookingInMirror) const { } void Avatar::renderBody(bool lookingInMirror, bool renderAvatarBalls) { - - // Render the body as balls and cones - if (renderAvatarBalls || !_voxels.getVoxelURL().isValid()) { + + if (_head.getFace().isFullFrame()) { + // Render the full-frame video + float alpha = getBallRenderAlpha(BODY_BALL_HEAD_BASE, lookingInMirror); + if (alpha > 0.0f) { + _head.getFace().render(1.0f); + } + } else if (renderAvatarBalls || !_voxels.getVoxelURL().isValid()) { + // Render the body as balls and cones for (int b = 0; b < NUM_AVATAR_BODY_BALLS; b++) { float alpha = getBallRenderAlpha(b, lookingInMirror); diff --git a/interface/src/avatar/Face.cpp b/interface/src/avatar/Face.cpp index ff31241c54..0541b3c21c 100644 --- a/interface/src/avatar/Face.cpp +++ b/interface/src/avatar/Face.cpp @@ -17,6 +17,7 @@ #include "Avatar.h" #include "Head.h" #include "Face.h" +#include "Webcam.h" #include "renderer/ProgramObject.h" using namespace cv; @@ -25,7 +26,6 @@ ProgramObject* Face::_program = 0; int Face::_texCoordCornerLocation; int Face::_texCoordRightLocation; int Face::_texCoordUpLocation; -int Face::_aspectRatioLocation; GLuint Face::_vboID; GLuint Face::_iboID; @@ -55,17 +55,25 @@ Face::~Face() { } } -void Face::setTextureRect(const cv::RotatedRect& textureRect) { - _textureRect = textureRect; - _aspectRatio = _textureRect.size.width / _textureRect.size.height; +void Face::setFrameFromWebcam() { + Webcam* webcam = Application::getInstance()->getWebcam(); + if (webcam->isSending()) { + _colorTextureID = webcam->getColorTextureID(); + _depthTextureID = webcam->getDepthTextureID(); + _textureSize = webcam->getTextureSize(); + _textureRect = webcam->getFaceRect(); + _aspectRatio = webcam->getAspectRatio(); + + } else { + clearFrame(); + } } +void Face::clearFrame() { + _colorTextureID = 0; +} + int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { - if (_colorCodec.name == 0) { - // initialize decoder context - vpx_codec_dec_init(&_colorCodec, vpx_codec_vp8_dx(), 0, 0); - } - // skip the header unsigned char* packetPosition = packetData; int frameCount = *(uint32_t*)packetPosition; @@ -89,110 +97,135 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { int payloadSize = dataBytes - (packetPosition - packetData); memcpy(_arrivingFrame.data() + frameOffset, packetPosition, payloadSize); - if ((_frameBytesRemaining -= payloadSize) <= 0) { - float aspectRatio = *(const float*)_arrivingFrame.constData(); - size_t colorSize = *(const size_t*)(_arrivingFrame.constData() + sizeof(float)); - const uint8_t* colorData = (const uint8_t*)(_arrivingFrame.constData() + sizeof(float) + sizeof(size_t)); - vpx_codec_decode(&_colorCodec, colorData, colorSize, 0, 0); - vpx_codec_iter_t iterator = 0; - vpx_image_t* image; - while ((image = vpx_codec_get_frame(&_colorCodec, &iterator)) != 0) { - // convert from YV12 to RGB - Mat color(image->d_h, image->d_w, CV_8UC3); - uchar* yline = image->planes[0]; - uchar* vline = image->planes[1]; - uchar* uline = image->planes[2]; - const int RED_V_WEIGHT = (int)(1.403 * 256); - const int GREEN_V_WEIGHT = (int)(0.714 * 256); - const int GREEN_U_WEIGHT = (int)(0.344 * 256); - const int BLUE_U_WEIGHT = (int)(1.773 * 256); - for (int i = 0; i < image->d_h; i += 2) { - uchar* ysrc = yline; - uchar* vsrc = vline; - uchar* usrc = uline; - for (int j = 0; j < image->d_w; j += 2) { - uchar* tl = color.ptr(i, j); - uchar* tr = color.ptr(i, j + 1); - uchar* bl = color.ptr(i + 1, j); - uchar* br = color.ptr(i + 1, j + 1); - - int v = *vsrc++ - 128; - int u = *usrc++ - 128; - - int redOffset = (RED_V_WEIGHT * v) >> 8; - int greenOffset = (GREEN_V_WEIGHT * v + GREEN_U_WEIGHT * u) >> 8; - int blueOffset = (BLUE_U_WEIGHT * u) >> 8; - - int ytl = ysrc[0]; - int ytr = ysrc[1]; - int ybl = ysrc[image->w]; - int ybr = ysrc[image->w + 1]; - ysrc += 2; - - tl[0] = ytl + redOffset; - tl[1] = ytl - greenOffset; - tl[2] = ytl + blueOffset; - - tr[0] = ytr + redOffset; - tr[1] = ytr - greenOffset; - tr[2] = ytr + blueOffset; - - bl[0] = ybl + redOffset; - bl[1] = ybl - greenOffset; - bl[2] = ybl + blueOffset; - - br[0] = ybr + redOffset; - br[1] = ybr - greenOffset; - br[2] = ybr + blueOffset; - } - yline += image->stride[0] * 2; - vline += image->stride[1]; - uline += image->stride[2]; + if ((_frameBytesRemaining -= payloadSize) > 0) { + return dataBytes; // wait for the rest of the frame + } + + if (frameSize == 0) { + // destroy the codecs, if we have any + destroyCodecs(); + + // disables video data + QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, Mat()), + Q_ARG(cv::Mat, Mat()), Q_ARG(float, 0.0f)); + return dataBytes; + } + + // the switch from full frame to not (or vice versa) requires us to reinit the codecs + float aspectRatio = *(const float*)_arrivingFrame.constData(); + bool fullFrame = (aspectRatio == FULL_FRAME_ASPECT); + if (fullFrame != _lastFullFrame) { + destroyCodecs(); + _lastFullFrame = fullFrame; + } + + if (_colorCodec.name == 0) { + // initialize decoder context + vpx_codec_dec_init(&_colorCodec, vpx_codec_vp8_dx(), 0, 0); + } + + size_t colorSize = *(const size_t*)(_arrivingFrame.constData() + sizeof(float)); + const uint8_t* colorData = (const uint8_t*)(_arrivingFrame.constData() + sizeof(float) + sizeof(size_t)); + vpx_codec_decode(&_colorCodec, colorData, colorSize, 0, 0); + vpx_codec_iter_t iterator = 0; + vpx_image_t* image; + while ((image = vpx_codec_get_frame(&_colorCodec, &iterator)) != 0) { + // convert from YV12 to RGB: see http://www.fourcc.org/yuv.php and + // http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor + Mat color(image->d_h, image->d_w, CV_8UC3); + uchar* yline = image->planes[0]; + uchar* vline = image->planes[1]; + uchar* uline = image->planes[2]; + const int RED_V_WEIGHT = (int)(1.403 * 256); + const int GREEN_V_WEIGHT = (int)(0.714 * 256); + const int GREEN_U_WEIGHT = (int)(0.344 * 256); + const int BLUE_U_WEIGHT = (int)(1.773 * 256); + for (int i = 0; i < image->d_h; i += 2) { + uchar* ysrc = yline; + uchar* vsrc = vline; + uchar* usrc = uline; + for (int j = 0; j < image->d_w; j += 2) { + uchar* tl = color.ptr(i, j); + uchar* tr = color.ptr(i, j + 1); + uchar* bl = color.ptr(i + 1, j); + uchar* br = color.ptr(i + 1, j + 1); + + int v = *vsrc++ - 128; + int u = *usrc++ - 128; + + int redOffset = (RED_V_WEIGHT * v) >> 8; + int greenOffset = (GREEN_V_WEIGHT * v + GREEN_U_WEIGHT * u) >> 8; + int blueOffset = (BLUE_U_WEIGHT * u) >> 8; + + int ytl = ysrc[0]; + int ytr = ysrc[1]; + int ybl = ysrc[image->w]; + int ybr = ysrc[image->w + 1]; + ysrc += 2; + + tl[0] = ytl + redOffset; + tl[1] = ytl - greenOffset; + tl[2] = ytl + blueOffset; + + tr[0] = ytr + redOffset; + tr[1] = ytr - greenOffset; + tr[2] = ytr + blueOffset; + + bl[0] = ybl + redOffset; + bl[1] = ybl - greenOffset; + bl[2] = ybl + blueOffset; + + br[0] = ybr + redOffset; + br[1] = ybr - greenOffset; + br[2] = ybr + blueOffset; } - Mat depth; - - const uint8_t* depthData = colorData + colorSize; - int depthSize = _arrivingFrame.size() - ((const char*)depthData - _arrivingFrame.constData()); - if (depthSize > 0) { - if (_depthCodec.name == 0) { - // initialize decoder context - vpx_codec_dec_init(&_depthCodec, vpx_codec_vp8_dx(), 0, 0); - } - vpx_codec_decode(&_depthCodec, depthData, depthSize, 0, 0); - vpx_codec_iter_t iterator = 0; - vpx_image_t* image; - while ((image = vpx_codec_get_frame(&_depthCodec, &iterator)) != 0) { - depth.create(image->d_h, image->d_w, CV_8UC1); - uchar* yline = image->planes[0]; - uchar* vline = image->planes[1]; - const uchar EIGHT_BIT_MAXIMUM = 255; - const uchar MASK_THRESHOLD = 192; - for (int i = 0; i < image->d_h; i += 2) { - uchar* ysrc = yline; - uchar* vsrc = vline; - for (int j = 0; j < image->d_w; j += 2) { - if (*vsrc++ < MASK_THRESHOLD) { - *depth.ptr(i, j) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i, j + 1) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i + 1, j) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i + 1, j + 1) = EIGHT_BIT_MAXIMUM; - - } else { - *depth.ptr(i, j) = ysrc[0]; - *depth.ptr(i, j + 1) = ysrc[1]; - *depth.ptr(i + 1, j) = ysrc[image->stride[0]]; - *depth.ptr(i + 1, j + 1) = ysrc[image->stride[0] + 1]; - } - ysrc += 2; - } - yline += image->stride[0] * 2; - vline += image->stride[1]; - } - } - } - QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, color), - Q_ARG(cv::Mat, depth), Q_ARG(float, aspectRatio)); + yline += image->stride[0] * 2; + vline += image->stride[1]; + uline += image->stride[2]; } + Mat depth; + + const uint8_t* depthData = colorData + colorSize; + int depthSize = _arrivingFrame.size() - ((const char*)depthData - _arrivingFrame.constData()); + if (depthSize > 0) { + if (_depthCodec.name == 0) { + // initialize decoder context + vpx_codec_dec_init(&_depthCodec, vpx_codec_vp8_dx(), 0, 0); + } + vpx_codec_decode(&_depthCodec, depthData, depthSize, 0, 0); + vpx_codec_iter_t iterator = 0; + vpx_image_t* image; + while ((image = vpx_codec_get_frame(&_depthCodec, &iterator)) != 0) { + depth.create(image->d_h, image->d_w, CV_8UC1); + uchar* yline = image->planes[0]; + uchar* vline = image->planes[1]; + const uchar EIGHT_BIT_MAXIMUM = 255; + const uchar MASK_THRESHOLD = 192; + for (int i = 0; i < image->d_h; i += 2) { + uchar* ysrc = yline; + uchar* vsrc = vline; + for (int j = 0; j < image->d_w; j += 2) { + if (*vsrc++ < MASK_THRESHOLD) { + *depth.ptr(i, j) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i, j + 1) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i + 1, j) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i + 1, j + 1) = EIGHT_BIT_MAXIMUM; + + } else { + *depth.ptr(i, j) = ysrc[0]; + *depth.ptr(i, j + 1) = ysrc[1]; + *depth.ptr(i + 1, j) = ysrc[image->stride[0]]; + *depth.ptr(i + 1, j + 1) = ysrc[image->stride[0] + 1]; + } + ysrc += 2; + } + yline += image->stride[0] * 2; + vline += image->stride[1]; + } + } + } + QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, color), + Q_ARG(cv::Mat, depth), Q_ARG(float, aspectRatio)); } return dataBytes; @@ -208,9 +241,22 @@ bool Face::render(float alpha) { glm::quat orientation = _owningHead->getOrientation(); glm::vec3 axis = glm::axis(orientation); glRotatef(glm::angle(orientation), axis.x, axis.y, axis.z); - float scale = BODY_BALL_RADIUS_HEAD_BASE * _owningHead->getScale(); - glScalef(scale, scale, scale); - + + float aspect, xScale, zScale; + if (_aspectRatio == FULL_FRAME_ASPECT) { + aspect = _textureSize.width / _textureSize.height; + const float FULL_FRAME_SCALE = 0.5f; + xScale = FULL_FRAME_SCALE * _owningHead->getScale(); + zScale = xScale * 0.3f; + + } else { + aspect = _aspectRatio; + xScale = BODY_BALL_RADIUS_HEAD_BASE * _owningHead->getScale(); + zScale = xScale * 1.5f; + glTranslatef(0.0f, -xScale * 0.75f, -xScale); + } + glScalef(xScale, xScale / aspect, zScale); + glColor4f(1.0f, 1.0f, 1.0f, alpha); Point2f points[4]; @@ -243,7 +289,6 @@ bool Face::render(float alpha) { _texCoordCornerLocation = _program->uniformLocation("texCoordCorner"); _texCoordRightLocation = _program->uniformLocation("texCoordRight"); _texCoordUpLocation = _program->uniformLocation("texCoordUp"); - _aspectRatioLocation = _program->uniformLocation("aspectRatio"); glGenBuffers(1, &_vboID); glBindBuffer(GL_ARRAY_BUFFER, _vboID); @@ -292,7 +337,6 @@ bool Face::render(float alpha) { (points[3].x - points[0].x) / _textureSize.width, (points[3].y - points[0].y) / _textureSize.height); _program->setUniformValue(_texCoordUpLocation, (points[1].x - points[0].x) / _textureSize.width, (points[1].y - points[0].y) / _textureSize.height); - _program->setUniformValue(_aspectRatioLocation, _aspectRatio); glEnableClientState(GL_VERTEX_ARRAY); glVertexPointer(2, GL_FLOAT, 0, 0); @@ -324,13 +368,13 @@ bool Face::render(float alpha) { glBegin(GL_QUADS); glTexCoord2f(points[0].x / _textureSize.width, points[0].y / _textureSize.height); - glVertex3f(0.5f, -0.5f / _aspectRatio, -0.5f); + glVertex3f(0.5f, -0.5f, 0.0f); glTexCoord2f(points[1].x / _textureSize.width, points[1].y / _textureSize.height); - glVertex3f(0.5f, 0.5f / _aspectRatio, -0.5f); + glVertex3f(0.5f, 0.5f, 0.0f); glTexCoord2f(points[2].x / _textureSize.width, points[2].y / _textureSize.height); - glVertex3f(-0.5f, 0.5f / _aspectRatio, -0.5f); + glVertex3f(-0.5f, 0.5f, 0.0f); glTexCoord2f(points[3].x / _textureSize.width, points[3].y / _textureSize.height); - glVertex3f(-0.5f, -0.5f / _aspectRatio, -0.5f); + glVertex3f(-0.5f, -0.5f, 0.0f); glEnd(); glDisable(GL_TEXTURE_2D); @@ -348,23 +392,40 @@ void Face::cycleRenderMode() { } void Face::setFrame(const cv::Mat& color, const cv::Mat& depth, float aspectRatio) { + if (color.empty()) { + // release our textures, if any; there's no more video + if (_colorTextureID != 0) { + glDeleteTextures(1, &_colorTextureID); + _colorTextureID = 0; + } + if (_depthTextureID != 0) { + glDeleteTextures(1, &_depthTextureID); + _depthTextureID = 0; + } + return; + } + if (_colorTextureID == 0) { glGenTextures(1, &_colorTextureID); - glBindTexture(GL_TEXTURE_2D, _colorTextureID); + } + glBindTexture(GL_TEXTURE_2D, _colorTextureID); + bool recreateTextures = (_textureSize.width != color.cols || _textureSize.height != color.rows); + if (recreateTextures) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, color.cols, color.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, color.ptr()); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); _textureSize = color.size(); _textureRect = RotatedRect(Point2f(color.cols * 0.5f, color.rows * 0.5f), _textureSize, 0.0f); - + } else { - glBindTexture(GL_TEXTURE_2D, _colorTextureID); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, color.cols, color.rows, GL_RGB, GL_UNSIGNED_BYTE, color.ptr()); } if (!depth.empty()) { if (_depthTextureID == 0) { glGenTextures(1, &_depthTextureID); - glBindTexture(GL_TEXTURE_2D, _depthTextureID); + } + glBindTexture(GL_TEXTURE_2D, _depthTextureID); + if (recreateTextures) { glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, depth.cols, depth.rows, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, depth.ptr()); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -380,3 +441,13 @@ void Face::setFrame(const cv::Mat& color, const cv::Mat& depth, float aspectRati _aspectRatio = aspectRatio; } +void Face::destroyCodecs() { + if (_colorCodec.name != 0) { + vpx_codec_destroy(&_colorCodec); + _colorCodec.name = 0; + } + if (_depthCodec.name != 0) { + vpx_codec_destroy(&_depthCodec); + _depthCodec.name = 0; + } +} diff --git a/interface/src/avatar/Face.h b/interface/src/avatar/Face.h index d4812fecfb..893318f186 100644 --- a/interface/src/avatar/Face.h +++ b/interface/src/avatar/Face.h @@ -20,6 +20,8 @@ class Head; class ProgramObject; +const float FULL_FRAME_ASPECT = 0.0f; + class Face : public QObject { Q_OBJECT @@ -28,10 +30,10 @@ public: Face(Head* owningHead); ~Face(); - void setColorTextureID(GLuint colorTextureID) { _colorTextureID = colorTextureID; } - void setDepthTextureID(GLuint depthTextureID) { _depthTextureID = depthTextureID; } - void setTextureSize(const cv::Size2f& textureSize) { _textureSize = textureSize; } - void setTextureRect(const cv::RotatedRect& textureRect); + bool isFullFrame() const { return _colorTextureID != 0 && _aspectRatio == FULL_FRAME_ASPECT; } + + void setFrameFromWebcam(); + void clearFrame(); int processVideoMessage(unsigned char* packetData, size_t dataBytes); @@ -49,6 +51,8 @@ private: enum RenderMode { MESH, POINTS, RENDER_MODE_COUNT }; + void destroyCodecs(); + Head* _owningHead; RenderMode _renderMode; GLuint _colorTextureID; @@ -59,6 +63,7 @@ private: vpx_codec_ctx_t _colorCodec; vpx_codec_ctx_t _depthCodec; + bool _lastFullFrame; QByteArray _arrivingFrame; int _frameCount; @@ -68,7 +73,6 @@ private: static int _texCoordCornerLocation; static int _texCoordRightLocation; static int _texCoordUpLocation; - static int _aspectRatioLocation; static GLuint _vboID; static GLuint _iboID; };