From c787781efbc572442946d4f95da6ce375d9f63dc Mon Sep 17 00:00:00 2001 From: Andrzej Kapolka Date: Tue, 30 Jul 2013 15:11:32 -0700 Subject: [PATCH] Send color and depth as separate streams (rather than one on top of the other) so that we can control their bitrates separately. --- interface/src/Webcam.cpp | 88 +++++++++++++++--------- interface/src/Webcam.h | 3 +- interface/src/avatar/Face.cpp | 93 +++++++++++++++----------- interface/src/avatar/Face.h | 3 +- libraries/shared/src/PacketHeaders.cpp | 7 +- 5 files changed, 120 insertions(+), 74 deletions(-) diff --git a/interface/src/Webcam.cpp b/interface/src/Webcam.cpp index b73537f6ae..2a58d51dae 100644 --- a/interface/src/Webcam.cpp +++ b/interface/src/Webcam.cpp @@ -269,7 +269,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean } FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), - _smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _codec(), _frameCount(0) { + _smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) { } FrameGrabber::~FrameGrabber() { @@ -377,9 +377,13 @@ void FrameGrabber::shutdown() { cvReleaseCapture(&_capture); _capture = 0; } - if (_codec.name != 0) { - vpx_codec_destroy(&_codec); - _codec.name = 0; + if (_colorCodec.name != 0) { + vpx_codec_destroy(&_colorCodec); + _colorCodec.name = 0; + } + if (_depthCodec.name != 0) { + vpx_codec_destroy(&_depthCodec); + _depthCodec.name = 0; } _initialized = false; @@ -492,17 +496,19 @@ void FrameGrabber::grabFrame() { const int ENCODED_FACE_WIDTH = 128; const int ENCODED_FACE_HEIGHT = 128; - int combinedFaceHeight = ENCODED_FACE_HEIGHT * (depth.empty() ? 1 : 2); - if (_codec.name == 0) { - // initialize encoder context + if (_colorCodec.name == 0) { + // initialize encoder context(s) vpx_codec_enc_cfg_t codecConfig; vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); - const int QUALITY_MULTIPLIER = 2; - codecConfig.rc_target_bitrate = QUALITY_MULTIPLIER * ENCODED_FACE_WIDTH * combinedFaceHeight * + codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h; codecConfig.g_w = ENCODED_FACE_WIDTH; - codecConfig.g_h = combinedFaceHeight; - vpx_codec_enc_init(&_codec, vpx_codec_vp8_cx(), &codecConfig, 0); + codecConfig.g_h = ENCODED_FACE_HEIGHT; + vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0); + + if (!depth.empty()) { + vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0); + } } // correct for 180 degree rotations @@ -539,9 +545,9 @@ void FrameGrabber::grabFrame() { const int ENCODED_BITS_PER_VU = 2; const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; const int BITS_PER_BYTE = 8; - _encodedFace.fill(128, ENCODED_FACE_WIDTH * combinedFaceHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); + _encodedFace.resize(ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); vpx_image_t vpxImage; - vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, combinedFaceHeight, 1, (unsigned char*)_encodedFace.data()); + vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, 1, (unsigned char*)_encodedFace.data()); uchar* yline = vpxImage.planes[0]; uchar* vline = vpxImage.planes[1]; uchar* uline = vpxImage.planes[2]; @@ -588,6 +594,24 @@ void FrameGrabber::grabFrame() { uline += vpxImage.stride[2]; } + // encode the frame + vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); + + // start the payload off with the aspect ratio + QByteArray payload(sizeof(float), 0); + *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; + + // extract the encoded frame + vpx_codec_iter_t iterator = 0; + const vpx_codec_cx_pkt_t* packet; + while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) { + if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + // prepend the length, which will indicate whether there's a depth frame too + payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz)); + payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); + } + } + if (!depth.empty()) { // warp the face depth without interpolation (because it will contain invalid zero values) _faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1); @@ -621,12 +645,14 @@ void FrameGrabber::grabFrame() { depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); // likewise for the encoded representation - uchar* yline = vpxImage.planes[0] + vpxImage.stride[0] * ENCODED_FACE_HEIGHT; - uchar* vline = vpxImage.planes[1] + vpxImage.stride[1] * (ENCODED_FACE_HEIGHT / 2); + uchar* yline = vpxImage.planes[0]; + uchar* vline = vpxImage.planes[1]; + uchar* uline = vpxImage.planes[2]; const uchar EIGHT_BIT_MAXIMUM = 255; for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) { uchar* ydest = yline; uchar* vdest = vline; + uchar* udest = uline; for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { ushort tl = *_faceDepth.ptr(i, j); ushort tr = *_faceDepth.ptr(i, j + 1); @@ -644,28 +670,28 @@ void FrameGrabber::grabFrame() { ydest += 2; *vdest++ = mask; + *udest++ = EIGHT_BIT_MIDPOINT; } yline += vpxImage.stride[0] * 2; vline += vpxImage.stride[1]; + uline += vpxImage.stride[2]; + } + + // encode the frame + vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME); + + // extract the encoded frame + vpx_codec_iter_t iterator = 0; + const vpx_codec_cx_pkt_t* packet; + while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) { + if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { + payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); + } } } - // encode the frame - vpx_codec_encode(&_codec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); - - // extract the encoded frame - vpx_codec_iter_t iterator = 0; - const vpx_codec_cx_pkt_t* packet; - while ((packet = vpx_codec_get_cx_data(&_codec, &iterator)) != 0) { - if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { - // prepend the aspect ratio - QByteArray payload(sizeof(float), 0); - *(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height; - payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); - QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", Q_ARG(int, _frameCount), - Q_ARG(QByteArray, payload)); - } - } + QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", + Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload)); QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMeanFaceDepth), diff --git a/interface/src/Webcam.h b/interface/src/Webcam.h index 16cf3a33a1..6c6d250897 100644 --- a/interface/src/Webcam.h +++ b/interface/src/Webcam.h @@ -120,7 +120,8 @@ private: cv::Mat _grayDepthFrame; float _smoothedMeanFaceDepth; - vpx_codec_ctx_t _codec; + vpx_codec_ctx_t _colorCodec; + vpx_codec_ctx_t _depthCodec; int _frameCount; cv::Mat _faceColor; cv::Mat _faceDepth; diff --git a/interface/src/avatar/Face.cpp b/interface/src/avatar/Face.cpp index 5d6c4984bd..f25426a5be 100644 --- a/interface/src/avatar/Face.cpp +++ b/interface/src/avatar/Face.cpp @@ -30,19 +30,25 @@ GLuint Face::_vboID; GLuint Face::_iboID; Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(MESH), - _colorTextureID(0), _depthTextureID(0), _codec(), _frameCount(0) { + _colorTextureID(0), _depthTextureID(0), _colorCodec(), _depthCodec(), _frameCount(0) { // we may have been created in the network thread, but we live in the main thread moveToThread(Application::getInstance()->thread()); } Face::~Face() { - if (_codec.name != 0) { - vpx_codec_destroy(&_codec); + if (_colorCodec.name != 0) { + vpx_codec_destroy(&_colorCodec); - // delete our textures, since we know that we own them + // delete our texture, since we know that we own it if (_colorTextureID != 0) { glDeleteTextures(1, &_colorTextureID); } + + } + if (_depthCodec.name != 0) { + vpx_codec_destroy(&_depthCodec); + + // delete our texture, since we know that we own it if (_depthTextureID != 0) { glDeleteTextures(1, &_depthTextureID); } @@ -55,9 +61,9 @@ void Face::setTextureRect(const cv::RotatedRect& textureRect) { } int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { - if (_codec.name == 0) { + if (_colorCodec.name == 0) { // initialize decoder context - vpx_codec_dec_init(&_codec, vpx_codec_vp8_dx(), 0, 0); + vpx_codec_dec_init(&_colorCodec, vpx_codec_vp8_dx(), 0, 0); } // skip the header unsigned char* packetPosition = packetData; @@ -85,14 +91,14 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { if ((_frameBytesRemaining -= payloadSize) <= 0) { float aspectRatio = *(const float*)_arrivingFrame.constData(); - vpx_codec_decode(&_codec, (const uint8_t*)_arrivingFrame.constData() + sizeof(float), - _arrivingFrame.size() - sizeof(float), 0, 0); + size_t colorSize = *(const size_t*)(_arrivingFrame.constData() + sizeof(float)); + const uint8_t* colorData = (const uint8_t*)(_arrivingFrame.constData() + sizeof(float) + sizeof(size_t)); + vpx_codec_decode(&_colorCodec, colorData, colorSize, 0, 0); vpx_codec_iter_t iterator = 0; vpx_image_t* image; - while ((image = vpx_codec_get_frame(&_codec, &iterator)) != 0) { + while ((image = vpx_codec_get_frame(&_colorCodec, &iterator)) != 0) { // convert from YV12 to RGB - const int imageHeight = image->d_w; - Mat color(imageHeight, image->d_w, CV_8UC3); + Mat color(image->d_h, image->d_w, CV_8UC3); uchar* yline = image->planes[0]; uchar* vline = image->planes[1]; uchar* uline = image->planes[2]; @@ -100,7 +106,7 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { const int GREEN_V_WEIGHT = (int)(0.714 * 256); const int GREEN_U_WEIGHT = (int)(0.344 * 256); const int BLUE_U_WEIGHT = (int)(1.773 * 256); - for (int i = 0; i < imageHeight; i += 2) { + for (int i = 0; i < image->d_h; i += 2) { uchar* ysrc = yline; uchar* vsrc = vline; uchar* usrc = uline; @@ -144,34 +150,45 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { uline += image->stride[2]; } Mat depth; - if (image->d_h > imageHeight) { - // if the height is greater than the width, we have depth data - depth.create(imageHeight, image->d_w, CV_8UC1); - uchar* yline = image->planes[0] + image->stride[0] * imageHeight; - uchar* vline = image->planes[1] + image->stride[1] * (imageHeight / 2); - const uchar EIGHT_BIT_MAXIMUM = 255; - const uchar MASK_THRESHOLD = 192; - for (int i = 0; i < imageHeight; i += 2) { - uchar* ysrc = yline; - uchar* vsrc = vline; - for (int j = 0; j < image->d_w; j += 2) { - - if (*vsrc++ >= MASK_THRESHOLD) { - *depth.ptr(i, j) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i, j + 1) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i + 1, j) = EIGHT_BIT_MAXIMUM; - *depth.ptr(i + 1, j + 1) = EIGHT_BIT_MAXIMUM; - - } else { - *depth.ptr(i, j) = ysrc[0]; - *depth.ptr(i, j + 1) = ysrc[1]; - *depth.ptr(i + 1, j) = ysrc[image->stride[0]]; - *depth.ptr(i + 1, j + 1) = ysrc[image->stride[0] + 1]; + + const uint8_t* depthData = colorData + colorSize; + int depthSize = _arrivingFrame.size() - ((const char*)depthData - _arrivingFrame.constData()); + if (depthSize > 0) { + if (_depthCodec.name == 0) { + // initialize decoder context + vpx_codec_dec_init(&_depthCodec, vpx_codec_vp8_dx(), 0, 0); + } + vpx_codec_decode(&_depthCodec, depthData, depthSize, 0, 0); + vpx_codec_iter_t iterator = 0; + vpx_image_t* image; + while ((image = vpx_codec_get_frame(&_depthCodec, &iterator)) != 0) { + depth.create(image->d_h, image->d_w, CV_8UC1); + uchar* yline = image->planes[0]; + uchar* vline = image->planes[1]; + const uchar EIGHT_BIT_MAXIMUM = 255; + const uchar MASK_THRESHOLD = 192; + for (int i = 0; i < image->d_h; i += 2) { + uchar* ysrc = yline; + uchar* vsrc = vline; + for (int j = 0; j < image->d_w; j += 2) { + + if (*vsrc++ >= MASK_THRESHOLD) { + *depth.ptr(i, j) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i, j + 1) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i + 1, j) = EIGHT_BIT_MAXIMUM; + *depth.ptr(i + 1, j + 1) = EIGHT_BIT_MAXIMUM; + + } else { + *depth.ptr(i, j) = ysrc[0]; + *depth.ptr(i, j + 1) = ysrc[1]; + *depth.ptr(i + 1, j) = ysrc[image->stride[0]]; + *depth.ptr(i + 1, j + 1) = ysrc[image->stride[0] + 1]; + } + ysrc += 2; } - ysrc += 2; + yline += image->stride[0] * 2; + vline += image->stride[1]; } - yline += image->stride[0] * 2; - vline += image->stride[1]; } } QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, color), diff --git a/interface/src/avatar/Face.h b/interface/src/avatar/Face.h index 1f9d41a1b3..d4812fecfb 100644 --- a/interface/src/avatar/Face.h +++ b/interface/src/avatar/Face.h @@ -57,7 +57,8 @@ private: cv::RotatedRect _textureRect; float _aspectRatio; - vpx_codec_ctx_t _codec; + vpx_codec_ctx_t _colorCodec; + vpx_codec_ctx_t _depthCodec; QByteArray _arrivingFrame; int _frameCount; diff --git a/libraries/shared/src/PacketHeaders.cpp b/libraries/shared/src/PacketHeaders.cpp index 292c4bbc0a..2e7b95c7f7 100644 --- a/libraries/shared/src/PacketHeaders.cpp +++ b/libraries/shared/src/PacketHeaders.cpp @@ -18,14 +18,15 @@ PACKET_VERSION versionForPacketType(PACKET_TYPE type) { case PACKET_TYPE_MICROPHONE_AUDIO_NO_ECHO: case PACKET_TYPE_MICROPHONE_AUDIO_WITH_ECHO: return 1; - break; case PACKET_TYPE_HEAD_DATA: return 2; - break; + + case PACKET_TYPE_AVATAR_FACE_VIDEO: + return 1; + default: return 0; - break; } }