Send color and depth as separate streams (rather than one on top of the other)

so that we can control their bitrates separately.
This commit is contained in:
Andrzej Kapolka 2013-07-30 15:11:32 -07:00
parent f8ba1c4be1
commit c787781efb
5 changed files with 120 additions and 74 deletions

View file

@ -269,7 +269,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean
} }
FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0), FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0),
_smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _codec(), _frameCount(0) { _smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) {
} }
FrameGrabber::~FrameGrabber() { FrameGrabber::~FrameGrabber() {
@ -377,9 +377,13 @@ void FrameGrabber::shutdown() {
cvReleaseCapture(&_capture); cvReleaseCapture(&_capture);
_capture = 0; _capture = 0;
} }
if (_codec.name != 0) { if (_colorCodec.name != 0) {
vpx_codec_destroy(&_codec); vpx_codec_destroy(&_colorCodec);
_codec.name = 0; _colorCodec.name = 0;
}
if (_depthCodec.name != 0) {
vpx_codec_destroy(&_depthCodec);
_depthCodec.name = 0;
} }
_initialized = false; _initialized = false;
@ -492,17 +496,19 @@ void FrameGrabber::grabFrame() {
const int ENCODED_FACE_WIDTH = 128; const int ENCODED_FACE_WIDTH = 128;
const int ENCODED_FACE_HEIGHT = 128; const int ENCODED_FACE_HEIGHT = 128;
int combinedFaceHeight = ENCODED_FACE_HEIGHT * (depth.empty() ? 1 : 2); if (_colorCodec.name == 0) {
if (_codec.name == 0) { // initialize encoder context(s)
// initialize encoder context
vpx_codec_enc_cfg_t codecConfig; vpx_codec_enc_cfg_t codecConfig;
vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0); vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0);
const int QUALITY_MULTIPLIER = 2; codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT *
codecConfig.rc_target_bitrate = QUALITY_MULTIPLIER * ENCODED_FACE_WIDTH * combinedFaceHeight *
codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h; codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h;
codecConfig.g_w = ENCODED_FACE_WIDTH; codecConfig.g_w = ENCODED_FACE_WIDTH;
codecConfig.g_h = combinedFaceHeight; codecConfig.g_h = ENCODED_FACE_HEIGHT;
vpx_codec_enc_init(&_codec, vpx_codec_vp8_cx(), &codecConfig, 0); vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0);
if (!depth.empty()) {
vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0);
}
} }
// correct for 180 degree rotations // correct for 180 degree rotations
@ -539,9 +545,9 @@ void FrameGrabber::grabFrame() {
const int ENCODED_BITS_PER_VU = 2; const int ENCODED_BITS_PER_VU = 2;
const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU; const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU;
const int BITS_PER_BYTE = 8; const int BITS_PER_BYTE = 8;
_encodedFace.fill(128, ENCODED_FACE_WIDTH * combinedFaceHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE); _encodedFace.resize(ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE);
vpx_image_t vpxImage; vpx_image_t vpxImage;
vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, combinedFaceHeight, 1, (unsigned char*)_encodedFace.data()); vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, 1, (unsigned char*)_encodedFace.data());
uchar* yline = vpxImage.planes[0]; uchar* yline = vpxImage.planes[0];
uchar* vline = vpxImage.planes[1]; uchar* vline = vpxImage.planes[1];
uchar* uline = vpxImage.planes[2]; uchar* uline = vpxImage.planes[2];
@ -588,6 +594,24 @@ void FrameGrabber::grabFrame() {
uline += vpxImage.stride[2]; uline += vpxImage.stride[2];
} }
// encode the frame
vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME);
// start the payload off with the aspect ratio
QByteArray payload(sizeof(float), 0);
*(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height;
// extract the encoded frame
vpx_codec_iter_t iterator = 0;
const vpx_codec_cx_pkt_t* packet;
while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) {
if (packet->kind == VPX_CODEC_CX_FRAME_PKT) {
// prepend the length, which will indicate whether there's a depth frame too
payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz));
payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz);
}
}
if (!depth.empty()) { if (!depth.empty()) {
// warp the face depth without interpolation (because it will contain invalid zero values) // warp the face depth without interpolation (because it will contain invalid zero values)
_faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1); _faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1);
@ -621,12 +645,14 @@ void FrameGrabber::grabFrame() {
depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset); depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset);
// likewise for the encoded representation // likewise for the encoded representation
uchar* yline = vpxImage.planes[0] + vpxImage.stride[0] * ENCODED_FACE_HEIGHT; uchar* yline = vpxImage.planes[0];
uchar* vline = vpxImage.planes[1] + vpxImage.stride[1] * (ENCODED_FACE_HEIGHT / 2); uchar* vline = vpxImage.planes[1];
uchar* uline = vpxImage.planes[2];
const uchar EIGHT_BIT_MAXIMUM = 255; const uchar EIGHT_BIT_MAXIMUM = 255;
for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) { for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) {
uchar* ydest = yline; uchar* ydest = yline;
uchar* vdest = vline; uchar* vdest = vline;
uchar* udest = uline;
for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) { for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) {
ushort tl = *_faceDepth.ptr<ushort>(i, j); ushort tl = *_faceDepth.ptr<ushort>(i, j);
ushort tr = *_faceDepth.ptr<ushort>(i, j + 1); ushort tr = *_faceDepth.ptr<ushort>(i, j + 1);
@ -644,28 +670,28 @@ void FrameGrabber::grabFrame() {
ydest += 2; ydest += 2;
*vdest++ = mask; *vdest++ = mask;
*udest++ = EIGHT_BIT_MIDPOINT;
} }
yline += vpxImage.stride[0] * 2; yline += vpxImage.stride[0] * 2;
vline += vpxImage.stride[1]; vline += vpxImage.stride[1];
} uline += vpxImage.stride[2];
} }
// encode the frame // encode the frame
vpx_codec_encode(&_codec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME); vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME);
// extract the encoded frame // extract the encoded frame
vpx_codec_iter_t iterator = 0; vpx_codec_iter_t iterator = 0;
const vpx_codec_cx_pkt_t* packet; const vpx_codec_cx_pkt_t* packet;
while ((packet = vpx_codec_get_cx_data(&_codec, &iterator)) != 0) { while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) {
if (packet->kind == VPX_CODEC_CX_FRAME_PKT) { if (packet->kind == VPX_CODEC_CX_FRAME_PKT) {
// prepend the aspect ratio
QByteArray payload(sizeof(float), 0);
*(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height;
payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz); payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz);
QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", Q_ARG(int, _frameCount),
Q_ARG(QByteArray, payload));
} }
} }
}
QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage",
Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload));
QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame", QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame",
Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMeanFaceDepth), Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMeanFaceDepth),

View file

@ -120,7 +120,8 @@ private:
cv::Mat _grayDepthFrame; cv::Mat _grayDepthFrame;
float _smoothedMeanFaceDepth; float _smoothedMeanFaceDepth;
vpx_codec_ctx_t _codec; vpx_codec_ctx_t _colorCodec;
vpx_codec_ctx_t _depthCodec;
int _frameCount; int _frameCount;
cv::Mat _faceColor; cv::Mat _faceColor;
cv::Mat _faceDepth; cv::Mat _faceDepth;

View file

@ -30,19 +30,25 @@ GLuint Face::_vboID;
GLuint Face::_iboID; GLuint Face::_iboID;
Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(MESH), Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(MESH),
_colorTextureID(0), _depthTextureID(0), _codec(), _frameCount(0) { _colorTextureID(0), _depthTextureID(0), _colorCodec(), _depthCodec(), _frameCount(0) {
// we may have been created in the network thread, but we live in the main thread // we may have been created in the network thread, but we live in the main thread
moveToThread(Application::getInstance()->thread()); moveToThread(Application::getInstance()->thread());
} }
Face::~Face() { Face::~Face() {
if (_codec.name != 0) { if (_colorCodec.name != 0) {
vpx_codec_destroy(&_codec); vpx_codec_destroy(&_colorCodec);
// delete our textures, since we know that we own them // delete our texture, since we know that we own it
if (_colorTextureID != 0) { if (_colorTextureID != 0) {
glDeleteTextures(1, &_colorTextureID); glDeleteTextures(1, &_colorTextureID);
} }
}
if (_depthCodec.name != 0) {
vpx_codec_destroy(&_depthCodec);
// delete our texture, since we know that we own it
if (_depthTextureID != 0) { if (_depthTextureID != 0) {
glDeleteTextures(1, &_depthTextureID); glDeleteTextures(1, &_depthTextureID);
} }
@ -55,9 +61,9 @@ void Face::setTextureRect(const cv::RotatedRect& textureRect) {
} }
int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) { int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
if (_codec.name == 0) { if (_colorCodec.name == 0) {
// initialize decoder context // initialize decoder context
vpx_codec_dec_init(&_codec, vpx_codec_vp8_dx(), 0, 0); vpx_codec_dec_init(&_colorCodec, vpx_codec_vp8_dx(), 0, 0);
} }
// skip the header // skip the header
unsigned char* packetPosition = packetData; unsigned char* packetPosition = packetData;
@ -85,14 +91,14 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
if ((_frameBytesRemaining -= payloadSize) <= 0) { if ((_frameBytesRemaining -= payloadSize) <= 0) {
float aspectRatio = *(const float*)_arrivingFrame.constData(); float aspectRatio = *(const float*)_arrivingFrame.constData();
vpx_codec_decode(&_codec, (const uint8_t*)_arrivingFrame.constData() + sizeof(float), size_t colorSize = *(const size_t*)(_arrivingFrame.constData() + sizeof(float));
_arrivingFrame.size() - sizeof(float), 0, 0); const uint8_t* colorData = (const uint8_t*)(_arrivingFrame.constData() + sizeof(float) + sizeof(size_t));
vpx_codec_decode(&_colorCodec, colorData, colorSize, 0, 0);
vpx_codec_iter_t iterator = 0; vpx_codec_iter_t iterator = 0;
vpx_image_t* image; vpx_image_t* image;
while ((image = vpx_codec_get_frame(&_codec, &iterator)) != 0) { while ((image = vpx_codec_get_frame(&_colorCodec, &iterator)) != 0) {
// convert from YV12 to RGB // convert from YV12 to RGB
const int imageHeight = image->d_w; Mat color(image->d_h, image->d_w, CV_8UC3);
Mat color(imageHeight, image->d_w, CV_8UC3);
uchar* yline = image->planes[0]; uchar* yline = image->planes[0];
uchar* vline = image->planes[1]; uchar* vline = image->planes[1];
uchar* uline = image->planes[2]; uchar* uline = image->planes[2];
@ -100,7 +106,7 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
const int GREEN_V_WEIGHT = (int)(0.714 * 256); const int GREEN_V_WEIGHT = (int)(0.714 * 256);
const int GREEN_U_WEIGHT = (int)(0.344 * 256); const int GREEN_U_WEIGHT = (int)(0.344 * 256);
const int BLUE_U_WEIGHT = (int)(1.773 * 256); const int BLUE_U_WEIGHT = (int)(1.773 * 256);
for (int i = 0; i < imageHeight; i += 2) { for (int i = 0; i < image->d_h; i += 2) {
uchar* ysrc = yline; uchar* ysrc = yline;
uchar* vsrc = vline; uchar* vsrc = vline;
uchar* usrc = uline; uchar* usrc = uline;
@ -144,14 +150,24 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
uline += image->stride[2]; uline += image->stride[2];
} }
Mat depth; Mat depth;
if (image->d_h > imageHeight) {
// if the height is greater than the width, we have depth data const uint8_t* depthData = colorData + colorSize;
depth.create(imageHeight, image->d_w, CV_8UC1); int depthSize = _arrivingFrame.size() - ((const char*)depthData - _arrivingFrame.constData());
uchar* yline = image->planes[0] + image->stride[0] * imageHeight; if (depthSize > 0) {
uchar* vline = image->planes[1] + image->stride[1] * (imageHeight / 2); if (_depthCodec.name == 0) {
// initialize decoder context
vpx_codec_dec_init(&_depthCodec, vpx_codec_vp8_dx(), 0, 0);
}
vpx_codec_decode(&_depthCodec, depthData, depthSize, 0, 0);
vpx_codec_iter_t iterator = 0;
vpx_image_t* image;
while ((image = vpx_codec_get_frame(&_depthCodec, &iterator)) != 0) {
depth.create(image->d_h, image->d_w, CV_8UC1);
uchar* yline = image->planes[0];
uchar* vline = image->planes[1];
const uchar EIGHT_BIT_MAXIMUM = 255; const uchar EIGHT_BIT_MAXIMUM = 255;
const uchar MASK_THRESHOLD = 192; const uchar MASK_THRESHOLD = 192;
for (int i = 0; i < imageHeight; i += 2) { for (int i = 0; i < image->d_h; i += 2) {
uchar* ysrc = yline; uchar* ysrc = yline;
uchar* vsrc = vline; uchar* vsrc = vline;
for (int j = 0; j < image->d_w; j += 2) { for (int j = 0; j < image->d_w; j += 2) {
@ -174,6 +190,7 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
vline += image->stride[1]; vline += image->stride[1];
} }
} }
}
QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, color), QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, color),
Q_ARG(cv::Mat, depth), Q_ARG(float, aspectRatio)); Q_ARG(cv::Mat, depth), Q_ARG(float, aspectRatio));
} }

View file

@ -57,7 +57,8 @@ private:
cv::RotatedRect _textureRect; cv::RotatedRect _textureRect;
float _aspectRatio; float _aspectRatio;
vpx_codec_ctx_t _codec; vpx_codec_ctx_t _colorCodec;
vpx_codec_ctx_t _depthCodec;
QByteArray _arrivingFrame; QByteArray _arrivingFrame;
int _frameCount; int _frameCount;

View file

@ -18,14 +18,15 @@ PACKET_VERSION versionForPacketType(PACKET_TYPE type) {
case PACKET_TYPE_MICROPHONE_AUDIO_NO_ECHO: case PACKET_TYPE_MICROPHONE_AUDIO_NO_ECHO:
case PACKET_TYPE_MICROPHONE_AUDIO_WITH_ECHO: case PACKET_TYPE_MICROPHONE_AUDIO_WITH_ECHO:
return 1; return 1;
break;
case PACKET_TYPE_HEAD_DATA: case PACKET_TYPE_HEAD_DATA:
return 2; return 2;
break;
case PACKET_TYPE_AVATAR_FACE_VIDEO:
return 1;
default: default:
return 0; return 0;
break;
} }
} }