mirror of
https://github.com/overte-org/overte.git
synced 2025-04-21 09:44:21 +02:00
Send color and depth as separate streams (rather than one on top of the other)
so that we can control their bitrates separately.
This commit is contained in:
parent
f8ba1c4be1
commit
c787781efb
5 changed files with 120 additions and 74 deletions
|
@ -269,7 +269,7 @@ void Webcam::setFrame(const Mat& color, int format, const Mat& depth, float mean
|
|||
}
|
||||
|
||||
FrameGrabber::FrameGrabber() : _initialized(false), _capture(0), _searchWindow(0, 0, 0, 0),
|
||||
_smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _codec(), _frameCount(0) {
|
||||
_smoothedMeanFaceDepth(UNINITIALIZED_FACE_DEPTH), _colorCodec(), _depthCodec(), _frameCount(0) {
|
||||
}
|
||||
|
||||
FrameGrabber::~FrameGrabber() {
|
||||
|
@ -377,9 +377,13 @@ void FrameGrabber::shutdown() {
|
|||
cvReleaseCapture(&_capture);
|
||||
_capture = 0;
|
||||
}
|
||||
if (_codec.name != 0) {
|
||||
vpx_codec_destroy(&_codec);
|
||||
_codec.name = 0;
|
||||
if (_colorCodec.name != 0) {
|
||||
vpx_codec_destroy(&_colorCodec);
|
||||
_colorCodec.name = 0;
|
||||
}
|
||||
if (_depthCodec.name != 0) {
|
||||
vpx_codec_destroy(&_depthCodec);
|
||||
_depthCodec.name = 0;
|
||||
}
|
||||
_initialized = false;
|
||||
|
||||
|
@ -492,17 +496,19 @@ void FrameGrabber::grabFrame() {
|
|||
|
||||
const int ENCODED_FACE_WIDTH = 128;
|
||||
const int ENCODED_FACE_HEIGHT = 128;
|
||||
int combinedFaceHeight = ENCODED_FACE_HEIGHT * (depth.empty() ? 1 : 2);
|
||||
if (_codec.name == 0) {
|
||||
// initialize encoder context
|
||||
if (_colorCodec.name == 0) {
|
||||
// initialize encoder context(s)
|
||||
vpx_codec_enc_cfg_t codecConfig;
|
||||
vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &codecConfig, 0);
|
||||
const int QUALITY_MULTIPLIER = 2;
|
||||
codecConfig.rc_target_bitrate = QUALITY_MULTIPLIER * ENCODED_FACE_WIDTH * combinedFaceHeight *
|
||||
codecConfig.rc_target_bitrate = ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT *
|
||||
codecConfig.rc_target_bitrate / codecConfig.g_w / codecConfig.g_h;
|
||||
codecConfig.g_w = ENCODED_FACE_WIDTH;
|
||||
codecConfig.g_h = combinedFaceHeight;
|
||||
vpx_codec_enc_init(&_codec, vpx_codec_vp8_cx(), &codecConfig, 0);
|
||||
codecConfig.g_h = ENCODED_FACE_HEIGHT;
|
||||
vpx_codec_enc_init(&_colorCodec, vpx_codec_vp8_cx(), &codecConfig, 0);
|
||||
|
||||
if (!depth.empty()) {
|
||||
vpx_codec_enc_init(&_depthCodec, vpx_codec_vp8_cx(), &codecConfig, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// correct for 180 degree rotations
|
||||
|
@ -539,9 +545,9 @@ void FrameGrabber::grabFrame() {
|
|||
const int ENCODED_BITS_PER_VU = 2;
|
||||
const int ENCODED_BITS_PER_PIXEL = ENCODED_BITS_PER_Y + 2 * ENCODED_BITS_PER_VU;
|
||||
const int BITS_PER_BYTE = 8;
|
||||
_encodedFace.fill(128, ENCODED_FACE_WIDTH * combinedFaceHeight * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE);
|
||||
_encodedFace.resize(ENCODED_FACE_WIDTH * ENCODED_FACE_HEIGHT * ENCODED_BITS_PER_PIXEL / BITS_PER_BYTE);
|
||||
vpx_image_t vpxImage;
|
||||
vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, combinedFaceHeight, 1, (unsigned char*)_encodedFace.data());
|
||||
vpx_img_wrap(&vpxImage, VPX_IMG_FMT_YV12, ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, 1, (unsigned char*)_encodedFace.data());
|
||||
uchar* yline = vpxImage.planes[0];
|
||||
uchar* vline = vpxImage.planes[1];
|
||||
uchar* uline = vpxImage.planes[2];
|
||||
|
@ -588,6 +594,24 @@ void FrameGrabber::grabFrame() {
|
|||
uline += vpxImage.stride[2];
|
||||
}
|
||||
|
||||
// encode the frame
|
||||
vpx_codec_encode(&_colorCodec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME);
|
||||
|
||||
// start the payload off with the aspect ratio
|
||||
QByteArray payload(sizeof(float), 0);
|
||||
*(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height;
|
||||
|
||||
// extract the encoded frame
|
||||
vpx_codec_iter_t iterator = 0;
|
||||
const vpx_codec_cx_pkt_t* packet;
|
||||
while ((packet = vpx_codec_get_cx_data(&_colorCodec, &iterator)) != 0) {
|
||||
if (packet->kind == VPX_CODEC_CX_FRAME_PKT) {
|
||||
// prepend the length, which will indicate whether there's a depth frame too
|
||||
payload.append((const char*)&packet->data.frame.sz, sizeof(packet->data.frame.sz));
|
||||
payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz);
|
||||
}
|
||||
}
|
||||
|
||||
if (!depth.empty()) {
|
||||
// warp the face depth without interpolation (because it will contain invalid zero values)
|
||||
_faceDepth.create(ENCODED_FACE_WIDTH, ENCODED_FACE_HEIGHT, CV_16UC1);
|
||||
|
@ -621,12 +645,14 @@ void FrameGrabber::grabFrame() {
|
|||
depth.convertTo(_grayDepthFrame, CV_8UC1, 1.0, depthOffset);
|
||||
|
||||
// likewise for the encoded representation
|
||||
uchar* yline = vpxImage.planes[0] + vpxImage.stride[0] * ENCODED_FACE_HEIGHT;
|
||||
uchar* vline = vpxImage.planes[1] + vpxImage.stride[1] * (ENCODED_FACE_HEIGHT / 2);
|
||||
uchar* yline = vpxImage.planes[0];
|
||||
uchar* vline = vpxImage.planes[1];
|
||||
uchar* uline = vpxImage.planes[2];
|
||||
const uchar EIGHT_BIT_MAXIMUM = 255;
|
||||
for (int i = 0; i < ENCODED_FACE_HEIGHT; i += 2) {
|
||||
uchar* ydest = yline;
|
||||
uchar* vdest = vline;
|
||||
uchar* udest = uline;
|
||||
for (int j = 0; j < ENCODED_FACE_WIDTH; j += 2) {
|
||||
ushort tl = *_faceDepth.ptr<ushort>(i, j);
|
||||
ushort tr = *_faceDepth.ptr<ushort>(i, j + 1);
|
||||
|
@ -644,28 +670,28 @@ void FrameGrabber::grabFrame() {
|
|||
ydest += 2;
|
||||
|
||||
*vdest++ = mask;
|
||||
*udest++ = EIGHT_BIT_MIDPOINT;
|
||||
}
|
||||
yline += vpxImage.stride[0] * 2;
|
||||
vline += vpxImage.stride[1];
|
||||
uline += vpxImage.stride[2];
|
||||
}
|
||||
|
||||
// encode the frame
|
||||
vpx_codec_encode(&_depthCodec, &vpxImage, _frameCount, 1, 0, VPX_DL_REALTIME);
|
||||
|
||||
// extract the encoded frame
|
||||
vpx_codec_iter_t iterator = 0;
|
||||
const vpx_codec_cx_pkt_t* packet;
|
||||
while ((packet = vpx_codec_get_cx_data(&_depthCodec, &iterator)) != 0) {
|
||||
if (packet->kind == VPX_CODEC_CX_FRAME_PKT) {
|
||||
payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// encode the frame
|
||||
vpx_codec_encode(&_codec, &vpxImage, ++_frameCount, 1, 0, VPX_DL_REALTIME);
|
||||
|
||||
// extract the encoded frame
|
||||
vpx_codec_iter_t iterator = 0;
|
||||
const vpx_codec_cx_pkt_t* packet;
|
||||
while ((packet = vpx_codec_get_cx_data(&_codec, &iterator)) != 0) {
|
||||
if (packet->kind == VPX_CODEC_CX_FRAME_PKT) {
|
||||
// prepend the aspect ratio
|
||||
QByteArray payload(sizeof(float), 0);
|
||||
*(float*)payload.data() = _smoothedFaceRect.size.width / _smoothedFaceRect.size.height;
|
||||
payload.append((const char*)packet->data.frame.buf, packet->data.frame.sz);
|
||||
QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage", Q_ARG(int, _frameCount),
|
||||
Q_ARG(QByteArray, payload));
|
||||
}
|
||||
}
|
||||
QMetaObject::invokeMethod(Application::getInstance(), "sendAvatarFaceVideoMessage",
|
||||
Q_ARG(int, _frameCount), Q_ARG(QByteArray, payload));
|
||||
|
||||
QMetaObject::invokeMethod(Application::getInstance()->getWebcam(), "setFrame",
|
||||
Q_ARG(cv::Mat, color), Q_ARG(int, format), Q_ARG(cv::Mat, _grayDepthFrame), Q_ARG(float, _smoothedMeanFaceDepth),
|
||||
|
|
|
@ -120,7 +120,8 @@ private:
|
|||
cv::Mat _grayDepthFrame;
|
||||
float _smoothedMeanFaceDepth;
|
||||
|
||||
vpx_codec_ctx_t _codec;
|
||||
vpx_codec_ctx_t _colorCodec;
|
||||
vpx_codec_ctx_t _depthCodec;
|
||||
int _frameCount;
|
||||
cv::Mat _faceColor;
|
||||
cv::Mat _faceDepth;
|
||||
|
|
|
@ -30,19 +30,25 @@ GLuint Face::_vboID;
|
|||
GLuint Face::_iboID;
|
||||
|
||||
Face::Face(Head* owningHead) : _owningHead(owningHead), _renderMode(MESH),
|
||||
_colorTextureID(0), _depthTextureID(0), _codec(), _frameCount(0) {
|
||||
_colorTextureID(0), _depthTextureID(0), _colorCodec(), _depthCodec(), _frameCount(0) {
|
||||
// we may have been created in the network thread, but we live in the main thread
|
||||
moveToThread(Application::getInstance()->thread());
|
||||
}
|
||||
|
||||
Face::~Face() {
|
||||
if (_codec.name != 0) {
|
||||
vpx_codec_destroy(&_codec);
|
||||
if (_colorCodec.name != 0) {
|
||||
vpx_codec_destroy(&_colorCodec);
|
||||
|
||||
// delete our textures, since we know that we own them
|
||||
// delete our texture, since we know that we own it
|
||||
if (_colorTextureID != 0) {
|
||||
glDeleteTextures(1, &_colorTextureID);
|
||||
}
|
||||
|
||||
}
|
||||
if (_depthCodec.name != 0) {
|
||||
vpx_codec_destroy(&_depthCodec);
|
||||
|
||||
// delete our texture, since we know that we own it
|
||||
if (_depthTextureID != 0) {
|
||||
glDeleteTextures(1, &_depthTextureID);
|
||||
}
|
||||
|
@ -55,9 +61,9 @@ void Face::setTextureRect(const cv::RotatedRect& textureRect) {
|
|||
}
|
||||
|
||||
int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
|
||||
if (_codec.name == 0) {
|
||||
if (_colorCodec.name == 0) {
|
||||
// initialize decoder context
|
||||
vpx_codec_dec_init(&_codec, vpx_codec_vp8_dx(), 0, 0);
|
||||
vpx_codec_dec_init(&_colorCodec, vpx_codec_vp8_dx(), 0, 0);
|
||||
}
|
||||
// skip the header
|
||||
unsigned char* packetPosition = packetData;
|
||||
|
@ -85,14 +91,14 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
|
|||
|
||||
if ((_frameBytesRemaining -= payloadSize) <= 0) {
|
||||
float aspectRatio = *(const float*)_arrivingFrame.constData();
|
||||
vpx_codec_decode(&_codec, (const uint8_t*)_arrivingFrame.constData() + sizeof(float),
|
||||
_arrivingFrame.size() - sizeof(float), 0, 0);
|
||||
size_t colorSize = *(const size_t*)(_arrivingFrame.constData() + sizeof(float));
|
||||
const uint8_t* colorData = (const uint8_t*)(_arrivingFrame.constData() + sizeof(float) + sizeof(size_t));
|
||||
vpx_codec_decode(&_colorCodec, colorData, colorSize, 0, 0);
|
||||
vpx_codec_iter_t iterator = 0;
|
||||
vpx_image_t* image;
|
||||
while ((image = vpx_codec_get_frame(&_codec, &iterator)) != 0) {
|
||||
while ((image = vpx_codec_get_frame(&_colorCodec, &iterator)) != 0) {
|
||||
// convert from YV12 to RGB
|
||||
const int imageHeight = image->d_w;
|
||||
Mat color(imageHeight, image->d_w, CV_8UC3);
|
||||
Mat color(image->d_h, image->d_w, CV_8UC3);
|
||||
uchar* yline = image->planes[0];
|
||||
uchar* vline = image->planes[1];
|
||||
uchar* uline = image->planes[2];
|
||||
|
@ -100,7 +106,7 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
|
|||
const int GREEN_V_WEIGHT = (int)(0.714 * 256);
|
||||
const int GREEN_U_WEIGHT = (int)(0.344 * 256);
|
||||
const int BLUE_U_WEIGHT = (int)(1.773 * 256);
|
||||
for (int i = 0; i < imageHeight; i += 2) {
|
||||
for (int i = 0; i < image->d_h; i += 2) {
|
||||
uchar* ysrc = yline;
|
||||
uchar* vsrc = vline;
|
||||
uchar* usrc = uline;
|
||||
|
@ -144,34 +150,45 @@ int Face::processVideoMessage(unsigned char* packetData, size_t dataBytes) {
|
|||
uline += image->stride[2];
|
||||
}
|
||||
Mat depth;
|
||||
if (image->d_h > imageHeight) {
|
||||
// if the height is greater than the width, we have depth data
|
||||
depth.create(imageHeight, image->d_w, CV_8UC1);
|
||||
uchar* yline = image->planes[0] + image->stride[0] * imageHeight;
|
||||
uchar* vline = image->planes[1] + image->stride[1] * (imageHeight / 2);
|
||||
const uchar EIGHT_BIT_MAXIMUM = 255;
|
||||
const uchar MASK_THRESHOLD = 192;
|
||||
for (int i = 0; i < imageHeight; i += 2) {
|
||||
uchar* ysrc = yline;
|
||||
uchar* vsrc = vline;
|
||||
for (int j = 0; j < image->d_w; j += 2) {
|
||||
|
||||
if (*vsrc++ >= MASK_THRESHOLD) {
|
||||
*depth.ptr(i, j) = EIGHT_BIT_MAXIMUM;
|
||||
*depth.ptr(i, j + 1) = EIGHT_BIT_MAXIMUM;
|
||||
*depth.ptr(i + 1, j) = EIGHT_BIT_MAXIMUM;
|
||||
*depth.ptr(i + 1, j + 1) = EIGHT_BIT_MAXIMUM;
|
||||
|
||||
} else {
|
||||
*depth.ptr(i, j) = ysrc[0];
|
||||
*depth.ptr(i, j + 1) = ysrc[1];
|
||||
*depth.ptr(i + 1, j) = ysrc[image->stride[0]];
|
||||
*depth.ptr(i + 1, j + 1) = ysrc[image->stride[0] + 1];
|
||||
|
||||
const uint8_t* depthData = colorData + colorSize;
|
||||
int depthSize = _arrivingFrame.size() - ((const char*)depthData - _arrivingFrame.constData());
|
||||
if (depthSize > 0) {
|
||||
if (_depthCodec.name == 0) {
|
||||
// initialize decoder context
|
||||
vpx_codec_dec_init(&_depthCodec, vpx_codec_vp8_dx(), 0, 0);
|
||||
}
|
||||
vpx_codec_decode(&_depthCodec, depthData, depthSize, 0, 0);
|
||||
vpx_codec_iter_t iterator = 0;
|
||||
vpx_image_t* image;
|
||||
while ((image = vpx_codec_get_frame(&_depthCodec, &iterator)) != 0) {
|
||||
depth.create(image->d_h, image->d_w, CV_8UC1);
|
||||
uchar* yline = image->planes[0];
|
||||
uchar* vline = image->planes[1];
|
||||
const uchar EIGHT_BIT_MAXIMUM = 255;
|
||||
const uchar MASK_THRESHOLD = 192;
|
||||
for (int i = 0; i < image->d_h; i += 2) {
|
||||
uchar* ysrc = yline;
|
||||
uchar* vsrc = vline;
|
||||
for (int j = 0; j < image->d_w; j += 2) {
|
||||
|
||||
if (*vsrc++ >= MASK_THRESHOLD) {
|
||||
*depth.ptr(i, j) = EIGHT_BIT_MAXIMUM;
|
||||
*depth.ptr(i, j + 1) = EIGHT_BIT_MAXIMUM;
|
||||
*depth.ptr(i + 1, j) = EIGHT_BIT_MAXIMUM;
|
||||
*depth.ptr(i + 1, j + 1) = EIGHT_BIT_MAXIMUM;
|
||||
|
||||
} else {
|
||||
*depth.ptr(i, j) = ysrc[0];
|
||||
*depth.ptr(i, j + 1) = ysrc[1];
|
||||
*depth.ptr(i + 1, j) = ysrc[image->stride[0]];
|
||||
*depth.ptr(i + 1, j + 1) = ysrc[image->stride[0] + 1];
|
||||
}
|
||||
ysrc += 2;
|
||||
}
|
||||
ysrc += 2;
|
||||
yline += image->stride[0] * 2;
|
||||
vline += image->stride[1];
|
||||
}
|
||||
yline += image->stride[0] * 2;
|
||||
vline += image->stride[1];
|
||||
}
|
||||
}
|
||||
QMetaObject::invokeMethod(this, "setFrame", Q_ARG(cv::Mat, color),
|
||||
|
|
|
@ -57,7 +57,8 @@ private:
|
|||
cv::RotatedRect _textureRect;
|
||||
float _aspectRatio;
|
||||
|
||||
vpx_codec_ctx_t _codec;
|
||||
vpx_codec_ctx_t _colorCodec;
|
||||
vpx_codec_ctx_t _depthCodec;
|
||||
|
||||
QByteArray _arrivingFrame;
|
||||
int _frameCount;
|
||||
|
|
|
@ -18,14 +18,15 @@ PACKET_VERSION versionForPacketType(PACKET_TYPE type) {
|
|||
case PACKET_TYPE_MICROPHONE_AUDIO_NO_ECHO:
|
||||
case PACKET_TYPE_MICROPHONE_AUDIO_WITH_ECHO:
|
||||
return 1;
|
||||
break;
|
||||
|
||||
case PACKET_TYPE_HEAD_DATA:
|
||||
return 2;
|
||||
break;
|
||||
|
||||
case PACKET_TYPE_AVATAR_FACE_VIDEO:
|
||||
return 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue