tweaks, using packed scalar for audio loudness

This commit is contained in:
Brad Hefta-Gaub 2016-12-21 17:21:28 -08:00
parent a14d106d3b
commit 82dd9412ab
3 changed files with 151 additions and 65 deletions

View file

@ -398,6 +398,8 @@ void AvatarMixer::broadcastAvatarData() {
}
numAvatarDataBytes += avatarPacketList->write(otherNode->getUUID().toRfc4122());
//qDebug() << "about to write data for:" << otherNode->getUUID();
numAvatarDataBytes += avatarPacketList->write(otherAvatar.toByteArray(detail));
avatarPacketList->endSegment();

View file

@ -49,67 +49,9 @@ const glm::vec3 DEFAULT_LOCAL_AABOX_SCALE(1.0f);
const QString AvatarData::FRAME_NAME = "com.highfidelity.recording.AvatarData";
namespace AvatarDataPacket {
// NOTE: AvatarDataPackets start with a uint16_t sequence number that is not reflected in the Header structure.
PACKED_BEGIN struct Header {
uint8_t packetStateFlags; // state flags, currently used to indicate if the packet is a minimal or fuller packet
} PACKED_END;
const size_t HEADER_SIZE = 1;
PACKED_BEGIN struct MinimalAvatarInfo {
float globalPosition[3]; // avatar's position
} PACKED_END;
const size_t MINIMAL_AVATAR_INFO_SIZE = 12;
PACKED_BEGIN struct AvatarInfo {
float position[3]; // skeletal model's position
float globalPosition[3]; // avatar's position
float globalBoundingBoxCorner[3]; // global position of the lowest corner of the avatar's bounding box
uint16_t localOrientation[3]; // avatar's local euler angles (degrees, compressed) relative to the thing it's attached to
uint16_t scale; // (compressed) 'ratio' encoding uses sign bit as flag.
float lookAtPosition[3]; // world space position that eyes are focusing on.
float audioLoudness; // current loundess of microphone
uint8_t sensorToWorldQuat[6]; // 6 byte compressed quaternion part of sensor to world matrix
uint16_t sensorToWorldScale; // uniform scale of sensor to world matrix
float sensorToWorldTrans[3]; // fourth column of sensor to world matrix
uint8_t flags;
} PACKED_END;
const size_t AVATAR_INFO_SIZE = 81;
// only present if HAS_REFERENTIAL flag is set in AvatarInfo.flags
PACKED_BEGIN struct ParentInfo {
uint8_t parentUUID[16]; // rfc 4122 encoded
uint16_t parentJointIndex;
} PACKED_END;
const size_t PARENT_INFO_SIZE = 18;
// only present if IS_FACESHIFT_CONNECTED flag is set in AvatarInfo.flags
PACKED_BEGIN struct FaceTrackerInfo {
float leftEyeBlink;
float rightEyeBlink;
float averageLoudness;
float browAudioLift;
uint8_t numBlendshapeCoefficients;
// float blendshapeCoefficients[numBlendshapeCoefficients];
} PACKED_END;
const size_t FACE_TRACKER_INFO_SIZE = 17;
// variable length structure follows
/*
struct JointData {
uint8_t numJoints;
uint8_t rotationValidityBits[ceil(numJoints / 8)]; // one bit per joint, if true then a compressed rotation follows.
SixByteQuat rotation[numValidRotations]; // encodeded and compressed by packOrientationQuatToSixBytes()
uint8_t translationValidityBits[ceil(numJoints / 8)]; // one bit per joint, if true then a compressed translation follows.
SixByteTrans translation[numValidTranslations]; // encodeded and compressed by packFloatVec3ToSignedTwoByteFixed()
};
*/
}
static const int TRANSLATION_COMPRESSION_RADIX = 12;
static const int SENSOR_TO_WORLD_SCALE_RADIX = 10;
static const int AUDIO_LOUDNESS_RADIX = 10;
#define ASSERT(COND) do { if (!(COND)) { abort(); } } while(0)
@ -230,12 +172,14 @@ QByteArray AvatarData::toByteArray(AvatarDataDetail dataDetail) {
destinationBuffer += sizeof(_globalPosition);
} else {
auto avatarInfo = reinterpret_cast<AvatarDataPacket::AvatarInfo*>(destinationBuffer);
avatarInfo->position[0] = getLocalPosition().x;
avatarInfo->position[1] = getLocalPosition().y;
avatarInfo->position[2] = getLocalPosition().z;
avatarInfo->globalPosition[0] = _globalPosition.x;
avatarInfo->globalPosition[1] = _globalPosition.y;
avatarInfo->globalPosition[2] = _globalPosition.z;
avatarInfo->position[0] = getLocalPosition().x;
avatarInfo->position[1] = getLocalPosition().y;
avatarInfo->position[2] = getLocalPosition().z;
avatarInfo->globalBoundingBoxCorner[0] = getPosition().x - _globalBoundingBoxCorner.x;
avatarInfo->globalBoundingBoxCorner[1] = getPosition().y - _globalBoundingBoxCorner.y;
avatarInfo->globalBoundingBoxCorner[2] = getPosition().z - _globalBoundingBoxCorner.z;
@ -248,7 +192,8 @@ QByteArray AvatarData::toByteArray(AvatarDataDetail dataDetail) {
avatarInfo->lookAtPosition[0] = _headData->_lookAtPosition.x;
avatarInfo->lookAtPosition[1] = _headData->_lookAtPosition.y;
avatarInfo->lookAtPosition[2] = _headData->_lookAtPosition.z;
avatarInfo->audioLoudness = _headData->_audioLoudness;
packFloatScalarToSignedTwoByteFixed((uint8_t*)&avatarInfo->audioLoudness, _headData->_audioLoudness, AUDIO_LOUDNESS_RADIX);
glm::mat4 sensorToWorldMatrix = getSensorToWorldMatrix();
packOrientationQuatToSixBytes(avatarInfo->sensorToWorldQuat, glmExtractRotation(sensorToWorldMatrix));
@ -280,6 +225,33 @@ QByteArray AvatarData::toByteArray(AvatarDataDetail dataDetail) {
}
destinationBuffer += sizeof(AvatarDataPacket::AvatarInfo);
#if 0 // debugging
#define COMPARE_MEMBER_V3(L, R, M) { if (L.M[0] != R.M[0] || L.M[1] != R.M[1] || L.M[2] != R.M[2]) { qCDebug(avatars) << #M " changed - old:" << "{" << L.M[0] << "," << L.M[1] << ", " << L.M[2] << "}" << " new:" "{" << R.M[0] << "," << R.M[1] << ", " << R.M[2] << "}"; } }
#define COMPARE_MEMBER_F(L, R, M) { if (L.M != R.M) { qCDebug(avatars) << #M " changed - old:" << L.M << " new:" << R.M; } }
qCDebug(avatars) << "--------------";
COMPARE_MEMBER_V3(_lastAvatarInfo, (*avatarInfo), position);
COMPARE_MEMBER_V3(_lastAvatarInfo, (*avatarInfo), globalPosition);
COMPARE_MEMBER_V3(_lastAvatarInfo, (*avatarInfo), globalBoundingBoxCorner);
COMPARE_MEMBER_V3(_lastAvatarInfo, (*avatarInfo), localOrientation);
COMPARE_MEMBER_F(_lastAvatarInfo, (*avatarInfo), scale);
COMPARE_MEMBER_V3(_lastAvatarInfo, (*avatarInfo), lookAtPosition);
COMPARE_MEMBER_F(_lastAvatarInfo, (*avatarInfo), audioLoudness);
if (_lastSensorToWorldMatrix != sensorToWorldMatrix) {
qCDebug(avatars) << "sensorToWorldMatrix changed - old:" << _lastSensorToWorldMatrix << "new:" << sensorToWorldMatrix;
}
//COMPARE_MEMBER_V3(_lastAvatarInfo, (*avatarInfo), sensorToWorldQuat);
COMPARE_MEMBER_F(_lastAvatarInfo, (*avatarInfo), sensorToWorldScale);
COMPARE_MEMBER_V3(_lastAvatarInfo, (*avatarInfo), sensorToWorldTrans);
COMPARE_MEMBER_F(_lastAvatarInfo, (*avatarInfo), flags);
memcpy(&_lastAvatarInfo, avatarInfo, sizeof(_lastAvatarInfo));
_lastSensorToWorldMatrix = sensorToWorldMatrix;
#endif
if (!parentID.isNull()) {
auto parentInfo = reinterpret_cast<AvatarDataPacket::ParentInfo*>(destinationBuffer);
QByteArray referentialAsBytes = parentID.toRfc4122();
@ -527,8 +499,8 @@ int AvatarData::parseDataFromBuffer(const QByteArray& buffer) {
auto avatarInfo = reinterpret_cast<const AvatarDataPacket::AvatarInfo*>(sourceBuffer);
sourceBuffer += sizeof(AvatarDataPacket::AvatarInfo);
glm::vec3 position = glm::vec3(avatarInfo->position[0], avatarInfo->position[1], avatarInfo->position[2]);
_globalPosition = glm::vec3(avatarInfo->globalPosition[0], avatarInfo->globalPosition[1], avatarInfo->globalPosition[2]);
glm::vec3 position = glm::vec3(avatarInfo->position[0], avatarInfo->position[1], avatarInfo->position[2]);
_globalBoundingBoxCorner = glm::vec3(avatarInfo->globalBoundingBoxCorner[0], avatarInfo->globalBoundingBoxCorner[1], avatarInfo->globalBoundingBoxCorner[2]);
if (isNaN(position)) {
if (shouldLogError(now)) {
@ -576,7 +548,11 @@ int AvatarData::parseDataFromBuffer(const QByteArray& buffer) {
}
_headData->_lookAtPosition = lookAt;
float audioLoudness = avatarInfo->audioLoudness;
float audioLoudness;
unpackFloatScalarFromSignedTwoByteFixed((int16_t*)&avatarInfo->audioLoudness, &audioLoudness, AUDIO_LOUDNESS_RADIX);
// FIXME - is this really needed?
if (isNaN(audioLoudness)) {
if (shouldLogError(now)) {
qCWarning(avatars) << "Discard AvatarData packet: audioLoudness is NaN, uuid " << getSessionUUID();

View file

@ -83,6 +83,111 @@ const quint32 AVATAR_MOTION_SCRIPTABLE_BITS =
const qint64 AVATAR_SILENCE_THRESHOLD_USECS = 5 * USECS_PER_SECOND;
namespace AvatarDataPacket {
// NOTE: AvatarDataPackets start with a uint16_t sequence number that is not reflected in the Header structure.
PACKED_BEGIN struct Header {
uint8_t packetStateFlags; // state flags, currently used to indicate if the packet is a minimal or fuller packet
} PACKED_END;
const size_t HEADER_SIZE = 1;
PACKED_BEGIN struct MinimalAvatarInfo {
float globalPosition[3]; // avatar's position
} PACKED_END;
const size_t MINIMAL_AVATAR_INFO_SIZE = 12;
PACKED_BEGIN struct AvatarInfo {
// FIXME - this has 8 unqiue items, we could use a simple header byte to indicate whether or not the fields
// exist in the packet and have changed since last being sent.
float globalPosition[3]; // avatar's position
// FIXME - possible savings:
// a) could be encoded as relative to last known position, most movements
// will be withing a smaller radix
// b) would still need an intermittent absolute value.
float position[3]; // skeletal model's position
// FIXME - this used to account for a registration offset from the avatar's position
// to the position of the skeletal model/mesh. This relative offset doesn't change from
// frame to frame, instead only changes when the model changes, it could be moved to the
// identity packet and/or only included when it changes.
// if it's encoded relative to the globalPosition, it could be reduced to a smaller radix
//
// POTENTIAL SAVINGS - 12 bytes
float globalBoundingBoxCorner[3]; // global position of the lowest corner of the avatar's bounding box
// FIXME - this would change less frequently if it was the dimensions of the bounding box
// instead of the corner.
//
// POTENTIAL SAVINGS - 12 bytes
uint16_t localOrientation[3]; // avatar's local euler angles (degrees, compressed) relative to the thing it's attached to
uint16_t scale; // (compressed) 'ratio' encoding uses sign bit as flag.
// FIXME - this doesn't change every frame
//
// POTENTIAL SAVINGS - 2 bytes
float lookAtPosition[3]; // world space position that eyes are focusing on.
// FIXME - unless the person has an eye tracker, this is simulated...
// a) maybe we can just have the client calculate this
// b) at distance this will be hard to discern and can likely be
// descimated or dropped completely
//
// POTENTIAL SAVINGS - 12 bytes
uint16_t audioLoudness; // current loundess of microphone
// FIXME -
// a) this could probably be decimated with a smaller radix <<< DONE
// b) this doesn't change every frame
//
// POTENTIAL SAVINGS - 4-2 bytes
// FIXME - these 20 bytes are only used by viewers if my avatar has "attachments"
// we could save these bytes if no attachments are active.
//
// POTENTIAL SAVINGS - 20 bytes
uint8_t sensorToWorldQuat[6]; // 6 byte compressed quaternion part of sensor to world matrix
uint16_t sensorToWorldScale; // uniform scale of sensor to world matrix
float sensorToWorldTrans[3]; // fourth column of sensor to world matrix
// FIXME - sensorToWorldTrans might be able to be better compressed if it was
// relative to the avatar position.
uint8_t flags;
} PACKED_END;
const size_t AVATAR_INFO_SIZE = 79;
// only present if HAS_REFERENTIAL flag is set in AvatarInfo.flags
PACKED_BEGIN struct ParentInfo {
uint8_t parentUUID[16]; // rfc 4122 encoded
uint16_t parentJointIndex;
} PACKED_END;
const size_t PARENT_INFO_SIZE = 18;
// only present if IS_FACESHIFT_CONNECTED flag is set in AvatarInfo.flags
PACKED_BEGIN struct FaceTrackerInfo {
float leftEyeBlink;
float rightEyeBlink;
float averageLoudness;
float browAudioLift;
uint8_t numBlendshapeCoefficients;
// float blendshapeCoefficients[numBlendshapeCoefficients];
} PACKED_END;
const size_t FACE_TRACKER_INFO_SIZE = 17;
// variable length structure follows
/*
struct JointData {
uint8_t numJoints;
uint8_t rotationValidityBits[ceil(numJoints / 8)]; // one bit per joint, if true then a compressed rotation follows.
SixByteQuat rotation[numValidRotations]; // encodeded and compressed by packOrientationQuatToSixBytes()
uint8_t translationValidityBits[ceil(numJoints / 8)]; // one bit per joint, if true then a compressed translation follows.
SixByteTrans translation[numValidTranslations]; // encodeded and compressed by packFloatVec3ToSignedTwoByteFixed()
};
*/
}
// Bitset of state flags - we store the key state, hand state, Faceshift, eye tracking, and existence of
// referential data in this bit set. The hand state is an octal, but is split into two sections to maintain
// backward compatibility. The bits are ordered as such (0-7 left to right).
@ -482,6 +587,9 @@ protected:
int getFauxJointIndex(const QString& name) const;
AvatarDataPacket::AvatarInfo _lastAvatarInfo;
glm::mat4 _lastSensorToWorldMatrix;
private:
friend void avatarStateFromFrame(const QByteArray& frameData, AvatarData* _avatar);
static QUrl _defaultFullAvatarModelUrl;