Merge pull request #15746 from roxanneskelly/bugz516

BUGZ-628 - Add quasi-MTBF logging to track disconnects
This commit is contained in:
Howard Stearns 2019-06-12 15:48:45 -07:00 committed by GitHub
commit 0c1df749b9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 62 additions and 21 deletions

View file

@ -88,11 +88,10 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointer<ReceivedMessag
auto pendingAssignment = _pendingAssignedNodes.find(nodeConnection.connectUUID); auto pendingAssignment = _pendingAssignedNodes.find(nodeConnection.connectUUID);
SharedNodePointer node; SharedNodePointer node;
QString username;
if (pendingAssignment != _pendingAssignedNodes.end()) { if (pendingAssignment != _pendingAssignedNodes.end()) {
node = processAssignmentConnectRequest(nodeConnection, pendingAssignment->second); node = processAssignmentConnectRequest(nodeConnection, pendingAssignment->second);
} else if (!STATICALLY_ASSIGNED_NODES.contains(nodeConnection.nodeType)) { } else if (!STATICALLY_ASSIGNED_NODES.contains(nodeConnection.nodeType)) {
QString username;
QByteArray usernameSignature; QByteArray usernameSignature;
if (message->getBytesLeftToRead() > 0) { if (message->getBytesLeftToRead() > 0) {
@ -123,8 +122,12 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointer<ReceivedMessag
nodeData->setPlaceName(nodeConnection.placeName); nodeData->setPlaceName(nodeConnection.placeName);
qDebug() << "Allowed connection from node" << uuidStringWithoutCurlyBraces(node->getUUID()) qDebug() << "Allowed connection from node" << uuidStringWithoutCurlyBraces(node->getUUID())
<< "on" << message->getSenderSockAddr() << "with MAC" << nodeConnection.hardwareAddress << "on" << message->getSenderSockAddr()
<< "and machine fingerprint" << nodeConnection.machineFingerprint; << "with MAC" << nodeConnection.hardwareAddress
<< "and machine fingerprint" << nodeConnection.machineFingerprint
<< "user" << username
<< "reason" << QString(nodeConnection.connectReason ? "SilentDomainDisconnect" : "Connect")
<< "previous connection uptime" << nodeConnection.previousConnectionUpTime/USECS_PER_MSEC << "msec";
// signal that we just connected a node so the DomainServer can get it a list // signal that we just connected a node so the DomainServer can get it a list
// and broadcast its presence right away // and broadcast its presence right away
@ -882,7 +885,6 @@ void DomainGatekeeper::getGroupMemberships(const QString& username) {
QJsonArray groupIDs = QJsonArray::fromStringList(groupIDSet.toList()); QJsonArray groupIDs = QJsonArray::fromStringList(groupIDSet.toList());
json["groups"] = groupIDs; json["groups"] = groupIDs;
// if we've already asked, wait for the answer before asking again // if we've already asked, wait for the answer before asking again
QString lowerUsername = username.toLower(); QString lowerUsername = username.toLower();
if (_inFlightGroupMembershipsRequests.contains(lowerUsername)) { if (_inFlightGroupMembershipsRequests.contains(lowerUsername)) {

View file

@ -1079,7 +1079,7 @@ void DomainServer::processListRequestPacket(QSharedPointer<ReceivedMessage> mess
// client-side send time of last connect/domain list request // client-side send time of last connect/domain list request
nodeData->setLastDomainCheckinTimestamp(nodeRequestData.lastPingTimestamp); nodeData->setLastDomainCheckinTimestamp(nodeRequestData.lastPingTimestamp);
sendDomainListToNode(sendingNode, message->getFirstPacketReceiveTime(), message->getSenderSockAddr()); sendDomainListToNode(sendingNode, message->getFirstPacketReceiveTime(), message->getSenderSockAddr(), false);
} }
bool DomainServer::isInInterestSet(const SharedNodePointer& nodeA, const SharedNodePointer& nodeB) { bool DomainServer::isInInterestSet(const SharedNodePointer& nodeA, const SharedNodePointer& nodeB) {
@ -1145,7 +1145,7 @@ void DomainServer::handleConnectedNode(SharedNodePointer newNode, quint64 reques
DomainServerNodeData* nodeData = static_cast<DomainServerNodeData*>(newNode->getLinkedData()); DomainServerNodeData* nodeData = static_cast<DomainServerNodeData*>(newNode->getLinkedData());
// reply back to the user with a PacketType::DomainList // reply back to the user with a PacketType::DomainList
sendDomainListToNode(newNode, requestReceiveTime, nodeData->getSendingSockAddr()); sendDomainListToNode(newNode, requestReceiveTime, nodeData->getSendingSockAddr(), true);
// if this node is a user (unassigned Agent), signal // if this node is a user (unassigned Agent), signal
if (newNode->getType() == NodeType::Agent && !nodeData->wasAssigned()) { if (newNode->getType() == NodeType::Agent && !nodeData->wasAssigned()) {
@ -1161,7 +1161,7 @@ void DomainServer::handleConnectedNode(SharedNodePointer newNode, quint64 reques
broadcastNewNode(newNode); broadcastNewNode(newNode);
} }
void DomainServer::sendDomainListToNode(const SharedNodePointer& node, quint64 requestPacketReceiveTime, const HifiSockAddr &senderSockAddr) { void DomainServer::sendDomainListToNode(const SharedNodePointer& node, quint64 requestPacketReceiveTime, const HifiSockAddr &senderSockAddr, bool newConnection) {
const int NUM_DOMAIN_LIST_EXTENDED_HEADER_BYTES = NUM_BYTES_RFC4122_UUID + NLPacket::NUM_BYTES_LOCALID + const int NUM_DOMAIN_LIST_EXTENDED_HEADER_BYTES = NUM_BYTES_RFC4122_UUID + NLPacket::NUM_BYTES_LOCALID +
NUM_BYTES_RFC4122_UUID + NLPacket::NUM_BYTES_LOCALID + 4; NUM_BYTES_RFC4122_UUID + NLPacket::NUM_BYTES_LOCALID + 4;
@ -1181,6 +1181,7 @@ void DomainServer::sendDomainListToNode(const SharedNodePointer& node, quint64 r
extendedHeaderStream << nodeData->getLastDomainCheckinTimestamp(); extendedHeaderStream << nodeData->getLastDomainCheckinTimestamp();
extendedHeaderStream << quint64(duration_cast<microseconds>(system_clock::now().time_since_epoch()).count()); extendedHeaderStream << quint64(duration_cast<microseconds>(system_clock::now().time_since_epoch()).count());
extendedHeaderStream << quint64(duration_cast<microseconds>(p_high_resolution_clock::now().time_since_epoch()).count()) - requestPacketReceiveTime; extendedHeaderStream << quint64(duration_cast<microseconds>(p_high_resolution_clock::now().time_since_epoch()).count()) - requestPacketReceiveTime;
extendedHeaderStream << newConnection;
auto domainListPackets = NLPacketList::create(PacketType::DomainList, extendedHeader); auto domainListPackets = NLPacketList::create(PacketType::DomainList, extendedHeader);
// always send the node their own UUID back // always send the node their own UUID back

View file

@ -173,7 +173,7 @@ private:
void handleKillNode(SharedNodePointer nodeToKill); void handleKillNode(SharedNodePointer nodeToKill);
void broadcastNodeDisconnect(const SharedNodePointer& disconnnectedNode); void broadcastNodeDisconnect(const SharedNodePointer& disconnnectedNode);
void sendDomainListToNode(const SharedNodePointer& node, quint64 requestPacketReceiveTime, const HifiSockAddr& senderSockAddr); void sendDomainListToNode(const SharedNodePointer& node, quint64 requestPacketReceiveTime, const HifiSockAddr& senderSockAddr, bool newConnection);
bool isInInterestSet(const SharedNodePointer& nodeA, const SharedNodePointer& nodeB); bool isInInterestSet(const SharedNodePointer& nodeA, const SharedNodePointer& nodeB);

View file

@ -35,6 +35,10 @@ NodeConnectionData NodeConnectionData::fromDataStream(QDataStream& dataStream, c
// now the machine fingerprint // now the machine fingerprint
dataStream >> newHeader.machineFingerprint; dataStream >> newHeader.machineFingerprint;
dataStream >> newHeader.connectReason;
dataStream >> newHeader.previousConnectionUpTime;
} }
dataStream >> newHeader.lastPingTimestamp; dataStream >> newHeader.lastPingTimestamp;

View file

@ -31,6 +31,8 @@ public:
QString placeName; QString placeName;
QString hardwareAddress; QString hardwareAddress;
QUuid machineFingerprint; QUuid machineFingerprint;
quint32 connectReason;
quint64 previousConnectionUpTime;
QByteArray protocolVersion; QByteArray protocolVersion;
}; };

View file

@ -632,6 +632,7 @@ void LimitedNodeList::processKillNode(ReceivedMessage& message) {
} }
void LimitedNodeList::handleNodeKill(const SharedNodePointer& node, ConnectionID nextConnectionID) { void LimitedNodeList::handleNodeKill(const SharedNodePointer& node, ConnectionID nextConnectionID) {
_nodeDisconnectTimestamp = usecTimestampNow();
qCDebug(networking) << "Killed" << *node; qCDebug(networking) << "Killed" << *node;
node->stopPingTimer(); node->stopPingTimer();
emit nodeKilled(node); emit nodeKilled(node);

View file

@ -108,6 +108,13 @@ public:
}; };
Q_ENUM(ConnectionStep); Q_ENUM(ConnectionStep);
enum ConnectReason : quint32 {
Connect = 0,
SilentDomainDisconnect
};
Q_ENUM(ConnectReason);
QUuid getSessionUUID() const; QUuid getSessionUUID() const;
void setSessionUUID(const QUuid& sessionUUID); void setSessionUUID(const QUuid& sessionUUID);
Node::LocalID getSessionLocalID() const; Node::LocalID getSessionLocalID() const;
@ -461,6 +468,9 @@ protected:
} }
std::unordered_map<QUuid, ConnectionID> _connectionIDs; std::unordered_map<QUuid, ConnectionID> _connectionIDs;
quint64 _nodeConnectTimestamp{ 0 };
quint64 _nodeDisconnectTimestamp{ 0 };
ConnectReason _connectReason { Connect };
private slots: private slots:
void flagTimeForConnectionStep(ConnectionStep connectionStep, quint64 timestamp); void flagTimeForConnectionStep(ConnectionStep connectionStep, quint64 timestamp);

View file

@ -113,6 +113,8 @@ NodeList::NodeList(char newOwnerType, int socketListenPort, int dtlsListenPort)
connect(&_domainHandler, SIGNAL(connectedToDomain(QUrl)), &_keepAlivePingTimer, SLOT(start())); connect(&_domainHandler, SIGNAL(connectedToDomain(QUrl)), &_keepAlivePingTimer, SLOT(start()));
connect(&_domainHandler, &DomainHandler::disconnectedFromDomain, &_keepAlivePingTimer, &QTimer::stop); connect(&_domainHandler, &DomainHandler::disconnectedFromDomain, &_keepAlivePingTimer, &QTimer::stop);
connect(&_domainHandler, &DomainHandler::limitOfSilentDomainCheckInsReached, this, [this]() { _connectReason = LimitedNodeList::SilentDomainDisconnect; });
// set our sockAddrBelongsToDomainOrNode method as the connection creation filter for the udt::Socket // set our sockAddrBelongsToDomainOrNode method as the connection creation filter for the udt::Socket
using std::placeholders::_1; using std::placeholders::_1;
_nodeSocket.setConnectionCreationFilterOperator(std::bind(&NodeList::sockAddrBelongsToDomainOrNode, this, _1)); _nodeSocket.setConnectionCreationFilterOperator(std::bind(&NodeList::sockAddrBelongsToDomainOrNode, this, _1));
@ -414,6 +416,16 @@ void NodeList::sendDomainServerCheckIn() {
// now add the machine fingerprint // now add the machine fingerprint
auto accountManager = DependencyManager::get<AccountManager>(); auto accountManager = DependencyManager::get<AccountManager>();
packetStream << FingerprintUtils::getMachineFingerprint(); packetStream << FingerprintUtils::getMachineFingerprint();
packetStream << _connectReason;
if (_nodeDisconnectTimestamp < _nodeConnectTimestamp) {
_nodeDisconnectTimestamp = usecTimestampNow();
}
quint64 previousConnectionUptime = _nodeConnectTimestamp ? _nodeDisconnectTimestamp - _nodeConnectTimestamp : 0;
packetStream << previousConnectionUptime;
} }
packetStream << quint64(duration_cast<microseconds>(system_clock::now().time_since_epoch()).count()); packetStream << quint64(duration_cast<microseconds>(system_clock::now().time_since_epoch()).count());
@ -666,6 +678,14 @@ void NodeList::processDomainServerList(QSharedPointer<ReceivedMessage> message)
quint64 domainServerCheckinProcessingTime; quint64 domainServerCheckinProcessingTime;
packetStream >> domainServerCheckinProcessingTime; packetStream >> domainServerCheckinProcessingTime;
bool newConnection;
packetStream >> newConnection;
if (newConnection) {
_nodeConnectTimestamp = usecTimestampNow();
_connectReason = Connect;
}
qint64 pingLagTime = (now - qint64(connectRequestTimestamp)) / qint64(USECS_PER_MSEC); qint64 pingLagTime = (now - qint64(connectRequestTimestamp)) / qint64(USECS_PER_MSEC);
qint64 domainServerRequestLag = (qint64(domainServerPingSendTime - domainServerCheckinProcessingTime) - qint64(connectRequestTimestamp)) / qint64(USECS_PER_MSEC);; qint64 domainServerRequestLag = (qint64(domainServerPingSendTime - domainServerCheckinProcessingTime) - qint64(connectRequestTimestamp)) / qint64(USECS_PER_MSEC);;

View file

@ -27,7 +27,7 @@ PacketVersion versionForPacketType(PacketType packetType) {
case PacketType::StunResponse: case PacketType::StunResponse:
return 17; return 17;
case PacketType::DomainList: case PacketType::DomainList:
return static_cast<PacketVersion>(DomainListVersion::HasTimestamp); return static_cast<PacketVersion>(DomainListVersion::HasConnectReason);
case PacketType::EntityAdd: case PacketType::EntityAdd:
case PacketType::EntityClone: case PacketType::EntityClone:
case PacketType::EntityEdit: case PacketType::EntityEdit:
@ -72,7 +72,7 @@ PacketVersion versionForPacketType(PacketType packetType) {
return static_cast<PacketVersion>(DomainConnectionDeniedVersion::IncludesExtraInfo); return static_cast<PacketVersion>(DomainConnectionDeniedVersion::IncludesExtraInfo);
case PacketType::DomainConnectRequest: case PacketType::DomainConnectRequest:
return static_cast<PacketVersion>(DomainConnectRequestVersion::HasTimestamp); return static_cast<PacketVersion>(DomainConnectRequestVersion::HasReason);
case PacketType::DomainServerAddedNode: case PacketType::DomainServerAddedNode:
return static_cast<PacketVersion>(DomainServerAddedNodeVersion::PermissionsGrid); return static_cast<PacketVersion>(DomainServerAddedNodeVersion::PermissionsGrid);

View file

@ -345,7 +345,8 @@ enum class DomainConnectRequestVersion : PacketVersion {
HasMACAddress, HasMACAddress,
HasMachineFingerprint, HasMachineFingerprint,
AlwaysHasMachineFingerprint, AlwaysHasMachineFingerprint,
HasTimestamp HasTimestamp,
HasReason
}; };
enum class DomainConnectionDeniedVersion : PacketVersion { enum class DomainConnectionDeniedVersion : PacketVersion {
@ -365,7 +366,8 @@ enum class DomainListVersion : PacketVersion {
GetUsernameFromUUIDSupport, GetUsernameFromUUIDSupport,
GetMachineFingerprintFromUUIDSupport, GetMachineFingerprintFromUUIDSupport,
AuthenticationOptional, AuthenticationOptional,
HasTimestamp HasTimestamp,
HasConnectReason
}; };
enum class AudioVersion : PacketVersion { enum class AudioVersion : PacketVersion {

View file

@ -63,8 +63,8 @@ void Socket::bind(const QHostAddress& address, quint16 port) {
auto sd = _udpSocket.socketDescriptor(); auto sd = _udpSocket.socketDescriptor();
int val = 0; // false int val = 0; // false
if (setsockopt(sd, IPPROTO_IP, IP_DONTFRAGMENT, (const char *)&val, sizeof(val))) { if (setsockopt(sd, IPPROTO_IP, IP_DONTFRAGMENT, (const char *)&val, sizeof(val))) {
int wsaError = WSAGetLastError(); auto wsaErr = WSAGetLastError();
qCWarning(networking) << "Socket::bind Cannot setsockopt IP_DONTFRAGMENT" << wsaError; qCWarning(networking) << "Socket::bind Cannot setsockopt IP_DONTFRAGMENT" << wsaErr;
} }
#endif #endif
} }
@ -242,7 +242,6 @@ qint64 Socket::writeDatagram(const QByteArray& datagram, const HifiSockAddr& soc
#endif #endif
qCDebug(networking) << "udt::writeDatagram (" << _udpSocket.state() << ") error - " << wsaError << _udpSocket.error() << "(" << _udpSocket.errorString() << ")" qCDebug(networking) << "udt::writeDatagram (" << _udpSocket.state() << ") error - " << wsaError << _udpSocket.error() << "(" << _udpSocket.errorString() << ")"
<< (pending ? "pending bytes:" : "pending:") << pending; << (pending ? "pending bytes:" : "pending:") << pending;
#ifdef DEBUG_EVENT_QUEUE #ifdef DEBUG_EVENT_QUEUE
int nodeListQueueSize = ::hifi::qt::getEventQueueSize(thread()); int nodeListQueueSize = ::hifi::qt::getEventQueueSize(thread());
qCDebug(networking) << "Networking queue size - " << nodeListQueueSize; qCDebug(networking) << "Networking queue size - " << nodeListQueueSize;