Merge pull request #15746 from roxanneskelly/bugz516

BUGZ-628 - Add quasi-MTBF logging to track disconnects
This commit is contained in:
Howard Stearns 2019-06-12 15:48:45 -07:00 committed by GitHub
commit 0c1df749b9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 62 additions and 21 deletions

View file

@ -57,7 +57,7 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointer<ReceivedMessag
if (message->getSize() == 0) {
return;
}
QDataStream packetStream(message->getMessage());
// read a NodeConnectionData object from the packet so we can pass around this data while we're inspecting it
@ -88,11 +88,10 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointer<ReceivedMessag
auto pendingAssignment = _pendingAssignedNodes.find(nodeConnection.connectUUID);
SharedNodePointer node;
QString username;
if (pendingAssignment != _pendingAssignedNodes.end()) {
node = processAssignmentConnectRequest(nodeConnection, pendingAssignment->second);
} else if (!STATICALLY_ASSIGNED_NODES.contains(nodeConnection.nodeType)) {
QString username;
QByteArray usernameSignature;
if (message->getBytesLeftToRead() > 0) {
@ -122,9 +121,13 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointer<ReceivedMessag
nodeData->setNodeInterestSet(safeInterestSet);
nodeData->setPlaceName(nodeConnection.placeName);
qDebug() << "Allowed connection from node" << uuidStringWithoutCurlyBraces(node->getUUID())
<< "on" << message->getSenderSockAddr() << "with MAC" << nodeConnection.hardwareAddress
<< "and machine fingerprint" << nodeConnection.machineFingerprint;
qDebug() << "Allowed connection from node" << uuidStringWithoutCurlyBraces(node->getUUID())
<< "on" << message->getSenderSockAddr()
<< "with MAC" << nodeConnection.hardwareAddress
<< "and machine fingerprint" << nodeConnection.machineFingerprint
<< "user" << username
<< "reason" << QString(nodeConnection.connectReason ? "SilentDomainDisconnect" : "Connect")
<< "previous connection uptime" << nodeConnection.previousConnectionUpTime/USECS_PER_MSEC << "msec";
// signal that we just connected a node so the DomainServer can get it a list
// and broadcast its presence right away
@ -468,7 +471,7 @@ SharedNodePointer DomainGatekeeper::processAgentConnectRequest(const NodeConnect
if (node->getPublicSocket() == nodeConnection.publicSockAddr && node->getLocalSocket() == nodeConnection.localSockAddr) {
// we have a node that already has these exact sockets
// this can occur if a node is failing to connect to the domain
// remove the old node before adding the new node
qDebug() << "Deleting existing connection from same sockaddr: " << node->getUUID();
existingNodeID = node->getUUID();
@ -842,7 +845,7 @@ void DomainGatekeeper::processICEPingPacket(QSharedPointer<ReceivedMessage> mess
// before we respond to this ICE ping packet, make sure we have a peer in the list that matches
QUuid icePeerID = QUuid::fromRfc4122({ message->getRawMessage(), NUM_BYTES_RFC4122_UUID });
if (_icePeers.contains(icePeerID)) {
auto pingReplyPacket = limitedNodeList->constructICEPingReplyPacket(*message, limitedNodeList->getSessionUUID());
@ -882,7 +885,6 @@ void DomainGatekeeper::getGroupMemberships(const QString& username) {
QJsonArray groupIDs = QJsonArray::fromStringList(groupIDSet.toList());
json["groups"] = groupIDs;
// if we've already asked, wait for the answer before asking again
QString lowerUsername = username.toLower();
if (_inFlightGroupMembershipsRequests.contains(lowerUsername)) {
@ -969,7 +971,7 @@ void DomainGatekeeper::getDomainOwnerFriendsList() {
QNetworkAccessManager::GetOperation, callbackParams, QByteArray(),
NULL, QVariantMap());
}
}
void DomainGatekeeper::getDomainOwnerFriendsListJSONCallback(QNetworkReply* requestReply) {

View file

@ -1079,7 +1079,7 @@ void DomainServer::processListRequestPacket(QSharedPointer<ReceivedMessage> mess
// client-side send time of last connect/domain list request
nodeData->setLastDomainCheckinTimestamp(nodeRequestData.lastPingTimestamp);
sendDomainListToNode(sendingNode, message->getFirstPacketReceiveTime(), message->getSenderSockAddr());
sendDomainListToNode(sendingNode, message->getFirstPacketReceiveTime(), message->getSenderSockAddr(), false);
}
bool DomainServer::isInInterestSet(const SharedNodePointer& nodeA, const SharedNodePointer& nodeB) {
@ -1145,7 +1145,7 @@ void DomainServer::handleConnectedNode(SharedNodePointer newNode, quint64 reques
DomainServerNodeData* nodeData = static_cast<DomainServerNodeData*>(newNode->getLinkedData());
// reply back to the user with a PacketType::DomainList
sendDomainListToNode(newNode, requestReceiveTime, nodeData->getSendingSockAddr());
sendDomainListToNode(newNode, requestReceiveTime, nodeData->getSendingSockAddr(), true);
// if this node is a user (unassigned Agent), signal
if (newNode->getType() == NodeType::Agent && !nodeData->wasAssigned()) {
@ -1161,7 +1161,7 @@ void DomainServer::handleConnectedNode(SharedNodePointer newNode, quint64 reques
broadcastNewNode(newNode);
}
void DomainServer::sendDomainListToNode(const SharedNodePointer& node, quint64 requestPacketReceiveTime, const HifiSockAddr &senderSockAddr) {
void DomainServer::sendDomainListToNode(const SharedNodePointer& node, quint64 requestPacketReceiveTime, const HifiSockAddr &senderSockAddr, bool newConnection) {
const int NUM_DOMAIN_LIST_EXTENDED_HEADER_BYTES = NUM_BYTES_RFC4122_UUID + NLPacket::NUM_BYTES_LOCALID +
NUM_BYTES_RFC4122_UUID + NLPacket::NUM_BYTES_LOCALID + 4;
@ -1181,6 +1181,7 @@ void DomainServer::sendDomainListToNode(const SharedNodePointer& node, quint64 r
extendedHeaderStream << nodeData->getLastDomainCheckinTimestamp();
extendedHeaderStream << quint64(duration_cast<microseconds>(system_clock::now().time_since_epoch()).count());
extendedHeaderStream << quint64(duration_cast<microseconds>(p_high_resolution_clock::now().time_since_epoch()).count()) - requestPacketReceiveTime;
extendedHeaderStream << newConnection;
auto domainListPackets = NLPacketList::create(PacketType::DomainList, extendedHeader);
// always send the node their own UUID back

View file

@ -173,7 +173,7 @@ private:
void handleKillNode(SharedNodePointer nodeToKill);
void broadcastNodeDisconnect(const SharedNodePointer& disconnnectedNode);
void sendDomainListToNode(const SharedNodePointer& node, quint64 requestPacketReceiveTime, const HifiSockAddr& senderSockAddr);
void sendDomainListToNode(const SharedNodePointer& node, quint64 requestPacketReceiveTime, const HifiSockAddr& senderSockAddr, bool newConnection);
bool isInInterestSet(const SharedNodePointer& nodeA, const SharedNodePointer& nodeB);

View file

@ -35,6 +35,10 @@ NodeConnectionData NodeConnectionData::fromDataStream(QDataStream& dataStream, c
// now the machine fingerprint
dataStream >> newHeader.machineFingerprint;
dataStream >> newHeader.connectReason;
dataStream >> newHeader.previousConnectionUpTime;
}
dataStream >> newHeader.lastPingTimestamp;

View file

@ -31,6 +31,8 @@ public:
QString placeName;
QString hardwareAddress;
QUuid machineFingerprint;
quint32 connectReason;
quint64 previousConnectionUpTime;
QByteArray protocolVersion;
};

View file

@ -632,6 +632,7 @@ void LimitedNodeList::processKillNode(ReceivedMessage& message) {
}
void LimitedNodeList::handleNodeKill(const SharedNodePointer& node, ConnectionID nextConnectionID) {
_nodeDisconnectTimestamp = usecTimestampNow();
qCDebug(networking) << "Killed" << *node;
node->stopPingTimer();
emit nodeKilled(node);

View file

@ -108,6 +108,13 @@ public:
};
Q_ENUM(ConnectionStep);
enum ConnectReason : quint32 {
Connect = 0,
SilentDomainDisconnect
};
Q_ENUM(ConnectReason);
QUuid getSessionUUID() const;
void setSessionUUID(const QUuid& sessionUUID);
Node::LocalID getSessionLocalID() const;
@ -461,6 +468,9 @@ protected:
}
std::unordered_map<QUuid, ConnectionID> _connectionIDs;
quint64 _nodeConnectTimestamp{ 0 };
quint64 _nodeDisconnectTimestamp{ 0 };
ConnectReason _connectReason { Connect };
private slots:
void flagTimeForConnectionStep(ConnectionStep connectionStep, quint64 timestamp);

View file

@ -113,6 +113,8 @@ NodeList::NodeList(char newOwnerType, int socketListenPort, int dtlsListenPort)
connect(&_domainHandler, SIGNAL(connectedToDomain(QUrl)), &_keepAlivePingTimer, SLOT(start()));
connect(&_domainHandler, &DomainHandler::disconnectedFromDomain, &_keepAlivePingTimer, &QTimer::stop);
connect(&_domainHandler, &DomainHandler::limitOfSilentDomainCheckInsReached, this, [this]() { _connectReason = LimitedNodeList::SilentDomainDisconnect; });
// set our sockAddrBelongsToDomainOrNode method as the connection creation filter for the udt::Socket
using std::placeholders::_1;
_nodeSocket.setConnectionCreationFilterOperator(std::bind(&NodeList::sockAddrBelongsToDomainOrNode, this, _1));
@ -414,6 +416,16 @@ void NodeList::sendDomainServerCheckIn() {
// now add the machine fingerprint
auto accountManager = DependencyManager::get<AccountManager>();
packetStream << FingerprintUtils::getMachineFingerprint();
packetStream << _connectReason;
if (_nodeDisconnectTimestamp < _nodeConnectTimestamp) {
_nodeDisconnectTimestamp = usecTimestampNow();
}
quint64 previousConnectionUptime = _nodeConnectTimestamp ? _nodeDisconnectTimestamp - _nodeConnectTimestamp : 0;
packetStream << previousConnectionUptime;
}
packetStream << quint64(duration_cast<microseconds>(system_clock::now().time_since_epoch()).count());
@ -666,6 +678,14 @@ void NodeList::processDomainServerList(QSharedPointer<ReceivedMessage> message)
quint64 domainServerCheckinProcessingTime;
packetStream >> domainServerCheckinProcessingTime;
bool newConnection;
packetStream >> newConnection;
if (newConnection) {
_nodeConnectTimestamp = usecTimestampNow();
_connectReason = Connect;
}
qint64 pingLagTime = (now - qint64(connectRequestTimestamp)) / qint64(USECS_PER_MSEC);
qint64 domainServerRequestLag = (qint64(domainServerPingSendTime - domainServerCheckinProcessingTime) - qint64(connectRequestTimestamp)) / qint64(USECS_PER_MSEC);;

View file

@ -27,7 +27,7 @@ PacketVersion versionForPacketType(PacketType packetType) {
case PacketType::StunResponse:
return 17;
case PacketType::DomainList:
return static_cast<PacketVersion>(DomainListVersion::HasTimestamp);
return static_cast<PacketVersion>(DomainListVersion::HasConnectReason);
case PacketType::EntityAdd:
case PacketType::EntityClone:
case PacketType::EntityEdit:
@ -72,7 +72,7 @@ PacketVersion versionForPacketType(PacketType packetType) {
return static_cast<PacketVersion>(DomainConnectionDeniedVersion::IncludesExtraInfo);
case PacketType::DomainConnectRequest:
return static_cast<PacketVersion>(DomainConnectRequestVersion::HasTimestamp);
return static_cast<PacketVersion>(DomainConnectRequestVersion::HasReason);
case PacketType::DomainServerAddedNode:
return static_cast<PacketVersion>(DomainServerAddedNodeVersion::PermissionsGrid);

View file

@ -345,7 +345,8 @@ enum class DomainConnectRequestVersion : PacketVersion {
HasMACAddress,
HasMachineFingerprint,
AlwaysHasMachineFingerprint,
HasTimestamp
HasTimestamp,
HasReason
};
enum class DomainConnectionDeniedVersion : PacketVersion {
@ -365,7 +366,8 @@ enum class DomainListVersion : PacketVersion {
GetUsernameFromUUIDSupport,
GetMachineFingerprintFromUUIDSupport,
AuthenticationOptional,
HasTimestamp
HasTimestamp,
HasConnectReason
};
enum class AudioVersion : PacketVersion {

View file

@ -63,8 +63,8 @@ void Socket::bind(const QHostAddress& address, quint16 port) {
auto sd = _udpSocket.socketDescriptor();
int val = 0; // false
if (setsockopt(sd, IPPROTO_IP, IP_DONTFRAGMENT, (const char *)&val, sizeof(val))) {
int wsaError = WSAGetLastError();
qCWarning(networking) << "Socket::bind Cannot setsockopt IP_DONTFRAGMENT" << wsaError;
auto wsaErr = WSAGetLastError();
qCWarning(networking) << "Socket::bind Cannot setsockopt IP_DONTFRAGMENT" << wsaErr;
}
#endif
}
@ -242,7 +242,6 @@ qint64 Socket::writeDatagram(const QByteArray& datagram, const HifiSockAddr& soc
#endif
qCDebug(networking) << "udt::writeDatagram (" << _udpSocket.state() << ") error - " << wsaError << _udpSocket.error() << "(" << _udpSocket.errorString() << ")"
<< (pending ? "pending bytes:" : "pending:") << pending;
#ifdef DEBUG_EVENT_QUEUE
int nodeListQueueSize = ::hifi::qt::getEventQueueSize(thread());
qCDebug(networking) << "Networking queue size - " << nodeListQueueSize;