From 8a3672f3c5c15b722c4010e65b024189ef97a122 Mon Sep 17 00:00:00 2001 From: Roxanne Skelly Date: Tue, 11 Jun 2019 12:41:45 -0700 Subject: [PATCH] Checkpoint MTBF uptime reporting --- domain-server/src/DomainGatekeeper.cpp | 22 ++++++++++--------- domain-server/src/NodeConnectionData.cpp | 4 ++++ domain-server/src/NodeConnectionData.h | 2 ++ libraries/networking/src/LimitedNodeList.cpp | 1 + libraries/networking/src/LimitedNodeList.h | 8 +++++++ libraries/networking/src/NodeList.cpp | 14 +++++++++++- .../networking/src/udt/PacketHeaders.cpp | 2 +- libraries/networking/src/udt/PacketHeaders.h | 3 ++- libraries/networking/src/udt/Socket.cpp | 16 ++++++++++---- 9 files changed, 55 insertions(+), 17 deletions(-) diff --git a/domain-server/src/DomainGatekeeper.cpp b/domain-server/src/DomainGatekeeper.cpp index f5705a570b..289d583719 100644 --- a/domain-server/src/DomainGatekeeper.cpp +++ b/domain-server/src/DomainGatekeeper.cpp @@ -57,7 +57,7 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointergetSize() == 0) { return; } - + QDataStream packetStream(message->getMessage()); // read a NodeConnectionData object from the packet so we can pass around this data while we're inspecting it @@ -88,11 +88,10 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointersecond); } else if (!STATICALLY_ASSIGNED_NODES.contains(nodeConnection.nodeType)) { - QString username; QByteArray usernameSignature; if (message->getBytesLeftToRead() > 0) { @@ -122,9 +121,13 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointersetNodeInterestSet(safeInterestSet); nodeData->setPlaceName(nodeConnection.placeName); - qDebug() << "Allowed connection from node" << uuidStringWithoutCurlyBraces(node->getUUID()) - << "on" << message->getSenderSockAddr() << "with MAC" << nodeConnection.hardwareAddress - << "and machine fingerprint" << nodeConnection.machineFingerprint; + qDebug() << "Allowed connection from node" << uuidStringWithoutCurlyBraces(node->getUUID()) + << "on" << message->getSenderSockAddr() + << "with MAC" << nodeConnection.hardwareAddress + << "and machine fingerprint" << nodeConnection.machineFingerprint + << "user" << username + << "reason" << nodeConnection.connectReason + << "previous connection uptime" << nodeConnection.previousConnectionUpTime/USECS_PER_MSEC << "msec"; // signal that we just connected a node so the DomainServer can get it a list // and broadcast its presence right away @@ -468,7 +471,7 @@ SharedNodePointer DomainGatekeeper::processAgentConnectRequest(const NodeConnect if (node->getPublicSocket() == nodeConnection.publicSockAddr && node->getLocalSocket() == nodeConnection.localSockAddr) { // we have a node that already has these exact sockets // this can occur if a node is failing to connect to the domain - + // remove the old node before adding the new node qDebug() << "Deleting existing connection from same sockaddr: " << node->getUUID(); existingNodeID = node->getUUID(); @@ -842,7 +845,7 @@ void DomainGatekeeper::processICEPingPacket(QSharedPointer mess // before we respond to this ICE ping packet, make sure we have a peer in the list that matches QUuid icePeerID = QUuid::fromRfc4122({ message->getRawMessage(), NUM_BYTES_RFC4122_UUID }); - + if (_icePeers.contains(icePeerID)) { auto pingReplyPacket = limitedNodeList->constructICEPingReplyPacket(*message, limitedNodeList->getSessionUUID()); @@ -882,7 +885,6 @@ void DomainGatekeeper::getGroupMemberships(const QString& username) { QJsonArray groupIDs = QJsonArray::fromStringList(groupIDSet.toList()); json["groups"] = groupIDs; - // if we've already asked, wait for the answer before asking again QString lowerUsername = username.toLower(); if (_inFlightGroupMembershipsRequests.contains(lowerUsername)) { @@ -969,7 +971,7 @@ void DomainGatekeeper::getDomainOwnerFriendsList() { QNetworkAccessManager::GetOperation, callbackParams, QByteArray(), NULL, QVariantMap()); } - + } void DomainGatekeeper::getDomainOwnerFriendsListJSONCallback(QNetworkReply* requestReply) { diff --git a/domain-server/src/NodeConnectionData.cpp b/domain-server/src/NodeConnectionData.cpp index b3ea005bd1..b4aaacd749 100644 --- a/domain-server/src/NodeConnectionData.cpp +++ b/domain-server/src/NodeConnectionData.cpp @@ -35,6 +35,10 @@ NodeConnectionData NodeConnectionData::fromDataStream(QDataStream& dataStream, c // now the machine fingerprint dataStream >> newHeader.machineFingerprint; + + dataStream >> newHeader.connectReason; + + dataStream >> newHeader.previousConnectionUpTime; } dataStream >> newHeader.lastPingTimestamp; diff --git a/domain-server/src/NodeConnectionData.h b/domain-server/src/NodeConnectionData.h index 43661f9caf..23eceb0dca 100644 --- a/domain-server/src/NodeConnectionData.h +++ b/domain-server/src/NodeConnectionData.h @@ -31,6 +31,8 @@ public: QString placeName; QString hardwareAddress; QUuid machineFingerprint; + quint32 connectReason; + quint64 previousConnectionUpTime; QByteArray protocolVersion; }; diff --git a/libraries/networking/src/LimitedNodeList.cpp b/libraries/networking/src/LimitedNodeList.cpp index 48f08d6d2e..9f4eb39013 100644 --- a/libraries/networking/src/LimitedNodeList.cpp +++ b/libraries/networking/src/LimitedNodeList.cpp @@ -632,6 +632,7 @@ void LimitedNodeList::processKillNode(ReceivedMessage& message) { } void LimitedNodeList::handleNodeKill(const SharedNodePointer& node, ConnectionID nextConnectionID) { + _nodeDisconnectTimestamp = usecTimestampNow(); qCDebug(networking) << "Killed" << *node; node->stopPingTimer(); emit nodeKilled(node); diff --git a/libraries/networking/src/LimitedNodeList.h b/libraries/networking/src/LimitedNodeList.h index f7ea0ec2ad..42fb5311b1 100644 --- a/libraries/networking/src/LimitedNodeList.h +++ b/libraries/networking/src/LimitedNodeList.h @@ -337,6 +337,12 @@ public: NodeType::EntityScriptServer }; + enum DomainConnectReason : quint32 { + START = 0, + RECONNECT + }; + Q_ENUM(DomainConnectReason); + public slots: void reset(); void eraseAllNodes(); @@ -461,6 +467,8 @@ protected: } std::unordered_map _connectionIDs; + quint64 _nodeConnectTimestamp { 0 }; + quint64 _nodeDisconnectTimestamp { 0 }; private slots: void flagTimeForConnectionStep(ConnectionStep connectionStep, quint64 timestamp); diff --git a/libraries/networking/src/NodeList.cpp b/libraries/networking/src/NodeList.cpp index 0e6b5503d7..c352b9f5ea 100644 --- a/libraries/networking/src/NodeList.cpp +++ b/libraries/networking/src/NodeList.cpp @@ -296,6 +296,8 @@ void NodeList::addSetOfNodeTypesToNodeInterestSet(const NodeSet& setOfNodeTypes) void NodeList::sendDomainServerCheckIn() { + int outstandingCheckins = _domainHandler.getCheckInPacketsSinceLastReply(); + // On ThreadedAssignments (assignment clients), this function // is called by the server check-in timer thread // not the NodeList thread. Calling it on the NodeList thread @@ -414,6 +416,16 @@ void NodeList::sendDomainServerCheckIn() { // now add the machine fingerprint auto accountManager = DependencyManager::get(); packetStream << FingerprintUtils::getMachineFingerprint(); + + packetStream << ((outstandingCheckins >= MAX_SILENT_DOMAIN_SERVER_CHECK_INS) ? RECONNECT : START); + + if (_nodeDisconnectTimestamp < _nodeConnectTimestamp) { + _nodeDisconnectTimestamp = usecTimestampNow(); + } + quint64 previousConnectionUptime = _nodeConnectTimestamp ? _nodeDisconnectTimestamp - _nodeConnectTimestamp : 0; + + packetStream << previousConnectionUptime; + } packetStream << quint64(duration_cast(system_clock::now().time_since_epoch()).count()); @@ -439,7 +451,6 @@ void NodeList::sendDomainServerCheckIn() { // Send duplicate check-ins in the exponentially increasing sequence 1, 1, 2, 4, ... static const int MAX_CHECKINS_TOGETHER = 20; static const int REBIND_CHECKIN_COUNT = 2; - int outstandingCheckins = _domainHandler.getCheckInPacketsSinceLastReply(); if (outstandingCheckins > REBIND_CHECKIN_COUNT) { _nodeSocket.rebind(); @@ -626,6 +637,7 @@ void NodeList::processDomainServerConnectionTokenPacket(QSharedPointerreadWithoutCopy(NUM_BYTES_RFC4122_UUID))); _domainHandler.clearPendingCheckins(); + _nodeConnectTimestamp = usecTimestampNow(); sendDomainServerCheckIn(); } diff --git a/libraries/networking/src/udt/PacketHeaders.cpp b/libraries/networking/src/udt/PacketHeaders.cpp index 566e1e4946..7ebaf5224f 100644 --- a/libraries/networking/src/udt/PacketHeaders.cpp +++ b/libraries/networking/src/udt/PacketHeaders.cpp @@ -72,7 +72,7 @@ PacketVersion versionForPacketType(PacketType packetType) { return static_cast(DomainConnectionDeniedVersion::IncludesExtraInfo); case PacketType::DomainConnectRequest: - return static_cast(DomainConnectRequestVersion::HasTimestamp); + return static_cast(DomainConnectRequestVersion::HasReason); case PacketType::DomainServerAddedNode: return static_cast(DomainServerAddedNodeVersion::PermissionsGrid); diff --git a/libraries/networking/src/udt/PacketHeaders.h b/libraries/networking/src/udt/PacketHeaders.h index 903c1f4c93..5baf5448dd 100644 --- a/libraries/networking/src/udt/PacketHeaders.h +++ b/libraries/networking/src/udt/PacketHeaders.h @@ -345,7 +345,8 @@ enum class DomainConnectRequestVersion : PacketVersion { HasMACAddress, HasMachineFingerprint, AlwaysHasMachineFingerprint, - HasTimestamp + HasTimestamp, + HasReason }; enum class DomainConnectionDeniedVersion : PacketVersion { diff --git a/libraries/networking/src/udt/Socket.cpp b/libraries/networking/src/udt/Socket.cpp index 406c2ff213..350c592c61 100644 --- a/libraries/networking/src/udt/Socket.cpp +++ b/libraries/networking/src/udt/Socket.cpp @@ -59,10 +59,13 @@ void Socket::bind(const QHostAddress& address, quint16 port) { auto sd = _udpSocket.socketDescriptor(); int val = IP_PMTUDISC_DONT; setsockopt(sd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val)); -#elif defined(Q_OS_WINDOWS) +#elif defined(Q_OS_WIN) auto sd = _udpSocket.socketDescriptor(); int val = 0; // false - setsockopt(sd, IPPROTO_IP, IP_DONTFRAGMENT, &val, sizeof(val)); + if (setsockopt(sd, IPPROTO_IP, IP_DONTFRAGMENT, (const char*)&val, sizeof(val))) { + auto err = WSAGetLastError(); + qCWarning(networking) << "Socket::bind Cannot setsockopt IP_DONTFRAGMENT" << err; + } #endif } } @@ -232,14 +235,17 @@ qint64 Socket::writeDatagram(const QByteArray& datagram, const HifiSockAddr& soc } qint64 bytesWritten = _udpSocket.writeDatagram(datagram, sockAddr.getAddress(), sockAddr.getPort()); + + if (bytesWritten < 0) { - qCDebug(networking) << "udt::writeDatagram (" << _udpSocket.state() << ") error - " << _udpSocket.error() << "(" << _udpSocket.errorString() << ")"; #ifdef WIN32 int wsaError = WSAGetLastError(); qCDebug(networking) << "windows socket error " << wsaError; #endif + qCDebug(networking) << "udt::writeDatagram (" << _udpSocket.state() << ") error - " << _udpSocket.error() << "(" << _udpSocket.errorString() << ")"; + #ifdef DEBUG_EVENT_QUEUE int nodeListQueueSize = ::hifi::qt::getEventQueueSize(thread()); qCDebug(networking) << "Networking queue size - " << nodeListQueueSize; @@ -506,11 +512,13 @@ std::vector Socket::getConnectionSockAddrs() { } void Socket::handleSocketError(QAbstractSocket::SocketError socketError) { - qCDebug(networking) << "udt::Socket (" << _udpSocket.state() << ") error - " << socketError << "(" << _udpSocket.errorString() << ")"; + #ifdef WIN32 int wsaError = WSAGetLastError(); qCDebug(networking) << "windows socket error " << wsaError; #endif + + qCDebug(networking) << "udt::Socket (" << _udpSocket.state() << ") error - " << socketError << "(" << _udpSocket.errorString() << ")"; #ifdef DEBUG_EVENT_QUEUE int nodeListQueueSize = ::hifi::qt::getEventQueueSize(thread()); qCDebug(networking) << "Networking queue size - " << nodeListQueueSize;