Checkpoint MTBF uptime reporting

This commit is contained in:
Roxanne Skelly 2019-06-11 12:41:45 -07:00
parent 250a4490d2
commit 8a3672f3c5
9 changed files with 55 additions and 17 deletions

View file

@ -57,7 +57,7 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointer<ReceivedMessag
if (message->getSize() == 0) {
return;
}
QDataStream packetStream(message->getMessage());
// read a NodeConnectionData object from the packet so we can pass around this data while we're inspecting it
@ -88,11 +88,10 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointer<ReceivedMessag
auto pendingAssignment = _pendingAssignedNodes.find(nodeConnection.connectUUID);
SharedNodePointer node;
QString username;
if (pendingAssignment != _pendingAssignedNodes.end()) {
node = processAssignmentConnectRequest(nodeConnection, pendingAssignment->second);
} else if (!STATICALLY_ASSIGNED_NODES.contains(nodeConnection.nodeType)) {
QString username;
QByteArray usernameSignature;
if (message->getBytesLeftToRead() > 0) {
@ -122,9 +121,13 @@ void DomainGatekeeper::processConnectRequestPacket(QSharedPointer<ReceivedMessag
nodeData->setNodeInterestSet(safeInterestSet);
nodeData->setPlaceName(nodeConnection.placeName);
qDebug() << "Allowed connection from node" << uuidStringWithoutCurlyBraces(node->getUUID())
<< "on" << message->getSenderSockAddr() << "with MAC" << nodeConnection.hardwareAddress
<< "and machine fingerprint" << nodeConnection.machineFingerprint;
qDebug() << "Allowed connection from node" << uuidStringWithoutCurlyBraces(node->getUUID())
<< "on" << message->getSenderSockAddr()
<< "with MAC" << nodeConnection.hardwareAddress
<< "and machine fingerprint" << nodeConnection.machineFingerprint
<< "user" << username
<< "reason" << nodeConnection.connectReason
<< "previous connection uptime" << nodeConnection.previousConnectionUpTime/USECS_PER_MSEC << "msec";
// signal that we just connected a node so the DomainServer can get it a list
// and broadcast its presence right away
@ -468,7 +471,7 @@ SharedNodePointer DomainGatekeeper::processAgentConnectRequest(const NodeConnect
if (node->getPublicSocket() == nodeConnection.publicSockAddr && node->getLocalSocket() == nodeConnection.localSockAddr) {
// we have a node that already has these exact sockets
// this can occur if a node is failing to connect to the domain
// remove the old node before adding the new node
qDebug() << "Deleting existing connection from same sockaddr: " << node->getUUID();
existingNodeID = node->getUUID();
@ -842,7 +845,7 @@ void DomainGatekeeper::processICEPingPacket(QSharedPointer<ReceivedMessage> mess
// before we respond to this ICE ping packet, make sure we have a peer in the list that matches
QUuid icePeerID = QUuid::fromRfc4122({ message->getRawMessage(), NUM_BYTES_RFC4122_UUID });
if (_icePeers.contains(icePeerID)) {
auto pingReplyPacket = limitedNodeList->constructICEPingReplyPacket(*message, limitedNodeList->getSessionUUID());
@ -882,7 +885,6 @@ void DomainGatekeeper::getGroupMemberships(const QString& username) {
QJsonArray groupIDs = QJsonArray::fromStringList(groupIDSet.toList());
json["groups"] = groupIDs;
// if we've already asked, wait for the answer before asking again
QString lowerUsername = username.toLower();
if (_inFlightGroupMembershipsRequests.contains(lowerUsername)) {
@ -969,7 +971,7 @@ void DomainGatekeeper::getDomainOwnerFriendsList() {
QNetworkAccessManager::GetOperation, callbackParams, QByteArray(),
NULL, QVariantMap());
}
}
void DomainGatekeeper::getDomainOwnerFriendsListJSONCallback(QNetworkReply* requestReply) {

View file

@ -35,6 +35,10 @@ NodeConnectionData NodeConnectionData::fromDataStream(QDataStream& dataStream, c
// now the machine fingerprint
dataStream >> newHeader.machineFingerprint;
dataStream >> newHeader.connectReason;
dataStream >> newHeader.previousConnectionUpTime;
}
dataStream >> newHeader.lastPingTimestamp;

View file

@ -31,6 +31,8 @@ public:
QString placeName;
QString hardwareAddress;
QUuid machineFingerprint;
quint32 connectReason;
quint64 previousConnectionUpTime;
QByteArray protocolVersion;
};

View file

@ -632,6 +632,7 @@ void LimitedNodeList::processKillNode(ReceivedMessage& message) {
}
void LimitedNodeList::handleNodeKill(const SharedNodePointer& node, ConnectionID nextConnectionID) {
_nodeDisconnectTimestamp = usecTimestampNow();
qCDebug(networking) << "Killed" << *node;
node->stopPingTimer();
emit nodeKilled(node);

View file

@ -337,6 +337,12 @@ public:
NodeType::EntityScriptServer
};
enum DomainConnectReason : quint32 {
START = 0,
RECONNECT
};
Q_ENUM(DomainConnectReason);
public slots:
void reset();
void eraseAllNodes();
@ -461,6 +467,8 @@ protected:
}
std::unordered_map<QUuid, ConnectionID> _connectionIDs;
quint64 _nodeConnectTimestamp { 0 };
quint64 _nodeDisconnectTimestamp { 0 };
private slots:
void flagTimeForConnectionStep(ConnectionStep connectionStep, quint64 timestamp);

View file

@ -296,6 +296,8 @@ void NodeList::addSetOfNodeTypesToNodeInterestSet(const NodeSet& setOfNodeTypes)
void NodeList::sendDomainServerCheckIn() {
int outstandingCheckins = _domainHandler.getCheckInPacketsSinceLastReply();
// On ThreadedAssignments (assignment clients), this function
// is called by the server check-in timer thread
// not the NodeList thread. Calling it on the NodeList thread
@ -414,6 +416,16 @@ void NodeList::sendDomainServerCheckIn() {
// now add the machine fingerprint
auto accountManager = DependencyManager::get<AccountManager>();
packetStream << FingerprintUtils::getMachineFingerprint();
packetStream << ((outstandingCheckins >= MAX_SILENT_DOMAIN_SERVER_CHECK_INS) ? RECONNECT : START);
if (_nodeDisconnectTimestamp < _nodeConnectTimestamp) {
_nodeDisconnectTimestamp = usecTimestampNow();
}
quint64 previousConnectionUptime = _nodeConnectTimestamp ? _nodeDisconnectTimestamp - _nodeConnectTimestamp : 0;
packetStream << previousConnectionUptime;
}
packetStream << quint64(duration_cast<microseconds>(system_clock::now().time_since_epoch()).count());
@ -439,7 +451,6 @@ void NodeList::sendDomainServerCheckIn() {
// Send duplicate check-ins in the exponentially increasing sequence 1, 1, 2, 4, ...
static const int MAX_CHECKINS_TOGETHER = 20;
static const int REBIND_CHECKIN_COUNT = 2;
int outstandingCheckins = _domainHandler.getCheckInPacketsSinceLastReply();
if (outstandingCheckins > REBIND_CHECKIN_COUNT) {
_nodeSocket.rebind();
@ -626,6 +637,7 @@ void NodeList::processDomainServerConnectionTokenPacket(QSharedPointer<ReceivedM
_domainHandler.setConnectionToken(QUuid::fromRfc4122(message->readWithoutCopy(NUM_BYTES_RFC4122_UUID)));
_domainHandler.clearPendingCheckins();
_nodeConnectTimestamp = usecTimestampNow();
sendDomainServerCheckIn();
}

View file

@ -72,7 +72,7 @@ PacketVersion versionForPacketType(PacketType packetType) {
return static_cast<PacketVersion>(DomainConnectionDeniedVersion::IncludesExtraInfo);
case PacketType::DomainConnectRequest:
return static_cast<PacketVersion>(DomainConnectRequestVersion::HasTimestamp);
return static_cast<PacketVersion>(DomainConnectRequestVersion::HasReason);
case PacketType::DomainServerAddedNode:
return static_cast<PacketVersion>(DomainServerAddedNodeVersion::PermissionsGrid);

View file

@ -345,7 +345,8 @@ enum class DomainConnectRequestVersion : PacketVersion {
HasMACAddress,
HasMachineFingerprint,
AlwaysHasMachineFingerprint,
HasTimestamp
HasTimestamp,
HasReason
};
enum class DomainConnectionDeniedVersion : PacketVersion {

View file

@ -59,10 +59,13 @@ void Socket::bind(const QHostAddress& address, quint16 port) {
auto sd = _udpSocket.socketDescriptor();
int val = IP_PMTUDISC_DONT;
setsockopt(sd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val));
#elif defined(Q_OS_WINDOWS)
#elif defined(Q_OS_WIN)
auto sd = _udpSocket.socketDescriptor();
int val = 0; // false
setsockopt(sd, IPPROTO_IP, IP_DONTFRAGMENT, &val, sizeof(val));
if (setsockopt(sd, IPPROTO_IP, IP_DONTFRAGMENT, (const char*)&val, sizeof(val))) {
auto err = WSAGetLastError();
qCWarning(networking) << "Socket::bind Cannot setsockopt IP_DONTFRAGMENT" << err;
}
#endif
}
}
@ -232,14 +235,17 @@ qint64 Socket::writeDatagram(const QByteArray& datagram, const HifiSockAddr& soc
}
qint64 bytesWritten = _udpSocket.writeDatagram(datagram, sockAddr.getAddress(), sockAddr.getPort());
if (bytesWritten < 0) {
qCDebug(networking) << "udt::writeDatagram (" << _udpSocket.state() << ") error - " << _udpSocket.error() << "(" << _udpSocket.errorString() << ")";
#ifdef WIN32
int wsaError = WSAGetLastError();
qCDebug(networking) << "windows socket error " << wsaError;
#endif
qCDebug(networking) << "udt::writeDatagram (" << _udpSocket.state() << ") error - " << _udpSocket.error() << "(" << _udpSocket.errorString() << ")";
#ifdef DEBUG_EVENT_QUEUE
int nodeListQueueSize = ::hifi::qt::getEventQueueSize(thread());
qCDebug(networking) << "Networking queue size - " << nodeListQueueSize;
@ -506,11 +512,13 @@ std::vector<HifiSockAddr> Socket::getConnectionSockAddrs() {
}
void Socket::handleSocketError(QAbstractSocket::SocketError socketError) {
qCDebug(networking) << "udt::Socket (" << _udpSocket.state() << ") error - " << socketError << "(" << _udpSocket.errorString() << ")";
#ifdef WIN32
int wsaError = WSAGetLastError();
qCDebug(networking) << "windows socket error " << wsaError;
#endif
qCDebug(networking) << "udt::Socket (" << _udpSocket.state() << ") error - " << socketError << "(" << _udpSocket.errorString() << ")";
#ifdef DEBUG_EVENT_QUEUE
int nodeListQueueSize = ::hifi::qt::getEventQueueSize(thread());
qCDebug(networking) << "Networking queue size - " << nodeListQueueSize;