make the deadlock watchdog more generous and add some logging so we can see heatbeats on slow machines

This commit is contained in:
Brad Hefta-Gaub 2016-03-24 12:37:07 -07:00
parent 003af03b33
commit 208b594ef8
2 changed files with 37 additions and 7 deletions

View file

@ -240,14 +240,17 @@ class DeadlockWatchdogThread : public QThread {
public:
static const unsigned long HEARTBEAT_CHECK_INTERVAL_SECS = 1;
static const unsigned long HEARTBEAT_UPDATE_INTERVAL_SECS = 1;
static const unsigned long MAX_HEARTBEAT_AGE_USECS = 15 * USECS_PER_SECOND;
static const unsigned long HEARTBEAT_REPORT_INTERVAL_USECS = 5 * USECS_PER_SECOND;
static const unsigned long MAX_HEARTBEAT_AGE_USECS = 30 * USECS_PER_SECOND;
static const uint64_t WARNING_ELAPSED_HEARTBEAT = 500 * USECS_PER_MSEC; // warn if elapsed heartbeat average is large
static const int HEARTBEAT_SAMPLES = 100000; // ~5 seconds worth of samples
// Set the heartbeat on launch
DeadlockWatchdogThread() {
setObjectName("Deadlock Watchdog");
QTimer* heartbeatTimer = new QTimer();
// Give the heartbeat an initial value
updateHeartbeat();
_heartbeat = usecTimestampNow();
connect(heartbeatTimer, &QTimer::timeout, [this] {
updateHeartbeat();
});
@ -258,7 +261,10 @@ public:
}
void updateHeartbeat() {
_heartbeat = usecTimestampNow();
auto now = usecTimestampNow();
auto elapsed = now - _heartbeat;
_movingAverage.addSample(elapsed);
_heartbeat = now;
}
void deadlockDetectionCrash() {
@ -269,10 +275,24 @@ public:
void run() override {
while (!_quit) {
QThread::sleep(HEARTBEAT_UPDATE_INTERVAL_SECS);
#ifdef NDEBUG
auto now = usecTimestampNow();
auto lastHeartbeatAge = now - _heartbeat;
auto sinceLastReport = now - _lastReport;
int elapsedMovingAverage = _movingAverage.average;
if (elapsedMovingAverage > _maxElapsed) {
_maxElapsed = elapsedMovingAverage;
}
if ((sinceLastReport > HEARTBEAT_REPORT_INTERVAL_USECS) || (elapsedMovingAverage > WARNING_ELAPSED_HEARTBEAT)) {
qDebug() << "updateHeartbeat.elapsedMovingAverage:" << elapsedMovingAverage
<< " maxElapsed:" << _maxElapsed << "numSamples:" << _movingAverage.numSamples;
_lastReport = now;
}
#ifdef NDEBUG
if (lastHeartbeatAge > MAX_HEARTBEAT_AGE_USECS) {
qDebug() << "DEADLOCK DETECTED -- updateHeartbeat.elapsedMovingAverage:" << elapsedMovingAverage
<< " maxElapsed:" << _maxElapsed << "numSamples:" << _movingAverage.numSamples;
deadlockDetectionCrash();
}
#endif
@ -280,10 +300,15 @@ public:
}
static std::atomic<uint64_t> _heartbeat;
static std::atomic<uint64_t> _lastReport;
static std::atomic<int> _maxElapsed;
bool _quit { false };
MovingAverage<int, HEARTBEAT_SAMPLES> _movingAverage;
};
std::atomic<uint64_t> DeadlockWatchdogThread::_heartbeat;
std::atomic<uint64_t> DeadlockWatchdogThread::_lastReport;
std::atomic<int> DeadlockWatchdogThread::_maxElapsed;
#ifdef Q_OS_WIN
class MyNativeEventFilter : public QAbstractNativeEventFilter {
@ -1381,6 +1406,8 @@ void Application::initializeUi() {
void Application::paintGL() {
updateHeartbeat();
// Some plugins process message events, potentially leading to
// re-entering a paint event. don't allow further processing if this
// happens
@ -2502,6 +2529,8 @@ static uint32_t _renderedFrameIndex { INVALID_FRAME };
void Application::idle(uint64_t now) {
updateHeartbeat();
if (_aboutToQuit || _inPaint) {
return; // bail early, nothing to do here.
}

View file

@ -14,6 +14,7 @@
#ifndef hifi_SimpleMovingAverage_h
#define hifi_SimpleMovingAverage_h
#include <atomic>
#include <stdint.h>
class SimpleMovingAverage {
@ -45,8 +46,8 @@ template <class T, int MAX_NUM_SAMPLES> class MovingAverage {
public:
const float WEIGHTING = 1.0f / (float)MAX_NUM_SAMPLES;
const float ONE_MINUS_WEIGHTING = 1.0f - WEIGHTING;
int numSamples{ 0 };
T average;
std::atomic<int> numSamples { 0 };
std::atomic<T> average;
void clear() {
numSamples = 0;