diff options
author | William A. Kennington III <wak@google.com> | 2018-02-27 18:47:05 -0800 |
---|---|---|
committer | Brad Bishop <bradleyb@fuzziesquirrel.com> | 2018-03-07 11:23:10 +0000 |
commit | d13310864134b8e5a237397a9e08607cb2a01013 (patch) | |
tree | 499cfcbbf369689d7ae5a97c5a50b4f67c77164a /watchdog.cpp | |
parent | 825f4981ef91031f3a9a549ff011ed9cc943a9b1 (diff) | |
download | phosphor-watchdog-d13310864134b8e5a237397a9e08607cb2a01013.tar.gz phosphor-watchdog-d13310864134b8e5a237397a9e08607cb2a01013.zip |
Implement a fallback watchdog option
Sometimes our initial watchdog action is not enough to recover the host
from the state it transitioned into. However, always using a more power
form of power cycle is not desirable as we can lose useful CPU crash
state. It is desirable in this case to have two levels of watchog
timers.
This patch implements the ability for the service to specify a fallback
watchdog action and interval. After the initial watchdog timeout is
encountered, the watchdog will be re-armed with the new parameters. Once
the watchdog times out again it will execute the fallback action.
Attempts to update the timeRemaining will reset the fallback just in
case something is still alive.
Change-Id: I69f4422c7e3963f02200815f3cef620af9e6cf8b
Signed-off-by: William A. Kennington III <wak@google.com>
Diffstat (limited to 'watchdog.cpp')
-rw-r--r-- | watchdog.cpp | 43 |
1 files changed, 36 insertions, 7 deletions
diff --git a/watchdog.cpp b/watchdog.cpp index 24e5416..b0c7b6f 100644 --- a/watchdog.cpp +++ b/watchdog.cpp @@ -19,8 +19,12 @@ bool Watchdog::enabled(bool value) { if (!value) { - // Attempt to disable our timer if needed - tryDisable(); + // Make sure we accurately reflect our enabled state to the + // tryFallbackOrDisable() call + WatchdogInherits::enabled(value); + + // Attempt to fallback or disable our timer if needed + tryFallbackOrDisable(); return false; } @@ -78,6 +82,13 @@ uint64_t Watchdog::timeRemaining(uint64_t value) return 0; } + if (!this->enabled()) + { + // Having a timer but not displaying an enabled value means we + // are inside of the fallback + value = fallback->interval; + } + // Update new expiration auto usec = duration_cast<microseconds>(milliseconds(value)); timer.start(usec); @@ -89,9 +100,13 @@ uint64_t Watchdog::timeRemaining(uint64_t value) // Optional callback function on timer expiration void Watchdog::timeOutHandler() { - auto action = expireAction(); - auto target = actionTargets.find(action); + Action action = expireAction(); + if (!this->enabled()) + { + action = fallback->action; + } + auto target = actionTargets.find(action); if (target == actionTargets.end()) { log<level::INFO>("watchdog: Timed out with no target", @@ -112,12 +127,26 @@ void Watchdog::timeOutHandler() bus.call_noreply(method); } - tryDisable(); + tryFallbackOrDisable(); } -void Watchdog::tryDisable() +void Watchdog::tryFallbackOrDisable() { - if (timerEnabled()) + // We only re-arm the watchdog if we were already enabled and have + // a possible fallback + if (fallback && this->enabled()) + { + auto interval_ms = fallback->interval; + auto interval_us = duration_cast<microseconds>(milliseconds(interval_ms)); + + timer.clearExpired(); + timer.start(interval_us); + timer.setEnabled<std::true_type>(); + + log<level::INFO>("watchdog: falling back", + entry("INTERVAL=%llu", interval_ms)); + } + else if (timerEnabled()) { timer.setEnabled<std::false_type>(); |