summaryrefslogtreecommitdiffstats
path: root/watchdog.cpp
diff options
context:
space:
mode:
authorWilliam A. Kennington III <wak@google.com>2018-02-27 18:47:05 -0800
committerBrad Bishop <bradleyb@fuzziesquirrel.com>2018-03-07 11:23:10 +0000
commitd13310864134b8e5a237397a9e08607cb2a01013 (patch)
tree499cfcbbf369689d7ae5a97c5a50b4f67c77164a /watchdog.cpp
parent825f4981ef91031f3a9a549ff011ed9cc943a9b1 (diff)
downloadphosphor-watchdog-d13310864134b8e5a237397a9e08607cb2a01013.tar.gz
phosphor-watchdog-d13310864134b8e5a237397a9e08607cb2a01013.zip
Implement a fallback watchdog option
Sometimes our initial watchdog action is not enough to recover the host from the state it transitioned into. However, always using a more power form of power cycle is not desirable as we can lose useful CPU crash state. It is desirable in this case to have two levels of watchog timers. This patch implements the ability for the service to specify a fallback watchdog action and interval. After the initial watchdog timeout is encountered, the watchdog will be re-armed with the new parameters. Once the watchdog times out again it will execute the fallback action. Attempts to update the timeRemaining will reset the fallback just in case something is still alive. Change-Id: I69f4422c7e3963f02200815f3cef620af9e6cf8b Signed-off-by: William A. Kennington III <wak@google.com>
Diffstat (limited to 'watchdog.cpp')
-rw-r--r--watchdog.cpp43
1 files changed, 36 insertions, 7 deletions
diff --git a/watchdog.cpp b/watchdog.cpp
index 24e5416..b0c7b6f 100644
--- a/watchdog.cpp
+++ b/watchdog.cpp
@@ -19,8 +19,12 @@ bool Watchdog::enabled(bool value)
{
if (!value)
{
- // Attempt to disable our timer if needed
- tryDisable();
+ // Make sure we accurately reflect our enabled state to the
+ // tryFallbackOrDisable() call
+ WatchdogInherits::enabled(value);
+
+ // Attempt to fallback or disable our timer if needed
+ tryFallbackOrDisable();
return false;
}
@@ -78,6 +82,13 @@ uint64_t Watchdog::timeRemaining(uint64_t value)
return 0;
}
+ if (!this->enabled())
+ {
+ // Having a timer but not displaying an enabled value means we
+ // are inside of the fallback
+ value = fallback->interval;
+ }
+
// Update new expiration
auto usec = duration_cast<microseconds>(milliseconds(value));
timer.start(usec);
@@ -89,9 +100,13 @@ uint64_t Watchdog::timeRemaining(uint64_t value)
// Optional callback function on timer expiration
void Watchdog::timeOutHandler()
{
- auto action = expireAction();
- auto target = actionTargets.find(action);
+ Action action = expireAction();
+ if (!this->enabled())
+ {
+ action = fallback->action;
+ }
+ auto target = actionTargets.find(action);
if (target == actionTargets.end())
{
log<level::INFO>("watchdog: Timed out with no target",
@@ -112,12 +127,26 @@ void Watchdog::timeOutHandler()
bus.call_noreply(method);
}
- tryDisable();
+ tryFallbackOrDisable();
}
-void Watchdog::tryDisable()
+void Watchdog::tryFallbackOrDisable()
{
- if (timerEnabled())
+ // We only re-arm the watchdog if we were already enabled and have
+ // a possible fallback
+ if (fallback && this->enabled())
+ {
+ auto interval_ms = fallback->interval;
+ auto interval_us = duration_cast<microseconds>(milliseconds(interval_ms));
+
+ timer.clearExpired();
+ timer.start(interval_us);
+ timer.setEnabled<std::true_type>();
+
+ log<level::INFO>("watchdog: falling back",
+ entry("INTERVAL=%llu", interval_ms));
+ }
+ else if (timerEnabled())
{
timer.setEnabled<std::false_type>();
OpenPOWER on IntegriCloud