diff options
author | William A. Kennington III <wak@google.com> | 2018-10-03 23:36:16 -0700 |
---|---|---|
committer | William A. Kennington III <wak@google.com> | 2018-10-04 00:14:02 -0700 |
commit | 7036c569c8de530165a3ee029cdeaa20fb9e9a41 (patch) | |
tree | bcd48c83f6ac10b9028a89ad2e85853ab227d78e | |
parent | d4cbc5aad28f32c90e18d0fdd5ff8d1150fbd663 (diff) | |
download | phosphor-watchdog-7036c569c8de530165a3ee029cdeaa20fb9e9a41.tar.gz phosphor-watchdog-7036c569c8de530165a3ee029cdeaa20fb9e9a41.zip |
watchdog: Handle systemd StartUnit errors
Currently, if systemd returns an error upon trying to StartUnit in the
timeout of the watchdog the entire daemon will crash.
Sep 28 22:59:09 phosphor-watchdog[2462]: Timer Expired
Sep 28 22:59:09 phosphor-watchdog[2462]: watchdog: Timed out
Sep 28 22:59:09 phosphor-watchdog[2462]: terminate called after throwing an instance of 'sdbusplus::exception::SdBusError'
Sep 28 22:59:09 phosphor-watchdog[2462]: what(): sd_bus_call noreply: org.freedesktop.DBus.Error.InvalidArgs: Unit ho
st-watchdog-reset.service is not loaded properly: Invalid argument.
This patch fixes the behavior so we get something more like:
Oct 04 07:12:14 phosphor-watchdog[10897]: watchdog: Timed out
Oct 04 07:12:14 phosphor-watchdog[10897]: watchdog: Failed to start unit
Oct 04 07:12:16 phosphor-watchdog[10897]: watchdog: disabled
Tested:
Ran with a bogus systemd target which used to throw errors which are
now correctly handled without crashing.
Change-Id: I4cf6ffded789e49d2329439165927cc227e2e79e
Signed-off-by: William A. Kennington III <wak@google.com>
-rw-r--r-- | watchdog.cpp | 30 |
1 files changed, 24 insertions, 6 deletions
diff --git a/watchdog.cpp b/watchdog.cpp index 9301fb0..92134ff 100644 --- a/watchdog.cpp +++ b/watchdog.cpp @@ -1,7 +1,11 @@ #include "watchdog.hpp" #include <chrono> +#include <phosphor-logging/elog.hpp> #include <phosphor-logging/log.hpp> +#include <sdbusplus/exception.hpp> +#include <xyz/openbmc_project/Common/error.hpp> + namespace phosphor { namespace watchdog @@ -10,6 +14,9 @@ using namespace std::chrono; using namespace std::chrono_literals; using namespace phosphor::logging; +using sdbusplus::exception::SdBusError; +using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; + // systemd service to kick start a target. constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; constexpr auto SYSTEMD_ROOT = "/org/freedesktop/systemd1"; @@ -120,15 +127,26 @@ void Watchdog::timeOutHandler() } else { - auto method = bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_ROOT, - SYSTEMD_INTERFACE, "StartUnit"); - method.append(target->second); - method.append("replace"); - log<level::INFO>("watchdog: Timed out", entry("ACTION=%s", convertForMessage(action).c_str()), entry("TARGET=%s", target->second.c_str())); - bus.call_noreply(method); + + try + { + auto method = bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_ROOT, + SYSTEMD_INTERFACE, "StartUnit"); + method.append(target->second); + method.append("replace"); + + bus.call_noreply(method); + } + catch (const SdBusError& e) + { + log<level::ERR>("watchdog: Failed to start unit", + entry("TARGET=%s", target->second.c_str()), + entry("ERROR=%s", e.what())); + commit<InternalFailure>(); + } } tryFallbackOrDisable(); |