From 7036c569c8de530165a3ee029cdeaa20fb9e9a41 Mon Sep 17 00:00:00 2001 From: "William A. Kennington III" Date: Wed, 3 Oct 2018 23:36:16 -0700 Subject: watchdog: Handle systemd StartUnit errors Currently, if systemd returns an error upon trying to StartUnit in the timeout of the watchdog the entire daemon will crash. Sep 28 22:59:09 phosphor-watchdog[2462]: Timer Expired Sep 28 22:59:09 phosphor-watchdog[2462]: watchdog: Timed out Sep 28 22:59:09 phosphor-watchdog[2462]: terminate called after throwing an instance of 'sdbusplus::exception::SdBusError' Sep 28 22:59:09 phosphor-watchdog[2462]: what(): sd_bus_call noreply: org.freedesktop.DBus.Error.InvalidArgs: Unit ho st-watchdog-reset.service is not loaded properly: Invalid argument. This patch fixes the behavior so we get something more like: Oct 04 07:12:14 phosphor-watchdog[10897]: watchdog: Timed out Oct 04 07:12:14 phosphor-watchdog[10897]: watchdog: Failed to start unit Oct 04 07:12:16 phosphor-watchdog[10897]: watchdog: disabled Tested: Ran with a bogus systemd target which used to throw errors which are now correctly handled without crashing. Change-Id: I4cf6ffded789e49d2329439165927cc227e2e79e Signed-off-by: William A. Kennington III --- watchdog.cpp | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/watchdog.cpp b/watchdog.cpp index 9301fb0..92134ff 100644 --- a/watchdog.cpp +++ b/watchdog.cpp @@ -1,7 +1,11 @@ #include "watchdog.hpp" #include +#include #include +#include +#include + namespace phosphor { namespace watchdog @@ -10,6 +14,9 @@ using namespace std::chrono; using namespace std::chrono_literals; using namespace phosphor::logging; +using sdbusplus::exception::SdBusError; +using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; + // systemd service to kick start a target. constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; constexpr auto SYSTEMD_ROOT = "/org/freedesktop/systemd1"; @@ -120,15 +127,26 @@ void Watchdog::timeOutHandler() } else { - auto method = bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_ROOT, - SYSTEMD_INTERFACE, "StartUnit"); - method.append(target->second); - method.append("replace"); - log("watchdog: Timed out", entry("ACTION=%s", convertForMessage(action).c_str()), entry("TARGET=%s", target->second.c_str())); - bus.call_noreply(method); + + try + { + auto method = bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_ROOT, + SYSTEMD_INTERFACE, "StartUnit"); + method.append(target->second); + method.append("replace"); + + bus.call_noreply(method); + } + catch (const SdBusError& e) + { + log("watchdog: Failed to start unit", + entry("TARGET=%s", target->second.c_str()), + entry("ERROR=%s", e.what())); + commit(); + } } tryFallbackOrDisable(); -- cgit v1.2.1