diff options
author | Matt Spinler <spinler@us.ibm.com> | 2017-06-02 12:29:41 -0500 |
---|---|---|
committer | Patrick Williams <patrick@stwcx.xyz> | 2017-06-26 01:57:59 +0000 |
commit | 7c33bff30f113fa8da7829322f6bb18d33bda46f (patch) | |
tree | 3b072935312874c6c79c3b69b125779fa8395144 | |
parent | 2430197500a63e6384070959dd8f6a741d11d342 (diff) | |
download | phosphor-objmgr-7c33bff30f113fa8da7829322f6bb18d33bda46f.tar.gz phosphor-objmgr-7c33bff30f113fa8da7829322f6bb18d33bda46f.zip |
Add class to monitor for unit failures
This class has an analyze() method that will
check the state of a unit, and then stop or start
a target unit if the state is failed.
The units and the action are passed in via the constructor.
Change-Id: Ibc8e54b8371d2261eb55cce5825c5cee6d214bab
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | fail-monitor/Makefile.am | 10 | ||||
-rw-r--r-- | fail-monitor/monitor.cpp | 132 | ||||
-rw-r--r-- | fail-monitor/monitor.hpp | 113 |
4 files changed, 255 insertions, 1 deletions
diff --git a/configure.ac b/configure.ac index 10ea4af..6ac4cc6 100644 --- a/configure.ac +++ b/configure.ac @@ -26,6 +26,7 @@ AC_SUBST([PYTHONDIR], ${pythondir}) # Checks for libraries. PKG_CHECK_MODULES([SYSTEMD], [libsystemd >= 221]) +PKG_CHECK_MODULES([PHOSPHOR_LOGGING], [phosphor-logging],, AC_MSG_ERROR(["Requires phosphor-logging."])) # Checks for header files. AC_CHECK_HEADER(systemd/sd-bus.h, ,[AC_MSG_ERROR([Could not find systemd/sd-bus.h...systemd development package required])]) diff --git a/fail-monitor/Makefile.am b/fail-monitor/Makefile.am index 1976be1..6c5c5b2 100644 --- a/fail-monitor/Makefile.am +++ b/fail-monitor/Makefile.am @@ -1,7 +1,15 @@ AM_DEFAULT_SOURCE_EXT = .cpp +AM_CPPFLAGS = -I$(top_srcdir) sbin_PROGRAMS = \ phosphor-unit-failure-monitor phosphor_unit_failure_monitor_SOURCES = \ - main.cpp + main.cpp \ + monitor.cpp + +phosphor_unit_failure_monitor_LDADD = \ + $(PHOSPHOR_LOGGING_LIBS) + +phosphor_unit_failure_monitor_CXXFLAGS = \ + $(PHOSPHOR_LOGGING_CFLAGS) diff --git a/fail-monitor/monitor.cpp b/fail-monitor/monitor.cpp new file mode 100644 index 0000000..1936e27 --- /dev/null +++ b/fail-monitor/monitor.cpp @@ -0,0 +1,132 @@ +/** + * Copyright © 2017 IBM Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <phosphor-logging/log.hpp> +#include "monitor.hpp" + +namespace phosphor +{ +namespace unit +{ +namespace failure +{ + +using namespace phosphor::logging; + +constexpr auto FAILED_STATE = "failed"; +constexpr auto START_METHOD = "StartUnit"; +constexpr auto STOP_METHOD = "StopUnit"; + +constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; +constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1"; +constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager"; +constexpr auto SYSTEMD_PROPERTY_INTERFACE = "org.freedesktop.DBus.Properties"; +constexpr auto SYSTEMD_UNIT_INTERFACE = "org.freedesktop.systemd1.Unit"; + + +void Monitor::analyze() +{ + if (inFailedState(std::move(getSourceUnitPath()))) + { + runTargetAction(); + } +} + + +bool Monitor::inFailedState(const std::string&& path) +{ + sdbusplus::message::variant<std::string> property; + + auto method = bus.new_method_call(SYSTEMD_SERVICE, + path.c_str(), + SYSTEMD_PROPERTY_INTERFACE, + "Get"); + + method.append(SYSTEMD_UNIT_INTERFACE, "ActiveState"); + + auto reply = bus.call(method); + if (reply.is_method_error()) + { + log<level::ERR>("Failed reading ActiveState DBus property", + entry("UNIT=%s", source.c_str())); + // TODO openbmc/openbmc#851 - Once available, throw returned error + throw std::runtime_error("Failed reading ActiveState DBus property"); + } + + reply.read(property); + + auto value = sdbusplus::message::variant_ns::get<std::string>(property); + return (value == FAILED_STATE); +} + + +std::string Monitor::getSourceUnitPath() +{ + sdbusplus::message::object_path path; + + auto method = bus.new_method_call(SYSTEMD_SERVICE, + SYSTEMD_OBJ_PATH, + SYSTEMD_INTERFACE, + "GetUnit"); + method.append(source); + auto reply = bus.call(method); + + if (reply.is_method_error()) + { + log<level::ERR>("Failed GetUnit DBus method call", + entry("UNIT=%s", source.c_str())); + // TODO openbmc/openbmc#851 - Once available, throw returned error + throw std::runtime_error("Failed GetUnit DBus method call"); + } + + reply.read(path); + + return static_cast<std::string>(path); +} + + +void Monitor::runTargetAction() +{ + //Start or stop the target unit + auto methodCall = (action == Action::start) ? + START_METHOD : STOP_METHOD; + + log<level::INFO>("The source unit is in failed state, " + "running target action", + entry("SOURCE=%s", source.c_str()), + entry("TARGET=%s", target.c_str()), + entry("ACTION=%s", methodCall)); + + auto method = this->bus.new_method_call(SYSTEMD_SERVICE, + SYSTEMD_OBJ_PATH, + SYSTEMD_INTERFACE, + methodCall); + method.append(target); + method.append("replace"); + + auto reply = bus.call(method); + + if (reply.is_method_error()) + { + log<level::ERR>("Failed to run action on the target unit", + entry("UNIT=%s", target.c_str())); + // TODO openbmc/openbmc#851 - Once available, throw returned error + throw std::runtime_error("Failed to run action on the target unit"); + } +} + +} +} +} diff --git a/fail-monitor/monitor.hpp b/fail-monitor/monitor.hpp new file mode 100644 index 0000000..5aa2170 --- /dev/null +++ b/fail-monitor/monitor.hpp @@ -0,0 +1,113 @@ +#pragma once + +#include <sdbusplus/bus.hpp> + +namespace phosphor +{ +namespace unit +{ +namespace failure +{ + +/** + * @class Monitor + * + * This class will analyze a unit to see if it is in the failed + * state. If it is, it will either start or stop a target unit. + * + * The use case is for running from the OnFailure directive in a + * unit file. If that unit keeps failing and restarting, it will + * eventually exceed its rate limits and stop being restarted. + * This application will allow another unit to be started when that + * occurs. + */ +class Monitor +{ + public: + + /** + * The valid actions - either starting or stopping a unit + */ + enum class Action + { + start, + stop + }; + + Monitor() = delete; + Monitor(const Monitor&) = delete; + Monitor(Monitor&&) = default; + Monitor& operator=(const Monitor&) = delete; + Monitor& operator=(Monitor&&) = default; + ~Monitor() = default; + + /** + * Constructor + * + * @param[in] sourceUnit - the source unit + * @param[in] targetUnit - the target unit + * @param[in] action - the action to run on the target + */ + Monitor(const std::string& sourceUnit, + const std::string& targetUnit, + Action action) : + bus(std::move(sdbusplus::bus::new_default())), + source(sourceUnit), + target(targetUnit), + action(action) + { + } + + /** + * Analyzes the source unit to check if it is in a failed state. + * If it is, then it runs the action on the target unit. + */ + void analyze(); + + private: + + /** + * Returns the dbus object path of the source unit + */ + std::string getSourceUnitPath(); + + /** + * Says if the unit object passed in has an + * ActiveState property equal to 'failed'. + * + * @param[in] path - the unit object path to check + * + * @return - true if this unit is in the failed state + */ + bool inFailedState(const std::string&& path); + + /** + * Runs the action on the target unit. + */ + void runTargetAction(); + + /** + * The dbus object + */ + sdbusplus::bus::bus bus; + + /** + * The source unit + */ + const std::string source; + + /** + * The target unit + */ + const std::string target; + + /** + * The action to run on the target if the source + * unit is in failed state. + */ + const Action action; +}; + +} +} +} |