summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Spinler <spinler@us.ibm.com>2017-06-02 12:29:41 -0500
committerPatrick Williams <patrick@stwcx.xyz>2017-06-26 01:57:59 +0000
commit7c33bff30f113fa8da7829322f6bb18d33bda46f (patch)
tree3b072935312874c6c79c3b69b125779fa8395144
parent2430197500a63e6384070959dd8f6a741d11d342 (diff)
downloadphosphor-objmgr-7c33bff30f113fa8da7829322f6bb18d33bda46f.tar.gz
phosphor-objmgr-7c33bff30f113fa8da7829322f6bb18d33bda46f.zip
Add class to monitor for unit failures
This class has an analyze() method that will check the state of a unit, and then stop or start a target unit if the state is failed. The units and the action are passed in via the constructor. Change-Id: Ibc8e54b8371d2261eb55cce5825c5cee6d214bab Signed-off-by: Matt Spinler <spinler@us.ibm.com>
-rw-r--r--configure.ac1
-rw-r--r--fail-monitor/Makefile.am10
-rw-r--r--fail-monitor/monitor.cpp132
-rw-r--r--fail-monitor/monitor.hpp113
4 files changed, 255 insertions, 1 deletions
diff --git a/configure.ac b/configure.ac
index 10ea4af..6ac4cc6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -26,6 +26,7 @@ AC_SUBST([PYTHONDIR], ${pythondir})
# Checks for libraries.
PKG_CHECK_MODULES([SYSTEMD], [libsystemd >= 221])
+PKG_CHECK_MODULES([PHOSPHOR_LOGGING], [phosphor-logging],, AC_MSG_ERROR(["Requires phosphor-logging."]))
# Checks for header files.
AC_CHECK_HEADER(systemd/sd-bus.h, ,[AC_MSG_ERROR([Could not find systemd/sd-bus.h...systemd development package required])])
diff --git a/fail-monitor/Makefile.am b/fail-monitor/Makefile.am
index 1976be1..6c5c5b2 100644
--- a/fail-monitor/Makefile.am
+++ b/fail-monitor/Makefile.am
@@ -1,7 +1,15 @@
AM_DEFAULT_SOURCE_EXT = .cpp
+AM_CPPFLAGS = -I$(top_srcdir)
sbin_PROGRAMS = \
phosphor-unit-failure-monitor
phosphor_unit_failure_monitor_SOURCES = \
- main.cpp
+ main.cpp \
+ monitor.cpp
+
+phosphor_unit_failure_monitor_LDADD = \
+ $(PHOSPHOR_LOGGING_LIBS)
+
+phosphor_unit_failure_monitor_CXXFLAGS = \
+ $(PHOSPHOR_LOGGING_CFLAGS)
diff --git a/fail-monitor/monitor.cpp b/fail-monitor/monitor.cpp
new file mode 100644
index 0000000..1936e27
--- /dev/null
+++ b/fail-monitor/monitor.cpp
@@ -0,0 +1,132 @@
+/**
+ * Copyright © 2017 IBM Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <phosphor-logging/log.hpp>
+#include "monitor.hpp"
+
+namespace phosphor
+{
+namespace unit
+{
+namespace failure
+{
+
+using namespace phosphor::logging;
+
+constexpr auto FAILED_STATE = "failed";
+constexpr auto START_METHOD = "StartUnit";
+constexpr auto STOP_METHOD = "StopUnit";
+
+constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
+constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
+constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
+constexpr auto SYSTEMD_PROPERTY_INTERFACE = "org.freedesktop.DBus.Properties";
+constexpr auto SYSTEMD_UNIT_INTERFACE = "org.freedesktop.systemd1.Unit";
+
+
+void Monitor::analyze()
+{
+ if (inFailedState(std::move(getSourceUnitPath())))
+ {
+ runTargetAction();
+ }
+}
+
+
+bool Monitor::inFailedState(const std::string&& path)
+{
+ sdbusplus::message::variant<std::string> property;
+
+ auto method = bus.new_method_call(SYSTEMD_SERVICE,
+ path.c_str(),
+ SYSTEMD_PROPERTY_INTERFACE,
+ "Get");
+
+ method.append(SYSTEMD_UNIT_INTERFACE, "ActiveState");
+
+ auto reply = bus.call(method);
+ if (reply.is_method_error())
+ {
+ log<level::ERR>("Failed reading ActiveState DBus property",
+ entry("UNIT=%s", source.c_str()));
+ // TODO openbmc/openbmc#851 - Once available, throw returned error
+ throw std::runtime_error("Failed reading ActiveState DBus property");
+ }
+
+ reply.read(property);
+
+ auto value = sdbusplus::message::variant_ns::get<std::string>(property);
+ return (value == FAILED_STATE);
+}
+
+
+std::string Monitor::getSourceUnitPath()
+{
+ sdbusplus::message::object_path path;
+
+ auto method = bus.new_method_call(SYSTEMD_SERVICE,
+ SYSTEMD_OBJ_PATH,
+ SYSTEMD_INTERFACE,
+ "GetUnit");
+ method.append(source);
+ auto reply = bus.call(method);
+
+ if (reply.is_method_error())
+ {
+ log<level::ERR>("Failed GetUnit DBus method call",
+ entry("UNIT=%s", source.c_str()));
+ // TODO openbmc/openbmc#851 - Once available, throw returned error
+ throw std::runtime_error("Failed GetUnit DBus method call");
+ }
+
+ reply.read(path);
+
+ return static_cast<std::string>(path);
+}
+
+
+void Monitor::runTargetAction()
+{
+ //Start or stop the target unit
+ auto methodCall = (action == Action::start) ?
+ START_METHOD : STOP_METHOD;
+
+ log<level::INFO>("The source unit is in failed state, "
+ "running target action",
+ entry("SOURCE=%s", source.c_str()),
+ entry("TARGET=%s", target.c_str()),
+ entry("ACTION=%s", methodCall));
+
+ auto method = this->bus.new_method_call(SYSTEMD_SERVICE,
+ SYSTEMD_OBJ_PATH,
+ SYSTEMD_INTERFACE,
+ methodCall);
+ method.append(target);
+ method.append("replace");
+
+ auto reply = bus.call(method);
+
+ if (reply.is_method_error())
+ {
+ log<level::ERR>("Failed to run action on the target unit",
+ entry("UNIT=%s", target.c_str()));
+ // TODO openbmc/openbmc#851 - Once available, throw returned error
+ throw std::runtime_error("Failed to run action on the target unit");
+ }
+}
+
+}
+}
+}
diff --git a/fail-monitor/monitor.hpp b/fail-monitor/monitor.hpp
new file mode 100644
index 0000000..5aa2170
--- /dev/null
+++ b/fail-monitor/monitor.hpp
@@ -0,0 +1,113 @@
+#pragma once
+
+#include <sdbusplus/bus.hpp>
+
+namespace phosphor
+{
+namespace unit
+{
+namespace failure
+{
+
+/**
+ * @class Monitor
+ *
+ * This class will analyze a unit to see if it is in the failed
+ * state. If it is, it will either start or stop a target unit.
+ *
+ * The use case is for running from the OnFailure directive in a
+ * unit file. If that unit keeps failing and restarting, it will
+ * eventually exceed its rate limits and stop being restarted.
+ * This application will allow another unit to be started when that
+ * occurs.
+ */
+class Monitor
+{
+ public:
+
+ /**
+ * The valid actions - either starting or stopping a unit
+ */
+ enum class Action
+ {
+ start,
+ stop
+ };
+
+ Monitor() = delete;
+ Monitor(const Monitor&) = delete;
+ Monitor(Monitor&&) = default;
+ Monitor& operator=(const Monitor&) = delete;
+ Monitor& operator=(Monitor&&) = default;
+ ~Monitor() = default;
+
+ /**
+ * Constructor
+ *
+ * @param[in] sourceUnit - the source unit
+ * @param[in] targetUnit - the target unit
+ * @param[in] action - the action to run on the target
+ */
+ Monitor(const std::string& sourceUnit,
+ const std::string& targetUnit,
+ Action action) :
+ bus(std::move(sdbusplus::bus::new_default())),
+ source(sourceUnit),
+ target(targetUnit),
+ action(action)
+ {
+ }
+
+ /**
+ * Analyzes the source unit to check if it is in a failed state.
+ * If it is, then it runs the action on the target unit.
+ */
+ void analyze();
+
+ private:
+
+ /**
+ * Returns the dbus object path of the source unit
+ */
+ std::string getSourceUnitPath();
+
+ /**
+ * Says if the unit object passed in has an
+ * ActiveState property equal to 'failed'.
+ *
+ * @param[in] path - the unit object path to check
+ *
+ * @return - true if this unit is in the failed state
+ */
+ bool inFailedState(const std::string&& path);
+
+ /**
+ * Runs the action on the target unit.
+ */
+ void runTargetAction();
+
+ /**
+ * The dbus object
+ */
+ sdbusplus::bus::bus bus;
+
+ /**
+ * The source unit
+ */
+ const std::string source;
+
+ /**
+ * The target unit
+ */
+ const std::string target;
+
+ /**
+ * The action to run on the target if the source
+ * unit is in failed state.
+ */
+ const Action action;
+};
+
+}
+}
+}
OpenPOWER on IntegriCloud