From 30e329add77a72fffeb21e9d3203062a042c6f30 Mon Sep 17 00:00:00 2001 From: Vishwanatha Subbanna Date: Mon, 24 Jul 2017 23:13:14 +0530 Subject: Reset OCC on error condition After detecting the error in the OCC, invokes OccReset command in Host Control dbus object with the sensor ID of the failing OCC Fixes openbmc/openbmc#1363 Change-Id: I608dbbb943d3b39d0709d6f350ab799e771a13e9 Signed-off-by: Vishwanatha Subbanna --- Makefile.am | 6 ++++-- occ_status.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ occ_status.hpp | 42 ++++++++++++++++++++++++++++++++++++++- test/Makefile.am | 1 + utils.cpp | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ utils.hpp | 23 ++++++++++++++++++++++ 6 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 utils.cpp create mode 100644 utils.hpp diff --git a/Makefile.am b/Makefile.am index 548e7ff..aecbebb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,7 +5,8 @@ noinst_HEADERS = \ powercap.hpp \ occ_errors.hpp \ occ_events.hpp \ - occ_finder.hpp + occ_finder.hpp \ + utils.hpp sbin_PROGRAMS = openpower-occ-control openpower_occ_control_SOURCES = \ @@ -16,7 +17,8 @@ openpower_occ_control_SOURCES = \ app.cpp \ powercap.cpp \ org/open_power/OCC/Device/error.cpp \ - occ_finder.cpp + occ_finder.cpp \ + utils.cpp BUILT_SOURCES = org/open_power/OCC/Device/error.hpp \ org/open_power/OCC/Device/error.cpp \ diff --git a/occ_status.cpp b/occ_status.cpp index 08b1ef3..f740dc7 100644 --- a/occ_status.cpp +++ b/occ_status.cpp @@ -1,5 +1,7 @@ +#include #include "occ_status.hpp" #include "occ_sensor.hpp" +#include "utils.hpp" namespace open_power { namespace occ @@ -35,6 +37,64 @@ void Status::deviceErrorHandler() { // This would deem OCC inactive this->occActive(false); + + // Reset the OCC + this->resetOCC(); +} + +// Sends message to host control command handler to reset OCC +void Status::resetOCC() +{ + using namespace phosphor::logging; + constexpr auto CONTROL_HOST_PATH = "/org/open_power/control/host0"; + constexpr auto CONTROL_HOST_INTF = "org.open_power.Control.Host"; + + // This will throw exception on failure + auto service = getService(bus, CONTROL_HOST_PATH, CONTROL_HOST_INTF); + + auto method = bus.new_method_call(service.c_str(), + CONTROL_HOST_PATH, + CONTROL_HOST_INTF, + "Execute"); + // OCC Reset control command + method.append(convertForMessage( + Control::Host::Command::OCCReset).c_str()); + + // OCC Sensor ID for callout reasons + method.append(sdbusplus::message::variant( + sensorMap.at(instance))); + bus.call_noreply(method); + return; +} + +// Handler called by Host control command handler to convey the +// status of the executed command +void Status::hostControlEvent(sdbusplus::message::message& msg) +{ + using namespace phosphor::logging; + + std::string cmdCompleted{}; + std::string cmdStatus{}; + + msg.read(cmdCompleted, cmdStatus); + + log("Host control signal values", + entry("COMMAND=%s",cmdCompleted.c_str()), + entry("STATUS=%s",cmdStatus.c_str())); + + if(Control::Host::convertResultFromString(cmdStatus) != + Control::Host::Result::Success) + { + if(Control::Host::convertCommandFromString(cmdCompleted) == + Control::Host::Command::OCCReset) + { + // Must be a Timeout. Log an Erorr trace + log("Error resetting the OCC.", + entry("PATH=%s", path.c_str()), + entry("SensorID=0x%X",sensorMap.at(instance))); + } + } + return; } } // namespace occ diff --git a/occ_status.hpp b/occ_status.hpp index d30d642..e07ca61 100644 --- a/occ_status.hpp +++ b/occ_status.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "occ_events.hpp" #include "occ_device.hpp" namespace open_power @@ -13,6 +14,12 @@ namespace occ namespace Base = sdbusplus::org::open_power::OCC::server; using Interface = sdbusplus::server::object::object; +// IPMID's host control application +namespace Control = sdbusplus::org::open_power::Control::server; + +// For waiting on signals +namespace sdbusRule = sdbusplus::bus::match::rules; + // OCC status instance. Ex. for "occ0", the instance is 0 using instanceID = int; @@ -40,11 +47,22 @@ class Status : public Interface */ Status(sdbusplus::bus::bus& bus, EventPtr& event, const char* path) : Interface(bus, path), + bus(bus), path(path), instance(((this->path.back() - '0'))), device(event, name + std::to_string(instance + 1), - std::bind(&Status::deviceErrorHandler, this)) + std::bind(&Status::deviceErrorHandler, this)), + hostControlSignal( + bus, + sdbusRule::type::signal() + + sdbusRule::member("CommandComplete") + + sdbusRule::path("/org/open_power/control/host0") + + sdbusRule::interface("org.open_power.Control.Host") + + sdbusRule::argN(0, Control::convertForMessage( + Control::Host::Command::OCCReset)), + std::bind(std::mem_fn(&Status::hostControlEvent), + this, std::placeholders::_1)) { // Nothing to do here } @@ -64,6 +82,10 @@ class Status : public Interface bool occActive(bool value) override; private: + + /** @brief sdbus handle */ + sdbusplus::bus::bus& bus; + /** @brief OCC dbus object path */ std::string path; @@ -79,8 +101,26 @@ class Status : public Interface /** @brief OCC device object to do bind and unbind */ Device device; + /** @brief Subscribe to host control signal + * + * Once the OCC reset is requested, BMC sends that message to host. + * If the host does not ack the message, then there would be a timeout + * and we need to catch that to log an error + **/ + sdbusplus::bus::match_t hostControlSignal; + /** @brief Callback handler when device errors are detected */ void deviceErrorHandler(); + + /** @brief Callback function on host control signals + * + * @param[in] msg - Data associated with subscribed signal + */ + void hostControlEvent(sdbusplus::message::message& msg); + + /** @brief Sends a message to host control command handler to reset OCC + */ + void resetOCC(); }; } // namespace occ diff --git a/test/Makefile.am b/test/Makefile.am index a680245..d644de8 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -20,4 +20,5 @@ utest_LDADD = $(top_builddir)/powercap.o \ $(top_builddir)/occ_status.o \ $(top_builddir)/occ_device.o \ $(top_builddir)/occ_errors.o \ + $(top_builddir)/utils.o \ $(top_builddir)/org/open_power/OCC/Device/error.o diff --git a/utils.cpp b/utils.cpp new file mode 100644 index 0000000..1dfdcef --- /dev/null +++ b/utils.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include +namespace open_power +{ +namespace occ +{ + +// For throwing exceptions +using namespace phosphor::logging; +using InternalFailure = sdbusplus::xyz::openbmc_project::Common:: + Error::InternalFailure; + +std::string getService(sdbusplus::bus::bus& bus, + const std::string& intf, + const std::string& path) +{ + auto mapperCall = bus.new_method_call("xyz.openbmc_project.ObjectMapper", + "/xyz/openbmc_project/object_mapper", + "xyz.openbmc_project.ObjectMapper", + "GetObject"); + + mapperCall.append(path); + mapperCall.append(std::vector({intf})); + + auto mapperResponseMsg = bus.call(mapperCall); + + if (mapperResponseMsg.is_method_error()) + { + log("ERROR in getting service", + entry("PATH=%s",path.c_str()), + entry("INTERFACE=%s",intf.c_str())); + + elog(); + } + + std::map> mapperResponse; + mapperResponseMsg.read(mapperResponse); + + if (mapperResponse.begin() == mapperResponse.end()) + { + log("ERROR reading mapper response", + entry("PATH=%s",path.c_str()), + entry("INTERFACE=%s",intf.c_str())); + + elog(); + } + return mapperResponse.begin()->first; +} + +} // namespace occ +} // namespace open_power diff --git a/utils.hpp b/utils.hpp new file mode 100644 index 0000000..51f9832 --- /dev/null +++ b/utils.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +namespace open_power +{ +namespace occ +{ +/** + * @brief Gets the D-Bus Service name for the input D-Bus path + * + * @param[in] bus - Bus handler + * @param[in] intf - Interface + * @param[in] path - Object Path + * + * @return Service name + * @error InternalFailure exception thrown + */ +std::string getService(sdbusplus::bus::bus& bus, + const std::string& intf, + const std::string& path); +} // namespace occ +} // namespace open_power -- cgit v1.2.1