From 636577f44fe3fc951538c1119ed0da8ac9a40932 Mon Sep 17 00:00:00 2001 From: "Edward A. James" Date: Fri, 6 Oct 2017 10:53:55 -0500 Subject: Add OCC present count detection and watch Add a Presence child class of Error to handle detecting the number of OCCs available. Add an instance of this Presence class if the Device detects that it is the master OCC, since the number of present OCCs is only reported by the master OCC. When a change to the number of OCCs reported is detected, compare with the number of OCCs determined to be active by the Manager, and if there is a mismatch, follow the usual error path (reset OCC, etc). Partially resolves openbmc/openbmc#2285 See https://gerrit.openbmc-project.xyz/#/c/7843/ Change-Id: Idbaca52b307992d9b01fe15439ab746ef6d64397 Signed-off-by: Edward A. James --- Makefile.am | 2 ++ occ_device.cpp | 17 +++++++++++++++++ occ_device.hpp | 27 ++++++++++++++++++++++++--- occ_errors.cpp | 3 ++- occ_errors.hpp | 35 +++++++++++++++++----------------- occ_manager.cpp | 4 +++- occ_manager.hpp | 9 +++++++-- occ_presence.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ occ_presence.hpp | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ occ_status.hpp | 4 ++++ test/utest.cpp | 5 ++++- 11 files changed, 190 insertions(+), 25 deletions(-) create mode 100644 occ_presence.cpp create mode 100644 occ_presence.hpp diff --git a/Makefile.am b/Makefile.am index a2b263e..3044e38 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,6 +6,7 @@ noinst_HEADERS = \ occ_errors.hpp \ occ_events.hpp \ occ_finder.hpp \ + occ_presence.hpp \ utils.hpp noinst_LTLIBRARIES = libocc_control.la @@ -21,6 +22,7 @@ libocc_control_la_SOURCES = \ occ_status.cpp \ occ_device.cpp \ occ_errors.cpp \ + occ_presence.cpp \ powercap.cpp \ org/open_power/OCC/Device/error.cpp \ occ_finder.cpp \ diff --git a/occ_device.cpp b/occ_device.cpp index 865aef2..8129502 100644 --- a/occ_device.cpp +++ b/occ_device.cpp @@ -1,3 +1,4 @@ +#include #include "occ_device.hpp" namespace open_power @@ -8,5 +9,21 @@ namespace occ fs::path Device::bindPath = fs::path(OCC_HWMON_PATH) / "bind"; fs::path Device::unBindPath = fs::path(OCC_HWMON_PATH) / "unbind"; +bool Device::master() const +{ + int master; + auto masterFile = fs::path(DEV_PATH) / config / "occ_master"; + std::ifstream file(masterFile, std::ios::in); + + if (!file) + { + return false; + } + + file >> master; + file.close(); + return (master != 0); +} + } // namespace occ } // namespace open_power diff --git a/occ_device.hpp b/occ_device.hpp index 3f7dff4..7ffb0ef 100644 --- a/occ_device.hpp +++ b/occ_device.hpp @@ -4,6 +4,7 @@ #include #include "occ_events.hpp" #include "occ_errors.hpp" +#include "occ_presence.hpp" #include "config.h" namespace open_power @@ -11,6 +12,7 @@ namespace open_power namespace occ { +class Manager; namespace fs = std::experimental::filesystem; /** @class Device @@ -30,10 +32,12 @@ class Device * * @param[in] event - Unique ptr reference to sd_event * @param[in] name - OCC instance name + * @param[in] manager - OCC manager instance * @param[in] callback - Optional callback on errors */ Device(EventPtr& event, const std::string& name, + const Manager& manager, std::function callBack = nullptr) : #ifdef I2C_OCC config(name), @@ -41,7 +45,11 @@ class Device config(name + '-' + "dev0"), #endif errorFile(fs::path(config) / "occ_error"), - error(event, errorFile, callBack) + error(event, errorFile, callBack), + presence(event, + fs::path(config) / "occs_present", + manager, + callBack) { // Nothing to do here } @@ -76,13 +84,20 @@ class Device /** @brief Starts to monitor for errors */ inline void addErrorWatch() { - return error.addWatch(); + if (master()) + { + presence.addWatch(); + } + + error.addWatch(); } /** @brief stops monitoring for errors */ inline void removeErrorWatch() { - return error.removeWatch(); + // we can always safely remove watch even if we don't add it + presence.removeWatch(); + error.removeWatch(); } private: @@ -106,6 +121,9 @@ class Device /** Abstraction of error monitoring */ Error error; + /** Abstraction of OCC presence monitoring */ + Presence presence; + /** @brief file writer to achieve bind and unbind * * @param[in] filename - Name of file to be written @@ -120,6 +138,9 @@ class Device file.close(); return; } + + /** @brief Returns if device represents the master OCC */ + bool master() const; }; } // namespace occ diff --git a/occ_errors.cpp b/occ_errors.cpp index bc01c91..fc24310 100644 --- a/occ_errors.cpp +++ b/occ_errors.cpp @@ -93,7 +93,6 @@ void Error::removeWatch() int Error::processEvents(sd_event_source* es, int fd, uint32_t revents, void* userData) { - log("Error file updated"); auto error = static_cast(userData); error->analyzeEvent(); @@ -118,6 +117,8 @@ void Error::analyzeEvent() // A non-zero data indicates an error condition // Let the caller take appropriate action on this auto data = readFile(len); + log("Error file updated", + entry("ERROR=%s", data.c_str())); if (data.empty() || data.front() == NO_ERROR) { diff --git a/occ_errors.hpp b/occ_errors.hpp index e3820d1..070e72c 100644 --- a/occ_errors.hpp +++ b/occ_errors.hpp @@ -61,15 +61,6 @@ class Error /** @brief event source wrapped in unique_ptr */ EventSourcePtr eventSource; - /** Error file */ - const fs::path file; - - /** @brief Optional function to call on error scenario */ - std::function callBack; - - /** @brief File descriptor to watch for errors */ - int fd = -1; - /** @brief Current state of error watching */ bool watching = false; @@ -79,13 +70,6 @@ class Error /** @brief Opens the file and populates fd */ void openFile(); - /** @brief Reads file data - * - * @return data read. Since its a /sysfs entry, - * it would be a string - */ - std::string readFile(int) const; - /** @brief Callback handler when the FD has some activity on it * * @param[in] es - Populated event source @@ -103,7 +87,24 @@ class Error * and makes a callback to error handler if the * content denotes an error condition */ - void analyzeEvent(); + virtual void analyzeEvent(); + + protected: + /** @brief File descriptor to watch for errors */ + int fd = -1; + + /** Error file */ + const fs::path file; + + /** @brief Optional function to call on error scenario */ + std::function callBack; + + /** @brief Reads file data + * + * @return data read. Since its a /sysfs entry, + * it would be a string + */ + std::string readFile(int) const; }; } // namespace occ diff --git a/occ_manager.cpp b/occ_manager.cpp index 8b99b1f..7d51490 100644 --- a/occ_manager.cpp +++ b/occ_manager.cpp @@ -70,6 +70,7 @@ void Manager::createObjects(const std::string& occ) bus, event, path.c_str(), + *this, std::bind(std::mem_fn(&Manager::statusCallBack), this, std::placeholders::_1))); @@ -132,7 +133,8 @@ void Manager::initStatusObjects() std::make_unique( bus, event, - path.c_str())); + path.c_str(), + *this)); } } #endif diff --git a/occ_manager.hpp b/occ_manager.hpp index 1aaffc8..542c7d9 100644 --- a/occ_manager.hpp +++ b/occ_manager.hpp @@ -23,8 +23,8 @@ struct Manager Manager() = delete; Manager(const Manager&) = delete; Manager& operator=(const Manager&) = delete; - Manager(Manager&&) = default; - Manager& operator=(Manager&&) = default; + Manager(Manager&&) = delete; + Manager& operator=(Manager&&) = delete; ~Manager() = default; /** @brief Adds OCC pass-through and status objects on the bus @@ -46,6 +46,11 @@ struct Manager #endif } + inline auto getNumOCCs() const + { + return activeCount; + } + private: /** @brief Checks if the CPU inventory is present and if so, creates * the occ D-Bus objects. Else, registers a handler to be diff --git a/occ_presence.cpp b/occ_presence.cpp new file mode 100644 index 0000000..f91521a --- /dev/null +++ b/occ_presence.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "occ_presence.hpp" +#include "occ_manager.hpp" +#include "elog-errors.hpp" + +namespace open_power +{ +namespace occ +{ + +// Reads the occs_present file and analyzes the data +void Presence::analyzeEvent() +{ + using namespace phosphor::logging; + using namespace sdbusplus::org::open_power::OCC::Device::Error; + + // Get the number of bytes to read + int len = -1; + auto r = ioctl(fd, FIONREAD, &len); + if (r < 0) + { + elog( + phosphor::logging::org::open_power::OCC::Device:: + ConfigFailure::CALLOUT_ERRNO(errno), + phosphor::logging::org::open_power::OCC::Device:: + ConfigFailure::CALLOUT_DEVICE_PATH(file.c_str())); + } + + auto data = readFile(len); + if (data.empty()) + { + return; + } + + // Let stoi determine the base + auto occsPresent = std::stoi(data, nullptr, 0); + if (manager.getNumOCCs() != occsPresent) + { + log("OCC presence mismatch", + entry("BMC_OCCS=%d", manager.getNumOCCs(), + entry("OCC_OCCS=%d", occsPresent))); + if (callBack) + { + callBack(); + } + } +} + +} // namespace occ +} // namespace open_power diff --git a/occ_presence.hpp b/occ_presence.hpp new file mode 100644 index 0000000..321dacb --- /dev/null +++ b/occ_presence.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include "occ_errors.hpp" +namespace open_power +{ +namespace occ +{ + +class Manager; + +/** @class Presence + * @brief Monitors the number of OCCs present + */ +class Presence : public Error +{ + public: + Presence() = delete; + Presence(const Presence&) = delete; + Presence& operator=(const Presence&) = delete; + Presence(Presence&&) = default; + Presence& operator=(Presence&&) = default; + + /** @brief Constructs the Presence object + * + * @param[in] event - Reference to sd_event unique_ptr + * @param[in] file - File used by driver to communicate errors + * @param[in] mgr - OCC manager instance + * @param[in] callBack - Optional function callback on error condition + */ + Presence(EventPtr& event, + const fs::path& file, + const Manager& mgr, + std::function callBack = nullptr) : + Error(event, file, callBack), + manager(mgr) + { + // Nothing to do here. + } + + private: + /** Store the manager instance to enable getting number of OCCs */ + const Manager& manager; + + /** @brief When the error event is received, analyzes it + * and makes a callback to error handler if the + * content denotes an error condition + */ + void analyzeEvent() override; +}; + +} // namespace occ +} // namespace open_power diff --git a/occ_status.hpp b/occ_status.hpp index a334a2b..55ee539 100644 --- a/occ_status.hpp +++ b/occ_status.hpp @@ -14,6 +14,7 @@ namespace open_power namespace occ { +class Manager; namespace Base = sdbusplus::org::open_power::OCC::server; using Interface = sdbusplus::server::object::object; @@ -48,12 +49,14 @@ class Status : public Interface * @param[in] bus - DBus bus to attach to * @param[in] event - sd_event unique pointer reference * @param[in] path - DBus object path + * @param[in] manager - OCC manager instance * @param[in] callBack - Callback handler to invoke during * property change */ Status(sdbusplus::bus::bus& bus, EventPtr& event, const char* path, + const Manager& manager, std::function callBack = nullptr) : Interface(bus, path, true), bus(bus), @@ -66,6 +69,7 @@ class Status : public Interface #else name + std::to_string(instance + 1), #endif + manager, std::bind(&Status::deviceErrorHandler, this)), hostControlSignal( bus, diff --git a/test/utest.cpp b/test/utest.cpp index 9731209..dc80cc2 100644 --- a/test/utest.cpp +++ b/test/utest.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "powercap.hpp" using namespace open_power::occ; @@ -11,7 +12,8 @@ class VerifyOccInput : public ::testing::Test bus(sdbusplus::bus::new_default()), rc(sd_event_default(&event)), eventP(event), - occStatus(bus, eventP, "/test/path/occ1"), + manager(bus, eventP), + occStatus(bus, eventP, "/test/path/occ1", manager), pcap(bus,occStatus) { EXPECT_GE(rc, 0); @@ -25,6 +27,7 @@ class VerifyOccInput : public ::testing::Test int rc; open_power::occ::EventPtr eventP; + Manager manager; Status occStatus; powercap::PowerCap pcap; }; -- cgit v1.2.1