summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdward A. James <eajames@us.ibm.com>2017-10-06 10:53:55 -0500
committerBrad Bishop <bradleyb@fuzziesquirrel.com>2017-11-17 20:50:09 +0000
commit636577f44fe3fc951538c1119ed0da8ac9a40932 (patch)
treecded6020e29358bd8913d6f7b2b5d56cd8f42204
parent5e177970328acb51d7213d137b6b47cde0451780 (diff)
downloadopenpower-occ-control-636577f44fe3fc951538c1119ed0da8ac9a40932.zip
openpower-occ-control-636577f44fe3fc951538c1119ed0da8ac9a40932.tar.gz
Add OCC present count detection and watch
Add a Presence child class of Error to handle detecting the number of OCCs available. Add an instance of this Presence class if the Device detects that it is the master OCC, since the number of present OCCs is only reported by the master OCC. When a change to the number of OCCs reported is detected, compare with the number of OCCs determined to be active by the Manager, and if there is a mismatch, follow the usual error path (reset OCC, etc). Partially resolves openbmc/openbmc#2285 See https://gerrit.openbmc-project.xyz/#/c/7843/ Change-Id: Idbaca52b307992d9b01fe15439ab746ef6d64397 Signed-off-by: Edward A. James <eajames@us.ibm.com>
-rw-r--r--Makefile.am2
-rw-r--r--occ_device.cpp17
-rw-r--r--occ_device.hpp27
-rw-r--r--occ_errors.cpp3
-rw-r--r--occ_errors.hpp35
-rw-r--r--occ_manager.cpp4
-rw-r--r--occ_manager.hpp9
-rw-r--r--occ_presence.cpp57
-rw-r--r--occ_presence.hpp52
-rw-r--r--occ_status.hpp4
-rw-r--r--test/utest.cpp5
11 files changed, 190 insertions, 25 deletions
diff --git a/Makefile.am b/Makefile.am
index a2b263e..3044e38 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -6,6 +6,7 @@ noinst_HEADERS = \
occ_errors.hpp \
occ_events.hpp \
occ_finder.hpp \
+ occ_presence.hpp \
utils.hpp
noinst_LTLIBRARIES = libocc_control.la
@@ -21,6 +22,7 @@ libocc_control_la_SOURCES = \
occ_status.cpp \
occ_device.cpp \
occ_errors.cpp \
+ occ_presence.cpp \
powercap.cpp \
org/open_power/OCC/Device/error.cpp \
occ_finder.cpp \
diff --git a/occ_device.cpp b/occ_device.cpp
index 865aef2..8129502 100644
--- a/occ_device.cpp
+++ b/occ_device.cpp
@@ -1,3 +1,4 @@
+#include <iostream>
#include "occ_device.hpp"
namespace open_power
@@ -8,5 +9,21 @@ namespace occ
fs::path Device::bindPath = fs::path(OCC_HWMON_PATH) / "bind";
fs::path Device::unBindPath = fs::path(OCC_HWMON_PATH) / "unbind";
+bool Device::master() const
+{
+ int master;
+ auto masterFile = fs::path(DEV_PATH) / config / "occ_master";
+ std::ifstream file(masterFile, std::ios::in);
+
+ if (!file)
+ {
+ return false;
+ }
+
+ file >> master;
+ file.close();
+ return (master != 0);
+}
+
} // namespace occ
} // namespace open_power
diff --git a/occ_device.hpp b/occ_device.hpp
index 3f7dff4..7ffb0ef 100644
--- a/occ_device.hpp
+++ b/occ_device.hpp
@@ -4,6 +4,7 @@
#include <experimental/filesystem>
#include "occ_events.hpp"
#include "occ_errors.hpp"
+#include "occ_presence.hpp"
#include "config.h"
namespace open_power
@@ -11,6 +12,7 @@ namespace open_power
namespace occ
{
+class Manager;
namespace fs = std::experimental::filesystem;
/** @class Device
@@ -30,10 +32,12 @@ class Device
*
* @param[in] event - Unique ptr reference to sd_event
* @param[in] name - OCC instance name
+ * @param[in] manager - OCC manager instance
* @param[in] callback - Optional callback on errors
*/
Device(EventPtr& event,
const std::string& name,
+ const Manager& manager,
std::function<void()> callBack = nullptr) :
#ifdef I2C_OCC
config(name),
@@ -41,7 +45,11 @@ class Device
config(name + '-' + "dev0"),
#endif
errorFile(fs::path(config) / "occ_error"),
- error(event, errorFile, callBack)
+ error(event, errorFile, callBack),
+ presence(event,
+ fs::path(config) / "occs_present",
+ manager,
+ callBack)
{
// Nothing to do here
}
@@ -76,13 +84,20 @@ class Device
/** @brief Starts to monitor for errors */
inline void addErrorWatch()
{
- return error.addWatch();
+ if (master())
+ {
+ presence.addWatch();
+ }
+
+ error.addWatch();
}
/** @brief stops monitoring for errors */
inline void removeErrorWatch()
{
- return error.removeWatch();
+ // we can always safely remove watch even if we don't add it
+ presence.removeWatch();
+ error.removeWatch();
}
private:
@@ -106,6 +121,9 @@ class Device
/** Abstraction of error monitoring */
Error error;
+ /** Abstraction of OCC presence monitoring */
+ Presence presence;
+
/** @brief file writer to achieve bind and unbind
*
* @param[in] filename - Name of file to be written
@@ -120,6 +138,9 @@ class Device
file.close();
return;
}
+
+ /** @brief Returns if device represents the master OCC */
+ bool master() const;
};
} // namespace occ
diff --git a/occ_errors.cpp b/occ_errors.cpp
index bc01c91..fc24310 100644
--- a/occ_errors.cpp
+++ b/occ_errors.cpp
@@ -93,7 +93,6 @@ void Error::removeWatch()
int Error::processEvents(sd_event_source* es, int fd,
uint32_t revents, void* userData)
{
- log<level::INFO>("Error file updated");
auto error = static_cast<Error*>(userData);
error->analyzeEvent();
@@ -118,6 +117,8 @@ void Error::analyzeEvent()
// A non-zero data indicates an error condition
// Let the caller take appropriate action on this
auto data = readFile(len);
+ log<level::INFO>("Error file updated",
+ entry("ERROR=%s", data.c_str()));
if (data.empty() ||
data.front() == NO_ERROR)
{
diff --git a/occ_errors.hpp b/occ_errors.hpp
index e3820d1..070e72c 100644
--- a/occ_errors.hpp
+++ b/occ_errors.hpp
@@ -61,15 +61,6 @@ class Error
/** @brief event source wrapped in unique_ptr */
EventSourcePtr eventSource;
- /** Error file */
- const fs::path file;
-
- /** @brief Optional function to call on error scenario */
- std::function<void()> callBack;
-
- /** @brief File descriptor to watch for errors */
- int fd = -1;
-
/** @brief Current state of error watching */
bool watching = false;
@@ -79,13 +70,6 @@ class Error
/** @brief Opens the file and populates fd */
void openFile();
- /** @brief Reads file data
- *
- * @return data read. Since its a /sysfs entry,
- * it would be a string
- */
- std::string readFile(int) const;
-
/** @brief Callback handler when the FD has some activity on it
*
* @param[in] es - Populated event source
@@ -103,7 +87,24 @@ class Error
* and makes a callback to error handler if the
* content denotes an error condition
*/
- void analyzeEvent();
+ virtual void analyzeEvent();
+
+ protected:
+ /** @brief File descriptor to watch for errors */
+ int fd = -1;
+
+ /** Error file */
+ const fs::path file;
+
+ /** @brief Optional function to call on error scenario */
+ std::function<void()> callBack;
+
+ /** @brief Reads file data
+ *
+ * @return data read. Since its a /sysfs entry,
+ * it would be a string
+ */
+ std::string readFile(int) const;
};
} // namespace occ
diff --git a/occ_manager.cpp b/occ_manager.cpp
index 8b99b1f..7d51490 100644
--- a/occ_manager.cpp
+++ b/occ_manager.cpp
@@ -70,6 +70,7 @@ void Manager::createObjects(const std::string& occ)
bus,
event,
path.c_str(),
+ *this,
std::bind(std::mem_fn(&Manager::statusCallBack),
this, std::placeholders::_1)));
@@ -132,7 +133,8 @@ void Manager::initStatusObjects()
std::make_unique<Status>(
bus,
event,
- path.c_str()));
+ path.c_str(),
+ *this));
}
}
#endif
diff --git a/occ_manager.hpp b/occ_manager.hpp
index 1aaffc8..542c7d9 100644
--- a/occ_manager.hpp
+++ b/occ_manager.hpp
@@ -23,8 +23,8 @@ struct Manager
Manager() = delete;
Manager(const Manager&) = delete;
Manager& operator=(const Manager&) = delete;
- Manager(Manager&&) = default;
- Manager& operator=(Manager&&) = default;
+ Manager(Manager&&) = delete;
+ Manager& operator=(Manager&&) = delete;
~Manager() = default;
/** @brief Adds OCC pass-through and status objects on the bus
@@ -46,6 +46,11 @@ struct Manager
#endif
}
+ inline auto getNumOCCs() const
+ {
+ return activeCount;
+ }
+
private:
/** @brief Checks if the CPU inventory is present and if so, creates
* the occ D-Bus objects. Else, registers a handler to be
diff --git a/occ_presence.cpp b/occ_presence.cpp
new file mode 100644
index 0000000..f91521a
--- /dev/null
+++ b/occ_presence.cpp
@@ -0,0 +1,57 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <phosphor-logging/log.hpp>
+#include <phosphor-logging/elog.hpp>
+#include <xyz/openbmc_project/Common/error.hpp>
+#include <org/open_power/OCC/Device/error.hpp>
+#include "occ_presence.hpp"
+#include "occ_manager.hpp"
+#include "elog-errors.hpp"
+
+namespace open_power
+{
+namespace occ
+{
+
+// Reads the occs_present file and analyzes the data
+void Presence::analyzeEvent()
+{
+ using namespace phosphor::logging;
+ using namespace sdbusplus::org::open_power::OCC::Device::Error;
+
+ // Get the number of bytes to read
+ int len = -1;
+ auto r = ioctl(fd, FIONREAD, &len);
+ if (r < 0)
+ {
+ elog<ConfigFailure>(
+ phosphor::logging::org::open_power::OCC::Device::
+ ConfigFailure::CALLOUT_ERRNO(errno),
+ phosphor::logging::org::open_power::OCC::Device::
+ ConfigFailure::CALLOUT_DEVICE_PATH(file.c_str()));
+ }
+
+ auto data = readFile(len);
+ if (data.empty())
+ {
+ return;
+ }
+
+ // Let stoi determine the base
+ auto occsPresent = std::stoi(data, nullptr, 0);
+ if (manager.getNumOCCs() != occsPresent)
+ {
+ log<level::INFO>("OCC presence mismatch",
+ entry("BMC_OCCS=%d", manager.getNumOCCs(),
+ entry("OCC_OCCS=%d", occsPresent)));
+ if (callBack)
+ {
+ callBack();
+ }
+ }
+}
+
+} // namespace occ
+} // namespace open_power
diff --git a/occ_presence.hpp b/occ_presence.hpp
new file mode 100644
index 0000000..321dacb
--- /dev/null
+++ b/occ_presence.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "occ_errors.hpp"
+namespace open_power
+{
+namespace occ
+{
+
+class Manager;
+
+/** @class Presence
+ * @brief Monitors the number of OCCs present
+ */
+class Presence : public Error
+{
+ public:
+ Presence() = delete;
+ Presence(const Presence&) = delete;
+ Presence& operator=(const Presence&) = delete;
+ Presence(Presence&&) = default;
+ Presence& operator=(Presence&&) = default;
+
+ /** @brief Constructs the Presence object
+ *
+ * @param[in] event - Reference to sd_event unique_ptr
+ * @param[in] file - File used by driver to communicate errors
+ * @param[in] mgr - OCC manager instance
+ * @param[in] callBack - Optional function callback on error condition
+ */
+ Presence(EventPtr& event,
+ const fs::path& file,
+ const Manager& mgr,
+ std::function<void()> callBack = nullptr) :
+ Error(event, file, callBack),
+ manager(mgr)
+ {
+ // Nothing to do here.
+ }
+
+ private:
+ /** Store the manager instance to enable getting number of OCCs */
+ const Manager& manager;
+
+ /** @brief When the error event is received, analyzes it
+ * and makes a callback to error handler if the
+ * content denotes an error condition
+ */
+ void analyzeEvent() override;
+};
+
+} // namespace occ
+} // namespace open_power
diff --git a/occ_status.hpp b/occ_status.hpp
index a334a2b..55ee539 100644
--- a/occ_status.hpp
+++ b/occ_status.hpp
@@ -14,6 +14,7 @@ namespace open_power
namespace occ
{
+class Manager;
namespace Base = sdbusplus::org::open_power::OCC::server;
using Interface = sdbusplus::server::object::object<Base::Status>;
@@ -48,12 +49,14 @@ class Status : public Interface
* @param[in] bus - DBus bus to attach to
* @param[in] event - sd_event unique pointer reference
* @param[in] path - DBus object path
+ * @param[in] manager - OCC manager instance
* @param[in] callBack - Callback handler to invoke during
* property change
*/
Status(sdbusplus::bus::bus& bus,
EventPtr& event,
const char* path,
+ const Manager& manager,
std::function<void(bool)> callBack = nullptr)
: Interface(bus, path, true),
bus(bus),
@@ -66,6 +69,7 @@ class Status : public Interface
#else
name + std::to_string(instance + 1),
#endif
+ manager,
std::bind(&Status::deviceErrorHandler, this)),
hostControlSignal(
bus,
diff --git a/test/utest.cpp b/test/utest.cpp
index 9731209..dc80cc2 100644
--- a/test/utest.cpp
+++ b/test/utest.cpp
@@ -1,5 +1,6 @@
#include <gtest/gtest.h>
#include <occ_events.hpp>
+#include <occ_manager.hpp>
#include "powercap.hpp"
using namespace open_power::occ;
@@ -11,7 +12,8 @@ class VerifyOccInput : public ::testing::Test
bus(sdbusplus::bus::new_default()),
rc(sd_event_default(&event)),
eventP(event),
- occStatus(bus, eventP, "/test/path/occ1"),
+ manager(bus, eventP),
+ occStatus(bus, eventP, "/test/path/occ1", manager),
pcap(bus,occStatus)
{
EXPECT_GE(rc, 0);
@@ -25,6 +27,7 @@ class VerifyOccInput : public ::testing::Test
int rc;
open_power::occ::EventPtr eventP;
+ Manager manager;
Status occStatus;
powercap::PowerCap pcap;
};
OpenPOWER on IntegriCloud