PEL: Notifier support for 'host full'

The host firmware may have a limited size for its staging area before it passes the PELs through to the OS, and this area may fill up with PELs it can't send if there are too many or the OS isn't up yet. In this case, it will send down an 'Ack PEL' PLDM command with a special response indicating this host full condition. The PLDM daemon will then call a method on this daemon to let it know. This command handles the host full condition on the HostNotifier class. When this is set: * The PEL that hit this condition will be put back on the queue to be sent again. * No new PELs will be sent up, except as noted below * A 60s timer will be started at the end of which another attempt will be made to send a PEL, in the hopes the condition went away. - If it didn't go away, this process will repeat. - If it did go away, a successful ack will be received and things will proceed as normal. Signed-off-by: Matt Spinler <spinler@us.ibm.com> Change-Id: Iaeb38f43f7acc595bcff234ba50cedf8188b3d9b
author: Matt Spinler <spinler@us.ibm.com> 2019-12-12 13:11:09 -0600
committer: Matt Spinler <spinler@us.ibm.com> 2020-01-27 08:06:26 -0600
commit: 41293cb851e00807996950f34cb1ec4e410d6931 (patch)
tree: 7718da9d2712f39a25d2b86ee0735989b3469422
parent: cc3b64aebb1760750888324f116d11a952acd203 (diff)
download: phosphor-logging-41293cb851e00807996950f34cb1ec4e410d6931.tar.gz
phosphor-logging-41293cb851e00807996950f34cb1ec4e410d6931.zip
4 files changed, 228 insertions, 4 deletions
diff --git a/extensions/openpower-pels/host_notifier.cpp b/extensions/openpower-pels/host_notifier.cpp
index b4e92a8..ca30340 100644
--- a/extensions/openpower-pels/host_notifier.cpp
+++ b/extensions/openpower-pels/host_notifier.cpp
@@ -30,7 +30,10 @@ HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
     _repo(repo),
     _dataIface(dataIface), _hostIface(std::move(hostIface)),
     _retryTimer(_hostIface->getEvent(),
-                std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this))
+                std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
+    _hostFullTimer(
+        _hostIface->getEvent(),
+        std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this))
 {
     // Subscribe to be told about new PELs.
     _repo.subscribeToAdds(subscriptionName,
@@ -155,7 +158,8 @@ void HostNotifier::newLogCallback(const PEL& pel)
 
     _pelQueue.push_back(pel.id());
 
-    if (!_dataIface.isHostUp())
+    // Notify shouldn't happen if host is down or full
+    if (!_dataIface.isHostUp() || _hostFull)
     {
         return;
     }
@@ -194,7 +198,8 @@ void HostNotifier::dispatch(sdeventplus::source::EventBase& source)
 
 void HostNotifier::doNewLogNotify()
 {
-    if (!_dataIface.isHostUp() || _retryTimer.isEnabled())
+    if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
+        _hostFullTimer.isEnabled())
     {
         return;
     }
@@ -264,6 +269,7 @@ void HostNotifier::doNewLogNotify()
 void HostNotifier::hostStateChange(bool hostUp)
 {
     _retryCount = 0;
+    _hostFull = false;
 
     if (hostUp && !_pelQueue.empty())
     {
@@ -282,6 +288,11 @@ void HostNotifier::hostStateChange(bool hostUp)
         }
 
         _sentPELs.clear();
+
+        if (_hostFullTimer.isEnabled())
+        {
+            _hostFullTimer.setEnabled(false);
+        }
     }
 }
 
@@ -298,7 +309,8 @@ void HostNotifier::commandResponse(ResponseStatus status)
 
         _repo.setPELHostTransState(id, TransmissionState::sent);
 
-        if (!_pelQueue.empty())
+        // If the host is full, don't send off the next PEL
+        if (!_hostFull && !_pelQueue.empty())
         {
             doNewLogNotify();
         }
@@ -324,6 +336,11 @@ void HostNotifier::retryTimerExpired()
     }
 }
 
+void HostNotifier::hostFullTimerExpired()
+{
+    doNewLogNotify();
+}
+
 void HostNotifier::stopCommand()
 {
     _retryCount = 0;
@@ -355,6 +372,51 @@ void HostNotifier::ackPEL(uint32_t id)
     {
         _sentPELs.erase(sent);
     }
+
+    // An ack means the host is no longer full
+    if (_hostFullTimer.isEnabled())
+    {
+        _hostFullTimer.setEnabled(false);
+    }
+
+    if (_hostFull)
+    {
+        _hostFull = false;
+
+        // Start sending PELs again, from the event loop
+        if (!_pelQueue.empty())
+        {
+            scheduleDispatch();
+        }
+    }
+}
+
+void HostNotifier::setHostFull(uint32_t id)
+{
+    log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id));
+
+    _hostFull = true;
+
+    // This PEL needs to get re-sent
+    auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
+    if (sent != _sentPELs.end())
+    {
+        _sentPELs.erase(sent);
+        _repo.setPELHostTransState(id, TransmissionState::newPEL);
+
+        if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
+            _pelQueue.end())
+        {
+            _pelQueue.push_front(id);
+        }
+    }
+
+    // The only PELs that will be sent when the
+    // host is full is from this timer callback.
+    if (!_hostFullTimer.isEnabled())
+    {
+        _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
+    }
 }
 
 } // namespace openpower::pels
diff --git a/extensions/openpower-pels/host_notifier.hpp b/extensions/openpower-pels/host_notifier.hpp
index d160dd4..21bd072 100644
--- a/extensions/openpower-pels/host_notifier.hpp
+++ b/extensions/openpower-pels/host_notifier.hpp
@@ -87,10 +87,36 @@ class HostNotifier
      *
      * This means the PEL never needs to be sent up again.
      *
+     * If the host was previously full, it is also an indication
+     * it no longer is.
+     *
      * @param[in] id - The PEL ID
      */
     void ackPEL(uint32_t id);
 
+    /**
+     * @brief Called when the host does not have room for more
+     *        PELs at this time.
+     *
+     * This can happen when an OS isn't running yet, and the
+     * staging area to hold the PELs before sending them up
+     * to the OS is full.  This will stop future PEls from being
+     * sent up, as explained below.
+     *
+     * The PEL with this ID will need to be sent again, so its
+     * state is set back to 'new', and it is removed from the list
+     * of already sent PELs.
+     *
+     * A timer will be started, if it isn't already running, to
+     * issue another send in the hopes that space has been freed
+     * up by then (Receiving an ackPEL response is also an
+     * indication of this if there happened to have been other
+     * PELs in flight).
+     *
+     * @param[in] id - The PEL ID
+     */
+    void setHostFull(uint32_t id);
+
   private:
     /**
      * @brief This function gets called by the Repository class
@@ -171,6 +197,15 @@ class HostNotifier
     void retryTimerExpired();
 
     /**
+     * @brief The function called when the 'host full' retry timer
+     *        expires.
+     *
+     * This will re-issue a command to try again with the PEL at
+     * the front of the queue.
+     */
+    void hostFullTimerExpired();
+
+    /**
      * @brief Stops an in progress command
      *
      * In progress meaning after the send but before the response.
@@ -217,11 +252,23 @@ class HostNotifier
     size_t _retryCount = 0;
 
     /**
+     * @brief Indicates if the host has said it is full and does not
+     *        currently have the space for more PELs.
+     */
+    bool _hostFull = false;
+
+    /**
      * @brief The command retry timer.
      */
     sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic> _retryTimer;
 
     /**
+     * @brief The host full timer, used to retry sending a PEL if the host
+     *        said it is full.
+     */
+    sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic> _hostFullTimer;
+
+    /**
      * @brief The object used to dispatch a new PEL send from the
      *        event loop, so the calling function can be returned from
      *        first.
diff --git a/test/openpower-pels/host_notifier_test.cpp b/test/openpower-pels/host_notifier_test.cpp
index 62c79b2..cd30e8d 100644
--- a/test/openpower-pels/host_notifier_test.cpp
+++ b/test/openpower-pels/host_notifier_test.cpp
@@ -643,3 +643,106 @@ TEST_F(HostNotifierTest, TestPowerCycleAndAcks)
 
     EXPECT_EQ(notifier.queueSize(), 0);
 }
+
+// Test the host full condition
+TEST_F(HostNotifierTest, TestHostFull)
+{
+    // The full interaction with the host is:
+    // BMC:  new PEL available
+    // Host: ReadPELFile  (not modeled here)
+    // Host: Ack(id) (if not full), or HostFull(id)
+    // BMC: if full and any new PELs come in, don't sent them
+    // Start a timer and try again
+    // Host responds with either Ack or full
+    // and repeat
+
+    Repository repo{repoPath};
+    MockDataInterface dataIface;
+
+    sdeventplus::Event sdEvent{event};
+
+    std::unique_ptr<HostInterface> hostIface =
+        std::make_unique<MockHostInterface>(event, dataIface);
+
+    MockHostInterface& mockHostIface =
+        reinterpret_cast<MockHostInterface&>(*hostIface);
+
+    HostNotifier notifier{repo, dataIface, std::move(hostIface)};
+
+    auto send = [&mockHostIface](uint32_t id, uint32_t size) {
+        return mockHostIface.send(0);
+    };
+
+    EXPECT_CALL(mockHostIface, sendNewLogCmd(_, _))
+        .WillRepeatedly(Invoke(send));
+
+    dataIface.changeHostState(true);
+
+    // Add and dispatch/send one PEL
+    auto pel = makePEL();
+    auto id = pel->id();
+    repo.add(pel);
+    runEvents(sdEvent, 2);
+
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 1);
+    EXPECT_EQ(notifier.queueSize(), 0);
+
+    // Host is full
+    notifier.setHostFull(id);
+
+    // It goes back on the queue
+    EXPECT_EQ(notifier.queueSize(), 1);
+
+    // The transmission state goes back to new
+    Repository::LogID i{Repository::LogID::Pel{id}};
+    auto data = repo.getPELData(i);
+    PEL pelFromRepo{*data};
+    EXPECT_EQ(pelFromRepo.hostTransmissionState(), TransmissionState::newPEL);
+
+    // Clock it, nothing should be sent still.
+    runEvents(sdEvent, 1);
+
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 1);
+    EXPECT_EQ(notifier.queueSize(), 1);
+
+    // Add another PEL and clock it, still nothing sent
+    pel = makePEL();
+    repo.add(pel);
+    runEvents(sdEvent, 2);
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 1);
+    EXPECT_EQ(notifier.queueSize(), 2);
+
+    // Let the host full timer expire to trigger a retry.
+    // Add some extra event passes just to be sure nothing new is sent.
+    runEvents(sdEvent, 5, mockHostIface.getHostFullRetryDelay());
+
+    // The timer expiration will send just the 1, not both
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 2);
+    EXPECT_EQ(notifier.queueSize(), 1);
+
+    // Host still full
+    notifier.setHostFull(id);
+
+    // Let the host full timer attempt again
+    runEvents(sdEvent, 2, mockHostIface.getHostFullRetryDelay());
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 3);
+
+    // Add yet another PEL with the retry timer expired.
+    // It shouldn't get sent out.
+    pel = makePEL();
+    repo.add(pel);
+    runEvents(sdEvent, 2);
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 3);
+
+    // The last 2 PELs still on the queue
+    EXPECT_EQ(notifier.queueSize(), 2);
+
+    // Host no longer full, it finally acks the first PEL
+    notifier.ackPEL(id);
+
+    // Now the remaining 2 PELs will be dispatched
+    runEvents(sdEvent, 3);
+
+    EXPECT_EQ(mockHostIface.numCmdsProcessed(), 5);
+    EXPECT_EQ(notifier.queueSize(), 0);
+}
diff --git a/test/openpower-pels/mocks.hpp b/test/openpower-pels/mocks.hpp
index dcce142..5bec579 100644
--- a/test/openpower-pels/mocks.hpp
+++ b/test/openpower-pels/mocks.hpp
@@ -100,6 +100,18 @@ class MockHostInterface : public HostInterface
     }
 
     /**
+     * @brief Returns the amount of time to wait before retrying if the
+     *        host firmware's PEL storage was full and it can't store
+     *        any more logs until it is freed up somehow.
+     *
+     * @return milliseconds - The amount of time to wait
+     */
+    virtual std::chrono::milliseconds getHostFullRetryDelay() const override
+    {
+        return std::chrono::milliseconds(20);
+    }
+
+    /**
      * @brief Returns the number of commands processed
      */
     size_t numCmdsProcessed() const
author	Matt Spinler <spinler@us.ibm.com>	2019-12-12 13:11:09 -0600
committer	Matt Spinler <spinler@us.ibm.com>	2020-01-27 08:06:26 -0600
commit	41293cb851e00807996950f34cb1ec4e410d6931 (patch)
tree	7718da9d2712f39a25d2b86ee0735989b3469422
parent	cc3b64aebb1760750888324f116d11a952acd203 (diff)
download	phosphor-logging-41293cb851e00807996950f34cb1ec4e410d6931.tar.gz phosphor-logging-41293cb851e00807996950f34cb1ec4e410d6931.zip