summaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
authorWilliam A. Kennington III <wak@google.com>2018-06-15 10:38:01 -0700
committerEmily Shaffer <emilyshaffer@google.com>2018-09-21 21:18:06 +0000
commitbae471cb4e9faeaf183664252b51e8c39d71fc95 (patch)
tree232536e41160a071d53760936968ba2e0af8ab5f /app
parent1f4bc1f079243df22aa1d47e82625620ff2de634 (diff)
downloadphosphor-host-ipmid-bae471cb4e9faeaf183664252b51e8c39d71fc95.zip
phosphor-host-ipmid-bae471cb4e9faeaf183664252b51e8c39d71fc95.tar.gz
app/watchdog: Only log internal failures once
Logging internal failures to the phosphor-logger is a slow process that can take up to 5 seconds. If we do this for each watchdog reset for a relatively fast watchdog we will clog up the channel and fill the ESEL log with errors. We probably don't want that kind of information to build up. Instead, only report errors to the ESEL when the state of the internal watchdog transitions from HEALTHY -> UNHEALTHY. Tested: Works on zaius when booting and terminating the watchdog daemon while it is ticking. Produces the expected single error in the log and only delays the IPMI channel once. Change-Id: I8110ba66c4a85e188666a34cb6d055bdbec30622 Signed-off-by: William A. Kennington III <wak@google.com>
Diffstat (limited to 'app')
-rw-r--r--app/watchdog.cpp45
1 files changed, 35 insertions, 10 deletions
diff --git a/app/watchdog.cpp b/app/watchdog.cpp
index ad167df..f6818cb 100644
--- a/app/watchdog.cpp
+++ b/app/watchdog.cpp
@@ -14,11 +14,32 @@
#include "host-ipmid/ipmid-api.h"
+using phosphor::logging::commit;
using phosphor::logging::level;
using phosphor::logging::log;
-using phosphor::logging::report;
using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
+static bool lastCallSuccessful = false;
+
+void reportError()
+{
+ // We don't want to fill the SEL with errors if the daemon dies and doesn't
+ // come back but the watchdog keeps on ticking. Instead, we only report the
+ // error if we haven't reported one since the last successful call
+ if (!lastCallSuccessful)
+ {
+ return;
+ }
+ lastCallSuccessful = false;
+
+ // TODO: This slow down the end of the IPMI transaction waiting
+ // for the commit to finish. commit<>() can take at least 5 seconds
+ // to complete. 5s is very slow for an IPMI command and ends up
+ // congesting the IPMI channel needlessly, especially if the watchdog
+ // is ticking fairly quickly and we have some transient issues.
+ commit<InternalFailure>();
+}
+
ipmi_ret_t ipmi_app_watchdog_reset(ipmi_netfn_t netfn, ipmi_cmd_t cmd,
ipmi_request_t request,
ipmi_response_t response,
@@ -36,29 +57,31 @@ ipmi_ret_t ipmi_app_watchdog_reset(ipmi_netfn_t netfn, ipmi_cmd_t cmd,
// so it can configure actions and timeouts
if (!wd_service.getInitialized())
{
+ lastCallSuccessful = true;
return IPMI_WDOG_CC_NOT_INIT;
}
// The ipmi standard dictates we enable the watchdog during reset
wd_service.resetTimeRemaining(true);
+ lastCallSuccessful = true;
return IPMI_CC_OK;
}
catch (const InternalFailure& e)
{
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
catch (const std::exception& e)
{
const std::string e_str = std::string("wd_reset: ") + e.what();
log<level::ERR>(e_str.c_str());
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
catch (...)
{
log<level::ERR>("wd_reset: Unknown Error");
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
}
@@ -156,6 +179,7 @@ ipmi_ret_t ipmi_app_watchdog_set(ipmi_netfn_t netfn, ipmi_cmd_t cmd,
// Mark as initialized so that future resets behave correctly
wd_service.setInitialized(true);
+ lastCallSuccessful = true;
return IPMI_CC_OK;
}
catch (const std::domain_error&)
@@ -164,20 +188,20 @@ ipmi_ret_t ipmi_app_watchdog_set(ipmi_netfn_t netfn, ipmi_cmd_t cmd,
}
catch (const InternalFailure& e)
{
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
catch (const std::exception& e)
{
const std::string e_str = std::string("wd_set: ") + e.what();
log<level::ERR>(e_str.c_str());
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
catch (...)
{
log<level::ERR>("wd_set: Unknown Error");
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
}
@@ -264,24 +288,25 @@ ipmi_ret_t ipmi_app_watchdog_get(ipmi_netfn_t netfn, ipmi_cmd_t cmd,
memcpy(response, &res, sizeof(res));
*data_len = sizeof(res);
+ lastCallSuccessful = true;
return IPMI_CC_OK;
}
catch (const InternalFailure& e)
{
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
catch (const std::exception& e)
{
const std::string e_str = std::string("wd_get: ") + e.what();
log<level::ERR>(e_str.c_str());
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
catch (...)
{
log<level::ERR>("wd_get: Unknown Error");
- report<InternalFailure>();
+ reportError();
return IPMI_CC_UNSPECIFIED_ERROR;
}
}
OpenPOWER on IntegriCloud