summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2017-08-02 11:51:31 -0500
committerDean Sanner <dsanner@us.ibm.com>2017-08-02 21:43:17 -0400
commitbbb94013c39af35e86427553219f9ec3278ca97f (patch)
treeb111b8ac43dac7052bd0be092c4cff04b2cbb5e2
parent2dcac6ebd423ddf42f31a2a588dde1923a7a35d1 (diff)
downloadtalos-hostboot-bbb94013c39af35e86427553219f9ec3278ca97f.tar.gz
talos-hostboot-bbb94013c39af35e86427553219f9ec3278ca97f.zip
MDIA: adjust timeout for FSP based machines
Change-Id: I6728d63916d89413721725305b0115bbd8d3120e Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44111 Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Dean Sanner <dsanner@us.ibm.com>
-rw-r--r--src/usr/diag/mdia/mdiasm.C79
-rw-r--r--src/usr/diag/mdia/mdiasm.H18
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices.C8
3 files changed, 54 insertions, 51 deletions
diff --git a/src/usr/diag/mdia/mdiasm.C b/src/usr/diag/mdia/mdiasm.C
index 8b99272f8..2f86c4e1a 100644
--- a/src/usr/diag/mdia/mdiasm.C
+++ b/src/usr/diag/mdia/mdiasm.C
@@ -44,6 +44,7 @@
#include <initservice/istepdispatcherif.H>
#include <ipmi/ipmiwatchdog.H>
#include <config.h>
+#include <initservice/initserviceif.H>
using namespace TARGETING;
using namespace ERRORLOG;
@@ -53,6 +54,27 @@ using namespace DeviceFW;
namespace MDIA
{
+// Maint cmd timeout values are in nanosecs. This is just for easy conversions.
+static const uint64_t NANOSEC_PER_SEC = 1000000000;
+
+// HW timeout value (in seconds).
+static const uint64_t MAINT_CMD_HW_TIMEOUT = 30;
+
+// Nimbus DD1.0 has a workaround that will likely cause the command to exceed
+// the normal timout value. In test 110 seconds was not enough on ZZ systems.
+// Bumping up to 300 seconds.
+static const uint64_t MAINT_CMD_HW_TIMEOUT_DD10 = 300;
+
+// When continuous traces are enabled, Hostboot will likely be throttled because
+// of the sheer amount traces that need to be processed. 30 minutes seems to be
+// working so far.
+static const uint64_t MAINT_CMD_HW_TIMEOUT_LONG = 1800;
+
+// The software timeout will be 10 minutes. Note that we will use the hardare
+// timeout and commit informational error logs each time that expires until it
+// eventually reachs the software threshold. This value contains the threshold.
+static const uint64_t MAINT_CMD_SW_TIMEOUT_TH = 600 / MAINT_CMD_HW_TIMEOUT;
+
void StateMachine::running(bool & o_running)
{
mutex_lock(&iv_mutex);
@@ -236,6 +258,7 @@ fapi2::TargetType getMdiaTargetType()
return targetType;
}
+// Returns the calculated timeout value in nanoseconds.
uint64_t getTimeoutValue()
{
// Out maintenance command timeout value will differ depending on a few
@@ -243,43 +266,49 @@ uint64_t getTimeoutValue()
// return it.
// Start with the default timeout value.
- uint64_t timeout = MAINT_CMD_TIMEOUT;
+ uint64_t timeout = MAINT_CMD_HW_TIMEOUT; // in seconds
// If continuous tracing is enabled.
- TargetHandle_t sys = nullptr;
- targetService().getTopLevelTarget(sys);
- HbSettings hbSettings = sys->getAttr<ATTR_HB_SETTINGS>();
-
- if ( hbSettings.traceContinuous && timeout < MAINT_CMD_TIMEOUT_LONG )
+ if ( timeout < MAINT_CMD_HW_TIMEOUT_LONG )
{
- timeout = MAINT_CMD_TIMEOUT_LONG;
+ TargetHandle_t sys = nullptr;
+ targetService().getTopLevelTarget(sys);
+ HbSettings hbSettings = sys->getAttr<ATTR_HB_SETTINGS>();
+
+ if ( hbSettings.traceContinuous )
+ {
+ timeout = MAINT_CMD_HW_TIMEOUT_LONG;
+ }
}
// Nimbus DD1.0 workaround.
- TARGETING::Target* masterProc = nullptr;
- TARGETING::targetService().masterProcChipTargetHandle(masterProc);
-
- if ( MODEL_NIMBUS == masterProc->getAttr<ATTR_MODEL>() &&
- 0x10 == masterProc->getAttr<ATTR_EC>() &&
- timeout < MAINT_CMD_TIMEOUT_DD10 )
+ if ( timeout < MAINT_CMD_HW_TIMEOUT_DD10 )
{
- timeout = MAINT_CMD_TIMEOUT_DD10;
+ TARGETING::Target* masterProc = nullptr;
+ TARGETING::targetService().masterProcChipTargetHandle(masterProc);
+
+ if ( MODEL_NIMBUS == masterProc->getAttr<ATTR_MODEL>() &&
+ 0x10 == masterProc->getAttr<ATTR_EC>() )
+ {
+ timeout = MAINT_CMD_HW_TIMEOUT_DD10;
+ }
}
- // Ensure that the MDIA timeout is less than the watchdog timer.
- if ( timeout >= (IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN*NANOSEC_PER_SEC) )
+ // Ensure that the timeout is less than the watchdog timer.
+ // NOTE: This should only be done on BMC based machines. The watch dog timer
+ // is not checked on FSP based machines.
+ if ( !INITSERVICE::spBaseServicesEnabled() &&
+ timeout >= IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN )
{
- // If the watchdog timer for some reason happens to be 10 sec or less,
- // just set the MDIA timeout to the watchdog timeout.
- // Else set it to ten seconds lower than the watchdog timer.
+ // If the watchdog timer for some reason happens to be 10 seconds or
+ // less, just set the MDIA timeout to the watchdog timeout. Otherwise,
+ // set it to ten seconds less than the watchdog timer.
timeout = ( IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN <= 10 )
- ? ( IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN *
- NANOSEC_PER_SEC )
- : ( (IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN-10) *
- NANOSEC_PER_SEC );
+ ? IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN
+ : IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN - 10;
}
- return timeout;
+ return timeout * NANOSEC_PER_SEC;
}
// Do the setup for CE thresholds
@@ -447,7 +476,7 @@ void StateMachine::processCommandTimeout(const MonitorIDs & i_monitorIDs)
if(firData & ~mskData)
{
// Committing an info log to help debug SW timeout
- if((*wit)->timeoutCnt >= MAINT_CMD_TIMEOUT_LOG)
+ if((*wit)->timeoutCnt >= MAINT_CMD_SW_TIMEOUT_TH)
{
MDIA_FAST("sm: committing a SW timeout info log "
"for HUID:0x%08X", get_huid(target));
diff --git a/src/usr/diag/mdia/mdiasm.H b/src/usr/diag/mdia/mdiasm.H
index 5e076f5ea..41c37ed02 100644
--- a/src/usr/diag/mdia/mdiasm.H
+++ b/src/usr/diag/mdia/mdiasm.H
@@ -41,24 +41,6 @@
namespace MDIA
{
-// Maint cmd timeout values are in nanosecs. This is just for easy conversions.
-static const uint64_t NANOSEC_PER_SEC = 1000000000;
-
-// HW timeout value - 30 secs
-static const uint64_t MAINT_CMD_TIMEOUT = 30 * NANOSEC_PER_SEC;
-
-// Nimbus DD1.0 has a workaround that will go over 30 seconds. Will bump it up
-// to 110 secs for now. May consider lowering it later, but only if there is a
-// problem.
-static const uint64_t MAINT_CMD_TIMEOUT_DD10 = 110 * NANOSEC_PER_SEC;
-
-// Longer MBA timeout to be used when continuous tracing is enabled - 30 min
-static const uint64_t MAINT_CMD_TIMEOUT_LONG = 1800 * NANOSEC_PER_SEC;
-
-// Commit an info log for SW timeout every 10 mins
-static const uint64_t MAINT_CMD_TIMEOUT_LOG =
- ( 600 * NANOSEC_PER_SEC ) / MAINT_CMD_TIMEOUT;
-
/**
* @brief work flow phases
*/
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C
index 15641c525..d0df00755 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices.C
@@ -103,15 +103,7 @@ void initiateUnitDump( TargetHandle_t i_target,
bool isSpConfigFsp()
{
- #ifdef __HOSTBOOT_RUNTIME
-
- return false; // Should never have an FSP when using HBRT.
-
- #else
-
return INITSERVICE::spBaseServicesEnabled();
-
- #endif
}
//------------------------------------------------------------------------------
OpenPOWER on IntegriCloud