diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2017-08-02 11:51:31 -0500 |
---|---|---|
committer | Dean Sanner <dsanner@us.ibm.com> | 2017-08-02 21:43:17 -0400 |
commit | bbb94013c39af35e86427553219f9ec3278ca97f (patch) | |
tree | b111b8ac43dac7052bd0be092c4cff04b2cbb5e2 | |
parent | 2dcac6ebd423ddf42f31a2a588dde1923a7a35d1 (diff) | |
download | talos-hostboot-bbb94013c39af35e86427553219f9ec3278ca97f.tar.gz talos-hostboot-bbb94013c39af35e86427553219f9ec3278ca97f.zip |
MDIA: adjust timeout for FSP based machines
Change-Id: I6728d63916d89413721725305b0115bbd8d3120e
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44111
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Dean Sanner <dsanner@us.ibm.com>
-rw-r--r-- | src/usr/diag/mdia/mdiasm.C | 79 | ||||
-rw-r--r-- | src/usr/diag/mdia/mdiasm.H | 18 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices.C | 8 |
3 files changed, 54 insertions, 51 deletions
diff --git a/src/usr/diag/mdia/mdiasm.C b/src/usr/diag/mdia/mdiasm.C index 8b99272f8..2f86c4e1a 100644 --- a/src/usr/diag/mdia/mdiasm.C +++ b/src/usr/diag/mdia/mdiasm.C @@ -44,6 +44,7 @@ #include <initservice/istepdispatcherif.H> #include <ipmi/ipmiwatchdog.H> #include <config.h> +#include <initservice/initserviceif.H> using namespace TARGETING; using namespace ERRORLOG; @@ -53,6 +54,27 @@ using namespace DeviceFW; namespace MDIA { +// Maint cmd timeout values are in nanosecs. This is just for easy conversions. +static const uint64_t NANOSEC_PER_SEC = 1000000000; + +// HW timeout value (in seconds). +static const uint64_t MAINT_CMD_HW_TIMEOUT = 30; + +// Nimbus DD1.0 has a workaround that will likely cause the command to exceed +// the normal timout value. In test 110 seconds was not enough on ZZ systems. +// Bumping up to 300 seconds. +static const uint64_t MAINT_CMD_HW_TIMEOUT_DD10 = 300; + +// When continuous traces are enabled, Hostboot will likely be throttled because +// of the sheer amount traces that need to be processed. 30 minutes seems to be +// working so far. +static const uint64_t MAINT_CMD_HW_TIMEOUT_LONG = 1800; + +// The software timeout will be 10 minutes. Note that we will use the hardare +// timeout and commit informational error logs each time that expires until it +// eventually reachs the software threshold. This value contains the threshold. +static const uint64_t MAINT_CMD_SW_TIMEOUT_TH = 600 / MAINT_CMD_HW_TIMEOUT; + void StateMachine::running(bool & o_running) { mutex_lock(&iv_mutex); @@ -236,6 +258,7 @@ fapi2::TargetType getMdiaTargetType() return targetType; } +// Returns the calculated timeout value in nanoseconds. uint64_t getTimeoutValue() { // Out maintenance command timeout value will differ depending on a few @@ -243,43 +266,49 @@ uint64_t getTimeoutValue() // return it. // Start with the default timeout value. - uint64_t timeout = MAINT_CMD_TIMEOUT; + uint64_t timeout = MAINT_CMD_HW_TIMEOUT; // in seconds // If continuous tracing is enabled. - TargetHandle_t sys = nullptr; - targetService().getTopLevelTarget(sys); - HbSettings hbSettings = sys->getAttr<ATTR_HB_SETTINGS>(); - - if ( hbSettings.traceContinuous && timeout < MAINT_CMD_TIMEOUT_LONG ) + if ( timeout < MAINT_CMD_HW_TIMEOUT_LONG ) { - timeout = MAINT_CMD_TIMEOUT_LONG; + TargetHandle_t sys = nullptr; + targetService().getTopLevelTarget(sys); + HbSettings hbSettings = sys->getAttr<ATTR_HB_SETTINGS>(); + + if ( hbSettings.traceContinuous ) + { + timeout = MAINT_CMD_HW_TIMEOUT_LONG; + } } // Nimbus DD1.0 workaround. - TARGETING::Target* masterProc = nullptr; - TARGETING::targetService().masterProcChipTargetHandle(masterProc); - - if ( MODEL_NIMBUS == masterProc->getAttr<ATTR_MODEL>() && - 0x10 == masterProc->getAttr<ATTR_EC>() && - timeout < MAINT_CMD_TIMEOUT_DD10 ) + if ( timeout < MAINT_CMD_HW_TIMEOUT_DD10 ) { - timeout = MAINT_CMD_TIMEOUT_DD10; + TARGETING::Target* masterProc = nullptr; + TARGETING::targetService().masterProcChipTargetHandle(masterProc); + + if ( MODEL_NIMBUS == masterProc->getAttr<ATTR_MODEL>() && + 0x10 == masterProc->getAttr<ATTR_EC>() ) + { + timeout = MAINT_CMD_HW_TIMEOUT_DD10; + } } - // Ensure that the MDIA timeout is less than the watchdog timer. - if ( timeout >= (IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN*NANOSEC_PER_SEC) ) + // Ensure that the timeout is less than the watchdog timer. + // NOTE: This should only be done on BMC based machines. The watch dog timer + // is not checked on FSP based machines. + if ( !INITSERVICE::spBaseServicesEnabled() && + timeout >= IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN ) { - // If the watchdog timer for some reason happens to be 10 sec or less, - // just set the MDIA timeout to the watchdog timeout. - // Else set it to ten seconds lower than the watchdog timer. + // If the watchdog timer for some reason happens to be 10 seconds or + // less, just set the MDIA timeout to the watchdog timeout. Otherwise, + // set it to ten seconds less than the watchdog timer. timeout = ( IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN <= 10 ) - ? ( IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN * - NANOSEC_PER_SEC ) - : ( (IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN-10) * - NANOSEC_PER_SEC ); + ? IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN + : IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN - 10; } - return timeout; + return timeout * NANOSEC_PER_SEC; } // Do the setup for CE thresholds @@ -447,7 +476,7 @@ void StateMachine::processCommandTimeout(const MonitorIDs & i_monitorIDs) if(firData & ~mskData) { // Committing an info log to help debug SW timeout - if((*wit)->timeoutCnt >= MAINT_CMD_TIMEOUT_LOG) + if((*wit)->timeoutCnt >= MAINT_CMD_SW_TIMEOUT_TH) { MDIA_FAST("sm: committing a SW timeout info log " "for HUID:0x%08X", get_huid(target)); diff --git a/src/usr/diag/mdia/mdiasm.H b/src/usr/diag/mdia/mdiasm.H index 5e076f5ea..41c37ed02 100644 --- a/src/usr/diag/mdia/mdiasm.H +++ b/src/usr/diag/mdia/mdiasm.H @@ -41,24 +41,6 @@ namespace MDIA { -// Maint cmd timeout values are in nanosecs. This is just for easy conversions. -static const uint64_t NANOSEC_PER_SEC = 1000000000; - -// HW timeout value - 30 secs -static const uint64_t MAINT_CMD_TIMEOUT = 30 * NANOSEC_PER_SEC; - -// Nimbus DD1.0 has a workaround that will go over 30 seconds. Will bump it up -// to 110 secs for now. May consider lowering it later, but only if there is a -// problem. -static const uint64_t MAINT_CMD_TIMEOUT_DD10 = 110 * NANOSEC_PER_SEC; - -// Longer MBA timeout to be used when continuous tracing is enabled - 30 min -static const uint64_t MAINT_CMD_TIMEOUT_LONG = 1800 * NANOSEC_PER_SEC; - -// Commit an info log for SW timeout every 10 mins -static const uint64_t MAINT_CMD_TIMEOUT_LOG = - ( 600 * NANOSEC_PER_SEC ) / MAINT_CMD_TIMEOUT; - /** * @brief work flow phases */ diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C index 15641c525..d0df00755 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices.C @@ -103,15 +103,7 @@ void initiateUnitDump( TargetHandle_t i_target, bool isSpConfigFsp() { - #ifdef __HOSTBOOT_RUNTIME - - return false; // Should never have an FSP when using HBRT. - - #else - return INITSERVICE::spBaseServicesEnabled(); - - #endif } //------------------------------------------------------------------------------ |