MDIA: adjust timeout for FSP based machines

Change-Id: I6728d63916d89413721725305b0115bbd8d3120e Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44111 Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Dean Sanner <dsanner@us.ibm.com>
author: Zane Shelley <zshelle@us.ibm.com> 2017-08-02 11:51:31 -0500
committer: Dean Sanner <dsanner@us.ibm.com> 2017-08-02 21:43:17 -0400
commit: bbb94013c39af35e86427553219f9ec3278ca97f (patch)
tree: b111b8ac43dac7052bd0be092c4cff04b2cbb5e2
parent: 2dcac6ebd423ddf42f31a2a588dde1923a7a35d1 (diff)
download: talos-hostboot-bbb94013c39af35e86427553219f9ec3278ca97f.tar.gz
talos-hostboot-bbb94013c39af35e86427553219f9ec3278ca97f.zip
3 files changed, 54 insertions, 51 deletions
diff --git a/src/usr/diag/mdia/mdiasm.C b/src/usr/diag/mdia/mdiasm.C
index 8b99272f8..2f86c4e1a 100644
--- a/src/usr/diag/mdia/mdiasm.C
+++ b/src/usr/diag/mdia/mdiasm.C
@@ -44,6 +44,7 @@
 #include <initservice/istepdispatcherif.H>
 #include <ipmi/ipmiwatchdog.H>
 #include <config.h>
+#include <initservice/initserviceif.H>
 
 using namespace TARGETING;
 using namespace ERRORLOG;
@@ -53,6 +54,27 @@ using namespace DeviceFW;
 namespace MDIA
 {
 
+// Maint cmd timeout values are in nanosecs. This is just for easy conversions.
+static const uint64_t NANOSEC_PER_SEC = 1000000000;
+
+// HW timeout value (in seconds).
+static const uint64_t MAINT_CMD_HW_TIMEOUT = 30;
+
+// Nimbus DD1.0 has a workaround that will likely cause the command to exceed
+// the normal timout value. In test 110 seconds was not enough on ZZ systems.
+// Bumping up to 300 seconds.
+static const uint64_t MAINT_CMD_HW_TIMEOUT_DD10 = 300;
+
+// When continuous traces are enabled, Hostboot will likely be throttled because
+// of the sheer amount traces that need to be processed. 30 minutes seems to be
+// working so far.
+static const uint64_t MAINT_CMD_HW_TIMEOUT_LONG = 1800;
+
+// The software timeout will be 10 minutes. Note that we will use the hardare
+// timeout and commit informational error logs each time that expires until it
+// eventually reachs the software threshold. This value contains the threshold.
+static const uint64_t MAINT_CMD_SW_TIMEOUT_TH = 600 / MAINT_CMD_HW_TIMEOUT;
+
 void StateMachine::running(bool & o_running)
 {
     mutex_lock(&iv_mutex);
@@ -236,6 +258,7 @@ fapi2::TargetType getMdiaTargetType()
     return targetType;
 }
 
+// Returns the calculated timeout value in nanoseconds.
 uint64_t getTimeoutValue()
 {
     // Out maintenance command timeout value will differ depending on a few
@@ -243,43 +266,49 @@ uint64_t getTimeoutValue()
     // return it.
 
     // Start with the default timeout value.
-    uint64_t timeout = MAINT_CMD_TIMEOUT;
+    uint64_t timeout = MAINT_CMD_HW_TIMEOUT; // in seconds
 
     // If continuous tracing is enabled.
-    TargetHandle_t sys = nullptr;
-    targetService().getTopLevelTarget(sys);
-    HbSettings hbSettings = sys->getAttr<ATTR_HB_SETTINGS>();
-
-    if ( hbSettings.traceContinuous && timeout < MAINT_CMD_TIMEOUT_LONG )
+    if ( timeout < MAINT_CMD_HW_TIMEOUT_LONG )
     {
-        timeout = MAINT_CMD_TIMEOUT_LONG;
+        TargetHandle_t sys = nullptr;
+        targetService().getTopLevelTarget(sys);
+        HbSettings hbSettings = sys->getAttr<ATTR_HB_SETTINGS>();
+
+        if ( hbSettings.traceContinuous )
+        {
+            timeout = MAINT_CMD_HW_TIMEOUT_LONG;
+        }
     }
 
     // Nimbus DD1.0 workaround.
-    TARGETING::Target* masterProc = nullptr;
-    TARGETING::targetService().masterProcChipTargetHandle(masterProc);
-
-    if ( MODEL_NIMBUS == masterProc->getAttr<ATTR_MODEL>() &&
-         0x10 == masterProc->getAttr<ATTR_EC>() &&
-         timeout < MAINT_CMD_TIMEOUT_DD10 )
+    if ( timeout < MAINT_CMD_HW_TIMEOUT_DD10 )
     {
-        timeout = MAINT_CMD_TIMEOUT_DD10;
+        TARGETING::Target* masterProc = nullptr;
+        TARGETING::targetService().masterProcChipTargetHandle(masterProc);
+
+        if ( MODEL_NIMBUS == masterProc->getAttr<ATTR_MODEL>() &&
+             0x10         == masterProc->getAttr<ATTR_EC>() )
+        {
+            timeout = MAINT_CMD_HW_TIMEOUT_DD10;
+        }
     }
 
-    // Ensure that the MDIA timeout is less than the watchdog timer.
-    if ( timeout >= (IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN*NANOSEC_PER_SEC) )
+    // Ensure that the timeout is less than the watchdog timer.
+    // NOTE: This should only be done on BMC based machines. The watch dog timer
+    // is not checked on FSP based machines.
+    if ( !INITSERVICE::spBaseServicesEnabled() &&
+         timeout >= IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN )
     {
-        // If the watchdog timer for some reason happens to be 10 sec or less,
-        // just set the MDIA timeout to the watchdog timeout.
-        // Else set it to ten seconds lower than the watchdog timer.
+        // If the watchdog timer for some reason happens to be 10 seconds or
+        // less, just set the MDIA timeout to the watchdog timeout. Otherwise,
+        // set it to ten seconds less than the watchdog timer.
         timeout = ( IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN <= 10 )
-                      ? ( IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN *
-                          NANOSEC_PER_SEC )
-                      : ( (IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN-10) *
-                          NANOSEC_PER_SEC );
+                      ? IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN
+                      : IPMIWATCHDOG::DEFAULT_WATCHDOG_COUNTDOWN - 10;
     }
 
-    return timeout;
+    return timeout * NANOSEC_PER_SEC;
 }
 
 // Do the setup for CE thresholds
@@ -447,7 +476,7 @@ void StateMachine::processCommandTimeout(const MonitorIDs & i_monitorIDs)
                 if(firData & ~mskData)
                 {
                     // Committing an info log to help debug SW timeout
-                    if((*wit)->timeoutCnt >= MAINT_CMD_TIMEOUT_LOG)
+                    if((*wit)->timeoutCnt >= MAINT_CMD_SW_TIMEOUT_TH)
                     {
                         MDIA_FAST("sm: committing a SW timeout info log "
                                   "for HUID:0x%08X", get_huid(target));
diff --git a/src/usr/diag/mdia/mdiasm.H b/src/usr/diag/mdia/mdiasm.H
index 5e076f5ea..41c37ed02 100644
--- a/src/usr/diag/mdia/mdiasm.H
+++ b/src/usr/diag/mdia/mdiasm.H
@@ -41,24 +41,6 @@
 namespace MDIA
 {
 
-// Maint cmd timeout values are in nanosecs. This is just for easy conversions.
-static const uint64_t NANOSEC_PER_SEC = 1000000000;
-
-// HW timeout value - 30 secs
-static const uint64_t MAINT_CMD_TIMEOUT = 30 * NANOSEC_PER_SEC;
-
-// Nimbus DD1.0 has a workaround that will go over 30 seconds. Will bump it up
-// to 110 secs for now. May consider lowering it later, but only if there is a
-// problem.
-static const uint64_t MAINT_CMD_TIMEOUT_DD10 = 110 * NANOSEC_PER_SEC;
-
-// Longer MBA timeout to be used when continuous tracing is enabled - 30 min
-static const uint64_t MAINT_CMD_TIMEOUT_LONG = 1800 * NANOSEC_PER_SEC;
-
-// Commit an info log for SW timeout every 10 mins
-static const uint64_t MAINT_CMD_TIMEOUT_LOG =
-          ( 600 * NANOSEC_PER_SEC ) / MAINT_CMD_TIMEOUT;
-
 /**
  * @brief work flow phases
  */
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C
index 15641c525..d0df00755 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices.C
@@ -103,15 +103,7 @@ void initiateUnitDump( TargetHandle_t i_target,
 
 bool isSpConfigFsp()
 {
-    #ifdef __HOSTBOOT_RUNTIME
-
-    return false; // Should never have an FSP when using HBRT.
-
-    #else
-
     return INITSERVICE::spBaseServicesEnabled();
-
-    #endif
 }
 
 //------------------------------------------------------------------------------
author	Zane Shelley <zshelle@us.ibm.com>	2017-08-02 11:51:31 -0500
committer	Dean Sanner <dsanner@us.ibm.com>	2017-08-02 21:43:17 -0400
commit	bbb94013c39af35e86427553219f9ec3278ca97f (patch)
tree	b111b8ac43dac7052bd0be092c4cff04b2cbb5e2
parent	2dcac6ebd423ddf42f31a2a588dde1923a7a35d1 (diff)
download	talos-hostboot-bbb94013c39af35e86427553219f9ec3278ca97f.tar.gz talos-hostboot-bbb94013c39af35e86427553219f9ec3278ca97f.zip