summaryrefslogtreecommitdiffstats
path: root/src/usr
diff options
context:
space:
mode:
authorChristian Geddes <crgeddes@us.ibm.com>2018-03-14 18:26:16 -0500
committerDaniel M. Crowell <dcrowell@us.ibm.com>2018-04-06 10:15:43 -0400
commitba8c8bfc02ca3d42a3caf3f8f797df07487c1dab (patch)
treeb0d8d1a4797b787a3b1194f5198746d4bd92c874 /src/usr
parent02f8995967cc97988cf3cdb40b1805915517bbaf (diff)
downloadtalos-hostboot-ba8c8bfc02ca3d42a3caf3f8f797df07487c1dab.tar.gz
talos-hostboot-ba8c8bfc02ca3d42a3caf3f8f797df07487c1dab.zip
sbe_retry_handler refactor
Previously the sbe_retry_handler had logic and wording that assumed that it was being used to tell if the slave sbe booted or not. However this code has many more use cases then that. Also there was some indirect recursion that made the code hard to follow. With this refactor the code should be easier to follow and the vocabulary used should be more generic. Change-Id: If6520197b3dd561857e336ed89d9356c1f2601d6 CQ: SW416106 RTC: 167191 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/55896 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com> Tested-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r--src/usr/isteps/istep08/call_proc_check_slave_sbe_seeprom_complete.C40
-rw-r--r--src/usr/isteps/istep08/makefile3
-rw-r--r--src/usr/sbeio/common/common.mk30
-rw-r--r--src/usr/sbeio/common/sbe_attn.C42
-rw-r--r--src/usr/sbeio/common/sbe_retry_handler.C1295
-rw-r--r--src/usr/sbeio/makefile30
-rw-r--r--src/usr/sbeio/runtime/makefile30
-rw-r--r--src/usr/sbeio/sbe_fifodd.C43
-rw-r--r--src/usr/sbeio/sbe_psudd.C56
-rw-r--r--src/usr/sbeio/test/sbe_retry_handler_test.H6
10 files changed, 859 insertions, 716 deletions
diff --git a/src/usr/isteps/istep08/call_proc_check_slave_sbe_seeprom_complete.C b/src/usr/isteps/istep08/call_proc_check_slave_sbe_seeprom_complete.C
index 215d0e35f..4b45a8de2 100644
--- a/src/usr/isteps/istep08/call_proc_check_slave_sbe_seeprom_complete.C
+++ b/src/usr/isteps/istep08/call_proc_check_slave_sbe_seeprom_complete.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2015,2017 */
+/* Contributors Listed Below - COPYRIGHT 2015,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -129,13 +129,24 @@ void* call_proc_check_slave_sbe_seeprom_complete( void *io_pArgs )
" on processor target %.8X",
TARGETING::get_huid(l_cpu_target));
+ //Note no PLID passed in
SBEIO::SbeRetryHandler l_SBEobj = SBEIO::SbeRetryHandler(
SBEIO::SbeRetryHandler::SBE_MODE_OF_OPERATION::ATTEMPT_REBOOT);
+ l_SBEobj.setSbeRestartMethod(SBEIO::SbeRetryHandler::
+ SBE_RESTART_METHOD::START_CBS);
+
+ // We want to tell the retry handler that we have just powered
+ // on the sbe, to distinguish this case from other cases where
+ // we have determine there is something wrong w/ the sbe and
+ // want to diagnose the problem
+ l_SBEobj.setInitialPowerOn(true);
+
l_SBEobj.main_sbe_handler(l_cpu_target);
- // No error and still functional
- if(l_cpu_target->getAttr<ATTR_HWAS_STATE>().functional)
+ // We will judge whether or not the SBE had a succesful
+ // boot or not depending on if it made it to runtime or not
+ if(l_SBEobj.isSbeAtRuntime())
{
// Set attribute indicating that SBE is started
l_cpu_target->setAttr<ATTR_SBE_IS_STARTED>(1);
@@ -173,29 +184,6 @@ void* call_proc_check_slave_sbe_seeprom_complete( void *io_pArgs )
"Running p9_extract_sbe_rc HWP"
" on processor target %.8X",
TARGETING::get_huid(l_cpu_target) );
-
- //@TODO-RTC:100963-Do something with the RETURN_ACTION
- P9_EXTRACT_SBE_RC::RETURN_ACTION l_rcAction
- = P9_EXTRACT_SBE_RC::RE_IPL;
- FAPI_INVOKE_HWP(l_errl, p9_extract_sbe_rc,
- l_fapi2ProcTarget,
- l_rcAction);
- if (l_errl)
- {
- TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "ERROR : proc_check_slave_sbe_seeprom_complete "
- "failed, p9_extract_sbe_rc HWP returning errorlog PLID=0x%x",
- l_errl->plid());
-
- // capture the target data in the elog
- ErrlUserDetailsTarget(l_cpu_target).addToLog( l_errl );
-
- // Create IStep error log and cross reference to error that occurred
- l_stepError.addErrorDetails( l_errl );
-
- // Commit error log
- errlCommit( l_errl, HWPF_COMP_ID );
- }
**/
} // end of going through all processors
diff --git a/src/usr/isteps/istep08/makefile b/src/usr/isteps/istep08/makefile
index 6aed23e56..347d8e2b9 100644
--- a/src/usr/isteps/istep08/makefile
+++ b/src/usr/isteps/istep08/makefile
@@ -5,7 +5,7 @@
#
# OpenPOWER HostBoot Project
#
-# Contributors Listed Below - COPYRIGHT 2015,2017
+# Contributors Listed Below - COPYRIGHT 2015,2018
# [+] International Business Machines Corp.
#
#
@@ -74,7 +74,6 @@ include ${PROCEDURES_PATH}/hwp/perv/p9_start_cbs.mk
# proc_check_slave_sbe_seeprom_complete : Check Slave SBE Complete
include ${PROCEDURES_PATH}/hwp/perv/p9_check_slave_sbe_seeprom_complete.mk
-include ${PROCEDURES_PATH}/hwp/sbe/p9_get_sbe_msg_register.mk
include ${PROCEDURES_PATH}/hwp/perv/p9_getecid.mk
# host_p9_fbc_eff_config
diff --git a/src/usr/sbeio/common/common.mk b/src/usr/sbeio/common/common.mk
index 397af666f..2163f81bb 100644
--- a/src/usr/sbeio/common/common.mk
+++ b/src/usr/sbeio/common/common.mk
@@ -22,5 +22,31 @@
# permissions and limitations under the License.
#
# IBM_PROLOG_END_TAG
-SBEIO_COMMON_OBJS += sbe_attn.o
-SBEIO_COMMON_OBJS += sbe_retry_handler.o
+
+#Common .mk files to include
+include ${ROOTPATH}/procedure.rules.mk
+include ${ROOTPATH}/src/import/chips/p9/procedures/hwp/sbe/p9_get_sbe_msg_register.mk
+include ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/p9_start_cbs.mk
+
+#Common Include Paths
+EXTRAINCDIR += ${PROCEDURES_PATH}/hwp/ffdc
+EXTRAINCDIR += ${PROCEDURES_PATH}/hwp/perv
+EXTRAINCDIR += ${PROCEDURES_PATH}/hwp/lib
+EXTRAINCDIR += ${PROCEDURES_PATH}/hwp/sbe
+EXTRAINCDIR += ${ROOTPATH}/src/import/hwpf/fapi2/include
+EXTRAINCDIR += ${ROOTPATH}/src/include/usr/fapi2
+EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/utils
+EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/utils/imageProcs
+EXTRAINCDIR += ${ROOTPATH}/src/import/chips/common/utils/imageProcs
+
+#Common Objects
+OBJS += p9_extract_sbe_rc.o
+OBJS += p9_ppe_common.o
+OBJS += sbe_attn.o
+OBJS += sbe_retry_handler.o
+
+#Common VPATHs
+VPATH += ${ROOTPATH}/src/usr/sbeio/common
+VPATH += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/sbe/
+VPATH += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/lib/
+VPATH += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/ \ No newline at end of file
diff --git a/src/usr/sbeio/common/sbe_attn.C b/src/usr/sbeio/common/sbe_attn.C
index 5ad8152e1..5c151b4eb 100644
--- a/src/usr/sbeio/common/sbe_attn.C
+++ b/src/usr/sbeio/common/sbe_attn.C
@@ -55,11 +55,7 @@ namespace SBEIO
TARGETING::get_huid(i_procTarg) );
errlHndl_t l_errhdl = nullptr;
- uint32_t l_sbePlid = getSbeRC(i_procTarg);
-
- TRACFCOMP( g_trac_sbeio, "handleVitalAttn> Returned SBE PLID=0x%x",
- l_sbePlid);
-
+ // TODO 167191 Full SBE Belly-Up Handling for OP
#ifdef __HOSTBOOT_RUNTIME
// Inform OPAL, SBE is currently disabled
if (TARGETING::is_sapphire_load())
@@ -74,16 +70,13 @@ namespace SBEIO
SbeRetryHandler l_sbeObj = SbeRetryHandler(
SbeRetryHandler::SBE_MODE_OF_OPERATION::ATTEMPT_REBOOT);
- // @todo - RTC:180242. Once the hreset method is finalized,
- // we can call the sbe handler with that method
- //l_sbeObj.setSbeRestartMethod(SbeRetryHandler::
- // SBE_RESTART_METHOD::HRESET);
- l_sbeObj.main_sbe_handler(i_procTarg);
+ //l_sbeObj.main_sbe_handler(i_procTarg);
+
#ifdef __HOSTBOOT_RUNTIME
// Inform OPAL the state of the SBE after a retry
- if (l_sbeObj.getSbeRestart())
+ if (l_sbeObj.isSbeAtRuntime())
{
if (TARGETING::is_sapphire_load())
{
@@ -100,31 +93,4 @@ namespace SBEIO
return l_errhdl;
}
- uint32_t getSbeRC(TARGETING::Target* i_target)
- {
- TRACFCOMP( g_trac_sbeio, ENTER_MRK "getSbeRC()");
-
- errlHndl_t l_errl = nullptr;
-
- uint32_t l_errlPlid = NULL;
- const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2ProcTarget(
- const_cast<TARGETING::Target*> (i_target));
-
- P9_EXTRACT_SBE_RC::RETURN_ACTION l_ret =
- P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM;
- FAPI_INVOKE_HWP(l_errl, p9_extract_sbe_rc,
- l_fapi2ProcTarget, l_ret);
-
- if(l_errl)
- {
- TRACFCOMP(g_trac_sbeio, "ERROR: p9_extract_sbe_rc HWP returning "
- "errorlog PLID: 0x%x", l_errl->plid());
-
- ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_errl);
- l_errlPlid = l_errl->plid();
- }
-
- return l_errlPlid;
- }
-
};
diff --git a/src/usr/sbeio/common/sbe_retry_handler.C b/src/usr/sbeio/common/sbe_retry_handler.C
index e2889bf16..0af3eedb2 100644
--- a/src/usr/sbeio/common/sbe_retry_handler.C
+++ b/src/usr/sbeio/common/sbe_retry_handler.C
@@ -45,7 +45,6 @@
#include <initservice/initserviceif.H>
#include <initservice/istepdispatcherif.H>
#include <errl/errludtarget.H>
-#include <sys/time.h>
#include <util/misc.H>
#include <ipmi/ipmiwatchdog.H>
@@ -92,16 +91,16 @@ SbeRetryHandler::SbeRetryHandler(SBE_MODE_OF_OPERATION i_sbeMode,
: iv_useSDB(false)
, iv_secureModeDisabled(false) //Per HW team this should always be 0
-, iv_sbeRestarted(false)
-, iv_sbeSide(0)
-, iv_errorLogPLID(0)
, iv_callerErrorLogPLID(i_plid)
, iv_switchSidesCount(0)
, iv_currentAction(P9_EXTRACT_SBE_RC::ERROR_RECOVERED)
-, iv_currentSBEState(SBE_REG_RETURN::SBE_FAILED_TO_BOOT)
-, iv_retriggeredMain(false)
+, iv_currentSBEState(SBE_REG_RETURN::SBE_NOT_AT_RUNTIME)
+, iv_shutdownReturnCode(0)
+, iv_currentSideBootAttempts(1) // It is safe to assume that the current side has attempted to boot
+, iv_ffdcSetAction(false)
, iv_sbeMode(i_sbeMode)
-, iv_sbeRestartMethod(SBE_RESTART_METHOD::START_CBS)
+, iv_sbeRestartMethod(SBE_RESTART_METHOD::HRESET)
+, iv_initialPowerOn(false)
{
SBE_TRACF(ENTER_MRK "SbeRetryHandler::SbeRetryHandler()");
@@ -111,209 +110,380 @@ SbeRetryHandler::SbeRetryHandler(SBE_MODE_OF_OPERATION i_sbeMode,
SBE_TRACF(EXIT_MRK "SbeRetryHandler::SbeRetryHandler()");
}
-SbeRetryHandler::~SbeRetryHandler()
-{
-
-}
+SbeRetryHandler::~SbeRetryHandler() {}
void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
{
SBE_TRACF(ENTER_MRK "main_sbe_handler()");
-
do
{
- errlHndl_t l_errl = NULL;
+ errlHndl_t l_errl = nullptr;
+ // Only set the secure debug bit (SDB) if we are not using xscom yet
if(!i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>().useXscom)
{
this->iv_useSDB = true;
}
- const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2ProcTarget(
- const_cast<TARGETING::Target*> (i_target));
+ // Get the SBE status register, this will tell us what state
+ // the SBE is in , if the asynFFDC bit is set on the sbe_reg
+ // then FFDC will be collected at this point in time.
+ // sbe_run_extract_msg_reg will return true if there was an error reading the status
+ if(!this->sbe_run_extract_msg_reg(i_target))
+ {
+ SBE_TRACF("main_sbe_handler(): Failed to get sbe register something is seriously wrong, we should always be able to read that!!");
+ //Error log should have already committed in sbe_run_extract_msg_reg for this issue
+ break;
+ }
+
+ // We will only trust the currState value if we know the SBE has just been booted.
+ // In this case we have been told by the caller that the sbe just powered on
+ // so it is safe to assume that the currState value is legit and we can trust that
+ // the sbe has booted successfully to runtime.
+ if( this->iv_initialPowerOn && (this->iv_sbeRegister.currState == SBE_STATE_RUNTIME))
+ {
+ //We have successfully powered on the SBE
+ SBE_TRACF("main_sbe_handler(): Initial power on of the SBE was a success!!");
+ break;
+ }
- bool l_retry = false;
+ //////******************************************************************
+ // If we have made it this far we can assume that something is wrong w/ the SBE
+ //////******************************************************************
- if(this->iv_sbeMode != INFORMATIONAL_ONLY)
+ // If something is wrong w/ the SBE during IPL time on a FSP based system then
+ // we will always TI and let hwsv deal with the problem. This is a unique path
+ // so we will have it handled in a separate procedure
+#ifndef __HOSTBOOT_RUNTIME
+ if(INITSERVICE::spBaseServicesEnabled())
{
- this->get_sbe_reg(i_target);
+ // This function will TI Hostboot so don't expect to return
+ handleFspIplTimeFail(i_target);
+ SBE_TRACF("main_sbe_handler(): We failed to TI the system when we should have, forcing an assert(0) call");
+ // We should never return from handleFspIplTimeFail
+ assert(0, "We have determined that there was an error with the SBE and should have TI'ed but for some reason we did not.");
+ }
+#endif
- if( (this->iv_sbeRegister.currState != SBE_STATE_RUNTIME) &&
- !(this->iv_sbeMode == SBE_ACTION_SET))
- {
- // return, false if no boot is needed, true if boot is needed.
- l_retry = this->sbe_boot_fail_handler(i_target);
- }
- else if(this->iv_sbeMode == SBE_ACTION_SET)
- {
- l_retry = true;
- }
+ // If iv_ffdcSetAction is true, that means that we found ffdc to parse
+ // this indicates that the SBE already determined what went wrong and
+ // reported the error via asyncFFDC so there is no need to
+ // run p9_extract_sbe_rc
+ // Also if the sbe is not booted at all, extract_rc will fail so we don't want to run it
+ if(!this->iv_ffdcSetAction && this->iv_sbeRegister.sbeBooted)
+ {
+ SBE_TRACF("main_sbe_handler(): No async ffdc found and sbe says it has been booted, running run p9_sbe_extract_rc.");
+ // Call the function that runs extract_rc, this needs to run to determine
+ // what broke and what our retry action should be
+ this->sbe_run_extract_rc(i_target);
+ }
+ // If we have determined that the sbe never booted
+ // then set the current action to be "restart sbe"
+ // that way we will attempt to start the sbe again
+ else if(!this->iv_sbeRegister.sbeBooted)
+ {
+ SBE_TRACF("main_sbe_handler(): SBE reports it was never booted, calling p9_sbe_extract_rc will fail. Setting action to be RESTART_SBE");
+ //Maybe commit log here saying initial start_cbs didnt run
+ this->iv_currentAction = P9_EXTRACT_SBE_RC::RESTART_SBE;
+ }
- while((this->iv_sbeRegister.currState != SBE_STATE_RUNTIME) &&
- l_retry)
- {
+ // If the mode was marked as informational that means the caller did not want
+ // any actions to take place, the caller only wanted information collected
+ if(this->iv_sbeMode == INFORMATIONAL_ONLY)
+ {
+ SBE_TRACF("main_sbe_handler(): Retry handler is being called in INFORMATIONAL mode so we are exiting without attempting any retry actions");
+ break;
+ }
- SBE_TRACF("main_sbe_handler(): current SBE state is %d, retry "
- "is %d current SBE action is %d",
- this->iv_sbeRegister.currState,
- l_retry, this->iv_currentAction);
+ // This do-while loop will continuously look at iv_currentAction, act
+ // accordingly, then read status register and determine next action.
+ // The ideal way to exit the loop is if the SBE makes it up to runtime after
+ // attempting a retry which indicates we have recovered. If the currentAction
+ // says NO_RECOVERY_ACTION then we break out of this loop. Also if we fail
+ // to read the sbe's status register or if we get write fails when trying to switch
+ // seeprom sides. Both the fails mentioned last indicate there is a larger problem
+ do
+ {
+ // We need to handle the following values that currentAction could be,
+ // it is possible that iv_currentAction can be any of these values except there
+ // is currently no path that will set it to be ERROR_RECOVERED
+ // ERROR_RECOVERED = 0,
+ // - We should never hit this, if we have recovered then
+ // curreState should be RUNTIME
+ // RESTART_SBE = 1,
+ // RESTART_CBS = 2,
+ // - We will not listen to p9_extract_rc on HOW to restart the
+ // sbe. We will assume iv_sbeRestartMethod is correct and
+ // perform the restart method that iv_sbeRestartMethod says
+ // regardless if currentAction = RESTART_SBE or RESTART_CBS
+ // REIPL_BKP_SEEPROM = 3,
+ // REIPL_UPD_SEEPROM = 4,
+ // - We will switch the seeprom side (if we have not already)
+ // - then attempt to restart the sbe w/ iv_sbeRestartMethod
+ // NO_RECOVERY_ACTION = 5,
+ // - we deconfigure the processor we are retrying and fail out
+ //
+ // Important things to remember, we only want to attempt a single side
+ // a maxiumum of 2 times, and also we only want to switch sides once
+
+ SBE_TRACF("main_sbe_handler(): iv_sbeRegister.currState: %d , "
+ "iv_currentSideBootAttempts: %d , "
+ "iv_currentAction: %d , ",
+ this->iv_sbeRegister.currState,
+ this->iv_currentSideBootAttempts,
+ this->iv_currentAction);
+ if(this->iv_currentAction == P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION)
+ {
+ // There is no action possible. Gard and Callout the proc
/*@
- * @errortype
- * @severity ERRORLOG::ERRL_SEV_INFORMATIONAL
- * @moduleid SBEIO_EXTRACT_RC_HANDLER
- * @reasoncode SBEIO_EXTRACT_RC_ERROR
- * @userdata1 HUID of proc that had the SBE timeout
- * @userdata2 SBE failing code
- *
- * @devdesc SBE did not start, this function is looking at
- * the error to determine next course of action
- *
- * @custdesc The SBE did not start, we will attempt a reboot
- * if possible
- */
+ * @errortype ERRL_SEV_UNRECOVERABLE
+ * @moduleid SBEIO_EXTRACT_RC_HANDLER
+ * @reasoncode SBEIO_NO_RECOVERY_ACTION
+ * @userdata1 SBE current error
+ * @userdata2 HUID of proc
+ * @devdesc There is no recovery action on the SBE.
+ * We're deconfiguring this proc
+ * @custdesc Processor Error
+ */
l_errl = new ERRORLOG::ErrlEntry(
- ERRORLOG::ERRL_SEV_INFORMATIONAL,
- SBEIO_EXTRACT_RC_HANDLER,
- SBEIO_EXTRACT_RC_ERROR,
- TARGETING::get_huid(i_target),
- this->iv_currentAction);
-
- l_errl->collectTrace("ISTEPS_TRACE",256);
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE,
+ SBEIO_EXTRACT_RC_HANDLER,
+ SBEIO_NO_RECOVERY_ACTION,
+ P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION,
+ TARGETING::get_huid(i_target));
+ l_errl->collectTrace( "ISTEPS_TRACE", 256);
+ l_errl->collectTrace( SBEIO_COMP_NAME, 256);
+ l_errl->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_NULL );
// Set the PLID of the error log to caller's PLID,
// if provided
if (iv_callerErrorLogPLID)
{
- l_errl->plid(iv_callerErrorLogPLID);
+ l_errl->plid(iv_callerErrorLogPLID);
}
- // Commit error and continue
errlCommit(l_errl, ISTEP_COMP_ID);
+ this->iv_currentSBEState = SBE_REG_RETURN::PROC_DECONFIG;
+ SBE_TRACF("main_sbe_handler(): We have concluded there are no further recovery actions to take, deconfiguring proc and exiting handler");
+ break;
+ }
- // if no recovery action, fail out.
- if(this->iv_currentAction ==
- P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION)
+ // if the bkp_seeprom or upd_seeprom, attempt to switch sides.
+ // This is also dependent on the iv_switchSideCount.
+ // Note: we do this for upd_seeprom because we don't support
+ // updating the seeprom during IPL time
+ if((this->iv_currentAction ==
+ P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM ||
+ this->iv_currentAction ==
+ P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM))
+ {
+ if(this->iv_switchSidesCount >= MAX_SWITCH_SIDE_COUNT)
{
- // There is no action possible. Gard and Callout the proc
/*@
- * @errortype ERRL_SEV_UNRECOVERABLE
- * @moduleid SBEIO_EXTRACT_RC_HANDLER
- * @reasoncode SBEIO_NO_RECOVERY_ACTION
- * @userdata1 SBE current error
- * @userdata2 HUID of proc
- * @devdesc There is no recovery action on the SBE.
- * We're garding this proc
- */
+ * @errortype ERRL_SEV_PREDICTIVE
+ * @moduleid SBEIO_EXTRACT_RC_HANDLER
+ * @reasoncode SBEIO_EXCEED_MAX_SIDE_SWITCHES
+ * @userdata1 Switch Sides Count
+ * @userdata2 HUID of proc
+ * @devdesc We have already flipped seeprom sides once
+ * and we should not have attempted to flip again
+ * @custdesc Processor Error
+ */
l_errl = new ERRORLOG::ErrlEntry(
- ERRORLOG::ERRL_SEV_UNRECOVERABLE,
+ ERRORLOG::ERRL_SEV_PREDICTIVE,
SBEIO_EXTRACT_RC_HANDLER,
- SBEIO_NO_RECOVERY_ACTION,
- P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION,
+ SBEIO_EXCEED_MAX_SIDE_SWITCHES,
+ this->iv_switchSidesCount,
TARGETING::get_huid(i_target));
- l_errl->collectTrace( "ISTEPS_TRACE", 256);
- l_errl->addHwCallout( i_target,
- HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DECONFIG,
- HWAS::GARD_NULL );
-
- // Cache PLID of error log
- iv_errorLogPLID = l_errl->plid();
+ l_errl->collectTrace( SBEIO_COMP_NAME, 256);
// Set the PLID of the error log to caller's PLID,
// if provided
if (iv_callerErrorLogPLID)
{
- l_errl->plid(iv_callerErrorLogPLID);
+ l_errl->plid(iv_callerErrorLogPLID);
}
-
+ errlCommit(l_errl, SBEIO_COMP_ID);
+ // Break out of loop, something bad happened and we dont want end
+ // up in a endless loop
+ break;
+ }
+ l_errl = this->switch_sbe_sides(i_target);
+ if(l_errl)
+ {
errlCommit(l_errl, ISTEP_COMP_ID);
-
- SBE_TRACF("main_sbe_handler(): updating return value "
- "to indicate that we have deconfigured the proc");
- this->iv_currentSBEState = SBE_REG_RETURN::PROC_DECONFIG;
-
+ // If any error occurs while we are trying to switch sides
+ // this indicates big problems so we want to break out of the
+ // retry loop
break;
}
+ // Note that we do not want to continue here because we want to
+ // attempt to restart using whatever sbeRestartMethod is set to after
+ // switching seeprom sides
+ }
- // if the bkp_seeprom or upd_seeprom, attempt to switch sides.
- // This is also dependent on the iv_switchSideCount.
- if(this->iv_currentAction ==
- P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM ||
- this->iv_currentAction ==
- P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM)
+ if(this->iv_currentSideBootAttempts >= MAX_SIDE_BOOT_ATTEMPTS)
+ {
+ /*@
+ * @errortype ERRL_SEV_PREDICTIVE
+ * @moduleid SBEIO_EXTRACT_RC_HANDLER
+ * @reasoncode SBEIO_EXCEED_MAX_SIDE_BOOTS
+ * @userdata1 # of boots attempts on this side
+ * @userdata2 HUID of proc
+ * @devdesc We have already done the max attempts for
+ * the current seeprom side. For some reason
+ * we are attempting to do another boot.
+ * @custdesc Processor Error
+ */
+ l_errl = new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_PREDICTIVE,
+ SBEIO_EXTRACT_RC_HANDLER,
+ SBEIO_EXCEED_MAX_SIDE_BOOTS,
+ this->iv_currentSideBootAttempts,
+ TARGETING::get_huid(i_target));
+
+ l_errl->collectTrace( SBEIO_COMP_NAME, 256);
+
+ // Set the PLID of the error log to caller's PLID,
+ // if provided
+ if (iv_callerErrorLogPLID)
{
- l_errl = this->switch_sbe_sides(i_target);
- if(l_errl)
- {
- errlCommit(l_errl, ISTEP_COMP_ID);
- break;
- }
+ l_errl->plid(iv_callerErrorLogPLID);
}
+ errlCommit(l_errl, SBEIO_COMP_ID);
+ // Break out of loop, something bad happened and we dont want end
+ // up in a endless loop
+ break;
+ }
+ // Look at the sbeRestartMethd instance variable to determine which method
+ // we will use to attempt the restart. In general during IPL time we will
+ // attempt CBS, during runtime we will want to use HRESET.
+ else if(this->iv_sbeRestartMethod == SBE_RESTART_METHOD::START_CBS)
+ {
+ SBE_TRACF("Invoking p9_start_cbs HWP on processor %.8X", get_huid(i_target));
+ const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>
+ l_fapi2_proc_target (i_target);
+
+ FAPI_INVOKE_HWP(l_errl, p9_start_cbs,
+ l_fapi2_proc_target, true);
- // Attempt SBE restart
- if(this->iv_sbeRestartMethod == SBE_RESTART_METHOD::START_CBS)
+ //Increment attempt count for this side
+ this->iv_currentSideBootAttempts++;
+
+ if(l_errl)
{
- SBE_TRACF("Invoking p9_start_cbs HWP");
- const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>
- l_fapi2_proc_target (i_target);
+ SBE_TRACF("ERROR: call p9_start_cbs, PLID=0x%x",
+ l_errl->plid() );
+ l_errl->collectTrace( "ISTEPS_TRACE", 256 );
+ l_errl->collectTrace( SBEIO_COMP_NAME, 256 );
+
+ // Gard the target, when SBE Retry fails
+ l_errl->addHwCallout(i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_Predictive);
- FAPI_INVOKE_HWP(l_errl, p9_start_cbs,
- l_fapi2_proc_target, true);
- if(l_errl)
+ // Set the PLID of the error log to caller's PLID,
+ // if provided
+ if (iv_callerErrorLogPLID)
{
- SBE_TRACF("ERROR: call p9_start_cbs, PLID=0x%x",
- l_errl->plid() );
- l_errl->collectTrace( "ISTEPS_TRACE", 256 );
-
- // Gard the target, when SBE Retry fails
- l_errl->addHwCallout(i_target,
- HWAS::SRCI_PRIORITY_HIGH,
- HWAS::NO_DECONFIG,
- HWAS::GARD_Predictive);
-
- // Set the PLID of the error log to caller's PLID,
- // if provided
- if (iv_callerErrorLogPLID)
- {
- l_errl->plid(iv_callerErrorLogPLID);
- }
-
- errlCommit( l_errl, ISTEP_COMP_ID);
+ l_errl->plid(iv_callerErrorLogPLID);
}
- }else
- {
- //@todo - RTC:180242 - Restart SBE
+
+ errlCommit( l_errl, ISTEP_COMP_ID);
+ // If we got an errlog while attempting start_cbs
+ // we will assume that no future retry actions
+ // will work so we will break out of the retry loop
+ break;
}
+ }else
+ {
+ //@todo RTC:180242 Right now we don't have the support
+ // to perform an hreset, when we do remove this error
+ // log and perform the hreset.
+
+ //Increment attempt count for this side
+ this->iv_currentSideBootAttempts++;
+ /*@
+ * @errortype
+ * @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE
+ * @moduleid SBEIO_EXTRACT_RC_HANDLER
+ * @reasoncode SBEIO_UNSUPPORTED_REQUEST
+ * @userdata1 HUID of proc that had the SBE timeout
+ * @userdata2 SBE failing code
+ *
+ * @devdesc SBE did not start, this function is looking at
+ * the error to determine next course of action
+ *
+ * @custdesc The SBE did not start, we will attempt a reboot
+ * if possible
+ */
+ l_errl = new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE,
+ SBEIO_EXTRACT_RC_HANDLER,
+ SBEIO_UNSUPPORTED_REQUEST,
+ TARGETING::get_huid(i_target),
+ this->iv_currentAction);
+
+ l_errl->collectTrace( SBEIO_COMP_NAME, 256 );
- // Get the sbe register
- this->get_sbe_reg(i_target);
+ // Gard the proc, when SBE Retry fails
+ l_errl->addHwCallout(i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_Predictive);
- if( (this->iv_sbeRegister.currState != SBE_STATE_RUNTIME))
+ // Set the PLID of the error log to caller's PLID,
+ // if provided
+ if (iv_callerErrorLogPLID)
{
- // return, false if no boot is needed.
- l_retry = this->sbe_boot_fail_handler(i_target);
+ l_errl->plid(iv_callerErrorLogPLID);
}
+
+ errlCommit(l_errl, ISTEP_COMP_ID);
+
+ // If we got an errlog while attempting hreset
+ // we will assume that no future retry actions
+ // will work so we will exit
+ break;
}
- }
- else
- {
- // In the informational only mode, we just need enough information
- // to get the SBE RC returned from the HWP. We are running with
- // the knowledge that the SBE has failed already.
- // pass true to have log show up
- this->sbe_boot_fail_handler(i_target, true);
- this->iv_currentSBEState = SBE_FAILED_TO_BOOT;
- }
+ // We have performed the action, so make sure that ffdcSetAction is set back to 0
+ this->iv_ffdcSetAction = 0;
- this->handle_sbe_reg_value(i_target);
+ // Get the sbe register (note that if asyncFFDC bit is set in status register then
+ // we will read it in this call)
+ if(!this->sbe_run_extract_msg_reg(i_target))
+ {
+ // Error log should have already committed in sbe_run_extract_msg_reg for this issue
+ // we need to stop our recovery efforts and bail out of the retry handler
+ break;
+ }
- // if we have started the sbe, and the current action is upd_seeprom
- // or bkp_seeprom, note that we started on an unexpected side
- if(i_target->getAttr<TARGETING::ATTR_SBE_IS_STARTED>() &&
- (this->iv_currentAction == P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM ||
- this->iv_currentAction == P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM) )
+ // If our retry attempt fail, and we didnt see any asyncFFDC after
+ if (this->iv_sbeRegister.currState != SBE_STATE_RUNTIME)
+ {
+ // Again, if ffdcSetAction is set, that means we have found FFDC
+ // already that the SBE saved away prior to failing so we don't need
+ // to run extract_rc if ffdcSetAction is true
+ if(!this->iv_ffdcSetAction)
+ {
+ SBE_TRACF("main_sbe_handler(): Failed to reach runtime after sbe restart and no asyncFFDC found. Calling p9_sbe_extract_rc.");
+ // Run extract rc to figure out why the sbe did not make it to
+ // runtime state
+ this->sbe_run_extract_rc(i_target);
+ }
+ }
+
+ } while((this->iv_sbeRegister).currState != SBE_STATE_RUNTIME);
+
+ // If we ended up switching sides we want to mark it down as
+ // as informational log
+ if(this->iv_switchSidesCount)
{
/*@
* @errortype ERRL_SEV_INFORMATIONAL
@@ -329,6 +499,7 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
SBEIO_BOOTED_UNEXPECTED_SIDE,
0,TARGETING::get_huid(i_target));
l_errl->collectTrace("ISTEPS_TRACE",256);
+ l_errl->collectTrace(SBEIO_COMP_NAME,256);
// Set the PLID of the error log to caller's PLID,
// if provided
@@ -345,212 +516,106 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
SBE_TRACF(EXIT_MRK "main_sbe_handler()");
}
-void SbeRetryHandler::get_sbe_reg(TARGETING::Target * i_target)
+bool SbeRetryHandler::sbe_run_extract_msg_reg(TARGETING::Target * i_target)
{
- SBE_TRACF(ENTER_MRK "get_sbe_reg()");
+ SBE_TRACF(ENTER_MRK "sbe_run_extract_msg_reg()");
errlHndl_t l_errl = nullptr;
- do
+ //Assume that reading the status succeeded
+ bool l_statusReadSuccess = true;
+
+ // This function will poll the status register for 60 seconds
+ // waiting for the SBE to reach runtime
+ // we will exit the polling before 60 seconds if we either reach
+ // runtime, or get an error reading the status reg, or if the asyncFFDC
+ // bit is set
+ l_errl = this->sbe_poll_status_reg(i_target);
+
+ // If there is no error getting the status register, and the SBE
+ // did not make it to runtime AND the asyncFFDC bit is set, we will
+ // use the FFDC to decide our actions rather than using p9_extract_sbe_rc
+ if((!l_errl) &&
+ (this->iv_sbeRegister.currState != SBE_STATE_RUNTIME) &&
+ this->iv_sbeRegister.asyncFFDC)
{
- l_errl = this->sbe_timeout_handler(i_target);
-
- if((!l_errl) && (this->iv_sbeRegister.currState != SBE_STATE_RUNTIME))
- {
- // See if async FFDC bit is set in SBE register
- if(this->iv_sbeRegister.asyncFFDC)
- {
- bool l_flowCtrl = this->sbe_get_ffdc_handler(i_target);
-
- if(l_flowCtrl)
- {
- break;
- }
- }
- }
- else if (l_errl)
- {
- SBE_TRACF("ERROR: call get_sbe_reg, PLID=0x%x", l_errl->plid() );
-
- // capture the target data in the elog
- ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog( l_errl );
+ SBE_TRACF("SUCCESS: sbe_run_extract_msg_reg completed okay for proc 0x%.8X . "
+ "There was asyncFFDC found though so we will run the FFDC parser",
+ TARGETING::get_huid(i_target));
+ // The SBE has responded to an asyncronus request that hostboot
+ // made with FFDC indicating an error has occurred.
+ // This should be the path we hit when we are waiting to see
+ // if the sbe boots
+ this->sbe_get_ffdc_handler(i_target);
+ }
+ // If there was an error log that means that we failed to read the
+ // cfam register to get the SBE status, something is seriously wrong
+ // if we hit this
+ else if (l_errl)
+ {
+ l_statusReadSuccess = false;
+ SBE_TRACF("ERROR: call sbe_run_extract_msg_reg, PLID=0x%x", l_errl->plid() );
- // Commit error log
- errlCommit( l_errl, HWPF_COMP_ID );
- }
- // No error and still functional
- else if(i_target->getAttr<TARGETING::ATTR_HWAS_STATE>().functional)
+ l_errl->collectTrace(SBEIO_COMP_NAME,256);
+ // Set the PLID of the error log to caller's PLID,
+ // if provided
+ if (iv_callerErrorLogPLID)
{
- // Set attribute indicating that SBE is started
- i_target->setAttr<TARGETING::ATTR_SBE_IS_STARTED>(1);
- this->iv_sbeRestarted = true;
-
- SBE_TRACF("SUCCESS: get_sbe_reg completed okay for proc 0x%.8X",
- TARGETING::get_huid(i_target));
+ l_errl->plid(iv_callerErrorLogPLID);
}
- //@TODO-RTC:100963 - this should match the logic in
- //call_proc_check_slave_sbe_seeprom.C
- } while(0);
- SBE_TRACF(EXIT_MRK "get_sbe_reg()");
-
-}
-
-void SbeRetryHandler::handle_sbe_reg_value(TARGETING::Target * i_target)
-{
- errlHndl_t l_errl = NULL;
-
- SBE_TRACF(ENTER_MRK "handle_sbe_reg_value()");
-
- const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>
- l_fapi2_proc_target(i_target);
+ // capture the target data in the elog
+ ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog( l_errl );
- switch(this->iv_currentSBEState)
+ // Commit error log
+ errlCommit( l_errl, HWPF_COMP_ID );
+ }
+ // No error, able to read the sbe status register okay
+ // No guarantees that the SBE made it to runtime
+ else
{
- case SbeRetryHandler::SBE_REG_RETURN::HWP_ERROR:
- {
- SBE_TRACF("handle_sbe_reg_value(): case FUNCTION_ERROR");
- // There has been a failure getting the SBE register
- // We cannot continue any further, return failure.
- this->iv_currentAction = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
- break;
- }
- case SbeRetryHandler::SBE_REG_RETURN::SBE_AT_RUNTIME:
- {
- SBE_TRACF("handle_sbe_reg_value(): case SBE_AT_RUNTIME");
- // The SBE has successfully booted at runtime
- this->iv_currentAction = P9_EXTRACT_SBE_RC::ERROR_RECOVERED;
- break;
- }
- case SbeRetryHandler::SBE_REG_RETURN::SBE_FAILED_TO_BOOT:
- {
- SBE_TRACF("handle_sbe_reg_value(): case SBE_FAILED_TO_BOOT");
- if((this->iv_currentAction == P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM)
- && (!iv_retriggeredMain))
-
- {
- iv_retriggeredMain = true;
-
-#ifndef __HOSTBOOT_RUNTIME
- // This could potentially take awhile, reset watchdog
- INITSERVICE::sendProgressCode();
-#endif
- SBE_TRACF("handle_sbe_reg_value(): Attempting "
- "REIPL_UPD_SEEPROM failed. Recalling with BKP_SEEPROM");
- // If we were trying to reipl and hit the error, we need
- // to start with a new seeprom before hitting the threshold
- this->iv_currentAction =
- P9_EXTRACT_SBE_RC::RETURN_ACTION::REIPL_BKP_SEEPROM;
- this->iv_sbeMode = SBE_MODE_OF_OPERATION::SBE_ACTION_SET;
- main_sbe_handler(i_target);
- break;
- }
-
- // Failed to boot, setting the final action for debugging.
- SBE_TRACF("Inside handle_sbe_reg_value, calling "
- "p9_extract_sbe_rc HWP");
- // Get SBE extract rc
- P9_EXTRACT_SBE_RC::RETURN_ACTION l_rcAction =
- P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM;
- FAPI_INVOKE_HWP(l_errl, p9_extract_sbe_rc,
- l_fapi2_proc_target, l_rcAction);
- this->iv_currentAction = l_rcAction;
-
- SBE_TRACF("handle_sbe_reg_value(): SBE failed to boot. Final "
- "action is %llx", l_rcAction);
-
- if(l_errl)
- {
- SBE_TRACF("ERROR : p9_extract_sbe_rc HWP returning errorlog "
- "PLID-0x%x", l_errl->plid());
-
- // capture the target data in the elog
- ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_errl);
+ SBE_TRACF("SUCCESS: sbe_run_extract_msg_reg completed okay for proc 0x%.8X",
+ TARGETING::get_huid(i_target));
+ }
- // Cache PLID of error log
- iv_errorLogPLID = l_errl->plid();
+ SBE_TRACF(EXIT_MRK "sbe_run_extract_msg_reg()");
- // Set the PLID of the error log to caller's PLID,
- // if provided
- if (iv_callerErrorLogPLID)
- {
- l_errl->plid(iv_callerErrorLogPLID);
- }
+ return l_statusReadSuccess;
- // Commit error log
- errlCommit( l_errl, HWPF_COMP_ID );
- }
-
- break;
- }
- default:
- {
- // This should never happened
- // error out, unexpected enum value returned.
- //return P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
- /*@
- * @errortype ERRL_SEV_PREDICTIVE
- * @moduleid SBEIO_HANDLE_SBE_REG_VALUE
- * @reasoncode SBEIO_INCORRECT_FCN_CALL
- * @userdata1 HUID of target
- * @userdata2 SBE current state
- * @devdesc This function was called incorrectly or
- * there is a new enum that is not handled yet.
- */
- l_errl = new ERRORLOG::ErrlEntry(
- ERRORLOG::ERRL_SEV_PREDICTIVE,
- SBEIO_HANDLE_SBE_REG_VALUE,
- SBEIO_INCORRECT_FCN_CALL,
- get_huid(i_target),this->iv_currentSBEState);
- l_errl->collectTrace("ISTEPS_TRACE",256);
-
- // Set the PLID of the error log to caller's PLID,
- // if provided
- if (iv_callerErrorLogPLID)
- {
- l_errl->plid(iv_callerErrorLogPLID);
- }
-
- errlCommit(l_errl, ISTEP_COMP_ID);
- this->iv_currentAction = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
- break;
- }
- }
- SBE_TRACF(EXIT_MRK "handle_sbe_reg_value()");
}
-errlHndl_t SbeRetryHandler::sbe_timeout_handler(TARGETING::Target * i_target)
+errlHndl_t SbeRetryHandler::sbe_poll_status_reg(TARGETING::Target * i_target)
{
- SBE_TRACF(ENTER_MRK "sbe_timeout_handler()");
+ SBE_TRACF(ENTER_MRK "sbe_poll_status_reg()");
- errlHndl_t l_errl = NULL;
+ errlHndl_t l_errl = nullptr;
this->iv_currentSBEState =
- SbeRetryHandler::SBE_REG_RETURN::SBE_FAILED_TO_BOOT;
+ SbeRetryHandler::SBE_REG_RETURN::SBE_NOT_AT_RUNTIME;
const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>
l_fapi2_proc_target(i_target);
- // Each slave sbe gets 60s to respond with the fact that it's
+ // Each sbe gets 60s to respond with the fact that it's
// booted and at runtime (stable state)
- uint64_t SBE_TIMEOUT_NSEC = 60*NS_PER_SEC; //60 sec
+ uint64_t l_sbeTimeout = SBE_RETRY_TIMEOUT_HW; // 60 seconds
// Bump this up really high for simics, things are slow there
if( Util::isSimicsRunning() )
{
- SBE_TIMEOUT_NSEC *= 10;
+ l_sbeTimeout = SBE_RETRY_TIMEOUT_SIMICS; // 600 seconds
}
- const uint64_t SBE_NUM_LOOPS = 100;
- const uint64_t SBE_WAIT_SLEEP = (SBE_TIMEOUT_NSEC/SBE_NUM_LOOPS);
+
+ const uint64_t SBE_WAIT_SLEEP = (l_sbeTimeout/SBE_RETRY_NUM_LOOPS);
SBE_TRACF("Running p9_get_sbe_msg_register HWP on proc target %.8X",
TARGETING::get_huid(i_target));
- for( uint64_t l_loops = 0; l_loops < SBE_NUM_LOOPS; l_loops++ )
+ for( uint64_t l_loops = 0; l_loops < SBE_RETRY_NUM_LOOPS; l_loops++ )
{
sbeMsgReg_t l_reg;
FAPI_INVOKE_HWP(l_errl, p9_get_sbe_msg_register,
l_fapi2_proc_target, l_reg);
- this->iv_sbeRegister = l_reg;
+ this->iv_sbeRegister.reg = l_reg.reg;
if (l_errl)
{
SBE_TRACF("ERROR : call p9_get_sbe_msg_register, PLID=0x%x, "
@@ -558,7 +623,7 @@ errlHndl_t SbeRetryHandler::sbe_timeout_handler(TARGETING::Target * i_target)
l_errl->plid(),
l_loops );
this->iv_currentSBEState =
- SbeRetryHandler::SBE_REG_RETURN::HWP_ERROR;
+ SbeRetryHandler::SBE_REG_RETURN::FAILED_COLLECTING_REG;
break;
}
else if ((this->iv_sbeRegister).currState == SBE_STATE_RUNTIME)
@@ -591,46 +656,74 @@ errlHndl_t SbeRetryHandler::sbe_timeout_handler(TARGETING::Target * i_target)
(this->iv_sbeRegister).reg);
}
l_loops++;
+#ifndef __HOSTBOOT_RUNTIME
+ // reset watchdog before performing the nanosleep
+ INITSERVICE::sendProgressCode();
+#endif
nanosleep(0,SBE_WAIT_SLEEP);
}
}
if ((this->iv_sbeRegister).currState != SBE_STATE_RUNTIME)
{
- // Switch to using FSI SCOM
+ // Switch to using FSI SCOM if we are not using xscom
TARGETING::ScomSwitches l_switches =
i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>();
TARGETING::ScomSwitches l_switches_before = l_switches;
- // Turn off SBE SCOM and turn on FSI SCOM.
- l_switches.useFsiScom = 1;
- l_switches.useSbeScom = 0;
-
- SBE_TRACF("sbe_timeout_handler: changing SCOM switches from 0x%.2X "
- "to 0x%.2X for proc 0x%.8X",
- l_switches_before,
- l_switches,
- TARGETING::get_huid(i_target));
- i_target->setAttr<TARGETING::ATTR_SCOM_SWITCHES>(l_switches);
+ if(!l_switches.useXscom)
+ {
+ // Turn off SBE SCOM and turn on FSI SCOM.
+ l_switches.useFsiScom = 1;
+ l_switches.useSbeScom = 0;
+
+ SBE_TRACF("sbe_poll_status_reg: changing SCOM switches from 0x%.2X "
+ "to 0x%.2X for proc 0x%.8X",
+ l_switches_before,
+ l_switches,
+ TARGETING::get_huid(i_target));
+ i_target->setAttr<TARGETING::ATTR_SCOM_SWITCHES>(l_switches);
+ }
}
- // Set the PLID of the error log to caller's PLID,
- // if provided
- if (l_errl && iv_callerErrorLogPLID)
+ SBE_TRACF(EXIT_MRK "sbe_poll_status_reg()");
+ return l_errl;
+}
+
+#ifndef __HOSTBOOT_RUNTIME
+void SbeRetryHandler::handleFspIplTimeFail(TARGETING::Target * i_target)
+{
+ // If we found that there was async FFDC available we need to notify hwsv of this
+ // even if we did not find anything useful in the ffdc for us, its possible hwsv
+ // will be able to use it.
+ if ((this->iv_sbeRegister).asyncFFDC)
{
- l_errl->plid(iv_callerErrorLogPLID);
+ iv_shutdownReturnCode = SBEIO_HWSV_COLLECT_SBE_RC;
}
-
- SBE_TRACF(EXIT_MRK "sbe_timeout_handler()");
- return l_errl;
+ // If the asyncFFDC bit is not set on the sbeRegister
+ // then we need to pass the DEAD_SBE RC to hwsv when we
+ // TI
+ else
+ {
+ this->iv_shutdownReturnCode = SBEIO_DEAD_SBE;
+ }
+ SBE_TRACF("handleFspIplTimeFail(): During IPL time on FSP system hostboot will TI so that HWSV can handle the error. "
+ "Shutting down w/ the error code %s" ,
+ this->iv_sbeRegister.asyncFFDC ? "SBEIO_HWSV_COLLECT_SBE_RC" : "SBEIO_DEAD_SBE" );
+
+ // On FSP systems if we failed to recover the SBE then we should shutdown w/ the
+ // correct error so that HWSV will know what FFDC to collect
+ INITSERVICE::doShutdownWithError(this->iv_shutdownReturnCode,
+ TARGETING::get_huid(i_target));
}
+#endif
-P9_EXTRACT_SBE_RC::RETURN_ACTION SbeRetryHandler::action_for_ffdc_rc(
+uint32_t SbeRetryHandler::action_for_ffdc_rc(
uint32_t i_rc)
{
SBE_TRACF(ENTER_MRK "action_for_ffdc_rc()");
- P9_EXTRACT_SBE_RC::RETURN_ACTION l_action;
+ uint32_t l_action;
switch(i_rc)
{
@@ -675,22 +768,22 @@ P9_EXTRACT_SBE_RC::RETURN_ACTION SbeRetryHandler::action_for_ffdc_rc(
case fapi2::RC_EXTRACT_SBE_RC_BRANCH_TO_SEEPROM_FAIL:
case fapi2::RC_EXTRACT_SBE_RC_UNEXPECTED_OTPROM_HALT:
case fapi2::RC_EXTRACT_SBE_RC_OTP_ECC_ERR:
- default:
l_action = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
break;
+ default:
+
+ l_action = NO_ACTION_FOUND_FOR_THIS_RC;
}
SBE_TRACF(EXIT_MRK "action_for_ffdc_rc()");
return l_action;
}
-bool SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
+void SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
{
SBE_TRACF(ENTER_MRK "sbe_get_ffdc_handler()");
-
- bool l_flowCtrl = false;
uint32_t l_responseSize = SbeFifoRespBuffer::MSG_BUFFER_SIZE;
uint32_t *l_pFifoResponse =
reinterpret_cast<uint32_t *>(malloc(l_responseSize));
@@ -715,12 +808,43 @@ bool SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
else
{
// Parse the FFDC package(s) in the response
- SbeFFDCParser * l_ffdc_parser =
- new SbeFFDCParser();
+ auto l_ffdc_parser = std::make_shared<SbeFFDCParser>();
l_ffdc_parser->parseFFDCData(reinterpret_cast<void *>(l_pFifoResponse));
uint8_t l_pkgs = l_ffdc_parser->getTotalPackages();
- P9_EXTRACT_SBE_RC::RETURN_ACTION l_action;
+
+ // Currently we expect a maxiumum of 2 FFDC packets. These packets would be
+ // a HWP FFDC packet which we will look at to determine what our retry action
+ // should be. The other type of packet we might see would be details on the
+ // internal SBE fail. For internal SBE fail packets we will just add the FFDC
+ // to the error log and move on.
+ //
+ // Note: If we exceed MAX_EXPECTED_FFDC_PACKAGES, commit an informational log.
+ // It shouldn't break anything but this could help us understand if something odd
+ // is happening
+ if(l_pkgs > MAX_EXPECTED_FFDC_PACKAGES)
+ {
+ /*@
+ * @errortype
+ * @moduleid SBEIO_GET_FFDC_HANDLER
+ * @reasoncode SBEIO_MORE_FFDC_THAN_EXPECTED
+ * @userdata1 Maximum expected packages
+ * @userdata2 Number of FFDC packages
+ * @devdesc Unexpected number of FFDC packages in buffer
+ * @custdesc Extra FFDC gathered, marked information event
+ */
+ l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_INFORMATIONAL,
+ SBEIO_GET_FFDC_HANDLER,
+ SBEIO_MORE_FFDC_THAN_EXPECTED,
+ MAX_EXPECTED_FFDC_PACKAGES,
+ l_pkgs);
+
+ l_errl->collectTrace( SBEIO_COMP_NAME, 256);
+
+ // Also log the failing proc as FFDC
+ ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_errl);
+ errlCommit(l_errl, SBEIO_COMP_ID);
+ }
// If there are FFDC packages, make a log for FFDC from SBE
if(l_pkgs > 0)
@@ -742,35 +866,47 @@ bool SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
// Also log the failing proc as FFDC
ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_errl);
- }
- // Process each FFDC package
- for(auto i=0; i<l_pkgs; i++)
- {
- // Add each package to the log
- l_errl->addFFDC( SBEIO_COMP_ID,
- l_ffdc_parser->getFFDCPackage(i),
- l_ffdc_parser->getPackageLength(i),
- 0,
- SBEIO_UDT_PARAMETERS,
- false );
-
- // Get the RC from the FFDC package
- uint32_t l_rc = l_ffdc_parser->getPackageRC(i);
-
- // Determine an action for the RC
- l_action = action_for_ffdc_rc(l_rc);
-
- // Handle that action
- this->iv_currentAction = l_action;
- this->iv_retriggeredMain = true;
- this->iv_sbeMode = SBE_MODE_OF_OPERATION::SBE_ACTION_SET;
- main_sbe_handler(i_target);
- }
- // If there are FFDC packages, commit the log
- if(l_pkgs > 0)
- {
+ // Process each FFDC package
+ for(auto i=0; i<l_pkgs; i++)
+ {
+ // Add each package to the log
+ l_errl->addFFDC( SBEIO_COMP_ID,
+ l_ffdc_parser->getFFDCPackage(i),
+ l_ffdc_parser->getPackageLength(i),
+ 0,
+ SBEIO_UDT_PARAMETERS,
+ false );
+
+ // Get the RC from the FFDC package
+ uint32_t l_rc = l_ffdc_parser->getPackageRC(i);
+
+ // Determine an action for the RC
+ P9_EXTRACT_SBE_RC::RETURN_ACTION l_action =
+ static_cast<P9_EXTRACT_SBE_RC::RETURN_ACTION>(action_for_ffdc_rc(l_rc));
+
+ if(l_action != NO_ACTION_FOUND_FOR_THIS_RC)
+ {
+ // Set the action associated with the RC that we found
+ this->iv_currentAction = l_action;
+
+ // This call will look at what action_for_ffdc_rc had set the return action to
+ // checks on how many times we have attempted to boot this side,
+ // and if we have already tried switching sides
+ //
+ //
+ // Note this call is important, if this is not called we could end up in a
+ // endless loop because this enforces MAX_SWITCH_SIDE_COUNT and MAX_SIDE_BOOT_ATTEMPTS
+ this->bestEffortCheck();
+
+ // Set the instance variable ffdcSetAction to let us
+ // know that the current action was set from what we
+ // found in the asyncFFDC
+ this->iv_ffdcSetAction = true;
+ }
+ }
+
l_errl->collectTrace( SBEIO_COMP_NAME, KILOBYTE/4);
l_errl->collectTrace( "ISTEPS_TRACE", KILOBYTE/4);
@@ -783,11 +919,6 @@ bool SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
errlCommit(l_errl, ISTEP_COMP_ID);
}
-
- delete l_ffdc_parser;
- l_ffdc_parser = nullptr;
-
- l_flowCtrl = true;
}
#endif
@@ -795,155 +926,60 @@ bool SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
l_pFifoResponse = nullptr;
SBE_TRACF(EXIT_MRK "sbe_get_ffdc_handler()");
- return l_flowCtrl;
}
-//By default we want to call the 2 param version of the func w/ "true"
-//passed in to tell the function we want to hide the mandatory errlog
-bool SbeRetryHandler::sbe_boot_fail_handler(TARGETING::Target * i_target)
-{
- return SbeRetryHandler::sbe_boot_fail_handler(i_target, false);
-}
-bool SbeRetryHandler::sbe_boot_fail_handler(TARGETING::Target * i_target,
- bool i_exposeLog)
+void SbeRetryHandler::sbe_run_extract_rc(TARGETING::Target * i_target)
{
- SBE_TRACF(ENTER_MRK "sbe_boot_fail_handler()");
+ SBE_TRACF(ENTER_MRK "sbe_run_extract_rc()");
errlHndl_t l_errl = nullptr;
fapi2::ReturnCode l_rc;
- bool o_needRetry = false;
-
- SBE_TRACF("SBE 0x%.8X never started, sbeReg=0x%.8X",
- TARGETING::get_huid(i_target),(this->iv_sbeRegister).reg );
- /*@
- * @errortype
- * @reasoncode SBEIO_SLAVE_TIMEOUT
- * @severity ERRORLOG::ERRL_SEV_INFORMATIONAL
- * @moduleid SBEIO_EXTRACT_RC_HANDLER
- * @userdata1 HUID of proc which had SBE timeout
- * @userdata2 SBE MSG Register
- *
- * @devdesc Slave SBE did not get to ready state within
- * allotted time
- *
- * @custdesc A processor in the system has failed to initialize
- */
- l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_INFORMATIONAL,
- SBEIO_EXTRACT_RC_HANDLER,
- SBEIO_SLAVE_TIMEOUT,
- TARGETING::get_huid(i_target),
- (this->iv_sbeRegister).reg);
-
- l_errl->collectTrace( "ISTEPS_TRACE", KILOBYTE/4);
-
- // Set the PLID of the error log to caller's PLID,
- // if provided
- if (iv_callerErrorLogPLID)
- {
- l_errl->plid(iv_callerErrorLogPLID);
- }
-
- if(i_exposeLog)
- {
- l_errl->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE);
-
- }
- // Commit error and continue, this is not terminating since
- // we can still at least boot with master proc
- errlCommit(l_errl,ISTEP_COMP_ID);
-
- SBE_TRACF("Inside sbe_boot_fail_handler, calling p9_extract_sbe_rc HWP");
+ SBE_TRACF("Inside sbe_run_extract_rc, calling p9_extract_sbe_rc HWP");
// Setup for the HWP
const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2ProcTarget(
const_cast<TARGETING::Target*> (i_target));
+ // Default the return action to be NO_RECOVERY , if something goes
+ // wrong in p9_extract_sbe_rc and l_ret doesn't get set in that function
+ // then we want to fall back on NO_RECOVERY which we will handle
+ // accordingly in bestEffortCheck
P9_EXTRACT_SBE_RC::RETURN_ACTION l_ret =
- P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM;
+ P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
- //Note that we are calling this while we are already inside
- //of a FAPI_INVOKE_HWP call. This might cause issue w/ current_err
- //but unsure how to get around it.
+ // TODO RTC: 190528 Force FAPI_INVOKE_HWP to call FAPI_EXEC_HWP when FAPI_INVOKE
+ // is blocked by mutex
+ // Note that it's possible we are calling this while we are already inside
+ // of a FAPI_INVOKE_HWP call. This might cause issue w/ current_err
+ // but unsure how to get around it.
FAPI_EXEC_HWP(l_rc, p9_extract_sbe_rc, l_fapi2ProcTarget,
l_ret, iv_useSDB, iv_secureModeDisabled);
+ // Convert the returnCode into an UNRECOVERABLE error log which we will
+ // associated w/ the caller's errlog via plid
l_errl = rcToErrl(l_rc, ERRORLOG::ERRL_SEV_UNRECOVERABLE);
this->iv_currentAction = l_ret;
- if(this->iv_currentAction != P9_EXTRACT_SBE_RC::ERROR_RECOVERED)
- {
+ // Set the instance variable ffdcSetAction to let us
+ // know that the current action was not set by what
+ // we found in asyncFFDC
+ this->iv_ffdcSetAction = false;
- if(l_errl)
- {
- SBE_TRACF("p9_extract_sbe_rc HWP returned action %d and errorlog "
- "PLID=0x%x, rc=0x%.4X", this->iv_currentAction,
- l_errl->plid(), l_errl->reasonCode() );
- errlCommit(l_errl, SBEIO_COMP_ID);
- }
+ // This call will look at what p9_extact_sbe_rc had set the return action to
+ // checks on how many times we have attempted to boot this side,
+ // and if we have already tried switching sides
+ //
+ // Note this call is important, if this is not called we could end up in a
+ // endless loop because this enforces MAX_SWITCH_SIDE_COUNT and MAX_SIDE_BOOT_ATTEMPTS
+ this->bestEffortCheck();
- SBE_TRACF("sbe_boot_fail_handler: We have hit an error in the SBE "
- "and hostboot will now attempt to reboot the SBE");
- /*@
- * @errortype
- * @severity ERRORLOG::ERRL_SEV_PREDICTIVE
- * @moduleid SBEIO_EXTRACT_RC_HANDLER
- * @reasoncode SBEIO_ATTEMPTING_REBOOT
- * @userdata1 HUID of proc which had the SBE timeout
- * @userdata2 Current action to be taken on the SBE
- * @devdesc HWP has returned a reboot action to be taken
- * Hostboot will now attempt to reboot the SBE
- * @custdesc A processor in the system has failed to initialize.
- * Hostboot is attempting a recovery.
- */
- l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE,
- SBEIO_EXTRACT_RC_HANDLER,
- SBEIO_ATTEMPTING_REBOOT,
- TARGETING::get_huid(i_target),
- this->iv_currentAction);
- l_errl->collectTrace("SBEIO_TRACE",KILOBYTE/4);
-
- // Set the PLID of the error log to caller's PLID if provided
- if(iv_callerErrorLogPLID)
- {
- l_errl->plid(iv_callerErrorLogPLID);
- }
- errlCommit(l_errl,SBEIO_COMP_ID);
-
- if(INITSERVICE::spBaseServicesEnabled())
- {
#ifndef __HOSTBOOT_RUNTIME
- // When we are on an FSP machine, we want to fail out of
- // hostboot and give control back to the FSP. They have
- // better diagnostics for this type of error.
- INITSERVICE::doShutdownWithError(SBEIO_HWSV_COLLECT_SBE_RC,
- TARGETING::get_huid(i_target));
+ // This could potentially take awhile, reset watchdog
+ INITSERVICE::sendProgressCode();
#endif
- }
-#ifndef __HOSTBOOT_RUNTIME
- // This could potentially take awhile, reset watchdog
- INITSERVICE::sendProgressCode();
-#endif
- SBE_TRACF("sbe_boot_fail_handler. iv_switchSides count is %llx",
- iv_switchSidesCount);
- if((this->iv_currentAction == P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION) &&
- (iv_switchSidesCount < MAX_SWITCH_SIDE_COUNT))
- {
- this->iv_currentAction = P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM;
- o_needRetry = true;
- }
- else if(iv_switchSidesCount >= MAX_SWITCH_SIDE_COUNT)
- {
- o_needRetry = false;
- }
- else
- {
- o_needRetry = true;
- }
-
- }
if(l_errl)
{
SBE_TRACF("Error: sbe_boot_fail_handler : p9_extract_sbe_rc HWP "
@@ -964,84 +1000,219 @@ bool SbeRetryHandler::sbe_boot_fail_handler(TARGETING::Target * i_target,
errlCommit( l_errl, HWPF_COMP_ID );
}
- SBE_TRACF(EXIT_MRK "sbe_boot_fail_handler() current action is %llx",
+ SBE_TRACF(EXIT_MRK "sbe_run_extract_rc() current action is %llx",
this->iv_currentAction);
- return o_needRetry;
+}
+
+void SbeRetryHandler::bestEffortCheck()
+{
+ // We don't want to accept that there is no recovery action just
+ // because that is what extract_rc is telling us. We want to make
+ // sure we have tried booting on this seeprom twice, and that we
+ // have tried the other seeprom twice as well. If we have tried all of
+ // those cases then we will fail out
+ if(this->iv_currentAction == P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION)
+ {
+ if (this->iv_currentSideBootAttempts < MAX_SIDE_BOOT_ATTEMPTS)
+ {
+ SBE_TRACF("bestEffortCheck(): suggested action was NO_RECOVERY_ACTION but we are trying RESTART_SBE");
+ this->iv_currentAction = P9_EXTRACT_SBE_RC::RESTART_SBE;
+ }
+ else if (this->iv_switchSidesCount < MAX_SWITCH_SIDE_COUNT)
+ {
+ SBE_TRACF("bestEffortCheck(): suggested action was NO_RECOVERY_ACTION but we are trying REIPL_BKP_SEEPROM");
+ this->iv_currentAction = P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM;
+ }
+ else
+ {
+ // If we have attempted the max boot attempts on current side
+ // and have already switched sides once, then we will accept
+ // that we don't know how to recover and pass this status out
+ }
+ }
+ // If we have already switched sides, and extract rc is telling us to
+ // switch sides again, there is nothing we can do, so change currentAction
+ // to be NO_RECOVERY_ACTION
+ else if(this->iv_currentAction == P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM ||
+ this->iv_currentAction == P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM )
+ {
+ if (this->iv_switchSidesCount >= MAX_SWITCH_SIDE_COUNT)
+ {
+ SBE_TRACF("bestEffortCheck(): suggested action was REIPL_BKP_SEEPROM/REIPL_UPD_SEEPROM but that is not possible so changing to NO_RECOVERY_ACTION");
+ this->iv_currentAction = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
+ }
+ }
+ // If the extract sbe rc hwp tells us to restart, and we have already
+ // done 2 retries on this side, then attempt to switch sides, if we can't
+ // switch sides, set currentAction to NO_RECOVERY_ACTION
+ else if(this->iv_currentAction == P9_EXTRACT_SBE_RC::RESTART_SBE ||
+ this->iv_currentAction == P9_EXTRACT_SBE_RC::RESTART_CBS)
+ {
+ if (this->iv_currentSideBootAttempts >= MAX_SIDE_BOOT_ATTEMPTS)
+ {
+ if (this->iv_switchSidesCount >= MAX_SWITCH_SIDE_COUNT)
+ {
+ SBE_TRACF("bestEffortCheck(): suggested action was RESTART_SBE/RESTART_CBS but no actions possible so changing to NO_RECOVERY_ACTION");
+ this->iv_currentAction = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
+ }
+ else
+ {
+ SBE_TRACF("bestEffortCheck(): suggested action was RESTART_SBE/RESTART_CBS but max attempts tried already so changing to REIPL_BKP_SEEPROM");
+ this->iv_currentAction = P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM;
+ }
+ }
+ }
}
errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target)
{
SBE_TRACF(ENTER_MRK "switch_sbe_sides()");
- errlHndl_t l_errl = NULL;
- const uint32_t l_sbeBootSelectMask = SBE::SBE_BOOT_SELECT_MASK >> 32;
+ errlHndl_t l_errl = nullptr;
+ TARGETING::ATTR_PROC_SBE_MASTER_CHIP_type l_isMaster =
+ i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>();
+
+#ifdef __HOSTBOOT_RUNTIME
+ const bool l_isRuntime = true;
+#else
+ const bool l_isRuntime = false;
+#endif
do{
- // Read PERV_SB_CS_FSI_BYTE 0x2820 for target proc
- uint32_t l_read_reg = 0;
- size_t l_opSize = sizeof(uint32_t);
- l_errl = DeviceFW::deviceOp(
- DeviceFW::READ,
- i_target,
- &l_read_reg,
- l_opSize,
- DEVICE_FSI_ADDRESS(PERV_SB_CS_FSI_BYTE) );
- if( l_errl )
+ if(!l_isRuntime && !l_isMaster)
{
- SBE_TRACF( ERR_MRK"switch_sbe_sides: FSI device read "
- "PERV_SB_CS_FSI_BYTE (0x%.4X), proc target = %.8X, "
- "RC=0x%X, PLID=0x%lX",
- PERV_SB_CS_FSI_BYTE, // 0x2820
- TARGETING::get_huid(i_target),
- ERRL_GETRC_SAFE(l_errl),
- ERRL_GETPLID_SAFE(l_errl));
- break;
- }
+ const uint32_t l_sbeBootSelectMask = SBE::SBE_BOOT_SELECT_MASK >> 32;
+ // Read PERV_SB_CS_FSI_BYTE 0x2820 for target proc
+ uint32_t l_read_reg = 0;
+ size_t l_opSize = sizeof(uint32_t);
+ l_errl = DeviceFW::deviceOp(
+ DeviceFW::READ,
+ i_target,
+ &l_read_reg,
+ l_opSize,
+ DEVICE_FSI_ADDRESS(PERV_SB_CS_FSI_BYTE) );
+
+ if( l_errl )
+ {
+ SBE_TRACF( ERR_MRK"switch_sbe_sides: FSI device read "
+ "PERV_SB_CS_FSI_BYTE (0x%.4X), proc target = %.8X, "
+ "RC=0x%X, PLID=0x%lX",
+ PERV_SB_CS_FSI_BYTE, // 0x2820
+ TARGETING::get_huid(i_target),
+ ERRL_GETRC_SAFE(l_errl),
+ ERRL_GETPLID_SAFE(l_errl));
+ break;
+ }
- // Determine how boot side is currently set
- if(l_read_reg & l_sbeBootSelectMask) // Currently set for Boot Side 1
- {
- // Set Boot Side 0 by clearing bit for side 1
- SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 0 for HUID 0x%08X",
- iv_switchSidesCount,
- TARGETING::get_huid(i_target));
- l_read_reg &= ~l_sbeBootSelectMask;
- this->iv_sbeSide = 1;
+ // Determine how boot side is currently set
+ if(l_read_reg & l_sbeBootSelectMask) // Currently set for Boot Side 1
+ {
+ // Set Boot Side 0 by clearing bit for side 1
+ SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 0 for HUID 0x%08X",
+ iv_switchSidesCount,
+ TARGETING::get_huid(i_target));
+ l_read_reg &= ~l_sbeBootSelectMask;
+ }
+ else // Currently set for Boot Side 0
+ {
+ // Set Boot Side 1 by setting bit for side 1
+ SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 1 for HUID 0x%08X",
+ iv_switchSidesCount,
+ TARGETING::get_huid(i_target));
+ l_read_reg |= l_sbeBootSelectMask;
+ }
+
+ // Write updated PERV_SB_CS_FSI 0x2820 back into target proc
+ l_errl = DeviceFW::deviceOp(
+ DeviceFW::WRITE,
+ i_target,
+ &l_read_reg,
+ l_opSize,
+ DEVICE_FSI_ADDRESS(PERV_SB_CS_FSI_BYTE) );
+ if( l_errl )
+ {
+ SBE_TRACF( ERR_MRK"switch_sbe_sides: FSI device write "
+ "PERV_SB_CS_FSI_BYTE (0x%.4X), proc target = %.8X, "
+ "RC=0x%X, PLID=0x%lX",
+ PERV_SB_CS_FSI_BYTE, // 0x2820
+ TARGETING::get_huid(i_target),
+ ERRL_GETRC_SAFE(l_errl),
+ ERRL_GETPLID_SAFE(l_errl));
+ break;
+ }
}
- else // Currently set for Boot Side 0
+ else
{
- // Set Boot Side 1 by setting bit for side 1
- SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 1 for HUID 0x%08X",
- iv_switchSidesCount,
- TARGETING::get_huid(i_target));
- l_read_reg |= l_sbeBootSelectMask;
- this->iv_sbeSide = 0;
+ // Read PERV_SB_CS_SCOM 0x50008 for target proc
+ uint64_t l_read_reg = 0;
+ size_t l_opSize = sizeof(uint64_t);
+ l_errl = DeviceFW::deviceOp(
+ DeviceFW::READ,
+ i_target,
+ &l_read_reg,
+ l_opSize,
+ DEVICE_SCOM_ADDRESS(PERV_SB_CS_SCOM) );
+
+ if( l_errl )
+ {
+ SBE_TRACF( ERR_MRK"switch_sbe_sides: SCOM device read "
+ "PERV_SB_CS_SCOM (0x%.4X), proc target = %.8X, "
+ "RC=0x%X, PLID=0x%lX",
+ PERV_SB_CS_SCOM, // 0x50008
+ TARGETING::get_huid(i_target),
+ ERRL_GETRC_SAFE(l_errl),
+ ERRL_GETPLID_SAFE(l_errl));
+ break;
+ }
+
+ // Determine how boot side is currently set
+ if(l_read_reg & SBE::SBE_BOOT_SELECT_MASK) // Currently set for Boot Side 1
+ {
+ // Set Boot Side 0 by clearing bit for side 1
+ SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 0 for HUID 0x%08X",
+ iv_switchSidesCount,
+ TARGETING::get_huid(i_target));
+ l_read_reg &= ~SBE::SBE_BOOT_SELECT_MASK;
+ }
+ else // Currently set for Boot Side 0
+ {
+ // Set Boot Side 1 by setting bit for side 1
+ SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 1 for HUID 0x%08X",
+ iv_switchSidesCount,
+ TARGETING::get_huid(i_target));
+ l_read_reg |= SBE::SBE_BOOT_SELECT_MASK;
+ }
+
+ // Write updated PERV_SB_CS_SCOM 0x50008 back into target proc
+ l_errl = DeviceFW::deviceOp(
+ DeviceFW::WRITE,
+ i_target,
+ &l_read_reg,
+ l_opSize,
+ DEVICE_SCOM_ADDRESS(PERV_SB_CS_SCOM) );
+ if( l_errl )
+ {
+ SBE_TRACF( ERR_MRK"switch_sbe_sides: FSI device write "
+ "PERV_SB_CS_SCOM (0x%.4X), proc target = %.8X, "
+ "RC=0x%X, PLID=0x%lX",
+ PERV_SB_CS_SCOM, // 0x50008
+ TARGETING::get_huid(i_target),
+ ERRL_GETRC_SAFE(l_errl),
+ ERRL_GETPLID_SAFE(l_errl));
+ break;
+ }
}
- SBE_TRACF("switch_sbe_sides(): iv_switchSidesCount is %llx",
- iv_switchSidesCount);
// Increment switch sides count
- ++iv_switchSidesCount;
-
- // Write updated PERV_SB_CS_FSI 0x2820 back into target proc
- l_errl = DeviceFW::deviceOp(
- DeviceFW::WRITE,
- i_target,
- &l_read_reg,
- l_opSize,
- DEVICE_FSI_ADDRESS(PERV_SB_CS_FSI_BYTE) );
- if( l_errl )
- {
- SBE_TRACF( ERR_MRK"switch_sbe_sides: FSI device write "
- "PERV_SB_CS_FSI_BYTE (0x%.4X), proc target = %.8X, "
- "RC=0x%X, PLID=0x%lX",
- PERV_SB_CS_FSI_BYTE, // 0x2820
- TARGETING::get_huid(i_target),
- ERRL_GETRC_SAFE(l_errl),
- ERRL_GETPLID_SAFE(l_errl));
- break;
- }
+ ++(this->iv_switchSidesCount);
+
+ SBE_TRACF("switch_sbe_sides(): iv_switchSidesCount has been incremented to %llx",
+ iv_switchSidesCount);
+
+ // Since we just switched sides, and we havent attempted a boot yet,
+ // set the current attempts for this side to be 0
+ this->iv_currentSideBootAttempts = 0;
}while(0);
// Set the PLID of the error log to caller's PLID,
diff --git a/src/usr/sbeio/makefile b/src/usr/sbeio/makefile
index da41e8862..fccde6aa4 100644
--- a/src/usr/sbeio/makefile
+++ b/src/usr/sbeio/makefile
@@ -25,19 +25,11 @@
ROOTPATH = ../../..
PROCEDURES_PATH = ${ROOTPATH}/src/import/chips/p9/procedures
MODULE = sbeio
-include common/common.mk
-
-EXTRAINCDIR += ${ROOTPATH}/src/import/hwpf/fapi2/include
-EXTRAINCDIR += ${ROOTPATH}/src/include/usr/fapi2
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/utils
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/utils/imageProcs
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/common/utils/imageProcs
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/ffdc
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/lib
-EXTRAINCDIR += ${PROCEDURES_PATH}/hwp/sbe/
+# pull in .mk common between sbeio and sbeio_rt
+include common/common.mk
+# sbeio's unique objects
OBJS += sbe_psudd.o
OBJS += sbe_utils.o
OBJS += sbe_secureHwp.o
@@ -56,22 +48,8 @@ OBJS += sbe_getSBEFFDC.o
OBJS += sbe_memRegionMgr.o
OBJS += sbe_fifo_buffer.o
OBJS += sbe_ffdc_package_parser.o
-OBJS += ${SBEIO_COMMON_OBJS}
-
-VPATH += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/
-VPATH += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/lib/
-VPATH += ${ROOTPATH}/src/usr/sbeio/common
-
-include ${ROOTPATH}/procedure.rules.mk
-
-#Not using the ekb mk file because it includes extra files
-# that we already include in libfapi2:
-# - p9_ppe_utils.o
-#include ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/p9_extract_sbe_rc.mk
-OBJS += p9_extract_sbe_rc.o
-OBJS += p9_ppe_common.o
-
+# sbeio's sub directories
SUBDIRS += test.d
SUBDIRS += runtime.d
diff --git a/src/usr/sbeio/runtime/makefile b/src/usr/sbeio/runtime/makefile
index 541ad0b77..37792b554 100644
--- a/src/usr/sbeio/runtime/makefile
+++ b/src/usr/sbeio/runtime/makefile
@@ -31,41 +31,15 @@ PROCEDURES_PATH = ${ROOTPATH}/src/import/chips/p9/procedures
MODULE = sbeio_rt
+# pull in .mk common between sbeio and sbeio_rt
include ../common/common.mk
-EXTRAINCDIR += ${ROOTPATH}/src/import/hwpf/fapi2/include
-EXTRAINCDIR += ${ROOTPATH}/src/include/usr/fapi2
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/utils
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/utils/imageProcs
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/common/utils/imageProcs
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/ffdc
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv
-EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/lib
-
-EXTRAINCDIR += ${PROCEDURES_PATH}/hwp/sbe/
-
## Objects unique to HBRT
OBJS += rt_sbeio.o
OBJS += sbeio_attr_override.o
OBJS += sbeio_vital_attn.o
-OBJS += ${SBEIO_COMMON_OBJS}
-
-VPATH += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/
-VPATH += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/sbe/
-VPATH += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/lib/
-VPATH += ../common
-
-include ${ROOTPATH}/procedure.rules.mk
-include ${ROOTPATH}/src/import/chips/p9/procedures/hwp/sbe/p9_get_sbe_msg_register.mk
-include ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/p9_start_cbs.mk
-
-#Not using the ekb mk file because it includes extra files
-# that we already include in libfapi2:
-# - p9_ppe_utils.o
-#include ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/p9_extract_sbe_rc.mk
-OBJS += p9_extract_sbe_rc.o
-OBJS += p9_ppe_common.o
+## sbeio_rt's sub directories
SUBDIRS += test.d
include ${ROOTPATH}/config.mk
diff --git a/src/usr/sbeio/sbe_fifodd.C b/src/usr/sbeio/sbe_fifodd.C
index 5906452ec..66b533540 100644
--- a/src/usr/sbeio/sbe_fifodd.C
+++ b/src/usr/sbeio/sbe_fifodd.C
@@ -47,6 +47,7 @@
#include <sbeio/sbe_sp_intf.H>
#include <xscom/piberror.H>
#include <sbeio/sbe_retry_handler.H>
+#include <initservice/initserviceif.H>
extern trace_desc_t* g_trac_sbeio;
@@ -657,14 +658,37 @@ errlHndl_t SbeFifo::waitDnFifoReady(TARGETING::Target * i_target,
errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
HWAS::SRCI_PRIORITY_HIGH);
- errl->addHwCallout( i_target,
- HWAS::SRCI_PRIORITY_HIGH,
- HWAS::NO_DECONFIG,
- HWAS::GARD_NULL );
+ // Keep a copy of the plid so we can pass it to the retry_handler
+ // so the error logs it creates will be linked
+ uint32_t l_errPlid = errl->plid();
- //It is likely that the SBE is in a failed state so set up retry handler
+ // Commit errlor log now if this is a FSP system because
+ // we will not return from retry handler
+ if(INITSERVICE::spBaseServicesEnabled())
+ {
+ errl->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL );
+ ERRORLOG::errlCommit( errl, SBEIO_COMP_ID );
+ }
+ //On open power systems we want to deconfigure the processor
+ else
+ {
+ errl->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_NULL );
+ }
+
+
+ // Set the retry handler's mode to be informational, this will run
+ // p9_extract_rc then TI the system on fsp-systems.
+ // On open power systems if mode is set to informational we will run
+ // p9_extract_rc then return back to this function
SbeRetryHandler l_SBEobj = SbeRetryHandler(
- SbeRetryHandler::SBE_MODE_OF_OPERATION::INFORMATIONAL_ONLY);
+ SbeRetryHandler::SBE_MODE_OF_OPERATION::INFORMATIONAL_ONLY,
+ l_errPlid);
// Look at the scomSwitch attribute to tell what types
// of scoms are going to be used. If the SMP is not yet up then we
@@ -683,12 +707,7 @@ errlHndl_t SbeFifo::waitDnFifoReady(TARGETING::Target * i_target,
l_SBEobj.main_sbe_handler(i_target);
- if(l_SBEobj.getPLID())
- {
- //tie the error from the sbe retry handler to this error
- errl->plid(l_SBEobj.getPLID());
- }
- errl->collectTrace(SBEIO_COMP_NAME);
+ //break out of continuous loop ( should only get here on openPower systems)
break;
}
diff --git a/src/usr/sbeio/sbe_psudd.C b/src/usr/sbeio/sbe_psudd.C
index 47be7b7be..d97f34d26 100644
--- a/src/usr/sbeio/sbe_psudd.C
+++ b/src/usr/sbeio/sbe_psudd.C
@@ -48,6 +48,7 @@
#include <p9_extract_sbe_rc.H>
#include <errl/errludlogregister.H>
#include <sbeio/sbe_retry_handler.H>
+#include <initservice/initserviceif.H>
trace_desc_t* g_trac_sbeio;
TRAC_INIT(&g_trac_sbeio, SBEIO_COMP_NAME, 6*KILOBYTE, TRACE::BUFFER_SLOW);
@@ -528,23 +529,45 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target,
TARGETING::get_huid(i_target)),
i_pPsuRequest->mbxReg0);
+ // log the failing proc as FFDC
+ ErrlUserDetailsTarget(i_target).addToLog(l_errl);
+ l_respRegsFFDC.addToLog(l_errl);
+ l_errl->collectTrace(SBEIO_COMP_NAME);
+
+ // Keep a copy of the plid so we can pass it to the retry_handler
+ // so the error logs it creates will be linked
+ uint32_t l_errPlid = l_errl->plid();
+
+ // Commit errlor log now if this is a FSP system because
+ // we will not return from retry handler
+ if(INITSERVICE::spBaseServicesEnabled())
+ {
+ l_errl->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_NULL );
+ ERRORLOG::errlCommit( l_errl, SBEIO_COMP_ID );
+ }
+ //On open power systems we want to deconfigure the processor
+ else
+ {
+ l_errl->addHwCallout( i_target,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_NULL );
+ }
+
// If the FFDC is empty, this error could be because the SBE
// isn't booted correctly. We need to check the state of the
- // SBE, handle the SBE value, and potentionally try
- // to restart the SBE
+ // SBE.
+ // If we are on a FSP based system we expect this to result in a TI
+ // If we are on a BMC based system we expect to return from this fail
SbeRetryHandler l_SBEobj = SbeRetryHandler(
- SbeRetryHandler::SBE_MODE_OF_OPERATION::
- INFORMATIONAL_ONLY);
+ SbeRetryHandler::SBE_MODE_OF_OPERATION::INFORMATIONAL_ONLY,
+ l_errPlid);
l_SBEobj.main_sbe_handler(i_target);
- if(l_SBEobj.getPLID() != NULL)
- {
- // If there is not an unrecovered error, we want to tie
- // the error from the sbe retry handler to this error.
- l_errl->plid(l_SBEobj.getPLID());
- l_errl->setSev(ERRL_SEV_UNRECOVERABLE);
- }
}
else
{
@@ -591,17 +614,16 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target,
l_ffdc_parser = nullptr;
}
-
l_errl->addHwCallout( i_target,
HWAS::SRCI_PRIORITY_HIGH,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL );
- }
- // log the failing proc as FFDC
- ErrlUserDetailsTarget(i_target).addToLog(l_errl);
- l_respRegsFFDC.addToLog(l_errl);
- l_errl->collectTrace(SBEIO_COMP_NAME);
+ // log the failing proc as FFDC
+ ErrlUserDetailsTarget(i_target).addToLog(l_errl);
+ l_respRegsFFDC.addToLog(l_errl);
+ l_errl->collectTrace(SBEIO_COMP_NAME);
+ }
MAGIC_INST_GET_SBE_TRACES(
i_target->getAttr<TARGETING::ATTR_POSITION>(),
diff --git a/src/usr/sbeio/test/sbe_retry_handler_test.H b/src/usr/sbeio/test/sbe_retry_handler_test.H
index bfe6808d7..9a3719895 100644
--- a/src/usr/sbeio/test/sbe_retry_handler_test.H
+++ b/src/usr/sbeio/test/sbe_retry_handler_test.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2017 */
+/* Contributors Listed Below - COPYRIGHT 2017,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -125,12 +125,12 @@ class SbeRetryHandlerTest : public CxxTest::TestSuite
uint32_t l_sbeStarted = l_cpu_target->getAttr<
TARGETING::ATTR_SBE_IS_STARTED>();
- if(l_SBEobj.getSbeRestart() && !l_sbeStarted)
+ if(l_SBEobj.isSbeAtRuntime() && !l_sbeStarted)
{
TS_FAIL("testSBEStarted: If the class element that "
"the SBE started is true, then the SBE attribute also "
"needs to be true");
- }else if(!(l_SBEobj.getSbeRestart() && l_sbeStarted))
+ }else if(!(l_SBEobj.isSbeAtRuntime() && l_sbeStarted))
{
TS_FAIL("testSBEStarted: If the class element "
"that the SBE started is false, then the SBE attribute "
OpenPOWER on IntegriCloud