summaryrefslogtreecommitdiffstats
path: root/src/usr/sbeio/common/sbe_retry_handler.C
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/sbeio/common/sbe_retry_handler.C')
-rw-r--r--src/usr/sbeio/common/sbe_retry_handler.C173
1 files changed, 49 insertions, 124 deletions
diff --git a/src/usr/sbeio/common/sbe_retry_handler.C b/src/usr/sbeio/common/sbe_retry_handler.C
index 25f05311d..222e6a2b1 100644
--- a/src/usr/sbeio/common/sbe_retry_handler.C
+++ b/src/usr/sbeio/common/sbe_retry_handler.C
@@ -37,6 +37,7 @@
#include <trace/interface.H>
#include <errl/errlentry.H>
#include <errl/errlmanager.H>
+#include <errl/errlreasoncodes.H>
#include <p9_extract_sbe_rc.H>
#include <fapi2/target.H>
@@ -44,6 +45,7 @@
#include <initservice/isteps_trace.H>
#include <initservice/initserviceif.H>
#include <initservice/istepdispatcherif.H>
+#include <initservice/initsvcreasoncodes.H>
#include <errl/errludtarget.H>
#include <util/misc.H>
#include <ipmi/ipmiwatchdog.H>
@@ -100,12 +102,6 @@ constexpr uint8_t MAX_SIDE_BOOT_ATTEMPTS = 2;
// add to an errorlog but otherwise ignores
constexpr uint8_t MAX_EXPECTED_FFDC_PACKAGES = 2;
-// action_for_ffdc_rc will figure out what action we should do
-// for each p9_extract_sbe_rc return code. If the RC does not match
-// any return code from p9_extract_sbe_rc then we want to have a
-// known "no action found" value which is defined here
-constexpr uint32_t NO_ACTION_FOUND_FOR_THIS_RC = 0xFFFF;
-
// Set up constants that will be used for setting up the timeout for
// reading the sbe message register
constexpr uint64_t SBE_RETRY_TIMEOUT_HW_SEC = 60; // 60 seconds
@@ -128,7 +124,6 @@ SbeRetryHandler::SbeRetryHandler(SBE_MODE_OF_OPERATION i_sbeMode,
, iv_currentSBEState(SBE_REG_RETURN::SBE_NOT_AT_RUNTIME)
, iv_shutdownReturnCode(0)
, iv_currentSideBootAttempts(1) // It is safe to assume that the current side has attempted to boot
-, iv_ffdcSetAction(false)
, iv_sbeMode(i_sbeMode)
, iv_sbeRestartMethod(SBE_RESTART_METHOD::HRESET)
, iv_initialPowerOn(false)
@@ -207,7 +202,7 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
ERRORLOG::ERRL_SEV_UNRECOVERABLE,
SBEIO_EXTRACT_RC_HANDLER,
SBEIO_SLAVE_FAILED_TO_BOOT,
- this->iv_ffdcSetAction,
+ this->iv_sbeRegister.asyncFFDC,
TARGETING::get_huid(i_target));
l_errl->collectTrace( "ISTEPS_TRACE", 256);
@@ -226,12 +221,10 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
}
#endif
- // If iv_ffdcSetAction is true, that means that we found ffdc to parse
- // this indicates that the SBE already determined what went wrong and
- // reported the error via asyncFFDC so there is no need to
- // run p9_extract_sbe_rc
- // Also if the sbe is not booted at all, extract_rc will fail so we don't want to run it
- if(!this->iv_ffdcSetAction && this->iv_sbeRegister.sbeBooted)
+
+ // if the sbe is not booted at all extract_rc will fail so we only
+ // will run extract RC if we know the sbe has at least tried to boot
+ if(this->iv_sbeRegister.sbeBooted)
{
SBE_TRACF("main_sbe_handler(): No async ffdc found and sbe says it has been booted, running run p9_sbe_extract_rc.");
// Call the function that runs extract_rc, this needs to run to determine
@@ -241,7 +234,7 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
// If we have determined that the sbe never booted
// then set the current action to be "restart sbe"
// that way we will attempt to start the sbe again
- else if(!this->iv_sbeRegister.sbeBooted)
+ else
{
SBE_TRACF("main_sbe_handler(): SBE reports it was never booted, calling p9_sbe_extract_rc will fail. Setting action to be RESTART_SBE");
this->iv_currentAction = P9_EXTRACT_SBE_RC::RESTART_SBE;
@@ -553,9 +546,6 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
}
}
- // We have performed the action, so make sure that ffdcSetAction is set back to 0
- this->iv_ffdcSetAction = 0;
-
// Get the sbe register (note that if asyncFFDC bit is set in status register then
// we will read it in this call)
if(!this->sbe_run_extract_msg_reg(i_target))
@@ -565,19 +555,12 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
break;
}
- // If our retry attempt fail, and we didnt see any asyncFFDC after
+ // If the currState of the SBE is not RUNTIME then we will assume
+ // our attempt to boot the SBE has failed, so run extract rc again
+ // to determine why we have failed
if (this->iv_sbeRegister.currState != SBE_STATE_RUNTIME)
{
- // Again, if ffdcSetAction is set, that means we have found FFDC
- // already that the SBE saved away prior to failing so we don't need
- // to run extract_rc if ffdcSetAction is true
- if(!this->iv_ffdcSetAction)
- {
- SBE_TRACF("main_sbe_handler(): Failed to reach runtime after sbe restart and no asyncFFDC found. Calling p9_sbe_extract_rc.");
- // Run extract rc to figure out why the sbe did not make it to
- // runtime state
- this->sbe_run_extract_rc(i_target);
- }
+ this->sbe_run_extract_rc(i_target);
}
} while((this->iv_sbeRegister).currState != SBE_STATE_RUNTIME);
@@ -637,8 +620,8 @@ bool SbeRetryHandler::sbe_run_extract_msg_reg(TARGETING::Target * i_target)
(this->iv_sbeRegister.currState != SBE_STATE_RUNTIME) &&
this->iv_sbeRegister.asyncFFDC)
{
- SBE_TRACF("SUCCESS: sbe_run_extract_msg_reg completed okay for proc 0x%.8X . "
- "There was asyncFFDC found though so we will run the FFDC parser",
+ SBE_TRACF("WARNING: sbe_run_extract_msg_reg completed without error for proc 0x%.8X . "
+ "However, there was asyncFFDC found though so we will run the FFDC parser",
TARGETING::get_huid(i_target));
// The SBE has responded to an asyncronus request that hostboot
// made with FFDC indicating an error has occurred.
@@ -669,7 +652,7 @@ bool SbeRetryHandler::sbe_run_extract_msg_reg(TARGETING::Target * i_target)
// No guarantees that the SBE made it to runtime
else
{
- SBE_TRACF("SUCCESS: sbe_run_extract_msg_reg completed okay for proc 0x%.8X",
+ SBE_TRACF("sbe_run_extract_msg_reg completed without error for proc 0x%.8X",
TARGETING::get_huid(i_target));
}
@@ -826,69 +809,6 @@ void SbeRetryHandler::handleFspIplTimeFail(TARGETING::Target * i_target)
}
#endif
-uint32_t SbeRetryHandler::action_for_ffdc_rc(
- uint32_t i_rc)
-{
- SBE_TRACF(ENTER_MRK "action_for_ffdc_rc()");
-
- uint32_t l_action;
-
- switch(i_rc)
- {
- case fapi2::RC_EXTRACT_SBE_RC_RUNNING:
- case fapi2::RC_EXTRACT_SBE_RC_NEVER_STARTED:
- case fapi2::RC_EXTRACT_SBE_RC_PROGRAM_INTERRUPT:
- case fapi2::RC_EXTRACT_SBE_RC_ADDR_NOT_RECOGNIZED:
- case fapi2::RC_EXTRACT_SBE_RC_PIBMEM_ECC_ERR:
- case fapi2::RC_EXTRACT_SBE_RC_FI2CM_BIT_RATE_ERR_NONSECURE_MODE:
-
- l_action = P9_EXTRACT_SBE_RC::RESTART_SBE;
-
- break;
-
- case fapi2::RC_EXTRACT_SBE_RC_MAGIC_NUMBER_MISMATCH:
- case fapi2::RC_EXTRACT_SBE_RC_FI2C_ECC_ERR:
- case fapi2::RC_EXTRACT_SBE_RC_FI2C_ECC_ERR_NONSECURE_MODE:
-
- l_action = P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM;
-
- break;
-
- case fapi2::RC_EXTRACT_SBE_RC_FI2C_TIMEOUT:
- case fapi2::RC_EXTRACT_SBE_RC_SBE_L1_LOADER_FAIL:
- case fapi2::RC_EXTRACT_SBE_RC_SBE_L2_LOADER_FAIL:
- case fapi2::RC_EXTRACT_SBE_RC_UNKNOWN_ERROR:
-
- l_action = P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM;
-
- break;
-
- case fapi2::RC_EXTRACT_SBE_RC_OTP_TIMEOUT:
- case fapi2::RC_EXTRACT_SBE_RC_OTP_PIB_ERR:
- case fapi2::RC_EXTRACT_SBE_RC_PIBMEM_PIB_ERR:
- case fapi2::RC_EXTRACT_SBE_RC_FI2C_SPRM_CFG_ERR:
- case fapi2::RC_EXTRACT_SBE_RC_FI2C_PIB_ERR:
-
- l_action = P9_EXTRACT_SBE_RC::RESTART_CBS;
-
- break;
-
- case fapi2::RC_EXTRACT_SBE_RC_BRANCH_TO_SEEPROM_FAIL:
- case fapi2::RC_EXTRACT_SBE_RC_UNEXPECTED_OTPROM_HALT:
- case fapi2::RC_EXTRACT_SBE_RC_OTP_ECC_ERR:
-
- l_action = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
-
- break;
- default:
-
- l_action = NO_ACTION_FOUND_FOR_THIS_RC;
- }
-
- SBE_TRACF(EXIT_MRK "action_for_ffdc_rc()");
- return l_action;
-}
-
void SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
{
SBE_TRACF(ENTER_MRK "sbe_get_ffdc_handler()");
@@ -896,6 +816,13 @@ void SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
uint32_t *l_pFifoResponse =
reinterpret_cast<uint32_t *>(malloc(l_responseSize));
+ // For OpenPower systems if a piece of HW is garded then we will
+ // need to force a reconfigure loop and avoid the rest of the
+ // sbe recovery process. On FSP systems if HW callouts are found in
+ // the FFDC, we just commit the errorlog and TI telling HWSV to look
+ // at the failure
+ bool l_reconfigRequired = false;
+
#ifndef __HOSTBOOT_RUNTIME
errlHndl_t l_errl = nullptr;
l_errl = getFifoSBEFFDC(i_target,
@@ -986,10 +913,6 @@ void SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
// Get the RC from the FFDC package
uint32_t l_rc = l_ffdc_parser->getPackageRC(i);
- // Determine an action for the RC
- P9_EXTRACT_SBE_RC::RETURN_ACTION l_action =
- static_cast<P9_EXTRACT_SBE_RC::RETURN_ACTION>(action_for_ffdc_rc(l_rc));
-
//See if HWP error, create another error log with callouts
if (l_rc != fapi2::FAPI2_RC_PLAT_ERR_SEE_DATA)
{
@@ -1010,8 +933,30 @@ void SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
uint32_t l_pos = i_target->getAttr<TARGETING::ATTR_FAPI_POS>();
FAPI_SET_SBE_ERROR(l_fapiRc, l_rc, &l_sbeFfdc, l_pos);
errlHndl_t l_sbeHwpfErr = rcToErrl(l_fapiRc);
+ // If we created an error successfully we must now commit it
if(l_sbeHwpfErr)
{
+ // On BMC systems we must do a reconfig loop if gard is found
+ if(!INITSERVICE::spBaseServicesEnabled())
+ {
+ // Iterate over user details sections of the error log to check for UD
+ // callouts from the HWPF component
+ // NOTE: rcToErrl will make UD Callouts have ERRL_COMP_ID/ERRL_UDT_CALLOUT
+ for(const auto l_callout : l_sbeHwpfErr->getUDSections(ERRL_COMP_ID,
+ ERRORLOG::ERRL_UDT_CALLOUT) )
+ {
+ // IF the callout has a gard associated with it we need to do a reconfig loop
+ if((reinterpret_cast<HWAS::callout_ud_t*>(l_callout)->type == HWAS::HW_CALLOUT &&
+ reinterpret_cast<HWAS::callout_ud_t*>(l_callout)->gardErrorType != HWAS::GARD_NULL) ||
+ (reinterpret_cast<HWAS::callout_ud_t*>(l_callout)->type == HWAS::CLOCK_CALLOUT &&
+ reinterpret_cast<HWAS::callout_ud_t*>(l_callout)->clkGardErrorType != HWAS::GARD_NULL) ||
+ (reinterpret_cast<HWAS::callout_ud_t*>(l_callout)->type == HWAS::PART_CALLOUT &&
+ reinterpret_cast<HWAS::callout_ud_t*>(l_callout)->partGardErrorType != HWAS::GARD_NULL))
+ {
+ l_reconfigRequired = true;
+ }
+ }
+ }
// Set the PLID of the error log to master PLID
// if the master PLID is set
updatePlids(l_sbeHwpfErr);
@@ -1029,26 +974,6 @@ void SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
SBEIO_UDT_PARAMETERS,
false );
}
-
- if(l_action != NO_ACTION_FOUND_FOR_THIS_RC)
- {
- // Set the action associated with the RC that we found
- this->iv_currentAction = l_action;
-
- // This call will look at what action_for_ffdc_rc had set the return action to
- // checks on how many times we have attempted to boot this side,
- // and if we have already tried switching sides
- //
- //
- // Note this call is important, if this is not called we could end up in a
- // endless loop because this enforces MAX_SWITCH_SIDE_COUNT and MAX_SIDE_BOOT_ATTEMPTS
- this->bestEffortCheck();
-
- // Set the instance variable ffdcSetAction to let us
- // know that the current action was set from what we
- // found in the asyncFFDC
- this->iv_ffdcSetAction = true;
- }
}
l_errl->collectTrace( SBEIO_COMP_NAME, KILOBYTE/4);
@@ -1066,6 +991,11 @@ void SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
free(l_pFifoResponse);
l_pFifoResponse = nullptr;
+ if(l_reconfigRequired)
+ {
+ INITSERVICE::doShutdown(INITSERVICE::SHUTDOWN_DO_RECONFIG_LOOP);
+ }
+
SBE_TRACF(EXIT_MRK "sbe_get_ffdc_handler()");
}
@@ -1103,11 +1033,6 @@ void SbeRetryHandler::sbe_run_extract_rc(TARGETING::Target * i_target)
l_errl = rcToErrl(l_rc, ERRORLOG::ERRL_SEV_UNRECOVERABLE);
this->iv_currentAction = l_ret;
- // Set the instance variable ffdcSetAction to let us
- // know that the current action was not set by what
- // we found in asyncFFDC
- this->iv_ffdcSetAction = false;
-
// This call will look at what p9_extact_sbe_rc had set the return action to
// checks on how many times we have attempted to boot this side,
// and if we have already tried switching sides
OpenPOWER on IntegriCloud