diff options
Diffstat (limited to 'src/usr/sbeio')
-rw-r--r-- | src/usr/sbeio/common/sbe_attn.C | 102 | ||||
-rw-r--r-- | src/usr/sbeio/common/sbe_retry_handler.C | 44 | ||||
-rw-r--r-- | src/usr/sbeio/runtime/sbeio_vital_attn.C | 10 | ||||
-rw-r--r-- | src/usr/sbeio/sbe_fifodd.C | 2 | ||||
-rw-r--r-- | src/usr/sbeio/sbe_psudd.C | 2 |
5 files changed, 132 insertions, 28 deletions
diff --git a/src/usr/sbeio/common/sbe_attn.C b/src/usr/sbeio/common/sbe_attn.C index 5c151b4eb..2fb0cf1da 100644 --- a/src/usr/sbeio/common/sbe_attn.C +++ b/src/usr/sbeio/common/sbe_attn.C @@ -30,6 +30,7 @@ #include <errl/errlentry.H> #include <errl/errlmanager.H> #include <errl/errludtarget.H> +#include <errl/errludcallout.H> #include <sbeio/sbe_attn.H> #include <fapi2/target.H> #include <fapi2/plat_hwp_invoker.H> @@ -53,44 +54,117 @@ namespace SBEIO TRACFCOMP( g_trac_sbeio, ENTER_MRK "handleVitalAttn> i_procTarg=", TARGETING::get_huid(i_procTarg) ); - errlHndl_t l_errhdl = nullptr; + errlHndl_t l_err = nullptr; - // TODO 167191 Full SBE Belly-Up Handling for OP #ifdef __HOSTBOOT_RUNTIME // Inform OPAL, SBE is currently disabled if (TARGETING::is_sapphire_load()) { // Inform OPAL of the inoperable SBE - l_errhdl = RT_SBEIO::vital_attn_inform_opal(i_procTarg, + l_err = RT_SBEIO::vital_attn_inform_opal(i_procTarg, RT_SBEIO::SBE_DISABLED); - } -#endif - // @todo - RTC:180242 - Restart SBE + if(l_err) + { + errlCommit(l_err, SBEIO_COMP_ID); + } + } SbeRetryHandler l_sbeObj = SbeRetryHandler( SbeRetryHandler::SBE_MODE_OF_OPERATION::ATTEMPT_REBOOT); - //l_sbeObj.main_sbe_handler(i_procTarg); + l_sbeObj.main_sbe_handler(i_procTarg); + // Check if the SBE made it back to runtime, this tells us if the retry was a + // success or not + if (!l_sbeObj.isSbeAtRuntime()) + { + /*@ + * @errortype ERRL_SEV_PREDICTIVE + * @moduleid SBEIO_HANDLE_VITAL_ATTN + * @reasoncode SBEIO_NO_RECOVERY_ACTION + * @userdata1 Huid of processor + * @userdata2 Unused + * @devdesc PRD detected an error with the SBE and HB failed to + * recover + * @custdesc Processor Error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + SBEIO_HANDLE_VITAL_ATTN, + SBEIO_NO_RECOVERY_ACTION, + TARGETING::get_huid(i_procTarg), + 0); -#ifdef __HOSTBOOT_RUNTIME - // Inform OPAL the state of the SBE after a retry - if (l_sbeObj.isSbeAtRuntime()) + l_err->addProcedureCallout( HWAS::EPUB_PRC_SUE_PREVERROR, + HWAS::SRCI_PRIORITY_HIGH); + + l_err->addHwCallout( i_procTarg, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL ); + + l_err->collectTrace( SBEIO_COMP_NAME, 256); + // @todo - RTC:180244 - Disable the OCC + } + // Inform OPAL the state of the SBE after a retry is successful + else { if (TARGETING::is_sapphire_load()) { - l_errhdl = RT_SBEIO::vital_attn_inform_opal(i_procTarg, + l_err = RT_SBEIO::vital_attn_inform_opal(i_procTarg, RT_SBEIO::SBE_ENABLED); + if(l_err) + { + errlCommit(l_err, SBEIO_COMP_ID); + } } + } +#else + const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2ProcTarget( + const_cast<TARGETING::Target*> (i_procTarg)); - // @todo - RTC:180244 - Disable the OCC + //Unused in the context, but required for p9_extract_sbe_rc + P9_EXTRACT_SBE_RC::RETURN_ACTION l_ret = + P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM; + + FAPI_INVOKE_HWP(l_err, p9_extract_sbe_rc, + l_fapi2ProcTarget, l_ret); + + // p9_extract_sbe_rc should always return w/ an error + // make sure at least some sort of error gets returned + // because we are not attempting to recover anything + if(!l_err) + { + /*@ + * @errortype ERRL_SEV_PREDICTIVE + * @moduleid SBEIO_HANDLE_VITAL_ATTN + * @reasoncode SBEIO_EXTRACT_RC_ERROR + * @userdata1 Huid of processor + * @userdata2 Return action from extract_rc + * @devdesc We expected an error log to be returned from + p9_extract_rc but there wasn't one + * @custdesc Processor Error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_PREDICTIVE, + SBEIO_HANDLE_VITAL_ATTN, + SBEIO_EXTRACT_RC_ERROR, + TARGETING::get_huid(i_procTarg), + l_ret); } + + //We want to deconfigure the processor where the error was detected + l_err->addHwCallout( i_procTarg, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DELAYED_DECONFIG, + HWAS::GARD_NULL ); #endif + TRACFCOMP( g_trac_sbeio, EXIT_MRK "handleVitalAttn> "); - return l_errhdl; + return l_err; } -}; +};
\ No newline at end of file diff --git a/src/usr/sbeio/common/sbe_retry_handler.C b/src/usr/sbeio/common/sbe_retry_handler.C index f68c587d3..5cfa6f32f 100644 --- a/src/usr/sbeio/common/sbe_retry_handler.C +++ b/src/usr/sbeio/common/sbe_retry_handler.C @@ -82,6 +82,32 @@ using namespace ERRORLOG; namespace SBEIO { +// Define constant expressions to be used + +//There are only 2 sides to the seeproms, so we only want to flip sides once +constexpr uint8_t MAX_SWITCH_SIDE_COUNT = 1; + +//We only want to attempt to boot with the same side seeprom twice +constexpr uint8_t MAX_SIDE_BOOT_ATTEMPTS = 2; + +// Currently we expect a maxiumum of 2 FFDC packets, the one +// that is useful to HB is the HWP FFDC. It is possible there is +// a packet that details an internal sbe fail that hostboot will +// add to an errorlog but otherwise ignores +constexpr uint8_t MAX_EXPECTED_FFDC_PACKAGES = 2; + +// action_for_ffdc_rc will figure out what action we should do +// for each p9_extract_sbe_rc return code. If the RC does not match +// any return code from p9_extract_sbe_rc then we want to have a +// known "no action found" value which is defined here +constexpr uint32_t NO_ACTION_FOUND_FOR_THIS_RC = 0xFFFF; + +// Set up constants that will be used for setting up the timeout for +// reading the sbe message register +constexpr uint64_t SBE_RETRY_TIMEOUT_HW_SEC = 60; // 60 seconds +constexpr uint64_t SBE_RETRY_TIMEOUT_SIMICS_SEC = 600; // 600 seconds +constexpr uint32_t SBE_RETRY_NUM_LOOPS = 60; + SbeRetryHandler::SbeRetryHandler(SBE_MODE_OF_OPERATION i_sbeMode) : SbeRetryHandler(i_sbeMode, 0) { @@ -121,7 +147,8 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target ) errlHndl_t l_errl = nullptr; // Only set the secure debug bit (SDB) if we are not using xscom yet - if(!i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>().useXscom) + if(!i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>().useXscom && + !i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>()) { this->iv_useSDB = true; } @@ -256,7 +283,7 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target ) l_errl->collectTrace( SBEIO_COMP_NAME, 256); l_errl->addHwCallout( i_target, HWAS::SRCI_PRIORITY_HIGH, - HWAS::DECONFIG, + HWAS::DELAYED_DECONFIG, HWAS::GARD_NULL ); // Set the PLID of the error log to caller's PLID, @@ -584,14 +611,15 @@ errlHndl_t SbeRetryHandler::sbe_poll_status_reg(TARGETING::Target * i_target) // Each sbe gets 60s to respond with the fact that it's // booted and at runtime (stable state) - uint64_t l_sbeTimeout = SBE_RETRY_TIMEOUT_HW; // 60 seconds + uint64_t l_sbeTimeout = SBE_RETRY_TIMEOUT_HW_SEC; // 60 seconds // Bump this up really high for simics, things are slow there if( Util::isSimicsRunning() ) { - l_sbeTimeout = SBE_RETRY_TIMEOUT_SIMICS; // 600 seconds + l_sbeTimeout = SBE_RETRY_TIMEOUT_SIMICS_SEC; // 600 seconds } - const uint64_t SBE_WAIT_SLEEP = (l_sbeTimeout/SBE_RETRY_NUM_LOOPS); + //Sleep time should be 1 second on HW, 10 seconds on simics + const uint64_t SBE_WAIT_SLEEP_SEC = (l_sbeTimeout/SBE_RETRY_NUM_LOOPS); SBE_TRACF("Running p9_get_sbe_msg_register HWP on proc target %.8X", TARGETING::get_huid(i_target)); @@ -646,7 +674,7 @@ errlHndl_t SbeRetryHandler::sbe_poll_status_reg(TARGETING::Target * i_target) // reset watchdog before performing the nanosleep INITSERVICE::sendProgressCode(); #endif - nanosleep(0,SBE_WAIT_SLEEP); + nanosleep(SBE_WAIT_SLEEP_SEC,0); } } @@ -1055,8 +1083,6 @@ errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target) SBE_TRACF(ENTER_MRK "switch_sbe_sides()"); errlHndl_t l_errl = nullptr; - TARGETING::ATTR_PROC_SBE_MASTER_CHIP_type l_isMaster = - i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>(); #ifdef __HOSTBOOT_RUNTIME const bool l_isRuntime = true; @@ -1066,7 +1092,7 @@ errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target) do{ - if(!l_isRuntime && !l_isMaster) + if(!l_isRuntime && !i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>()) { const uint32_t l_sbeBootSelectMask = SBE::SBE_BOOT_SELECT_MASK >> 32; // Read PERV_SB_CS_FSI_BYTE 0x2820 for target proc diff --git a/src/usr/sbeio/runtime/sbeio_vital_attn.C b/src/usr/sbeio/runtime/sbeio_vital_attn.C index 8498d2c61..8f8d6bfa8 100644 --- a/src/usr/sbeio/runtime/sbeio_vital_attn.C +++ b/src/usr/sbeio/runtime/sbeio_vital_attn.C @@ -52,17 +52,21 @@ namespace RT_SBEIO /*@ * @errortype * @severity ERRL_SEV_INFORMATIONAL - * @moduleid SBEIO_RUNTIME_HANDLE_VITAL_ATTN + * @moduleid SBEIO_HANDLE_VITAL_ATTN * @reasoncode SBEIO_RT_NULL_FIRMWARE_REQUEST_PTR * @userdata1 HUID of target * @userdata2 none * @devdesc Unable to inform OPAL of SBE failure */ l_err = new ErrlEntry( ERRL_SEV_INFORMATIONAL, - SBEIO_RUNTIME_HANDLE_VITAL_ATTN, + SBEIO_HANDLE_VITAL_ATTN, SBEIO_RT_NULL_FIRMWARE_REQUEST_PTR, get_huid(i_procTarg), - 0, true); + 0, false); + + l_err->addProcedureCallout(HWAS::EPUB_PRC_PHYP_CODE, + HWAS::SRCI_PRIORITY_HIGH); + break; } diff --git a/src/usr/sbeio/sbe_fifodd.C b/src/usr/sbeio/sbe_fifodd.C index d82f038f2..fabfe1405 100644 --- a/src/usr/sbeio/sbe_fifodd.C +++ b/src/usr/sbeio/sbe_fifodd.C @@ -677,7 +677,7 @@ errlHndl_t SbeFifo::waitDnFifoReady(TARGETING::Target * i_target, { errl->addHwCallout( i_target, HWAS::SRCI_PRIORITY_HIGH, - HWAS::DECONFIG, + HWAS::DELAYED_DECONFIG, HWAS::GARD_NULL ); } diff --git a/src/usr/sbeio/sbe_psudd.C b/src/usr/sbeio/sbe_psudd.C index a4916d5c2..9df26744a 100644 --- a/src/usr/sbeio/sbe_psudd.C +++ b/src/usr/sbeio/sbe_psudd.C @@ -553,7 +553,7 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target, { l_errl->addHwCallout( i_target, HWAS::SRCI_PRIORITY_HIGH, - HWAS::DECONFIG, + HWAS::DELAYED_DECONFIG, HWAS::GARD_NULL ); } |