diff options
Diffstat (limited to 'src/usr/sbeio/sbe_psudd.C')
-rw-r--r-- | src/usr/sbeio/sbe_psudd.C | 111 |
1 files changed, 37 insertions, 74 deletions
diff --git a/src/usr/sbeio/sbe_psudd.C b/src/usr/sbeio/sbe_psudd.C index 10fbc7a8a..fcbc95d5b 100644 --- a/src/usr/sbeio/sbe_psudd.C +++ b/src/usr/sbeio/sbe_psudd.C @@ -48,6 +48,7 @@ #include <fapi2/plat_hwp_invoker.H> #include <p9_extract_sbe_rc.H> #include <errl/errludlogregister.H> +#include <sbeio/sbe_retry_handler.H> trace_desc_t* g_trac_sbeio; TRAC_INIT(&g_trac_sbeio, SBEIO_COMP_NAME, 6*KILOBYTE, TRACE::BUFFER_SLOW); @@ -539,92 +540,54 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target, } psuResponse* l_resp = reinterpret_cast<psuResponse*>(l_respRegs); - - SBE_TRACF(ERR_MRK "pollForPsuComplete: " - "timeout waiting for PSU request to complete" - ": doorbell=%.8X, mbox4=%.16llX", - l_data, l_respRegs[0]); - - // Look for a hardware failure first - const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> - l_fapiTarg(i_target); - P9_EXTRACT_SBE_RC::RETURN_ACTION l_rcAction = - P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION; - FAPI_INVOKE_HWP( l_errl, p9_extract_sbe_rc, - l_fapiTarg, l_rcAction ); - if( l_rcAction != P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION ) - { - // saw an error on the sbe itself, use the error we - // got back from the HWP - } - else + if(!(l_resp->primaryStatus & SBE_PRI_FFDC_ERROR)) { - // got an error in the attempt to find a hw fail, just - // commit it as info - if( l_errl ) - { - l_errl->setSev( ERRORLOG::ERRL_SEV_INFORMATIONAL ); - ERRORLOG::errlCommit( l_errl, SBEIO_COMP_ID ); - l_errl = nullptr; - } - - // we don't know what caused the timeout, make a generic log + // Create an informational error /*@ * @errortype - * @moduleid SBEIO_PSU - * @reasoncode SBEIO_PSU_RESPONSE_TIMEOUT + * @moduleid SBEIO_PSU + * @reasoncode SBEIO_PSU_FFDC_MISSING * @userdata1[00:15] Primary Status in mbox4 * @userdata1[16:31] Sequence Id in mbox4 * @userdata1[32:63] Processor Target - * @userdata2 Failing Request - * @devdesc Timeout waiting for PSU command to complete - * @custdesc Firmware error communicating with boot device + * @userdata2 Failing Request + * @devdesc Timeout waiting for PSU command to complete + * @custdesc Firmware error communicating with boot device */ - l_errl = new ErrlEntry(ERRL_SEV_UNRECOVERABLE, - SBEIO_PSU, - SBEIO_PSU_RESPONSE_TIMEOUT, - TWO_UINT32_TO_UINT64( - TWO_UINT16_TO_UINT32( - l_resp->primaryStatus, - l_resp->secondaryStatus), - TARGETING::get_huid(i_target)), - i_pPsuRequest->mbxReg0); - // Code should be okay so callout hardware - l_errl->addHwCallout( i_target, - HWAS::SRCI_PRIORITY_HIGH, - HWAS::NO_DECONFIG, - HWAS::GARD_NULL ); - } - - // log the failing proc as FFDC - ErrlUserDetailsTarget(i_target).addToLog(l_errl); - - // check for any FFDC logged by the SBE itself - void * l_ffdcPkg = findFFDCBufferByTarget(i_target); - if(l_ffdcPkg != NULL) - { - SbeFFDCParser * l_ffdc_parser = new SbeFFDCParser(); - l_ffdc_parser->parseFFDCData(l_ffdcPkg); - uint8_t l_pkgs = l_ffdc_parser->getTotalPackages(); - uint8_t i; - for(i = 0; i < l_pkgs; i++) + l_errl = new ErrlEntry(ERRL_SEV_PREDICTIVE, + SBEIO_PSU, + SBEIO_PSU_FFDC_MISSING, + TWO_UINT32_TO_UINT64( + TWO_UINT16_TO_UINT32( + l_resp->primaryStatus, + l_resp->secondaryStatus), + TARGETING::get_huid(i_target)), + i_pPsuRequest->mbxReg0); + + // If the FFDC is empty, we need to check the state of the SBE + // and then, handle the SBE value, and potentionally try + // to restart the SBE + SbeRetryHandler l_SBEobj = SbeRetryHandler( + SbeRetryHandler::SBE_MODE_OF_OPERATION:: + INFORMATIONAL_ONLY); + + l_SBEobj.main_sbe_handler(i_target); + + if(l_SBEobj.getPLID() != NULL) { - l_errl->addFFDC( SBEIO_COMP_ID, - l_ffdc_parser->getFFDCPackage(i), - l_ffdc_parser->getPackageLength(i), - 0, - SBEIO_UDT_PARAMETERS, - false ); + // If there is not an unrecovered error, we want to tie + // the error from the sbe retry handler to this error. + l_errl->plid(l_SBEobj.getPLID()); + l_errl->setSev(ERRL_SEV_UNRECOVERABLE); } - delete l_ffdc_parser; - l_ffdc_parser = nullptr; - } - // save the mbox status regs as FFDC - l_respRegsFFDC.addToLog(l_errl); + // log the failing proc as FFDC + ErrlUserDetailsTarget(i_target).addToLog(l_errl); + l_respRegsFFDC.addToLog(l_errl); + l_errl->collectTrace(SBEIO_COMP_NAME); + } - l_errl->collectTrace(SBEIO_COMP_NAME); MAGIC_INST_GET_SBE_TRACES( i_target->getAttr<TARGETING::ATTR_POSITION>(), SBEIO_PSU_RESPONSE_TIMEOUT); |