summaryrefslogtreecommitdiffstats
path: root/src/usr/sbeio/sbe_psudd.C
diff options
context:
space:
mode:
authorElizabeth Liner <eliner@us.ibm.com>2017-12-08 11:23:05 -0600
committerDaniel M. Crowell <dcrowell@us.ibm.com>2017-12-20 21:38:16 -0500
commita334e5aa3f11fe4f4657ba89139ac0a543845af1 (patch)
tree2f1fe358ecd4a76b8e08a2addc6814553217b20a /src/usr/sbeio/sbe_psudd.C
parentc16744bed5254395d28c188e1571f0846c7f2afa (diff)
downloadtalos-hostboot-a334e5aa3f11fe4f4657ba89139ac0a543845af1.tar.gz
talos-hostboot-a334e5aa3f11fe4f4657ba89139ac0a543845af1.zip
Add in a check in the PSU error logging for SBE failure
If there is an error in the PSU, and there is no FFDC, there is probably something wrong with the SBE. Since there is new support to attempt an SBE reboot if there's an error, we want to add that functionality to the PSU flow as well. Change-Id: I8203193b49caaf640962e181a814df83c0760ff4 RTC:178801 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46641 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/sbeio/sbe_psudd.C')
-rw-r--r--src/usr/sbeio/sbe_psudd.C111
1 files changed, 37 insertions, 74 deletions
diff --git a/src/usr/sbeio/sbe_psudd.C b/src/usr/sbeio/sbe_psudd.C
index 10fbc7a8a..fcbc95d5b 100644
--- a/src/usr/sbeio/sbe_psudd.C
+++ b/src/usr/sbeio/sbe_psudd.C
@@ -48,6 +48,7 @@
#include <fapi2/plat_hwp_invoker.H>
#include <p9_extract_sbe_rc.H>
#include <errl/errludlogregister.H>
+#include <sbeio/sbe_retry_handler.H>
trace_desc_t* g_trac_sbeio;
TRAC_INIT(&g_trac_sbeio, SBEIO_COMP_NAME, 6*KILOBYTE, TRACE::BUFFER_SLOW);
@@ -539,92 +540,54 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target,
}
psuResponse* l_resp = reinterpret_cast<psuResponse*>(l_respRegs);
-
- SBE_TRACF(ERR_MRK "pollForPsuComplete: "
- "timeout waiting for PSU request to complete"
- ": doorbell=%.8X, mbox4=%.16llX",
- l_data, l_respRegs[0]);
-
- // Look for a hardware failure first
- const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>
- l_fapiTarg(i_target);
- P9_EXTRACT_SBE_RC::RETURN_ACTION l_rcAction =
- P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
- FAPI_INVOKE_HWP( l_errl, p9_extract_sbe_rc,
- l_fapiTarg, l_rcAction );
- if( l_rcAction != P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION )
- {
- // saw an error on the sbe itself, use the error we
- // got back from the HWP
- }
- else
+ if(!(l_resp->primaryStatus & SBE_PRI_FFDC_ERROR))
{
- // got an error in the attempt to find a hw fail, just
- // commit it as info
- if( l_errl )
- {
- l_errl->setSev( ERRORLOG::ERRL_SEV_INFORMATIONAL );
- ERRORLOG::errlCommit( l_errl, SBEIO_COMP_ID );
- l_errl = nullptr;
- }
-
- // we don't know what caused the timeout, make a generic log
+ // Create an informational error
/*@
* @errortype
- * @moduleid SBEIO_PSU
- * @reasoncode SBEIO_PSU_RESPONSE_TIMEOUT
+ * @moduleid SBEIO_PSU
+ * @reasoncode SBEIO_PSU_FFDC_MISSING
* @userdata1[00:15] Primary Status in mbox4
* @userdata1[16:31] Sequence Id in mbox4
* @userdata1[32:63] Processor Target
- * @userdata2 Failing Request
- * @devdesc Timeout waiting for PSU command to complete
- * @custdesc Firmware error communicating with boot device
+ * @userdata2 Failing Request
+ * @devdesc Timeout waiting for PSU command to complete
+ * @custdesc Firmware error communicating with boot device
*/
- l_errl = new ErrlEntry(ERRL_SEV_UNRECOVERABLE,
- SBEIO_PSU,
- SBEIO_PSU_RESPONSE_TIMEOUT,
- TWO_UINT32_TO_UINT64(
- TWO_UINT16_TO_UINT32(
- l_resp->primaryStatus,
- l_resp->secondaryStatus),
- TARGETING::get_huid(i_target)),
- i_pPsuRequest->mbxReg0);
- // Code should be okay so callout hardware
- l_errl->addHwCallout( i_target,
- HWAS::SRCI_PRIORITY_HIGH,
- HWAS::NO_DECONFIG,
- HWAS::GARD_NULL );
- }
-
- // log the failing proc as FFDC
- ErrlUserDetailsTarget(i_target).addToLog(l_errl);
-
- // check for any FFDC logged by the SBE itself
- void * l_ffdcPkg = findFFDCBufferByTarget(i_target);
- if(l_ffdcPkg != NULL)
- {
- SbeFFDCParser * l_ffdc_parser = new SbeFFDCParser();
- l_ffdc_parser->parseFFDCData(l_ffdcPkg);
- uint8_t l_pkgs = l_ffdc_parser->getTotalPackages();
- uint8_t i;
- for(i = 0; i < l_pkgs; i++)
+ l_errl = new ErrlEntry(ERRL_SEV_PREDICTIVE,
+ SBEIO_PSU,
+ SBEIO_PSU_FFDC_MISSING,
+ TWO_UINT32_TO_UINT64(
+ TWO_UINT16_TO_UINT32(
+ l_resp->primaryStatus,
+ l_resp->secondaryStatus),
+ TARGETING::get_huid(i_target)),
+ i_pPsuRequest->mbxReg0);
+
+ // If the FFDC is empty, we need to check the state of the SBE
+ // and then, handle the SBE value, and potentionally try
+ // to restart the SBE
+ SbeRetryHandler l_SBEobj = SbeRetryHandler(
+ SbeRetryHandler::SBE_MODE_OF_OPERATION::
+ INFORMATIONAL_ONLY);
+
+ l_SBEobj.main_sbe_handler(i_target);
+
+ if(l_SBEobj.getPLID() != NULL)
{
- l_errl->addFFDC( SBEIO_COMP_ID,
- l_ffdc_parser->getFFDCPackage(i),
- l_ffdc_parser->getPackageLength(i),
- 0,
- SBEIO_UDT_PARAMETERS,
- false );
+ // If there is not an unrecovered error, we want to tie
+ // the error from the sbe retry handler to this error.
+ l_errl->plid(l_SBEobj.getPLID());
+ l_errl->setSev(ERRL_SEV_UNRECOVERABLE);
}
- delete l_ffdc_parser;
- l_ffdc_parser = nullptr;
- }
- // save the mbox status regs as FFDC
- l_respRegsFFDC.addToLog(l_errl);
+ // log the failing proc as FFDC
+ ErrlUserDetailsTarget(i_target).addToLog(l_errl);
+ l_respRegsFFDC.addToLog(l_errl);
+ l_errl->collectTrace(SBEIO_COMP_NAME);
+ }
- l_errl->collectTrace(SBEIO_COMP_NAME);
MAGIC_INST_GET_SBE_TRACES(
i_target->getAttr<TARGETING::ATTR_POSITION>(),
SBEIO_PSU_RESPONSE_TIMEOUT);
OpenPOWER on IntegriCloud