diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2017-03-07 10:57:35 -0600 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2017-03-22 17:54:11 -0400 |
commit | 98de8e60e8395033bf1deed9ede0929ecb796841 (patch) | |
tree | 2bbdb4f6b2f13e03a9e2a2a95955d174d2b4b72d /src/usr | |
parent | 070a02c9f75530fd5c559456255500e36dcb2792 (diff) | |
download | talos-hostboot-98de8e60e8395033bf1deed9ede0929ecb796841.tar.gz talos-hostboot-98de8e60e8395033bf1deed9ede0929ecb796841.zip |
PRD: RCD parity error handling
Change-Id: I291ca299249e6b18760959fdc3fed2747d3d4f46
RTC: 165385
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/38123
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/38264
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca.rule | 4 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule | 2 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule | 11 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfP9Mca.C | 45 |
4 files changed, 52 insertions, 10 deletions
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule index 7aeba8b66..9946489d3 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule @@ -219,7 +219,7 @@ rule rMCACALFIR MCACALFIR & ~MCACALFIR_MASK & MCACALFIR_ACT0 & MCACALFIR_ACT1; }; -group gMCACALFIR filter priority( 13 ), cs_root_cause( 13 ) +group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 ) { /** MCACALFIR[0] * A MBA recoverable error has occurred. @@ -294,7 +294,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 13 ) /** MCACALFIR[14] * RCD during periodic cal */ - (rMCACALFIR, bit(14)) ? threshold_and_mask; + (rMCACALFIR, bit(14)) ? rcd_parity_error; /** MCACALFIR[15] * scom error diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule index cfcf39300..746ca2e73 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule @@ -73,7 +73,7 @@ actionclass rcd_parity_error callout(connected(TYPE_DIMM,0), MRU_HIGH); # DIMM 0 HIGH callout(connected(TYPE_DIMM,1), MRU_HIGH); # DIMM 1 HIGH calloutSelfLow; # Self LOW - threshold32pday; # Threshold 32/day + # Thresholding done in plugin funccall("RcdParityError"); # Run TPS on TH for all MCA ranks }; diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule index a52eb54b5..bf2fd3fd1 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule @@ -209,3 +209,14 @@ capture group PllFIRs; }; + ############################################################################ + # Misc + ############################################################################ + + register FARB0 + { + name "MCP.PORT0.SRQ.MBA_FARB0Q"; + scomaddr 0x07010913; + capture group default; + }; + diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C index 1a2f7792a..cda2226c7 100644 --- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C +++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C @@ -29,6 +29,7 @@ #include <prdfPluginMap.H> // Platform includes +#include <prdfP9McaDataBundle.H> #include <prdfP9McbistDataBundle.H> #include <prdfPlatServices.H> #ifdef __HOSTBOOT_RUNTIME @@ -63,16 +64,14 @@ int32_t RcdParityError( ExtensibleChip * i_mcaChip, { #define PRDF_FUNC "[p9_mca::RcdParityError] " - // The callouts have already been made in the rule code. All we need to do - // now is start TPS on all slave ranks behind the MCA. This can only be done - // at runtime because it is too complicated to handle during Memory - // Diagnostics and we don't have time to complete the procedures at any - // other point during the IPL. The DIMMs will be deconfigured during the IPL - // anyways. So not really much benefit except for extra FFDC. + // The callouts have already been made in the rule code. All other actions + // documented below. #ifdef __HOSTBOOT_RUNTIME // TPS only supported at runtime. - if ( io_sc.service_data->IsAtThreshold() ) + // Recovery is always enabled during runtime. Start TPS on all slave ranks + // behind the MCA if the recovery threshold is reached. + if ( getMcaDataBundle(i_mcaChip)->iv_rcdParityTh.inc(io_sc) ) { ExtensibleChip * mcbChip = getConnectedParent( i_mcaChip, TYPE_MCBIST ); @@ -96,6 +95,38 @@ int32_t RcdParityError( ExtensibleChip * i_mcaChip, } } + #else // IPL + + SCAN_COMM_REGISTER_CLASS * farb0 = i_mcaChip->getRegister("FARB0"); + if ( SUCCESS != farb0->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on MCAECCFIR: i_mcaChip=0x%08x", + i_mcaChip->getHuid() ); + + // Ensure the reg is zero so that we will use the recovery threshold and + // guarantee we don't try to do a reconfig. + farb0->clearAllBits(); + } + + if ( farb0->IsBitSet(54) ) + { + // Recovery is disabled. Issue a reconfig loop. Make the error log + // predictive if threshold is reached. + if ( rcdParityErrorReconfigLoop() ) + io_sc.service_data->setServiceCall(); + } + else + { + // Make the error log predictive if the recovery threshold is reached. + // Don't bother with TPS on all ranks because it is too complicated to + // handle during Memory Diagnostics and we don't have time to complete + // the procedures at any other point during the IPL. The DIMMs will be + // deconfigured during the IPL anyways. So not really much benefit + // except for extra FFDC. + if ( getMcaDataBundle(i_mcaChip)->iv_rcdParityTh.inc(io_sc) ) + io_sc.service_data->setServiceCall(); + } + #endif return SUCCESS; |