summaryrefslogtreecommitdiffstats
path: root/src/usr
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2017-03-07 10:57:35 -0600
committerZane C. Shelley <zshelle@us.ibm.com>2017-03-22 17:54:11 -0400
commit98de8e60e8395033bf1deed9ede0929ecb796841 (patch)
tree2bbdb4f6b2f13e03a9e2a2a95955d174d2b4b72d /src/usr
parent070a02c9f75530fd5c559456255500e36dcb2792 (diff)
downloadtalos-hostboot-98de8e60e8395033bf1deed9ede0929ecb796841.tar.gz
talos-hostboot-98de8e60e8395033bf1deed9ede0929ecb796841.zip
PRD: RCD parity error handling
Change-Id: I291ca299249e6b18760959fdc3fed2747d3d4f46 RTC: 165385 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/38123 Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/38264 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca.rule4
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule2
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule11
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfP9Mca.C45
4 files changed, 52 insertions, 10 deletions
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
index 7aeba8b66..9946489d3 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
@@ -219,7 +219,7 @@ rule rMCACALFIR
MCACALFIR & ~MCACALFIR_MASK & MCACALFIR_ACT0 & MCACALFIR_ACT1;
};
-group gMCACALFIR filter priority( 13 ), cs_root_cause( 13 )
+group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 )
{
/** MCACALFIR[0]
* A MBA recoverable error has occurred.
@@ -294,7 +294,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 13 )
/** MCACALFIR[14]
* RCD during periodic cal
*/
- (rMCACALFIR, bit(14)) ? threshold_and_mask;
+ (rMCACALFIR, bit(14)) ? rcd_parity_error;
/** MCACALFIR[15]
* scom error
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
index cfcf39300..746ca2e73 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
@@ -73,7 +73,7 @@ actionclass rcd_parity_error
callout(connected(TYPE_DIMM,0), MRU_HIGH); # DIMM 0 HIGH
callout(connected(TYPE_DIMM,1), MRU_HIGH); # DIMM 1 HIGH
calloutSelfLow; # Self LOW
- threshold32pday; # Threshold 32/day
+ # Thresholding done in plugin
funccall("RcdParityError"); # Run TPS on TH for all MCA ranks
};
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule
index a52eb54b5..bf2fd3fd1 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule
@@ -209,3 +209,14 @@
capture group PllFIRs;
};
+ ############################################################################
+ # Misc
+ ############################################################################
+
+ register FARB0
+ {
+ name "MCP.PORT0.SRQ.MBA_FARB0Q";
+ scomaddr 0x07010913;
+ capture group default;
+ };
+
diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
index 1a2f7792a..cda2226c7 100644
--- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
+++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
@@ -29,6 +29,7 @@
#include <prdfPluginMap.H>
// Platform includes
+#include <prdfP9McaDataBundle.H>
#include <prdfP9McbistDataBundle.H>
#include <prdfPlatServices.H>
#ifdef __HOSTBOOT_RUNTIME
@@ -63,16 +64,14 @@ int32_t RcdParityError( ExtensibleChip * i_mcaChip,
{
#define PRDF_FUNC "[p9_mca::RcdParityError] "
- // The callouts have already been made in the rule code. All we need to do
- // now is start TPS on all slave ranks behind the MCA. This can only be done
- // at runtime because it is too complicated to handle during Memory
- // Diagnostics and we don't have time to complete the procedures at any
- // other point during the IPL. The DIMMs will be deconfigured during the IPL
- // anyways. So not really much benefit except for extra FFDC.
+ // The callouts have already been made in the rule code. All other actions
+ // documented below.
#ifdef __HOSTBOOT_RUNTIME // TPS only supported at runtime.
- if ( io_sc.service_data->IsAtThreshold() )
+ // Recovery is always enabled during runtime. Start TPS on all slave ranks
+ // behind the MCA if the recovery threshold is reached.
+ if ( getMcaDataBundle(i_mcaChip)->iv_rcdParityTh.inc(io_sc) )
{
ExtensibleChip * mcbChip = getConnectedParent( i_mcaChip, TYPE_MCBIST );
@@ -96,6 +95,38 @@ int32_t RcdParityError( ExtensibleChip * i_mcaChip,
}
}
+ #else // IPL
+
+ SCAN_COMM_REGISTER_CLASS * farb0 = i_mcaChip->getRegister("FARB0");
+ if ( SUCCESS != farb0->Read() )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on MCAECCFIR: i_mcaChip=0x%08x",
+ i_mcaChip->getHuid() );
+
+ // Ensure the reg is zero so that we will use the recovery threshold and
+ // guarantee we don't try to do a reconfig.
+ farb0->clearAllBits();
+ }
+
+ if ( farb0->IsBitSet(54) )
+ {
+ // Recovery is disabled. Issue a reconfig loop. Make the error log
+ // predictive if threshold is reached.
+ if ( rcdParityErrorReconfigLoop() )
+ io_sc.service_data->setServiceCall();
+ }
+ else
+ {
+ // Make the error log predictive if the recovery threshold is reached.
+ // Don't bother with TPS on all ranks because it is too complicated to
+ // handle during Memory Diagnostics and we don't have time to complete
+ // the procedures at any other point during the IPL. The DIMMs will be
+ // deconfigured during the IPL anyways. So not really much benefit
+ // except for extra FFDC.
+ if ( getMcaDataBundle(i_mcaChip)->iv_rcdParityTh.inc(io_sc) )
+ io_sc.service_data->setServiceCall();
+ }
+
#endif
return SUCCESS;
OpenPOWER on IntegriCloud