summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2018-05-11 12:02:52 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-05-15 21:58:11 -0400
commit3f14f5032bfee52840e5c58848f1e7bd4deb47ed (patch)
treecb1322b2fb931837d748f5d5238d1a413d1f11d1
parentb26bb8d32b9f4bb119461d753d283a066d6b33c0 (diff)
downloadtalos-hostboot-3f14f5032bfee52840e5c58848f1e7bd4deb47ed.tar.gz
talos-hostboot-3f14f5032bfee52840e5c58848f1e7bd4deb47ed.zip
PRD: UE callout rank if no dimm callout
Change-Id: Ic8254be5e5d33b18f433f2e308aa23cd88687c7d CQ: SW427493 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58709 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58724 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C25
1 files changed, 24 insertions, 1 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
index 4c5153e81..9cb7f5b42 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
@@ -178,6 +178,7 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr,
l_dqBitmap.getCaptureData( io_sc.service_data->GetCaptureData() );
// Add all DIMMs with bad bits to the callout list.
+ TargetHandleList callouts;
for ( uint8_t ps = 0; ps < DIMMS_PER_RANK::MBA; ps++ )
{
bool badDqs = false;
@@ -194,7 +195,7 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr,
rank, ps );
if ( l_dimm == nullptr ) continue;
- io_sc.service_data->SetCallout( l_dimm, MRU_HIGH );
+ callouts.push_back( l_dimm );
if ( isMfgCeCheckingEnabled() )
{
@@ -204,6 +205,28 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr,
}
}
+ if ( 0 == callouts.size() )
+ {
+ // It is possible the scrub counters have rolled over to zero
+ // due to a known DD1.0 hardware bug. In this case, the best
+ // we can do is callout both DIMMs, because at minimum we know
+ // there was a UE, we just don't know where.
+ // NOTE: If this condition happens because of a DD2.0+ bug, the
+ // mssIplUeIsolation procedure will callout the Centaur.
+ callouts = getConnectedDimms( i_chip->getTrgt(), rank );
+ if ( 0 == callouts.size() )
+ {
+ PRDF_ERR( PRDF_FUNC "getConnectedDimms() failed" );
+ o_rc = FAIL; break;
+ }
+ }
+
+ // Callout all DIMMs in the list.
+ for ( auto & dimm : callouts )
+ {
+ io_sc.service_data->SetCallout( dimm, MRU_HIGH );
+ }
+
// Make the error log predictive.
io_sc.service_data->setServiceCall();
OpenPOWER on IntegriCloud