diff options
author | Caleb Palmer <cnpalmer@us.ibm.com> | 2018-05-11 12:02:52 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-05-15 21:58:11 -0400 |
commit | 3f14f5032bfee52840e5c58848f1e7bd4deb47ed (patch) | |
tree | cb1322b2fb931837d748f5d5238d1a413d1f11d1 /src/usr | |
parent | b26bb8d32b9f4bb119461d753d283a066d6b33c0 (diff) | |
download | talos-hostboot-3f14f5032bfee52840e5c58848f1e7bd4deb47ed.tar.gz talos-hostboot-3f14f5032bfee52840e5c58848f1e7bd4deb47ed.zip |
PRD: UE callout rank if no dimm callout
Change-Id: Ic8254be5e5d33b18f433f2e308aa23cd88687c7d
CQ: SW427493
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58709
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58724
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C | 25 |
1 files changed, 24 insertions, 1 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C index 4c5153e81..9cb7f5b42 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C @@ -178,6 +178,7 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, l_dqBitmap.getCaptureData( io_sc.service_data->GetCaptureData() ); // Add all DIMMs with bad bits to the callout list. + TargetHandleList callouts; for ( uint8_t ps = 0; ps < DIMMS_PER_RANK::MBA; ps++ ) { bool badDqs = false; @@ -194,7 +195,7 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, rank, ps ); if ( l_dimm == nullptr ) continue; - io_sc.service_data->SetCallout( l_dimm, MRU_HIGH ); + callouts.push_back( l_dimm ); if ( isMfgCeCheckingEnabled() ) { @@ -204,6 +205,28 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, } } + if ( 0 == callouts.size() ) + { + // It is possible the scrub counters have rolled over to zero + // due to a known DD1.0 hardware bug. In this case, the best + // we can do is callout both DIMMs, because at minimum we know + // there was a UE, we just don't know where. + // NOTE: If this condition happens because of a DD2.0+ bug, the + // mssIplUeIsolation procedure will callout the Centaur. + callouts = getConnectedDimms( i_chip->getTrgt(), rank ); + if ( 0 == callouts.size() ) + { + PRDF_ERR( PRDF_FUNC "getConnectedDimms() failed" ); + o_rc = FAIL; break; + } + } + + // Callout all DIMMs in the list. + for ( auto & dimm : callouts ) + { + io_sc.service_data->SetCallout( dimm, MRU_HIGH ); + } + // Make the error log predictive. io_sc.service_data->setServiceCall(); |