diff options
Diffstat (limited to 'src/usr/diag/prdf')
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C | 25 |
1 files changed, 24 insertions, 1 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C index 4c5153e81..9cb7f5b42 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C @@ -178,6 +178,7 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, l_dqBitmap.getCaptureData( io_sc.service_data->GetCaptureData() ); // Add all DIMMs with bad bits to the callout list. + TargetHandleList callouts; for ( uint8_t ps = 0; ps < DIMMS_PER_RANK::MBA; ps++ ) { bool badDqs = false; @@ -194,7 +195,7 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, rank, ps ); if ( l_dimm == nullptr ) continue; - io_sc.service_data->SetCallout( l_dimm, MRU_HIGH ); + callouts.push_back( l_dimm ); if ( isMfgCeCheckingEnabled() ) { @@ -204,6 +205,28 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr, } } + if ( 0 == callouts.size() ) + { + // It is possible the scrub counters have rolled over to zero + // due to a known DD1.0 hardware bug. In this case, the best + // we can do is callout both DIMMs, because at minimum we know + // there was a UE, we just don't know where. + // NOTE: If this condition happens because of a DD2.0+ bug, the + // mssIplUeIsolation procedure will callout the Centaur. + callouts = getConnectedDimms( i_chip->getTrgt(), rank ); + if ( 0 == callouts.size() ) + { + PRDF_ERR( PRDF_FUNC "getConnectedDimms() failed" ); + o_rc = FAIL; break; + } + } + + // Callout all DIMMs in the list. + for ( auto & dimm : callouts ) + { + io_sc.service_data->SetCallout( dimm, MRU_HIGH ); + } + // Make the error log predictive. io_sc.service_data->setServiceCall(); |