From 979413b9625240939ce45915b5d2cc5998c6bfa1 Mon Sep 17 00:00:00 2001 From: Zane Shelley Date: Wed, 22 Feb 2017 12:32:18 -0600 Subject: PRD: IPL DRAM Repairs cleanup Change-Id: Id0e3d2bea90f71d56df44ccab0506a28cf2017ff Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/36865 Tested-by: Jenkins Server Reviewed-by: Benjamin J. Weisenbeck Reviewed-by: Caleb N. Palmer Reviewed-by: Zane C. Shelley Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/37047 Tested-by: Jenkins OP Build CI Tested-by: FSP CI Jenkins --- .../diag/prdf/common/plat/mem/prdfMemEccAnalysis.C | 9 ++++++++ .../prdf/common/plat/mem/prdfP9McaDataBundle.H | 26 ++++++++++++++++++++++ src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C | 4 ++-- src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C | 4 ++-- src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C | 18 +++++++-------- src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C | 15 ++++++++----- src/usr/diag/prdf/prdfMain_ipl.C | 6 ++--- 7 files changed, 60 insertions(+), 22 deletions(-) (limited to 'src/usr/diag/prdf') diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C index 95c972723..7036888fe 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C @@ -82,6 +82,15 @@ void calloutMemUe( ExtensibleChip * i_chip, const MemRank & i_rank, { PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + // TODO: RTC 169933 During Memory Diagnostics we'll want to call the + // mssIplUeIsolation() HWP so that we can isolate to a single DIMM if + // possible. This may be a difficult task to do at this point in the + // code because it will run a maintenance command on the Centaur, + // which may require some cleanup of the previous command. Since there + // are no plans to support IS DIMMs attached to a Centaur in P9, we + // may be able to get rid of this requirement because the FRU will be + // the same regardless if one or two logical DIMMs are called out. + MemoryMru memmru ( i_chip->getTrgt(), i_rank, MemoryMruData::CALLOUT_RANK ); io_sc.service_data->SetCallout( memmru ); } diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H b/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H index aea42630f..3557d27f0 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H @@ -38,6 +38,10 @@ #include #endif +#if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) + #include +#endif + namespace PRDF { @@ -72,6 +76,21 @@ class McaDataBundle : public DataBundle #endif + #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) + + /** @return The IPL CE statistics object. */ + MemIplCeStats * getIplCeStats() + { + if ( nullptr == iv_iplCeStats ) + { + iv_iplCeStats = new MemIplCeStats( iv_chip ); + } + + return iv_iplCeStats; + } + + #endif + private: // functions // Don't allow copy or assignment. @@ -82,6 +101,13 @@ class McaDataBundle : public DataBundle ExtensibleChip * iv_chip; // This MCA. + #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) + + /** MNFG IPL CE statistics. */ + MemIplCeStats * iv_iplCeStats = nullptr; + + #endif + public: // instance variables MemCeTable iv_ceTable; ///< CE table for FFDC diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C index 77f69de28..9c384654f 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C @@ -62,8 +62,6 @@ uint32_t MemTdCtlr::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) break; } - collectStateCaptureData( io_sc, TD_CTLR_DATA::START ); - #else // IPL only PRDF_ASSERT( isInMdiaMode() ); // MDIA must be running. @@ -78,6 +76,8 @@ uint32_t MemTdCtlr::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) #endif + collectStateCaptureData( io_sc, TD_CTLR_DATA::START ); + if ( nullptr == iv_curProcedure ) { // There are no TD procedures currently in progress. diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C index 06993bdb7..7a8e57b83 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C @@ -188,11 +188,11 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, const MemRank & i_rank, io_queue.push( e ); } else if ( isMfgCeCheckingEnabled() && - (0 != (eccAttns & MAINT_HARD_NCE_ETE)) ) + (0 != (eccAttns & MAINT_HARD_NCE_ETE)) ) { io_sc.service_data->AddSignatureList( trgt, PRDFSIG_MaintHARD_CTE ); - // TODO RTC 136128 + // TODO RTC 169935 // - Query the per-symbol counters for the hard CE symbol (there // should be only one). // - Add the symbol to the callout list (via MemoryMru). diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C index e123b84ed..59810eaac 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_ipl.C @@ -51,8 +51,8 @@ uint32_t TpsEvent::nextStep( STEP_CODE_DATA_STRUCT & io_sc, #define PRDF_FUNC "[TpsEvent::nextStep] " uint32_t o_rc = SUCCESS; - o_done = true; - MemIplCeStats ceStats( iv_chip ); + + o_done = false; do { @@ -77,11 +77,13 @@ uint32_t TpsEvent::nextStep( STEP_CODE_DATA_STRUCT & io_sc, else { //collect the CE statistics for later analysis use - o_rc = ceStats.collectStats( iv_rank ); + McaDataBundle * db = getMcaDataBundle( iv_chip ); + o_rc = db->getIplCeStats()->collectStats( iv_rank ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "Call to 'ceStats.collectStats' failed " - "on chip: 0x%08x", iv_chip->getHuid() ); + PRDF_ERR( PRDF_FUNC "collectStats(m%ds%d) failed on 0x%08x", + iv_rank.getMaster(), iv_rank.getSlave(), + iv_chip->getHuid() ); break; } @@ -111,10 +113,8 @@ uint32_t TpsEvent::nextStep( STEP_CODE_DATA_STRUCT & io_sc, PRDFSIG_MaintIUE ); } - //Add the rank to the callout list - MemoryMru memmru(iv_chip->getTrgt(), iv_rank, - MemoryMruData::CALLOUT_RANK); - io_sc.service_data->SetCallout( memmru ); + //Add the rank to the callout list (via MemoryMru) + MemEcc::calloutMemUe( iv_chip, iv_rank, io_sc ); //Make the error log predictive io_sc.service_data->setServiceCall(); diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C index 5ab4183bf..50d47aae5 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C @@ -63,8 +63,8 @@ uint32_t VcmEvent::nextStep( STEP_CODE_DATA_STRUCT & io_sc, o_rc = startVcmPhase1( iv_chip, iv_rank ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "Call to 'startVcmPhase1 failed on chip: " - "0x%08x", iv_chip->getHuid() ); + PRDF_ERR( PRDF_FUNC "startVcmPhase1(0x%08x, %d) failed", + iv_chip->getHuid(), iv_rank.getMaster() ); break; } iv_phase = TD_PHASE_1; @@ -77,8 +77,8 @@ uint32_t VcmEvent::nextStep( STEP_CODE_DATA_STRUCT & io_sc, o_rc = checkEccFirs( iv_chip, eccAttns ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "Call to 'checkEccFirs' failed on chip: " - "0x%08x", iv_chip->getHuid() ); + PRDF_ERR( PRDF_FUNC "checkEccFirs(0x%08x) failed", + iv_chip->getHuid() ); break; } @@ -172,9 +172,12 @@ uint32_t VcmEvent::nextStep( STEP_CODE_DATA_STRUCT & io_sc, io_sc.service_data->setSignature( iv_chip->getHuid(), PRDFSIG_VcmVerified ); - //Update the DRAM repairs VPD to indicate the entire DRAM is bad - //TODO ?????? - updating VPD not yet supported + // If there is a symbol mark on the same DRAM as the newly + // verified chip mark, remove the symbol mark. + // TODO: RTC 164705 + // Set entire chip in DRAM Repairs VPD. + // TODO: RTC 169939 } //else - verification failed else diff --git a/src/usr/diag/prdf/prdfMain_ipl.C b/src/usr/diag/prdf/prdfMain_ipl.C index f54b8e7eb..24ccfda2a 100644 --- a/src/usr/diag/prdf/prdfMain_ipl.C +++ b/src/usr/diag/prdf/prdfMain_ipl.C @@ -33,11 +33,11 @@ #include -//#include TODO RTC 136128 +//#include TODO RTC 159628 #include #include #include -//#include TODO RTC 136128 +//#include TODO RTC 159628 #include #include @@ -74,7 +74,7 @@ int32_t analyzeIplCEStats( TargetHandle_t i_mba, bool &o_calloutMade ) PRDF_ENTER( PRDF_FUNC "(0x%08x)", getHuid(i_mba) ); -/* TODO RTC 136128 +/* TODO RTC 159628 // will unlock when going out of scope PRDF_SYSTEM_SCOPELOCK; -- cgit v1.2.1