diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2017-06-20 11:40:49 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2017-06-22 10:21:56 -0400 |
commit | 5c7c9832bd41c86e9c5705947dad2d3604d5ebbb (patch) | |
tree | d6f1b21e234de6a715959be9d54b6243c5350c37 /src/usr/diag | |
parent | d1924a3c11bf3ed92bacd89c4932ecbd57ebb391 (diff) | |
download | talos-hostboot-5c7c9832bd41c86e9c5705947dad2d3604d5ebbb.tar.gz talos-hostboot-5c7c9832bd41c86e9c5705947dad2d3604d5ebbb.zip |
PRD: MNFG thresholding for RCD parity error reconfig loops
Change-Id: Ie0282529d66cbe4b3169ad7ee601dbd2cb49f779
CQ: SW392001
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/42136
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Diffstat (limited to 'src/usr/diag')
-rw-r--r-- | src/usr/diag/mdia/mdia.C | 20 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfP9Mca.C | 4 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_ipl.C | 25 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_ipl.H | 3 |
4 files changed, 30 insertions, 22 deletions
diff --git a/src/usr/diag/mdia/mdia.C b/src/usr/diag/mdia/mdia.C index d136f6cee..c05e38a1e 100644 --- a/src/usr/diag/mdia/mdia.C +++ b/src/usr/diag/mdia/mdia.C @@ -117,20 +117,16 @@ errlHndl_t runStep(const TargetHandleList & i_targetList) doStepCleanup(globals); - if ( nullptr != top && - 0 != top->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>() ) + // If this step completes without the need for a reconfig due to an RCD + // parity error, clear all RCD parity error counters. + ATTR_RECONFIGURE_LOOP_type attr = top->getAttr<ATTR_RECONFIGURE_LOOP>(); + if ( 0 == (attr & RECONFIGURE_LOOP_RCD_PARITY_ERROR) ) { - // Reset the RCD parity error reconfig loop counter if this step - // completes without an RCD parity error. Note that PRD will only set - // the RCD parity error flag if there is an RCD parity error and the - // total count of reconfig loops is under threshold. At threshold, a - // part will be deconfigured, forcing a reconfig, but the RCD parity - // error flag will not be set to ensure this code is activated and the - // count it reset. - ATTR_RECONFIGURE_LOOP_type attr = top->getAttr<ATTR_RECONFIGURE_LOOP>(); - if ( 0 == (attr & RECONFIGURE_LOOP_RCD_PARITY_ERROR) ) + TargetHandleList trgtList; getAllChiplets( trgtList, TYPE_MCA ); + for ( auto & trgt : trgtList ) { - top->setAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(0); + if ( 0 != trgt->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>() ) + trgt->setAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(0); } } diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C index 84b3a7792..17161c03d 100644 --- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C +++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C @@ -109,7 +109,7 @@ int32_t RcdParityError( ExtensibleChip * i_mcaChip, // documented below. // Nothing more to do if this is a checkstop attention. - if ( CHECK_STOP != io_sc.service_data->getPrimaryAttnType() ) + if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() ) return SUCCESS; #ifdef __HOSTBOOT_RUNTIME // TPS only supported at runtime. @@ -160,7 +160,7 @@ int32_t RcdParityError( ExtensibleChip * i_mcaChip, { // Recovery is disabled. Issue a reconfig loop. Make the error log // predictive if threshold is reached. - if ( rcdParityErrorReconfigLoop() ) + if ( rcdParityErrorReconfigLoop(i_mcaChip->getTrgt()) ) io_sc.service_data->setServiceCall(); } else diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C index 658a9f785..3b4aa0e7f 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C @@ -39,6 +39,7 @@ //#include <prdfCenDqBitmap.H> TODO RTC 164707 #include <prdfMemScrubUtils.H> +#include <prdfMfgThresholdMgr.H> #include <diag/mdia/mdia.H> #include <config.h> @@ -107,15 +108,25 @@ int32_t mdiaSendEventMsg( TargetHandle_t i_trgt, //------------------------------------------------------------------------------ -bool rcdParityErrorReconfigLoop() +bool rcdParityErrorReconfigLoop( TargetHandle_t i_trgt ) { TargetHandle_t top = getSystemTarget(); - // Check the current reconfig count. - uint8_t allowed = top->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOPS_ALLOWED>(); - uint8_t count = top->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(); + // Get the current reconfig count and increment. + uint8_t count = i_trgt->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>() + 1; - if ( count <= allowed ) + // Get the reconfig threshold and check MNFG threshold, if needed. + uint8_t th = top->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOPS_ALLOWED>() + 1; + if ( mfgMode() ) + { + uint8_t mnfgTh = MfgThresholdMgr::getInstance()-> + getThreshold(ATTR_MNFG_TH_RCD_PARITY_ERRORS); + if ( mnfgTh < th ) + th = mnfgTh; + } + + // If the count is under threshold, trigger a reconfig loop. + if ( count < th ) { // Set the RCD parity error flag in the reconfig loop attribute. This // will trigger a reconfig loop at the end of the current istep. @@ -126,8 +137,8 @@ bool rcdParityErrorReconfigLoop() top->setAttr<ATTR_RECONFIGURE_LOOP>(attr); } - // Increment the count. - top->setAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(++count); + // Write the new count to the attribute. + i_trgt->setAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(count); return false; } diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H index 5151f11b3..2dcdc628c 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.H @@ -65,10 +65,11 @@ int32_t mdiaSendEventMsg( TARGETING::TargetHandle_t i_trgt, /** * @brief Initiates a reconfig loop due to an RCD parity error. + * @param i_trgt An MCA target. * @return True if the number of allowed reconfig loops has been exceeded. * False otherwise. */ -bool rcdParityErrorReconfigLoop(); +bool rcdParityErrorReconfigLoop( TARGETING::TargetHandle_t i_trgt ); /** * @brief Invokes the restore DRAM repairs hardware procedure. |