diff options
author | Caleb Palmer <cnpalmer@us.ibm.com> | 2019-09-23 08:18:27 -0500 |
---|---|---|
committer | Zane C Shelley <zshelle@us.ibm.com> | 2019-09-30 10:34:10 -0500 |
commit | 70a4b6bf1a02fbe7d71a67f6ebae6e75f0e20c95 (patch) | |
tree | 70fb8777fc074dacf5027170bca5377adf7ea5cc /src/usr/diag/prdf | |
parent | 0f3e041cbe1c7d6d2ae203187f843b1714b287db (diff) | |
download | talos-hostboot-70a4b6bf1a02fbe7d71a67f6ebae6e75f0e20c95.tar.gz talos-hostboot-70a4b6bf1a02fbe7d71a67f6ebae6e75f0e20c95.zip |
PRD: Add threshold for stopping on UEs/CEs during BgScrub
Change-Id: Iddeec04300631fc57b5c2f4a2eb57302e9f98fe2
CQ: SW476467
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84071
Reviewed-by: Benjamen G Tyner <ben.tyner@ibm.com>
Reviewed-by: Paul Greenwood <paul.greenwood@ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Zane C Shelley <zshelle@us.ibm.com>
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84073
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf')
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C | 60 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H | 13 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices.C | 7 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_rt.C | 39 |
4 files changed, 103 insertions, 16 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C index 779d13a73..817a345f3 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C @@ -1623,21 +1623,53 @@ uint32_t MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub( bool & o_canResume ) // can use the stop conditions, which should be unique for background scrub, // to determine if it has been configured. - SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" ); - o_rc = reg->Read(); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x", - iv_chip->getHuid() ); - } - else if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH - 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH - 0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH - reg->IsBitSet(34) && // pause on MPE - reg->IsBitSet(35) ) // pause on UE + do { - o_canResume = true; - } + SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x", + iv_chip->getHuid() ); + break; + } + // Note: The stop conditions for background scrubbing can now be + // variable depending on whether we have hit threshold for the number + // of UEs or CEs that we have stopped on on a rank. + + // If we haven't hit CE or UE threshold, check the CE stop conditions + if ( !getMcbistDataBundle(iv_chip)->iv_ceScrubStopCounter.atTh() && + !getMcbistDataBundle(iv_chip)->iv_ueScrubStopCounter.atTh() ) + { + // If the stop conditions aren't set, just break out. + if ( !(0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH + 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH + 0xf != reg->GetBitFieldJustified(8,4)) ) // NCE hard TH + { + break; + } + + } + + // If we haven't hit UE threshold yet, check the UE stop condition + if ( !getMcbistDataBundle(iv_chip)->iv_ueScrubStopCounter.atTh() ) + { + // If the stop condition isn't set, just break out + if ( !reg->IsBitSet(35) ) // pause on UE + { + break; + } + } + + // Need to check the stop on mpe stop condition regardless of whether + // we hit the UE or CE threshold. + if ( reg->IsBitSet(34) ) // pause on MPE + { + // If we reach here, all the stop conditions are set for background + // scrub, so we can resume. + o_canResume = true; + } + }while(0); return o_rc; diff --git a/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H b/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H index 4a284253a..3883eb936 100644 --- a/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H +++ b/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016 */ +/* Contributors Listed Below - COPYRIGHT 2016,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -81,6 +81,17 @@ class McbistDataBundle : public DataBundle /** The Targeted Diagnostics controller. */ MemTdCtlr<TARGETING::TYPE_MCBIST> * iv_tdCtlr = nullptr; + + public: // instance variables + #ifdef __HOSTBOOT_RUNTIME + + // These are used to limit the number of times a scrub command will stop + // on a UE or CE on a rank. This is to prevent potential flooding of + // maintenance UEs or CEs. The threshold will be 16 per rank for each. + ScrubResumeCounter iv_ueScrubStopCounter; + ScrubResumeCounter iv_ceScrubStopCounter; + + #endif }; /** diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C index 48d26f8b8..e4b122ab5 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices.C @@ -40,6 +40,7 @@ #include <prdfRegisterCache.H> #include <prdfCenMbaDataBundle.H> +#include <prdfP9McbistDataBundle.H> #include <prdfMemScrubUtils.H> #include <iipServiceDataCollector.h> @@ -791,6 +792,12 @@ uint32_t startBgScrub<TYPE_MCA>( ExtensibleChip * i_mcaChip, ExtensibleChip * mcbChip = getConnectedParent( i_mcaChip, TYPE_MCBIST ); fapi2::Target<fapi2::TARGET_TYPE_MCBIST> fapiTrgt ( mcbChip->getTrgt() ); + #ifdef __HOSTBOOT_RUNTIME + // Starting a new command. Clear the UE and CE scrub stop counters + getMcbistDataBundle( mcbChip )->iv_ueScrubStopCounter.reset(); + getMcbistDataBundle( mcbChip )->iv_ceScrubStopCounter.reset(); + #endif + // Get the stop conditions. // NOTE: If HBRT_PRD is not configured, we want to use the defaults so that // background scrubbing never stops. diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C index e3bdecb59..cd0b045dc 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C @@ -37,6 +37,7 @@ // Platform includes #include <prdfCenMbaDataBundle.H> +#include <prdfP9McbistDataBundle.H> #include <prdfMemScrubUtils.H> #include <prdfPlatServices.H> @@ -173,9 +174,45 @@ uint32_t resumeBgScrub<TYPE_MCBIST>( ExtensibleChip * i_chip ) break; } + // Check UE and CE stop counters to determine stop conditions + mss::mcbist::stop_conditions<> stopCond; + if ( getMcbistDataBundle(i_chip)->iv_ueScrubStopCounter.atTh() ) + { + // If we've reached the limit of UEs we're allowed to stop on + // per rank, only set the stop on mpe stop condition. + stopCond.set_pause_on_mpe(mss::ON); + } + else if ( getMcbistDataBundle(i_chip)->iv_ceScrubStopCounter.atTh() ) + { + // If we've reached the limit of CEs we're allowed to stop on + // per rank, set all the normal stop conditions except stop on CE + stopCond.set_pause_on_aue(mss::ON); + + #ifdef CONFIG_HBRT_PRD + + stopCond.set_pause_on_mpe(mss::ON) + .set_pause_on_ue(mss::ON); + + // In MNFG mode, stop on RCE_ETE to get an accurate callout for IUEs + if ( mfgMode() ) stopCond.set_thresh_rce(1); + + #endif + } + else + { + // If we haven't reached threshold on the number of UEs or CEs we + // have stopped on, do not change the stop conditions. + stopCond = mss::mcbist::stop_conditions<>( + mss::mcbist::stop_conditions<>::DONT_CHANGE ); + } + // Resume the command on the next address. + // Note: we have to limit the number of times a command has been stopped + // because of a UE/CE. Therefore, we must always resume the command to + // the end of the current slave rank so we can reset the UE/CE counts. errlHndl_t errl; - FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt ); + FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt, + mss::mcbist::STOP_AFTER_SLAVE_RANK, stopCond ); if ( nullptr != errl ) { |