diff options
author | Caleb Palmer <cnpalmer@us.ibm.com> | 2019-09-30 15:19:49 -0500 |
---|---|---|
committer | Zane C Shelley <zshelle@us.ibm.com> | 2019-10-04 12:57:41 -0500 |
commit | 2dbc30966476745b0b6fa85e9c8e46398b31e86f (patch) | |
tree | d618b7db53b611ca53c1ffbc3ca104199d6ef291 | |
parent | 9d750b3d919830c1813fbcf2e7797a10c9075928 (diff) | |
download | talos-hostboot-2dbc30966476745b0b6fa85e9c8e46398b31e86f.tar.gz talos-hostboot-2dbc30966476745b0b6fa85e9c8e46398b31e86f.zip |
PRD: Axone UE/CE threshold for background scrub
Change-Id: I2872ee94a385d84b10a6e0aaf7f3c0a95c496aa0
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84551
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamen G Tyner <ben.tyner@ibm.com>
Reviewed-by: Brian J Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C Shelley <zshelle@us.ibm.com>
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84781
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H | 7 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C | 60 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices.C | 7 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_rt.C | 46 |
4 files changed, 100 insertions, 20 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H index e5977ee6c..499baf00c 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H @@ -40,6 +40,7 @@ #ifdef __HOSTBOOT_MODULE +#include <prdfMemScrubUtils.H> #include <prdfMemTdFalseAlarm.H> #include <prdfMemThresholds.H> #include <prdfMemTdCtlr.H> @@ -206,6 +207,12 @@ class OcmbDataBundle : public DataBundle * scrubbing is resumed. */ bool iv_maskMainlineNceTce = false; + // These are used to limit the number of times a scrub command will stop + // on a UE or CE on a rank. This is to prevent potential flooding of + // maintenance UEs or CEs. The threshold will be 16 per rank for each. + ScrubResumeCounter iv_ueScrubStopCounter; + ScrubResumeCounter iv_ceScrubStopCounter; + #else // IPL only /** MNFG IPL CE statistics. */ diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C index 817a345f3..da18cea81 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C @@ -1693,21 +1693,53 @@ uint32_t MemTdCtlr<TYPE_OCMB_CHIP>::canResumeBgScrub( bool & o_canResume ) // can use the stop conditions, which should be unique for background scrub, // to determine if it has been configured. - SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" ); - o_rc = reg->Read(); - if ( SUCCESS != o_rc ) - { - PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x", - iv_chip->getHuid() ); - } - else if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH - 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH - 0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH - reg->IsBitSet(34) && // pause on MPE - reg->IsBitSet(35) ) // pause on UE + do { - o_canResume = true; - } + SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" ); + o_rc = reg->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x", + iv_chip->getHuid() ); + break; + } + // Note: The stop conditions for background scrubbing can now be + // variable depending on whether we have hit threshold for the number + // of UEs or CEs that we have stopped on on a rank. + + // If we haven't hit CE or UE threshold, check the CE stop conditions + if ( !getOcmbDataBundle(iv_chip)->iv_ceScrubStopCounter.atTh() && + !getOcmbDataBundle(iv_chip)->iv_ueScrubStopCounter.atTh() ) + { + // If the stop conditions aren't set, just break out. + if ( !(0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH + 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH + 0xf != reg->GetBitFieldJustified(8,4)) ) // NCE hard TH + { + break; + } + + } + + // If we haven't hit UE threshold yet, check the UE stop condition + if ( !getOcmbDataBundle(iv_chip)->iv_ueScrubStopCounter.atTh() ) + { + // If the stop condition isn't set, just break out + if ( !reg->IsBitSet(35) ) // pause on UE + { + break; + } + } + + // Need to check the stop on mpe stop condition regardless of whether + // we hit the UE or CE threshold. + if ( reg->IsBitSet(34) ) // pause on MPE + { + // If we reach here, all the stop conditions are set for background + // scrub, so we can resume. + o_canResume = true; + } + }while(0); return o_rc; diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C index 491cc344d..55bc84657 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices.C @@ -41,6 +41,7 @@ #include <prdfCenMbaDataBundle.H> #include <prdfP9McbistDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfMemScrubUtils.H> #include <iipServiceDataCollector.h> @@ -1442,6 +1443,12 @@ uint32_t startBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmb, // Get the OCMB fapi target fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt (i_ocmb->getTrgt()); + #ifdef __HOSTBOOT_RUNTIME + // Starting a new command. Clear the UE and CE scrub stop counters + getOcmbDataBundle( mcbChip )->iv_ueScrubStopCounter.reset(); + getOcmbDataBundle( mcbChip )->iv_ceScrubStopCounter.reset(); + #endif + // Get the stop conditions. // NOTE: If HBRT_PRD is not configured, we want to use the defaults so that // background scrubbing never stops. diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C index cd0b045dc..74ba41e30 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C @@ -38,6 +38,7 @@ // Platform includes #include <prdfCenMbaDataBundle.H> #include <prdfP9McbistDataBundle.H> +#include <prdfOcmbDataBundle.H> #include <prdfMemScrubUtils.H> #include <prdfPlatServices.H> @@ -467,10 +468,8 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) uint32_t o_rc = SUCCESS; PRDF_TRAC( PRDF_FUNC "Function not supported yet" ); - - /* TODO RTC 207273 - no hwp support yet - - // Get the OCMB_CHIP fapi target + /* TODO RTC 207273 - no HWP support yet + // Get the OCMB fapi target fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt ( i_chip->getTrgt() ); do @@ -484,9 +483,45 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) break; } + // Check UE and CE stop counters to determine stop conditions + mss::mcbist::stop_conditions<> stopCond; + if ( getOcmbDataBundle(i_chip)->iv_ueScrubStopCounter.atTh() ) + { + // If we've reached the limit of UEs we're allowed to stop on + // per rank, only set the stop on mpe stop condition. + stopCond.set_pause_on_mpe(mss::ON); + } + else if ( getOcmbDataBundle(i_chip)->iv_ceScrubStopCounter.atTh() ) + { + // If we've reached the limit of CEs we're allowed to stop on + // per rank, set all the normal stop conditions except stop on CE + stopCond.set_pause_on_aue(mss::ON); + + #ifdef CONFIG_HBRT_PRD + + stopCond.set_pause_on_mpe(mss::ON) + .set_pause_on_ue(mss::ON); + + // In MNFG mode, stop on RCE_ETE to get an accurate callout for IUEs + if ( mfgMode() ) stopCond.set_thresh_rce(1); + + #endif + } + else + { + // If we haven't reached threshold on the number of UEs or CEs we + // have stopped on, do not change the stop conditions. + stopCond = mss::mcbist::stop_conditions<>( + mss::mcbist::stop_conditions<>::DONT_CHANGE ); + } + // Resume the command on the next address. + // Note: we have to limit the number of times a command has been stopped + // because of a UE/CE. Therefore, we must always resume the command to + // the end of the current slave rank so we can reset the UE/CE counts. errlHndl_t errl; - FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt ); + FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt, + mss::mcbist::STOP_AFTER_SLAVE_RANK, stopCond ); if ( nullptr != errl ) { @@ -497,7 +532,6 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip ) } } while (0); - */ return o_rc; |