summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2019-09-23 08:18:27 -0500
committerZane C Shelley <zshelle@us.ibm.com>2019-09-30 10:34:10 -0500
commit70a4b6bf1a02fbe7d71a67f6ebae6e75f0e20c95 (patch)
tree70fb8777fc074dacf5027170bca5377adf7ea5cc /src/usr/diag/prdf
parent0f3e041cbe1c7d6d2ae203187f843b1714b287db (diff)
downloadtalos-hostboot-70a4b6bf1a02fbe7d71a67f6ebae6e75f0e20c95.tar.gz
talos-hostboot-70a4b6bf1a02fbe7d71a67f6ebae6e75f0e20c95.zip
PRD: Add threshold for stopping on UEs/CEs during BgScrub
Change-Id: Iddeec04300631fc57b5c2f4a2eb57302e9f98fe2 CQ: SW476467 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84071 Reviewed-by: Benjamen G Tyner <ben.tyner@ibm.com> Reviewed-by: Paul Greenwood <paul.greenwood@ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Zane C Shelley <zshelle@us.ibm.com> Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84073 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag/prdf')
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C60
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H13
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices.C7
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices_rt.C39
4 files changed, 103 insertions, 16 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
index 779d13a73..817a345f3 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
@@ -1623,21 +1623,53 @@ uint32_t MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub( bool & o_canResume )
// can use the stop conditions, which should be unique for background scrub,
// to determine if it has been configured.
- SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" );
- o_rc = reg->Read();
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x",
- iv_chip->getHuid() );
- }
- else if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH
- 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH
- 0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH
- reg->IsBitSet(34) && // pause on MPE
- reg->IsBitSet(35) ) // pause on UE
+ do
{
- o_canResume = true;
- }
+ SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" );
+ o_rc = reg->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x",
+ iv_chip->getHuid() );
+ break;
+ }
+ // Note: The stop conditions for background scrubbing can now be
+ // variable depending on whether we have hit threshold for the number
+ // of UEs or CEs that we have stopped on on a rank.
+
+ // If we haven't hit CE or UE threshold, check the CE stop conditions
+ if ( !getMcbistDataBundle(iv_chip)->iv_ceScrubStopCounter.atTh() &&
+ !getMcbistDataBundle(iv_chip)->iv_ueScrubStopCounter.atTh() )
+ {
+ // If the stop conditions aren't set, just break out.
+ if ( !(0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH
+ 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH
+ 0xf != reg->GetBitFieldJustified(8,4)) ) // NCE hard TH
+ {
+ break;
+ }
+
+ }
+
+ // If we haven't hit UE threshold yet, check the UE stop condition
+ if ( !getMcbistDataBundle(iv_chip)->iv_ueScrubStopCounter.atTh() )
+ {
+ // If the stop condition isn't set, just break out
+ if ( !reg->IsBitSet(35) ) // pause on UE
+ {
+ break;
+ }
+ }
+
+ // Need to check the stop on mpe stop condition regardless of whether
+ // we hit the UE or CE threshold.
+ if ( reg->IsBitSet(34) ) // pause on MPE
+ {
+ // If we reach here, all the stop conditions are set for background
+ // scrub, so we can resume.
+ o_canResume = true;
+ }
+ }while(0);
return o_rc;
diff --git a/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H b/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H
index 4a284253a..3883eb936 100644
--- a/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H
+++ b/src/usr/diag/prdf/plat/mem/prdfP9McbistDataBundle.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016 */
+/* Contributors Listed Below - COPYRIGHT 2016,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -81,6 +81,17 @@ class McbistDataBundle : public DataBundle
/** The Targeted Diagnostics controller. */
MemTdCtlr<TARGETING::TYPE_MCBIST> * iv_tdCtlr = nullptr;
+
+ public: // instance variables
+ #ifdef __HOSTBOOT_RUNTIME
+
+ // These are used to limit the number of times a scrub command will stop
+ // on a UE or CE on a rank. This is to prevent potential flooding of
+ // maintenance UEs or CEs. The threshold will be 16 per rank for each.
+ ScrubResumeCounter iv_ueScrubStopCounter;
+ ScrubResumeCounter iv_ceScrubStopCounter;
+
+ #endif
};
/**
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C
index 48d26f8b8..e4b122ab5 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices.C
@@ -40,6 +40,7 @@
#include <prdfRegisterCache.H>
#include <prdfCenMbaDataBundle.H>
+#include <prdfP9McbistDataBundle.H>
#include <prdfMemScrubUtils.H>
#include <iipServiceDataCollector.h>
@@ -791,6 +792,12 @@ uint32_t startBgScrub<TYPE_MCA>( ExtensibleChip * i_mcaChip,
ExtensibleChip * mcbChip = getConnectedParent( i_mcaChip, TYPE_MCBIST );
fapi2::Target<fapi2::TARGET_TYPE_MCBIST> fapiTrgt ( mcbChip->getTrgt() );
+ #ifdef __HOSTBOOT_RUNTIME
+ // Starting a new command. Clear the UE and CE scrub stop counters
+ getMcbistDataBundle( mcbChip )->iv_ueScrubStopCounter.reset();
+ getMcbistDataBundle( mcbChip )->iv_ceScrubStopCounter.reset();
+ #endif
+
// Get the stop conditions.
// NOTE: If HBRT_PRD is not configured, we want to use the defaults so that
// background scrubbing never stops.
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
index e3bdecb59..cd0b045dc 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
@@ -37,6 +37,7 @@
// Platform includes
#include <prdfCenMbaDataBundle.H>
+#include <prdfP9McbistDataBundle.H>
#include <prdfMemScrubUtils.H>
#include <prdfPlatServices.H>
@@ -173,9 +174,45 @@ uint32_t resumeBgScrub<TYPE_MCBIST>( ExtensibleChip * i_chip )
break;
}
+ // Check UE and CE stop counters to determine stop conditions
+ mss::mcbist::stop_conditions<> stopCond;
+ if ( getMcbistDataBundle(i_chip)->iv_ueScrubStopCounter.atTh() )
+ {
+ // If we've reached the limit of UEs we're allowed to stop on
+ // per rank, only set the stop on mpe stop condition.
+ stopCond.set_pause_on_mpe(mss::ON);
+ }
+ else if ( getMcbistDataBundle(i_chip)->iv_ceScrubStopCounter.atTh() )
+ {
+ // If we've reached the limit of CEs we're allowed to stop on
+ // per rank, set all the normal stop conditions except stop on CE
+ stopCond.set_pause_on_aue(mss::ON);
+
+ #ifdef CONFIG_HBRT_PRD
+
+ stopCond.set_pause_on_mpe(mss::ON)
+ .set_pause_on_ue(mss::ON);
+
+ // In MNFG mode, stop on RCE_ETE to get an accurate callout for IUEs
+ if ( mfgMode() ) stopCond.set_thresh_rce(1);
+
+ #endif
+ }
+ else
+ {
+ // If we haven't reached threshold on the number of UEs or CEs we
+ // have stopped on, do not change the stop conditions.
+ stopCond = mss::mcbist::stop_conditions<>(
+ mss::mcbist::stop_conditions<>::DONT_CHANGE );
+ }
+
// Resume the command on the next address.
+ // Note: we have to limit the number of times a command has been stopped
+ // because of a UE/CE. Therefore, we must always resume the command to
+ // the end of the current slave rank so we can reset the UE/CE counts.
errlHndl_t errl;
- FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt );
+ FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt,
+ mss::mcbist::STOP_AFTER_SLAVE_RANK, stopCond );
if ( nullptr != errl )
{
OpenPOWER on IntegriCloud