summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2019-09-30 15:19:49 -0500
committerZane C Shelley <zshelle@us.ibm.com>2019-10-04 12:57:41 -0500
commit2dbc30966476745b0b6fa85e9c8e46398b31e86f (patch)
treed618b7db53b611ca53c1ffbc3ca104199d6ef291
parent9d750b3d919830c1813fbcf2e7797a10c9075928 (diff)
downloadtalos-hostboot-2dbc30966476745b0b6fa85e9c8e46398b31e86f.tar.gz
talos-hostboot-2dbc30966476745b0b6fa85e9c8e46398b31e86f.zip
PRD: Axone UE/CE threshold for background scrub
Change-Id: I2872ee94a385d84b10a6e0aaf7f3c0a95c496aa0 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84551 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamen G Tyner <ben.tyner@ibm.com> Reviewed-by: Brian J Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C Shelley <zshelle@us.ibm.com> Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84781 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H7
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C60
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices.C7
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices_rt.C46
4 files changed, 100 insertions, 20 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H
index e5977ee6c..499baf00c 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfOcmbDataBundle.H
@@ -40,6 +40,7 @@
#ifdef __HOSTBOOT_MODULE
+#include <prdfMemScrubUtils.H>
#include <prdfMemTdFalseAlarm.H>
#include <prdfMemThresholds.H>
#include <prdfMemTdCtlr.H>
@@ -206,6 +207,12 @@ class OcmbDataBundle : public DataBundle
* scrubbing is resumed. */
bool iv_maskMainlineNceTce = false;
+ // These are used to limit the number of times a scrub command will stop
+ // on a UE or CE on a rank. This is to prevent potential flooding of
+ // maintenance UEs or CEs. The threshold will be 16 per rank for each.
+ ScrubResumeCounter iv_ueScrubStopCounter;
+ ScrubResumeCounter iv_ceScrubStopCounter;
+
#else // IPL only
/** MNFG IPL CE statistics. */
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
index 817a345f3..da18cea81 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
@@ -1693,21 +1693,53 @@ uint32_t MemTdCtlr<TYPE_OCMB_CHIP>::canResumeBgScrub( bool & o_canResume )
// can use the stop conditions, which should be unique for background scrub,
// to determine if it has been configured.
- SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" );
- o_rc = reg->Read();
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x",
- iv_chip->getHuid() );
- }
- else if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH
- 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH
- 0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH
- reg->IsBitSet(34) && // pause on MPE
- reg->IsBitSet(35) ) // pause on UE
+ do
{
- o_canResume = true;
- }
+ SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" );
+ o_rc = reg->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x",
+ iv_chip->getHuid() );
+ break;
+ }
+ // Note: The stop conditions for background scrubbing can now be
+ // variable depending on whether we have hit threshold for the number
+ // of UEs or CEs that we have stopped on on a rank.
+
+ // If we haven't hit CE or UE threshold, check the CE stop conditions
+ if ( !getOcmbDataBundle(iv_chip)->iv_ceScrubStopCounter.atTh() &&
+ !getOcmbDataBundle(iv_chip)->iv_ueScrubStopCounter.atTh() )
+ {
+ // If the stop conditions aren't set, just break out.
+ if ( !(0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH
+ 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH
+ 0xf != reg->GetBitFieldJustified(8,4)) ) // NCE hard TH
+ {
+ break;
+ }
+
+ }
+
+ // If we haven't hit UE threshold yet, check the UE stop condition
+ if ( !getOcmbDataBundle(iv_chip)->iv_ueScrubStopCounter.atTh() )
+ {
+ // If the stop condition isn't set, just break out
+ if ( !reg->IsBitSet(35) ) // pause on UE
+ {
+ break;
+ }
+ }
+
+ // Need to check the stop on mpe stop condition regardless of whether
+ // we hit the UE or CE threshold.
+ if ( reg->IsBitSet(34) ) // pause on MPE
+ {
+ // If we reach here, all the stop conditions are set for background
+ // scrub, so we can resume.
+ o_canResume = true;
+ }
+ }while(0);
return o_rc;
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices.C b/src/usr/diag/prdf/plat/prdfPlatServices.C
index 491cc344d..55bc84657 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices.C
@@ -41,6 +41,7 @@
#include <prdfCenMbaDataBundle.H>
#include <prdfP9McbistDataBundle.H>
+#include <prdfOcmbDataBundle.H>
#include <prdfMemScrubUtils.H>
#include <iipServiceDataCollector.h>
@@ -1442,6 +1443,12 @@ uint32_t startBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_ocmb,
// Get the OCMB fapi target
fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt (i_ocmb->getTrgt());
+ #ifdef __HOSTBOOT_RUNTIME
+ // Starting a new command. Clear the UE and CE scrub stop counters
+ getOcmbDataBundle( mcbChip )->iv_ueScrubStopCounter.reset();
+ getOcmbDataBundle( mcbChip )->iv_ceScrubStopCounter.reset();
+ #endif
+
// Get the stop conditions.
// NOTE: If HBRT_PRD is not configured, we want to use the defaults so that
// background scrubbing never stops.
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
index cd0b045dc..74ba41e30 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C
@@ -38,6 +38,7 @@
// Platform includes
#include <prdfCenMbaDataBundle.H>
#include <prdfP9McbistDataBundle.H>
+#include <prdfOcmbDataBundle.H>
#include <prdfMemScrubUtils.H>
#include <prdfPlatServices.H>
@@ -467,10 +468,8 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip )
uint32_t o_rc = SUCCESS;
PRDF_TRAC( PRDF_FUNC "Function not supported yet" );
-
- /* TODO RTC 207273 - no hwp support yet
-
- // Get the OCMB_CHIP fapi target
+ /* TODO RTC 207273 - no HWP support yet
+ // Get the OCMB fapi target
fapi2::Target<fapi2::TARGET_TYPE_OCMB_CHIP> fapiTrgt ( i_chip->getTrgt() );
do
@@ -484,9 +483,45 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip )
break;
}
+ // Check UE and CE stop counters to determine stop conditions
+ mss::mcbist::stop_conditions<> stopCond;
+ if ( getOcmbDataBundle(i_chip)->iv_ueScrubStopCounter.atTh() )
+ {
+ // If we've reached the limit of UEs we're allowed to stop on
+ // per rank, only set the stop on mpe stop condition.
+ stopCond.set_pause_on_mpe(mss::ON);
+ }
+ else if ( getOcmbDataBundle(i_chip)->iv_ceScrubStopCounter.atTh() )
+ {
+ // If we've reached the limit of CEs we're allowed to stop on
+ // per rank, set all the normal stop conditions except stop on CE
+ stopCond.set_pause_on_aue(mss::ON);
+
+ #ifdef CONFIG_HBRT_PRD
+
+ stopCond.set_pause_on_mpe(mss::ON)
+ .set_pause_on_ue(mss::ON);
+
+ // In MNFG mode, stop on RCE_ETE to get an accurate callout for IUEs
+ if ( mfgMode() ) stopCond.set_thresh_rce(1);
+
+ #endif
+ }
+ else
+ {
+ // If we haven't reached threshold on the number of UEs or CEs we
+ // have stopped on, do not change the stop conditions.
+ stopCond = mss::mcbist::stop_conditions<>(
+ mss::mcbist::stop_conditions<>::DONT_CHANGE );
+ }
+
// Resume the command on the next address.
+ // Note: we have to limit the number of times a command has been stopped
+ // because of a UE/CE. Therefore, we must always resume the command to
+ // the end of the current slave rank so we can reset the UE/CE counts.
errlHndl_t errl;
- FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt );
+ FAPI_INVOKE_HWP( errl, mss::memdiags::continue_cmd, fapiTrgt,
+ mss::mcbist::STOP_AFTER_SLAVE_RANK, stopCond );
if ( nullptr != errl )
{
@@ -497,7 +532,6 @@ uint32_t resumeBgScrub<TYPE_OCMB_CHIP>( ExtensibleChip * i_chip )
}
} while (0);
-
*/
return o_rc;
OpenPOWER on IntegriCloud