summaryrefslogtreecommitdiffstats
path: root/src/usr/diag
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-04-26 17:06:40 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-05-04 22:26:42 -0400
commite8111177af9da1880627e7f025ad29fa99dacfa5 (patch)
tree1ee1c661c7f2361119347d1500fa118fa508e43e /src/usr/diag
parent3d5c1c541bae81970b97160272e76e3772a75864 (diff)
downloadtalos-hostboot-e8111177af9da1880627e7f025ad29fa99dacfa5.tar.gz
talos-hostboot-e8111177af9da1880627e7f025ad29fa99dacfa5.zip
PRD: fixed how RT TPS procedures are banned from processing
If banned, the TPS procedure must be prevented from being added to the queue. This fixes the issue where background scrubbing gets stopped manually and the TPS procedure is never executed. This also fixes a flooding issue when TPS is not available to fix persistent errors. Change-Id: I76cc9f7ce7c06587261ff593a626a4ef51b317e1 RTC: 192009 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57919 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58327 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag')
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H22
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H5
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H43
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C20
-rwxr-xr-xsrc/usr/diag/prdf/plat/mem/prdfMemTdQueue.H3
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C15
6 files changed, 83 insertions, 25 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H b/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H
index d792e8cdf..fc4ff7800 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H
@@ -157,6 +157,28 @@ uint32_t handleTdEvent<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip,
return getMbaDataBundle(i_chip)->getTdCtlr()->handleTdEvent( io_sc );
}
+/**
+ * @brief Generic wrapper to tell the TD controller to ban TPS on a rank.
+ * @param i_chip MCA or MBA.
+ * @param i_rank The target slave rank.
+ */
+template<TARGETING::TYPE T>
+void banTps( ExtensibleChip * i_chip, const MemRank & i_rank );
+
+template<> inline
+void banTps<TARGETING::TYPE_MCA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank )
+{
+ getMcaDataBundle(i_chip)->getTdCtlr()->banTps( i_chip, i_rank );
+}
+
+template<> inline
+void banTps<TARGETING::TYPE_MBA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank )
+{
+ getMbaDataBundle(i_chip)->getTdCtlr()->banTps( i_chip, i_rank );
+}
+
#endif // Hostboot Runtime only
} // end namespace MemDbUtils
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H b/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H
index 4d3c4fa02..07a6b7b92 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfP9McaDataBundle.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016,2017 */
+/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -202,9 +202,6 @@ class McaDataBundle : public DataBundle
* scrubbing is resumed. */
bool iv_maskMainlineNceTce = false;
- /** Map to keep track of ranks that have banned TPS. */
- std::map<MemRank, bool> iv_tpsBans;
-
#endif
};
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
index 533213207..e098622cb 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
@@ -85,7 +85,20 @@ class MemTdCtlr
* diagnostics, if not already in progress.
* @param i_entry The new TD queue entry.
*/
- void pushToQueue( TdEntry * i_entry ) { iv_queue.push(i_entry); }
+ void pushToQueue( TdEntry * i_entry )
+ {
+ #ifdef __HOSTBOOT_RUNTIME
+ if ( TdEntry::TPS_EVENT == i_entry->getType() &&
+ isTpsBanned(i_entry->getChip(), i_entry->getRank()) )
+ {
+ PRDF_ERR( "[MemTdCtlr::pushToQueue] TPS banned on 0x%08x 0x%02x",
+ i_entry->getChip()->getHuid(), i_entry->getRank() );
+ return; // prevent the entry from being added to the queue.
+ }
+ #endif
+
+ iv_queue.push(i_entry);
+ }
#ifdef __HOSTBOOT_RUNTIME
@@ -107,6 +120,18 @@ class MemTdCtlr
uint32_t handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc );
/**
+ * @brief Bans TPS on the given rank. Any attempts to add a TPS procedure
+ * to the queue for this rank will be ignored.
+ * @param i_rank The target slave rank.
+ */
+ void banTps( ExtensibleChip * i_chip, const MemRank & i_rank )
+ {
+ // It doesn't matter what we set the value to, we just need to make sure
+ // the rank exists in the map.
+ iv_tpsBans[std::make_pair(i_chip, i_rank)] = true;
+ }
+
+ /**
* @brief Handles reset-reload or FO scenario.
*
* This does not call initialize() or start any maintenance commands.
@@ -238,6 +263,17 @@ class MemTdCtlr
#ifdef __HOSTBOOT_RUNTIME
/**
+ * @param i_rank The target slave rank.
+ * @return True, if this slave rank has been banned. False, otherwise.
+ */
+ bool isTpsBanned( ExtensibleChip * i_chip, const MemRank & i_rank )
+ {
+ // Check if this rank exists in the map.
+ std::pair<ExtensibleChip *, MemRank> e = std::make_pair(i_chip, i_rank);
+ return ( iv_tpsBans.end() != iv_tpsBans.find(e) );
+ }
+
+ /**
* @brief Masks NCE and TCE ECC attentions.
* @note Only intended to be used just before starting a new TD procedure.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
@@ -287,9 +323,8 @@ class MemTdCtlr
* to be restarted with a new command. */
bool iv_resumeBgScrub = false;
- /** Keeps track if the fetch attentions have been masked during a TD
- * procedure. */
- bool iv_fetchAttnsMasked = false;
+ /** Map to keep track of ranks that have banned TPS. */
+ std::map< std::pair<ExtensibleChip *, MemRank>, bool > iv_tpsBans;
#else // IPL only
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
index af4acc15b..2fef2fd9a 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
@@ -185,6 +185,22 @@ uint32_t MemTdCtlr<T>::handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc )
// Don't interrupt a TD procedure if one is already in progress.
if ( nullptr != iv_curProcedure ) break;
+ // If the queue is empty, there is nothing to do. So there is no point
+ // to stopping background scrub. This could have happen if TPS was
+ // banned on a rank and the TPS request was never added to the queue. In
+ // that case, mask fetch attentions temporarily to prevent flooding.
+ if ( iv_queue.empty() )
+ {
+ o_rc = maskEccAttns();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "maskEccAttns() failed" );
+ break;
+ }
+
+ break; // Don't stop background scrub.
+ }
+
// Stop background scrubbing.
o_rc = stopBgScrub<T>( iv_chip );
if ( SUCCESS != o_rc )
@@ -849,8 +865,6 @@ uint32_t MemTdCtlr<TYPE_MBA>::maskEccAttns()
break;
}
- iv_fetchAttnsMasked = true;
-
} while (0);
return o_rc;
@@ -903,8 +917,6 @@ uint32_t MemTdCtlr<TYPE_MBA>::unmaskEccAttns()
break;
}
- iv_fetchAttnsMasked = false;
-
} while (0);
return o_rc;
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdQueue.H b/src/usr/diag/prdf/plat/mem/prdfMemTdQueue.H
index 01fa7aaa0..5e4d32454 100755
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdQueue.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdQueue.H
@@ -119,6 +119,9 @@ class TdEntry
/** @return The event type */
TdType getType() const { return iv_tdType; }
+ /** @return The chip in which this event occurred */
+ ExtensibleChip * getChip() const { return iv_chip; }
+
/** @return The rank in which this event occurred */
MemRank getRank() const { return iv_rank; }
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
index be71392d7..526e52e00 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
@@ -26,14 +26,13 @@
/** @file prdfMemTps_rt.C */
// Platform includes
-#include <prdfCenMbaDataBundle.H>
+#include <prdfMemDbUtils.H>
#include <prdfMemEccAnalysis.H>
#include <prdfMemMark.H>
#include <prdfMemScrubUtils.H>
#include <prdfMemTdFalseAlarm.H>
#include <prdfMemTps.H>
#include <prdfP9McaExtraSig.H>
-#include <prdfP9McaDataBundle.H>
#include <prdfTargetServices.H>
using namespace TARGETING;
@@ -350,9 +349,7 @@ uint32_t TpsEvent<T>::analyzeTpsPhase1_rt( STEP_CODE_DATA_STRUCT & io_sc,
// If iv_ban is true and this procedure is done, then ban TPS on this rank.
if ( iv_ban && o_done )
{
- // It doesn't matter what we set the value to, we just need to
- // make sure the rank exists in the map.
- getMcaDataBundle(iv_chip)->iv_tpsBans[iv_rank] = true;
+ MemDbUtils::banTps<T>( iv_chip, iv_rank );
// Permanently mask mainline NCEs and TCEs.
getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;
@@ -1145,14 +1142,6 @@ uint32_t TpsEvent<TYPE_MCA>::nextStep( STEP_CODE_DATA_STRUCT & io_sc,
do
{
- // Check if TPS is banned on this rank.
- if ( 1 == getMcaDataBundle(iv_chip)->iv_tpsBans.count(iv_rank) )
- {
- // If TPS is banned, abort the procedure.
- o_done = true;
- break;
- }
-
// Runtime TPS is slightly different than IPL TPS or any other TD event.
// There really is only one phase, but we use two phases to help
// differentiate between the CE types that are collected. So only one of
OpenPOWER on IntegriCloud