summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-05-14 21:57:26 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-05-18 10:39:37 -0400
commit1b04e458595a9e9c5c04dd322f90d4c44129e111 (patch)
treec70c6c2c35632c604d7d3c359118964c9fbcc948
parentc1c584f04be0bb7b1340d013b9b9d6147ffa2960 (diff)
downloadtalos-hostboot-1b04e458595a9e9c5c04dd322f90d4c44129e111.tar.gz
talos-hostboot-1b04e458595a9e9c5c04dd322f90d4c44129e111.zip
PRD: support to determine if BG scrub can resume after stop-on-error
Change-Id: Ie2179b66bbe77ef1b982a6dfb1750734d6a9cc23 RTC: 192638 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58811 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59012 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C136
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H23
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C17
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H7
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C73
5 files changed, 146 insertions, 110 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C
index 38cbcc8bd..5b7e72c03 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C
@@ -386,96 +386,6 @@ uint32_t checkEccFirs<TYPE_MBA>( ExtensibleChip * i_chip,
//------------------------------------------------------------------------------
template<>
-uint32_t isBgScrubConfig<TYPE_MCBIST>( ExtensibleChip * i_chip,
- bool & o_isBgScrub )
-{
- #define PRDF_FUNC "[isBgScrubConfig] "
-
- PRDF_ASSERT( nullptr != i_chip );
- PRDF_ASSERT( TYPE_MCBIST == i_chip->getType() );
-
- uint32_t o_rc = SUCCESS;
-
- o_isBgScrub = false;
-
- do
- {
- // There really is not a good way of doing this. A scrub command is a
- // scrub command the only difference is the speed. Unfortunately, that
- // speed can change depending on how the hardware team tunes it. For
- // now, we can use the stop conditions, which should be unique for
- // background scrub, to determine if it has been configured.
-
- SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MBSTR" );
- o_rc = reg->Read();
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: i_chip=0x%08x",
- i_chip->getHuid() );
- break;
- }
-
- if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH
- 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH
- 0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH
- reg->IsBitSet(34) && // pause on MPE
- reg->IsBitSet(35) ) // pause on UE
- {
- o_isBgScrub = true;
- }
-
- } while(0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-template<>
-uint32_t isBgScrubConfig<TYPE_MCA>( ExtensibleChip * i_chip,
- bool & o_isBgScrub )
-{
- PRDF_ASSERT( nullptr != i_chip );
- PRDF_ASSERT( TYPE_MCA == i_chip->getType() );
-
- ExtensibleChip * mcbChip = getConnectedParent( i_chip, TYPE_MCBIST );
-
- return isBgScrubConfig<TYPE_MCBIST>( mcbChip, o_isBgScrub );
-}
-
-template<>
-uint32_t isBgScrubConfig<TYPE_MBA>( ExtensibleChip * i_chip,
- bool & o_isBgScrub )
-{
- #define PRDF_FUNC "[isBgScrubConfig] "
-
- PRDF_ASSERT( nullptr != i_chip );
- PRDF_ASSERT( TYPE_MBA == i_chip->getType() );
-
- uint32_t o_rc = SUCCESS;
-
- o_isBgScrub = false;
-
- do
- {
- // There really is not a good way of doing this. A scrub command is a
- // scrub command the only difference is the speed. Unfortunately, that
- // speed can change depending on how the hardware team tunes it. For
- // now, we can use the stop conditions, which should be unique for
- // background scrub, to determine if it has been configured.
-
- // TODO RTC 157888
-
- } while(0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-template<>
uint32_t setBgScrubThresholds<TYPE_MBA>( ExtensibleChip * i_chip,
const MemRank & i_rank )
{
@@ -534,5 +444,51 @@ uint32_t setBgScrubThresholds<TYPE_MBA>( ExtensibleChip * i_chip,
//------------------------------------------------------------------------------
+template<>
+uint32_t didCmdStopOnLastAddr<TYPE_MBA>( ExtensibleChip * i_chip,
+ AddrRangeType i_rangeType,
+ bool & o_stoppedOnLastAddr )
+{
+ #define PRDF_FUNC "[didCmdStopOnLastAddr] "
+
+ uint32_t o_rc = SUCCESS;
+
+ o_stoppedOnLastAddr = false;
+
+ do
+ {
+ // Get the current address.
+ MemAddr curAddr;
+ o_rc = getMemMaintAddr<TYPE_MBA>( i_chip, curAddr );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
+ i_chip->getHuid() );
+ break;
+ }
+
+ // Get the end address of the current rank.
+ MemAddr junk, endAddr;
+ o_rc = getMemAddrRange<TYPE_MBA>( i_chip, curAddr.getRank(), junk,
+ endAddr, i_rangeType );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%02x) failed",
+ i_chip->getHuid(), curAddr.getRank().getKey() );
+ break;
+ }
+
+ // Compare the addresses.
+ o_stoppedOnLastAddr = ( curAddr == endAddr );
+
+ } while (0);
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H
index 0dc884f46..72b52cbcc 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H
@@ -155,16 +155,6 @@ template<TARGETING::TYPE T>
uint32_t checkEccFirs( ExtensibleChip * i_chip, uint32_t & o_eccAttns );
/**
- * @brief Checks if the command currently configured in hardware is background
- * scrubbing.
- * @param i_chip MCBIST, MCA, or MBA.
- * @param o_isBgScrub TRUE if background scrub is configured, FALSE otherwise.
- * @return Non-SUCCESS on SCOM failures, SUCCESS otherwise.
- */
-template<TARGETING::TYPE T>
-uint32_t isBgScrubConfig( ExtensibleChip * i_chip, bool & o_isBgScrub );
-
-/**
* @brief Sets the ETE thresholds needed for background scrubbing.
* @param i_chip An MBA.
* @param i_rank A rank on the target DIMM.
@@ -174,6 +164,19 @@ template<TARGETING::TYPE T>
uint32_t setBgScrubThresholds( ExtensibleChip * i_chip,
const MemRank & i_rank );
+/**
+ * @param i_chip MBA.
+ * @param i_rangeType See enum AddrRangeType.
+ * @param o_stoppedOnLastAddr True, if the current maintenance command stopped
+ * on the last address of the given rank range.
+ * False, otherwise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+template<TARGETING::TYPE T>
+uint32_t didCmdStopOnLastAddr( ExtensibleChip * i_chip,
+ AddrRangeType i_rangeType,
+ bool & o_stoppedOnLastAddr );
+
} //end namespace PRDF
#endif // __prdfMemScrubUtils_H
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C
index 906b40d0c..cad88c66c 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C
@@ -348,21 +348,18 @@ uint32_t MemTdCtlr<T>::analyzeCmdComplete( bool & o_errorsFound,
if ( iv_queue.empty() )
{
// The queue is empty so it is possible that background scrubbing
- // only stopped for FFDC. Simply resume the command instead of
- // starting a new one. Note that it is possible to get here if we
- // were running a TD procedure and the PRD service is reset.
- // Therefore, we must check if background scrubbing was actually
- // configured.
- bool isBgScrub;
- o_rc = isBgScrubConfig<T>( iv_chip, isBgScrub );
+ // only stopped for FFDC. If possible, simply resume the command
+ // instead of starting a new one. This must be checked here instead
+ // of in defaultStep() because a TD procedure could have been run
+ // before defaultStep() and it is possible that canResumeBgScrub()
+ // could give as a false positive in that case.
+ o_rc = canResumeBgScrub( iv_resumeBgScrub );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "isBgScrubConfig(0x%08x) failed",
+ PRDF_ERR( PRDF_FUNC "canResumeBgScrub(0x%08x) failed",
iv_chip->getHuid() );
break;
}
-
- if ( isBgScrub ) iv_resumeBgScrub = true;
}
else
{
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
index e098622cb..eef997a51 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
@@ -290,6 +290,13 @@ class MemTdCtlr
*/
uint32_t unmaskEccAttns();
+ /**
+ * @param o_canResume True, if background scrubbing can be resumed. False,
+ * if a new background scrub command must be started.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+ uint32_t canResumeBgScrub( bool & o_canResume );
+
#endif
private: // instance variables
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
index 2fef2fd9a..a1300496c 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
@@ -1149,6 +1149,79 @@ uint32_t MemTdCtlr<TYPE_MBA>::handleRrFo()
//------------------------------------------------------------------------------
+template<>
+uint32_t MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub( bool & o_canResume )
+{
+ #define PRDF_FUNC "[MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub] "
+
+ uint32_t o_rc = SUCCESS;
+
+ o_canResume = false;
+
+ // It is possible that we were running a TD procedure and the PRD service
+ // was reset. Therefore, we must check if background scrubbing was actually
+ // configured. There really is not a good way of doing this. A scrub command
+ // is a scrub command the only difference is the speed. Unfortunately, that
+ // speed can change depending on how the hardware team tunes it. For now, we
+ // can use the stop conditions, which should be unique for background scrub,
+ // to determine if it has been configured.
+
+ SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" );
+ o_rc = reg->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x",
+ iv_chip->getHuid() );
+ }
+ else if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH
+ 0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH
+ 0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH
+ reg->IsBitSet(34) && // pause on MPE
+ reg->IsBitSet(35) ) // pause on UE
+ {
+ o_canResume = true;
+ }
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+template<>
+uint32_t MemTdCtlr<TYPE_MBA>::canResumeBgScrub( bool & o_canResume )
+{
+ #define PRDF_FUNC "[MemTdCtlr<TYPE_MBA>::canResumeBgScrub] "
+
+ uint32_t o_rc = SUCCESS;
+
+ o_canResume = false;
+
+ // It is possible that we were running a TD procedure and the PRD service
+ // was reset. Assuming the command did not stop on the last address of the
+ // current slave rank, we will simply "resume" the command from the next
+ // address to the end of the rank. The MBA resume actually starts a new
+ // command, unlike MCBIST. Therefore, we can get away with blindly starting
+ // the command without trying to assess what type of command was actually
+ // running.
+
+ bool lastAddr = false;
+ o_rc = didCmdStopOnLastAddr<TYPE_MBA>( iv_chip, SLAVE_RANK, lastAddr );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "didCmdStopOnLastAddr(0x%08x) failed",
+ iv_chip->getHuid() );
+ }
+ else
+ {
+ o_canResume = !lastAddr;
+ }
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
// Avoid linker errors with the template.
template class MemTdCtlr<TYPE_MCBIST>;
OpenPOWER on IntegriCloud