summaryrefslogtreecommitdiffstats
path: root/src/usr
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-05-17 12:01:34 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-05-23 15:11:24 -0400
commite940af9a779a680dd817b65f5bbc356ad91f4c59 (patch)
treee304eba798279c787097e3a3445ba7089d2b1053 /src/usr
parent0d6b900a5d121f3cf9d67d0c2fdb91efd27d2a9b (diff)
downloadtalos-hostboot-e940af9a779a680dd817b65f5bbc356ad91f4c59.tar.gz
talos-hostboot-e940af9a779a680dd817b65f5bbc356ad91f4c59.zip
PRD: Maint soft/inter/hard CE handling during background scrub for Centaur
Change-Id: I9363812d7e3a7fcca46e481c6250d810bfcd970a RTC: 192638 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58980 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59229 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr')
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C219
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H16
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C52
3 files changed, 42 insertions, 245 deletions
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
index 24d7b6c9e..6e69f8ac5 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
@@ -468,124 +468,6 @@ int32_t CenMbaTdCtlr::initialize()
//------------------------------------------------------------------------------
-int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc,
- const CenAddr & i_stopAddr,
- const CenAddr & i_endAddr )
-{
- #define PRDF_FUNC "[CenMbaTdCtlr::analyzeCmdComplete] "
-
- int32_t o_rc = SUCCESS;
-
- do
- {
- if ( NO_OP != iv_tdState )
- {
- PRDF_ERR( PRDF_FUNC "Invalid state machine configuration" );
- o_rc = FAIL; break;
- }
-
- // Initialize iv_rank. This must be done before calling other
- // functions as they require iv_rank to be accurate.
- iv_rank = i_stopAddr.getRank();
-
- // Background scrubbing was interrupted, most likely because of an ECC
- // error, so set the interrupted rank in the rank list.
- o_rc = iv_masterRanks.setInterruptedRank( iv_rank );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "setInterruptedRank() failed" );
- break;
- }
-
- // Get all reported error conditions.
- uint16_t eccErrorMask = NO_ERROR;
- o_rc = checkEccErrors( eccErrorMask, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "checkEccErrors() failed" );
- break;
- }
-
- // The order of the following checks is important. Each call to handle
- // an error will set the PRD signature and override the previous
- // signature. We want the highest priority error signature (memory UEs)
- // to be displayed so these checks should be ordered from lowest to
- // highest priority.
-
- if ( (eccErrorMask & SOFT_CTE) || (eccErrorMask & INTER_CTE) )
- {
- o_rc = handleSoftIntCeEte_NonTd( io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleSoftIntCeEte_NonTd() failed" );
- break;
- }
- }
-
- if ( eccErrorMask & HARD_CTE )
- {
- o_rc = handleHardCeEte_NonTd( io_sc, i_stopAddr );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleHardCeEte_NonTd() failed" );
- break;
- }
- }
-
- if ( iv_queue.empty() )
- {
- // No TD requests so resume background. If the scrub reached the end
- // address, start background scrubbing on the next good rank.
- // Otherwise, resume the current scrub.
-
- if ( i_endAddr == i_stopAddr )
- {
- if ( (NO_ERROR == eccErrorMask) || (MCE == eccErrorMask) )
- {
- // The scrub completed without an error (this function
- // currently ignores MCEs). Don't commit the error log
- // (reduces informational error logs).
- io_sc.service_data->setDontCommitErrl();
- }
-
- o_rc = startBgScrub( io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "startBgScrub() failed" );
- break;
- }
- }
- else
- {
- // Restart the scrub on the next address.
- o_rc = resumeScrub( io_sc, eccErrorMask );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "resumeScrub() failed" );
- break;
- }
- }
- }
- else
- {
- // A TD request was added to the queue, start the next TD request.
- o_rc = startNextTd( io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "startNextTd() failed" );
- break;
- }
- }
-
- } while(0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc,
const CenAddr & i_stopAddr,
const CenAddr & i_endAddr )
@@ -1926,107 +1808,6 @@ int32_t CenMbaTdCtlr::handleCeEte_Tps( STEP_CODE_DATA_STRUCT & io_sc )
//------------------------------------------------------------------------------
-int32_t CenMbaTdCtlr::handleHardCeEte_NonTd( STEP_CODE_DATA_STRUCT & io_sc,
- const CenAddr & i_addr )
-{
- #define PRDF_FUNC "[CenMbaTdCtlr::handleHardCeEte_NonTd] "
-
- int32_t o_rc = SUCCESS;
-
- setTdSignature( io_sc, PRDFSIG_MaintHARD_CTE );
-
- do
- {
- // Send page deallocation message to PHYP
- o_rc = DEALLOC::pageGard( iv_mbaChip, i_addr );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "pageGard() failed" );
- break;
- }
-
- // Get the failing symbol. Note that the hard CE threshold is 1 so there
- // should only be one symbol with a non-zero per symbol count.
-
- MaintSymbols symData; CenSymbol junk;
- o_rc = collectCeStats( iv_mbaChip, iv_rank, symData, junk );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "collectCeStats() failed." );
- break;
- }
-
- if ( 1 != symData.size() )
- {
- PRDF_ERR( PRDF_FUNC "collectCeStats() return size %d, but was "
- "expecting size 1", symData.size() );
- o_rc = FAIL;
- break;
- }
-
- CenSymbol symbol = symData[0].symbol;
-
- // Callout the symbol.
- MemoryMru memmru ( iv_mbaTrgt, iv_rank, symbol );
- io_sc.service_data->SetCallout( memmru );
-
- // Add entry to CE table and add a TPS request to the queue, if needed.
- CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );
- if ( mbadb->iv_ceTable.addEntry(i_addr, symbol, true) )
- {
- o_rc = addTdQueueEntryTPS( iv_rank, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "addTdQueueEntryTPS() failed" );
- break;
- }
- }
-
- // Any hard CEs in MNFG should be immediately reported.
- if ( mfgMode() )
- io_sc.service_data->setServiceCall();
-
- } while(0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-int32_t CenMbaTdCtlr::handleSoftIntCeEte_NonTd( STEP_CODE_DATA_STRUCT & io_sc )
-{
- #define PRDF_FUNC "[CenMbaTdCtlr::handleSoftIntCeEte_NonTd] "
-
- int32_t o_rc = SUCCESS;
-
- setTdSignature( io_sc, PRDFSIG_MaintNCE_CTE );
-
- do
- {
- // Callout the rank. Note that the per CE counters only capture hard CEs
- // so it is not possible to isolate any further than a rank.
- MemoryMru memmru ( iv_mbaTrgt, iv_rank, MemoryMruData::CALLOUT_RANK );
- io_sc.service_data->SetCallout( memmru );
-
- // Add a TPS request to the queue.
- o_rc = addTdQueueEntryTPS( iv_rank, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "addTdQueueEntryTPS() failed" );
- break;
- }
-
- } while(0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
int32_t CenMbaTdCtlr::handleTpsFalseAlarm( STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[CenMbaTdCtlr::handleTpsFalseAlarm] "
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H
index 2838d288a..6573636cd 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H
@@ -262,22 +262,6 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon
int32_t handleCeEte_Tps( STEP_CODE_DATA_STRUCT & io_sc );
/**
- * @brief Handles hard CE ETEs during background scrub.
- * @param io_sc The step code data struct.
- * @param i_addr The address in which the maintenance command stopped.
- * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
- */
- int32_t handleHardCeEte_NonTd( STEP_CODE_DATA_STRUCT & io_sc,
- const CenAddr & i_addr );
-
- /**
- * @brief Handles soft and intermittent CEs during background scrub.
- * @param io_sc The step code data struct.
- * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
- */
- int32_t handleSoftIntCeEte_NonTd( STEP_CODE_DATA_STRUCT & io_sc );
-
- /**
* @brief Handles TPS false alarms.
* @param io_sc The step code data struct.
* @note Should only be called at the end of TPS phase 2 if no ECC errors
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
index b42daa446..052d6e0da 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
@@ -524,6 +524,42 @@ uint32_t __handleNceEte( ExtensibleChip * i_chip, TdQueue & io_queue,
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
+uint32_t __handleSoftInterCeEte( ExtensibleChip * i_chip, TdQueue & io_queue,
+ const MemAddr & i_addr,
+ STEP_CODE_DATA_STRUCT & io_sc );
+
+template<>
+uint32_t __handleSoftInterCeEte<TYPE_MCA>( ExtensibleChip * i_chip,
+ TdQueue & io_queue,
+ const MemAddr & i_addr,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ return __handleNceEte<TYPE_MCA>( i_chip, io_queue, i_addr, io_sc );
+}
+
+template<>
+uint32_t __handleSoftInterCeEte<TYPE_MBA>( ExtensibleChip * i_chip,
+ TdQueue & io_queue,
+ const MemAddr & i_addr,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ // Due to workarounds on the Centaur we are unable to stop on each
+ // occurrence of the soft or intermittent CEs like we do for Nimbus.
+ // Instead, the threshold is set much higher. If the threshold is hit we
+ // simply want to add the rank to the callout list and trigger TPS.
+
+ MemoryMru mm { i_chip->getTrgt(), i_addr.getRank(),
+ MemoryMruData::CALLOUT_RANK };
+ io_sc.service_data->SetCallout( mm );
+
+ io_queue.push( new TpsEvent<TYPE_MBA>(i_chip, i_addr.getRank()) );
+
+ return SUCCESS;
+}
+
+//------------------------------------------------------------------------------
+
+template<TARGETING::TYPE T>
uint32_t __handleRceEte( ExtensibleChip * i_chip, TdQueue & io_queue,
const MemRank & i_rank, bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc );
@@ -673,10 +709,10 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, TdQueue & io_queue,
o_errorsFound = true;
io_sc.service_data->AddSignatureList( trgt, PRDFSIG_MaintINTER_CTE);
- o_rc = __handleNceEte<T>( i_chip, io_queue, i_addr, io_sc );
+ o_rc = __handleSoftInterCeEte<T>( i_chip, io_queue, i_addr, io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "__handleNceEte<T>(0x%08x) failed",
+ PRDF_ERR( PRDF_FUNC "__handleSoftInterCeEte<T>(0x%08x) failed",
huid );
break;
}
@@ -687,10 +723,10 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, TdQueue & io_queue,
o_errorsFound = true;
io_sc.service_data->AddSignatureList( trgt, PRDFSIG_MaintSOFT_CTE );
- o_rc = __handleNceEte<T>( i_chip, io_queue, i_addr, io_sc );
+ o_rc = __handleSoftInterCeEte<T>( i_chip, io_queue, i_addr, io_sc );
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "__handleNceEte<T>(0x%08x) failed",
+ PRDF_ERR( PRDF_FUNC "__handleSoftInterCeEte<T>(0x%08x) failed",
huid );
break;
}
@@ -781,14 +817,10 @@ template
uint32_t __checkEcc<TYPE_MCA>( ExtensibleChip * i_chip, TdQueue & io_queue,
const MemAddr & i_addr, bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc );
-template<>
+template
uint32_t __checkEcc<TYPE_MBA>( ExtensibleChip * i_chip, TdQueue & io_queue,
const MemAddr & i_addr, bool & o_errorsFound,
- STEP_CODE_DATA_STRUCT & io_sc )
-{
- // TODO: remove this once runtime support is abled for MBA.
- return SUCCESS;
-}
+ STEP_CODE_DATA_STRUCT & io_sc );
//------------------------------------------------------------------------------
OpenPOWER on IntegriCloud