summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-05-17 11:36:17 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-05-23 15:11:20 -0400
commit0d6b900a5d121f3cf9d67d0c2fdb91efd27d2a9b (patch)
treee3e7330f6b0ffbb37ed7189ca3eae5c74b2779ab
parent5064efd9ee552ace99fb051ce98bb896202933ab (diff)
downloadtalos-hostboot-0d6b900a5d121f3cf9d67d0c2fdb91efd27d2a9b.tar.gz
talos-hostboot-0d6b900a5d121f3cf9d67d0c2fdb91efd27d2a9b.zip
PRD: Maintenance RCE handling during background scrub for Centaur
Change-Id: Ib146e7bead1f3b4bae4e36fd582360bdbd22afce RTC: 192638 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58979 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59228 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C176
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H25
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C62
3 files changed, 53 insertions, 210 deletions
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
index 4f8df05fa..24d7b6c9e 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.C
@@ -532,36 +532,6 @@ int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc,
}
}
- if ( eccErrorMask & RETRY_CTE )
- {
- o_rc = handleRceEte_NonTd( io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleRceEte_NonTd() failed" );
- break;
- }
- }
-
- if ( eccErrorMask & MPE )
- {
- o_rc = handleMpe_NonTd( io_sc, i_stopAddr );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleMpe_NonTd() failed" );
- break;
- }
- }
-
- if ( eccErrorMask & UE )
- {
- o_rc = handleUe_NonTd( io_sc, i_stopAddr );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleUe_NonTd() failed" );
- break;
- }
- }
-
if ( iv_queue.empty() )
{
// No TD requests so resume background. If the scrub reached the end
@@ -1956,152 +1926,6 @@ int32_t CenMbaTdCtlr::handleCeEte_Tps( STEP_CODE_DATA_STRUCT & io_sc )
//------------------------------------------------------------------------------
-int32_t CenMbaTdCtlr::handleUe_NonTd( STEP_CODE_DATA_STRUCT & io_sc,
- const CenAddr & i_addr )
-{
- #define PRDF_FUNC "[CenMbaTdCtlr::handleUe_NonTd] "
-
- int32_t o_rc = SUCCESS;
-
- setTdSignature( io_sc, PRDFSIG_MaintUE );
-
- do
- {
- // Add entry to UE table.
- CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );
- mbadb->iv_ueTable.addEntry( UE_TABLE::SCRUB_UE, i_addr );
-
- // Callout the rank.
- MemoryMru memmru ( iv_mbaTrgt, iv_rank, MemoryMruData::CALLOUT_RANK );
- io_sc.service_data->SetCallout( memmru );
- io_sc.service_data->setServiceCall();
-
- // Add a TPS request to the queue and ban any future TPS requests.
- o_rc = addTdQueueEntryTPS( iv_rank, io_sc, true );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "addTdQueueEntryTPS() failed" );
- break;
- }
-
- // Send lmb gard message to PHYP.
- o_rc = DEALLOC::lmbGard( iv_mbaChip, i_addr, false );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "lmbGard() failed" );
- break;
- }
-
- } while(0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-int32_t CenMbaTdCtlr::handleMpe_NonTd( STEP_CODE_DATA_STRUCT & io_sc,
- const CenAddr & i_addr )
-{
- #define PRDF_FUNC "[CenMbaTdCtlr::handleMpe_NonTd] "
-
- int32_t o_rc = SUCCESS;
-
- setTdSignature( io_sc, PRDFSIG_MaintMPE );
-
- do
- {
- // Add entry to UE table.
- CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );
- mbadb->iv_ueTable.addEntry( UE_TABLE::SCRUB_MPE, i_addr );
-
- // Add a VCM request to the queue.
- o_rc = addTdQueueEntryVCM( iv_rank );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "addTdQueueEntryVCM() failed" );
- break;
- }
-
- // Get the current mark in hardware.
- CenMark mark;
- o_rc = mssGetMarkStore( iv_mbaTrgt, iv_rank, mark );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "mssGetMarkStore() failed");
- break;
- }
-
- // Callout the mark.
- CalloutUtil::calloutMark( iv_mbaTrgt, iv_rank, mark, io_sc );
-
- } while( 0 );
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-int32_t CenMbaTdCtlr::handleRceEte_NonTd( STEP_CODE_DATA_STRUCT & io_sc )
-{
- #define PRDF_FUNC "[CenMbaTdCtlr::handleRceEte_NonTd] "
-
- int32_t o_rc = SUCCESS;
-
- setTdSignature( io_sc, PRDFSIG_MaintRETRY_CTE );
-
- do
- {
- MemoryMru memmru ( iv_mbaTrgt, iv_rank, MemoryMruData::CALLOUT_RANK );
- io_sc.service_data->SetCallout( memmru );
-
- bool doTps = true;
-
- if ( mfgMode() )
- {
- // Get RCE count.
- const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSEC1"
- : "MBA1_MBSEC1";
- SCAN_COMM_REGISTER_CLASS * mbsec1
- = iv_membChip->getRegister( reg_str );
- o_rc = mbsec1->Read();
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str );
- break;
- }
-
- uint16_t count = mbsec1->GetBitFieldJustified( 0, 12 );
-
- // Add count to RCE table
- CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );
- doTps = mbadb->iv_rceTable.addEntry( iv_rank, io_sc, count );
- }
- else
- io_sc.service_data->setServiceCall();
-
- if ( doTps )
- {
- o_rc = addTdQueueEntryTPS( iv_rank, io_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "addTdQueueEntryTPS() failed" );
- break;
- }
- }
-
- } while(0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
int32_t CenMbaTdCtlr::handleHardCeEte_NonTd( STEP_CODE_DATA_STRUCT & io_sc,
const CenAddr & i_addr )
{
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H
index 40f67247d..2838d288a 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaTdCtlr_rt.H
@@ -262,31 +262,6 @@ class CenMbaTdCtlr : public CenMbaTdCtlrCommon
int32_t handleCeEte_Tps( STEP_CODE_DATA_STRUCT & io_sc );
/**
- * @brief Handles UEs during background scrub.
- * @param io_sc The step code data struct.
- * @param i_addr The address in which the maintenance command stopped.
- * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
- */
- int32_t handleUe_NonTd( STEP_CODE_DATA_STRUCT & io_sc,
- const CenAddr & i_addr );
-
- /**
- * @brief Handles MPEs during background scrub.
- * @param io_sc The step code data struct.
- * @param i_addr The address in which the maintenance command stopped.
- * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
- */
- int32_t handleMpe_NonTd( STEP_CODE_DATA_STRUCT & io_sc,
- const CenAddr & i_addr );
-
- /**
- * @brief Handles RCE ETEs during background scrub.
- * @param io_sc The step code data struct.
- * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
- */
- int32_t handleRceEte_NonTd( STEP_CODE_DATA_STRUCT & io_sc );
-
- /**
* @brief Handles hard CE ETEs during background scrub.
* @param io_sc The step code data struct.
* @param i_addr The address in which the maintenance command stopped.
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
index e0835df7e..b42daa446 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
@@ -524,12 +524,13 @@ uint32_t __handleNceEte( ExtensibleChip * i_chip, TdQueue & io_queue,
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
-uint32_t __handleRceEte( ExtensibleChip * i_chip, bool & o_errorsFound,
+uint32_t __handleRceEte( ExtensibleChip * i_chip, TdQueue & io_queue,
+ const MemRank & i_rank, bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc );
template<>
-uint32_t __handleRceEte<TYPE_MCA>( ExtensibleChip * i_chip,
- bool & o_errorsFound,
+uint32_t __handleRceEte<TYPE_MCA>( ExtensibleChip * i_chip, TdQueue & io_queue,
+ const MemRank & i_rank, bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[__handleRceEte] "
@@ -557,7 +558,7 @@ uint32_t __handleRceEte<TYPE_MCA>( ExtensibleChip * i_chip,
o_errorsFound = true;
io_sc.service_data->AddSignatureList( i_chip->getTrgt(),
PRDFSIG_MaintIUE );
- o_rc = MemEcc::analyzeMaintIue<TYPE_MCA>(i_chip, io_sc);
+ o_rc = MemEcc::handleMemIue<TYPE_MCA>( i_chip, i_rank, io_sc );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "analyzeMaintIue(0x%08x) failed",
@@ -572,18 +573,61 @@ uint32_t __handleRceEte<TYPE_MCA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
-/* TODO RTC 157888
template<>
-uint32_t __handleRceEte<TYPE_MBA>( ExtensibleChip * i_chip,
- bool & o_errorsFound,
+uint32_t __handleRceEte<TYPE_MBA>( ExtensibleChip * i_chip, TdQueue & io_queue,
+ const MemRank & i_rank, bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[__handleRceEte] "
uint32_t o_rc = SUCCESS;
+ TargetHandle_t trgt = i_chip->getTrgt();
+
+ o_errorsFound = true;
+ io_sc.service_data->AddSignatureList( trgt, PRDFSIG_MaintRETRY_CTE );
+
+ // Add the rank to the callout list.
+ MemoryMru mm { trgt, i_rank, MemoryMruData::CALLOUT_RANK };
+ io_sc.service_data->SetCallout( mm );
+
do
{
+ bool doTps = true;
+
+ if ( mfgMode() )
+ {
+ ExtensibleChip * membChip = getConnectedParent(i_chip, TYPE_MEMBUF);
+
+ // Get the current RCE count from hardware.
+ const char * reg_str = (0 == i_chip->getPos()) ? "MBA0_MBSEC1"
+ : "MBA1_MBSEC1";
+ SCAN_COMM_REGISTER_CLASS * reg = membChip->getRegister( reg_str );
+ o_rc = reg->Read();
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str );
+ break;
+ }
+ uint16_t count = reg->GetBitFieldJustified( 0, 12 );
+
+ // Add the count to RCE table.
+ doTps = getMbaDataBundle(i_chip)->iv_rceTable.addEntry( i_rank,
+ io_sc,
+ count );
+ }
+ else
+ {
+ // The RCE threshold was set to the maximum. If we hit this then
+ // there is definitely a problem.
+ io_sc.service_data->setServiceCall();
+ }
+
+ // Add a TPS procedure to the queue, if needed.
+ if ( doTps )
+ {
+ io_queue.push( new TpsEvent<TYPE_MBA>(i_chip, i_rank) );
+ }
} while (0);
@@ -591,7 +635,6 @@ uint32_t __handleRceEte<TYPE_MBA>( ExtensibleChip * i_chip,
#undef PRDF_FUNC
}
-*/
//------------------------------------------------------------------------------
@@ -694,7 +737,8 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, TdQueue & io_queue,
if ( 0 != (eccAttns & MAINT_RCE_ETE) )
{
- o_rc = __handleRceEte<T>( i_chip, o_errorsFound, io_sc );
+ o_rc = __handleRceEte<T>( i_chip, io_queue, rank, o_errorsFound,
+ io_sc );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "__handleRceEte<T>(0x%08x) failed", huid );
OpenPOWER on IntegriCloud