summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-04-25 12:46:24 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-04-27 21:30:46 -0400
commit096bf926ad629fae603499bd5fbdeba19cf818b5 (patch)
tree3b8a75a32b64e9785bbb4277bde809dcb5d26aa0
parentcda40fd41b6921d307a384c600098d3ef395e01c (diff)
downloadtalos-hostboot-096bf926ad629fae603499bd5fbdeba19cf818b5.tar.gz
talos-hostboot-096bf926ad629fae603499bd5fbdeba19cf818b5.zip
PRD: Memory CE, UE, RCE isolation for MBA
Change-Id: If6e80e2c6bd3f83113fd24486ca8a285ea0d4447 RTC: 187480 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57855 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57932 CI-Ready: Zane C. Shelley <zshelle@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
-rw-r--r--src/usr/diag/prdf/common/plat/cen/cen_centaur.rule54
-rw-r--r--src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule40
-rw-r--r--src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C118
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C72
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H10
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/Membuf.rule62
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule82
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C390
8 files changed, 296 insertions, 532 deletions
diff --git a/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule b/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule
index ea4724c10..da358c8b7 100644
--- a/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule
+++ b/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule
@@ -1425,47 +1425,48 @@ rule rMBSECCFIR_0
MBSECCFIR_0 & ~MBSECCFIR_0_MASK & ~MBSECCFIR_0_ACT0 & MBSECCFIR_0_ACT1;
};
-group gMBSECCFIR_0 filter singlebit, cs_root_cause( 19, 44, 47, 49 )
+group gMBSECCFIR_0 filter priority ( 19, 41 ),
+ cs_root_cause( 19, 44, 47, 49 )
{
/** MBSECCFIR_0[0]
* Memory chip mark on rank 0
*/
- (rMBSECCFIR_0, bit(0)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(0)) ? verify_chip_mark_0_0;
/** MBSECCFIR_0[1]
* Memory chip mark on rank 1
*/
- (rMBSECCFIR_0, bit(1)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(1)) ? verify_chip_mark_0_1;
/** MBSECCFIR_0[2]
* Memory chip mark on rank 2
*/
- (rMBSECCFIR_0, bit(2)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(2)) ? verify_chip_mark_0_2;
/** MBSECCFIR_0[3]
* Memory chip mark on rank 3
*/
- (rMBSECCFIR_0, bit(3)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(3)) ? verify_chip_mark_0_3;
/** MBSECCFIR_0[4]
* Memory chip mark on rank 4
*/
- (rMBSECCFIR_0, bit(4)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(4)) ? verify_chip_mark_0_4;
/** MBSECCFIR_0[5]
* Memory chip mark on rank 5
*/
- (rMBSECCFIR_0, bit(5)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(5)) ? verify_chip_mark_0_5;
/** MBSECCFIR_0[6]
* Memory chip mark on rank 6
*/
- (rMBSECCFIR_0, bit(6)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(6)) ? verify_chip_mark_0_6;
/** MBSECCFIR_0[7]
* Memory chip mark on rank 7
*/
- (rMBSECCFIR_0, bit(7)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(7)) ? verify_chip_mark_0_7;
/** MBSECCFIR_0[8:15]
* Reserved
@@ -1475,12 +1476,12 @@ group gMBSECCFIR_0 filter singlebit, cs_root_cause( 19, 44, 47, 49 )
/** MBSECCFIR_0[16]
* Memory NCE
*/
- (rMBSECCFIR_0, bit(16)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(16)) ? mainline_nce_handling_0;
/** MBSECCFIR_0[17]
* Memory RCE
*/
- (rMBSECCFIR_0, bit(17)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(17)) ? mainline_rce_pue_handling_0;
/** MBSECCFIR_0[18]
* Memory SUE
@@ -1490,7 +1491,7 @@ group gMBSECCFIR_0 filter singlebit, cs_root_cause( 19, 44, 47, 49 )
/** MBSECCFIR_0[19]
* Memory UE
*/
- (rMBSECCFIR_0, bit(19)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(19)) ? mainline_ue_handling_0_UERE;
/** MBSECCFIR_0[20:27]
* Maintenance chip mark
@@ -1540,7 +1541,7 @@ group gMBSECCFIR_0 filter singlebit, cs_root_cause( 19, 44, 47, 49 )
/** MBSECCFIR_0[43]
* Prefetch Memory UE
*/
- (rMBSECCFIR_0, bit(43)) ? TBDDefaultCallout;
+ (rMBSECCFIR_0, bit(43)) ? mainline_rce_pue_handling_0;
/** MBSECCFIR_0[44]
* Memory RCD parity error
@@ -1596,47 +1597,48 @@ rule rMBSECCFIR_1
MBSECCFIR_1 & ~MBSECCFIR_1_MASK & ~MBSECCFIR_1_ACT0 & MBSECCFIR_1_ACT1;
};
-group gMBSECCFIR_1 filter singlebit, cs_root_cause( 19, 44, 47, 49 )
+group gMBSECCFIR_1 filter priority ( 19, 41 ),
+ cs_root_cause( 19, 44, 47, 49 )
{
/** MBSECCFIR_1[0]
* Memory chip mark on rank 0
*/
- (rMBSECCFIR_1, bit(0)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(0)) ? verify_chip_mark_1_0;
/** MBSECCFIR_1[1]
* Memory chip mark on rank 1
*/
- (rMBSECCFIR_1, bit(1)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(1)) ? verify_chip_mark_1_1;
/** MBSECCFIR_1[2]
* Memory chip mark on rank 2
*/
- (rMBSECCFIR_1, bit(2)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(2)) ? verify_chip_mark_1_2;
/** MBSECCFIR_1[3]
* Memory chip mark on rank 3
*/
- (rMBSECCFIR_1, bit(3)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(3)) ? verify_chip_mark_1_3;
/** MBSECCFIR_1[4]
* Memory chip mark on rank 4
*/
- (rMBSECCFIR_1, bit(4)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(4)) ? verify_chip_mark_1_4;
/** MBSECCFIR_1[5]
* Memory chip mark on rank 5
*/
- (rMBSECCFIR_1, bit(5)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(5)) ? verify_chip_mark_1_5;
/** MBSECCFIR_1[6]
* Memory chip mark on rank 6
*/
- (rMBSECCFIR_1, bit(6)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(6)) ? verify_chip_mark_1_6;
/** MBSECCFIR_1[7]
* Memory chip mark on rank 7
*/
- (rMBSECCFIR_1, bit(7)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(7)) ? verify_chip_mark_1_7;
/** MBSECCFIR_1[8:15]
* Reserved
@@ -1646,12 +1648,12 @@ group gMBSECCFIR_1 filter singlebit, cs_root_cause( 19, 44, 47, 49 )
/** MBSECCFIR_1[16]
* Memory NCE
*/
- (rMBSECCFIR_1, bit(16)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(16)) ? mainline_nce_handling_1;
/** MBSECCFIR_1[17]
* Memory RCE
*/
- (rMBSECCFIR_1, bit(17)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(17)) ? mainline_rce_pue_handling_1;
/** MBSECCFIR_1[18]
* Memory SUE
@@ -1661,7 +1663,7 @@ group gMBSECCFIR_1 filter singlebit, cs_root_cause( 19, 44, 47, 49 )
/** MBSECCFIR_1[19]
* Memory UE
*/
- (rMBSECCFIR_1, bit(19)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(19)) ? mainline_ue_handling_1_UERE;
/** MBSECCFIR_1[20:27]
* Maintenance chip mark
@@ -1711,7 +1713,7 @@ group gMBSECCFIR_1 filter singlebit, cs_root_cause( 19, 44, 47, 49 )
/** MBSECCFIR_1[43]
* Prefetch Memory UE
*/
- (rMBSECCFIR_1, bit(43)) ? TBDDefaultCallout;
+ (rMBSECCFIR_1, bit(43)) ? mainline_rce_pue_handling_1;
/** MBSECCFIR_1[44]
* Memory RCD parity error
diff --git a/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule b/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule
index f60f534e7..5eb5716ee 100644
--- a/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule
+++ b/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule
@@ -183,3 +183,43 @@ actionclass l4_cache_co_ue_UERE
SueSource;
};
+/** Verify Chip Mark */
+actionclass verify_chip_mark_0_0 { funccall("AnalyzeFetchMpe0_0"); };
+actionclass verify_chip_mark_0_1 { funccall("AnalyzeFetchMpe0_1"); };
+actionclass verify_chip_mark_0_2 { funccall("AnalyzeFetchMpe0_2"); };
+actionclass verify_chip_mark_0_3 { funccall("AnalyzeFetchMpe0_3"); };
+actionclass verify_chip_mark_0_4 { funccall("AnalyzeFetchMpe0_4"); };
+actionclass verify_chip_mark_0_5 { funccall("AnalyzeFetchMpe0_5"); };
+actionclass verify_chip_mark_0_6 { funccall("AnalyzeFetchMpe0_6"); };
+actionclass verify_chip_mark_0_7 { funccall("AnalyzeFetchMpe0_7"); };
+actionclass verify_chip_mark_1_0 { funccall("AnalyzeFetchMpe1_0"); };
+actionclass verify_chip_mark_1_1 { funccall("AnalyzeFetchMpe1_1"); };
+actionclass verify_chip_mark_1_2 { funccall("AnalyzeFetchMpe1_2"); };
+actionclass verify_chip_mark_1_3 { funccall("AnalyzeFetchMpe1_3"); };
+actionclass verify_chip_mark_1_4 { funccall("AnalyzeFetchMpe1_4"); };
+actionclass verify_chip_mark_1_5 { funccall("AnalyzeFetchMpe1_5"); };
+actionclass verify_chip_mark_1_6 { funccall("AnalyzeFetchMpe1_6"); };
+actionclass verify_chip_mark_1_7 { funccall("AnalyzeFetchMpe1_7"); };
+
+/** Fetch NCE */
+actionclass mainline_nce_handling_0 { funccall("AnalyzeFetchNce0"); };
+actionclass mainline_nce_handling_1 { funccall("AnalyzeFetchNce1"); };
+
+/** Fetch UE */
+actionclass mainline_ue_handling_0_UERE
+{
+ funccall("AnalyzeFetchUe0");
+ threshold( field(33 / 30 min) );
+ SueSource;
+};
+actionclass mainline_ue_handling_1_UERE
+{
+ funccall("AnalyzeFetchUe1");
+ threshold( field(33 / 30 min) );
+ SueSource;
+};
+
+/** Fetch RCE or Prefetch UE */
+actionclass mainline_rce_pue_handling_0 { funccall("AnalyzeFetchRcePue0"); };
+actionclass mainline_rce_pue_handling_1 { funccall("AnalyzeFetchRcePue1"); };
+
diff --git a/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C b/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C
index af1b22761..2e0707f22 100644
--- a/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C
+++ b/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C
@@ -30,6 +30,8 @@
#include <prdfPluginMap.H>
// Platform includes
+#include <prdfCenMbaDataBundle.H>
+#include <prdfMemEccAnalysis.H>
#include <prdfMemUtils.H>
using namespace TARGETING;
@@ -194,6 +196,122 @@ PLUGIN_RCD_PARITY_UE_SIDEEFFECTS( 1 )
#undef PLUGIN_RCD_PARITY_UE_SIDEEFFECTS
+//##############################################################################
+//
+// MBSECCFIRs
+//
+//##############################################################################
+
+/**
+ * @brief MBSECCFIR[0:7] - Mailine MPE.
+ * @param i_chip MEMBUF chip.
+ * @param io_sc The step code data struct.
+ * @return SUCCESS
+ */
+#define PLUGIN_FETCH_MPE_ERROR( POS, RANK ) \
+int32_t AnalyzeFetchMpe##POS##_##RANK( ExtensibleChip * i_chip, \
+ STEP_CODE_DATA_STRUCT & io_sc ) \
+{ \
+ ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \
+ PRDF_ASSERT( nullptr != mbaChip ); \
+ MemRank rank { RANK }; \
+ MemEcc::analyzeFetchMpe<TYPE_MBA>( mbaChip, rank, io_sc );\
+ return SUCCESS; \
+} \
+PRDF_PLUGIN_DEFINE( cen_centaur, AnalyzeFetchMpe##POS##_##RANK );
+
+PLUGIN_FETCH_MPE_ERROR( 0, 0 )
+PLUGIN_FETCH_MPE_ERROR( 0, 1 )
+PLUGIN_FETCH_MPE_ERROR( 0, 2 )
+PLUGIN_FETCH_MPE_ERROR( 0, 3 )
+PLUGIN_FETCH_MPE_ERROR( 0, 4 )
+PLUGIN_FETCH_MPE_ERROR( 0, 5 )
+PLUGIN_FETCH_MPE_ERROR( 0, 6 )
+PLUGIN_FETCH_MPE_ERROR( 0, 7 )
+
+PLUGIN_FETCH_MPE_ERROR( 1, 0 )
+PLUGIN_FETCH_MPE_ERROR( 1, 1 )
+PLUGIN_FETCH_MPE_ERROR( 1, 2 )
+PLUGIN_FETCH_MPE_ERROR( 1, 3 )
+PLUGIN_FETCH_MPE_ERROR( 1, 4 )
+PLUGIN_FETCH_MPE_ERROR( 1, 5 )
+PLUGIN_FETCH_MPE_ERROR( 1, 6 )
+PLUGIN_FETCH_MPE_ERROR( 1, 7 )
+
+#undef PLUGIN_FETCH_MPE_ERROR
+
+//------------------------------------------------------------------------------
+
+/**
+ * @brief MBSECCFIR[16] - Mainline CE.
+ * @param i_chip MEMBUF chip.
+ * @param io_sc The step code data struct.
+ * @return SUCCESS
+ */
+#define PLUGIN_FETCH_NCE_ERROR( POS ) \
+int32_t AnalyzeFetchNce##POS( ExtensibleChip * i_chip, \
+ STEP_CODE_DATA_STRUCT & io_sc ) \
+{ \
+ ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \
+ PRDF_ASSERT( nullptr != mbaChip ); \
+ MemEcc::analyzeFetchNceTce<TYPE_MBA, MbaDataBundle *>( mbaChip, io_sc ); \
+ return SUCCESS; \
+} \
+PRDF_PLUGIN_DEFINE( cen_centaur, AnalyzeFetchNce##POS );
+
+PLUGIN_FETCH_NCE_ERROR( 0 )
+PLUGIN_FETCH_NCE_ERROR( 1 )
+
+#undef PLUGIN_FETCH_NCE_ERROR
+
+//------------------------------------------------------------------------------
+
+/**
+ * @brief MBSECCFIR[19] - Mainline UE.
+ * @param i_chip MEMBUF chip.
+ * @param io_sc The step code data struct.
+ * @return SUCCESS
+ */
+#define PLUGIN_FETCH_UE_ERROR( POS ) \
+int32_t AnalyzeFetchUe##POS( ExtensibleChip * i_chip, \
+ STEP_CODE_DATA_STRUCT & io_sc ) \
+{ \
+ ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \
+ PRDF_ASSERT( nullptr != mbaChip ); \
+ MemEcc::analyzeFetchUe<TYPE_MBA>( mbaChip, io_sc ); \
+ return SUCCESS; \
+} \
+PRDF_PLUGIN_DEFINE( cen_centaur, AnalyzeFetchUe##POS );
+
+PLUGIN_FETCH_UE_ERROR( 0 )
+PLUGIN_FETCH_UE_ERROR( 1 )
+
+#undef PLUGIN_FETCH_UE_ERROR
+
+//------------------------------------------------------------------------------
+
+/**
+ * @brief MBSECCFIR[17] - Mainline RCE / MBSECCFIR[43] Prefetch UE.
+ * @param i_chip MEMBUF chip.
+ * @param io_sc The step code data struct.
+ * @return SUCCESS
+ */
+#define PLUGIN_FETCH_RCE_PUE_ERROR( POS ) \
+int32_t AnalyzeFetchRcePue##POS( ExtensibleChip * i_chip, \
+ STEP_CODE_DATA_STRUCT & io_sc ) \
+{ \
+ ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \
+ PRDF_ASSERT( nullptr != mbaChip ); \
+ MemEcc::analyzeFetchRcePue<TYPE_MBA>( mbaChip, io_sc ); \
+ return SUCCESS; \
+} \
+PRDF_PLUGIN_DEFINE( cen_centaur, AnalyzeFetchRcePue##POS );
+
+PLUGIN_FETCH_RCE_PUE_ERROR( 0 )
+PLUGIN_FETCH_RCE_PUE_ERROR( 1 )
+
+#undef PLUGIN_FETCH_RCE_PUE_ERROR
+
//------------------------------------------------------------------------------
} // end namespace cen_centaur
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
index 6175a4c7c..4c5153e81 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
@@ -555,6 +555,10 @@ template
uint32_t analyzeFetchMpe<TYPE_MCA>( ExtensibleChip * i_chip,
const MemRank & i_rank,
STEP_CODE_DATA_STRUCT & io_sc );
+template
+uint32_t analyzeFetchMpe<TYPE_MBA>( ExtensibleChip * i_chip,
+ const MemRank & i_rank,
+ STEP_CODE_DATA_STRUCT & io_sc );
//------------------------------------------------------------------------------
@@ -765,6 +769,9 @@ uint32_t analyzeFetchNceTce( ExtensibleChip * i_chip,
template
uint32_t analyzeFetchNceTce<TYPE_MCA, McaDataBundle *>( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc );
+template
+uint32_t analyzeFetchNceTce<TYPE_MBA, MbaDataBundle *>( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc );
//------------------------------------------------------------------------------
@@ -834,6 +841,9 @@ uint32_t analyzeFetchUe( ExtensibleChip * i_chip,
template
uint32_t analyzeFetchUe<TYPE_MCA>( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc );
+template
+uint32_t analyzeFetchUe<TYPE_MBA>( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc );
//------------------------------------------------------------------------------
@@ -1128,6 +1138,68 @@ uint32_t analyzeImpe<TYPE_MCA>( ExtensibleChip * i_chip,
//------------------------------------------------------------------------------
+template<>
+uint32_t analyzeFetchRcePue<TYPE_MBA>( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #define PRDF_FUNC "[MemEcc::analyzeFetchRcePue] "
+
+ PRDF_ASSERT( TYPE_MBA == i_chip->getType() );
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // WORKAROUND: An RCE starts as a UE and its address is trapped in the
+ // MBUER (note: UE fir bit not set at this point). Since
+ // multiple addresses are retried (not just the failing
+ // address), the MBRCER will contain the last address
+ // retried, and not necessarily the address that started out
+ // with the UE. Therefore, we will use the MBUER instead.
+
+ MemAddr addr;
+ o_rc = getMemReadAddr<TYPE_MBA>( i_chip, MemAddr::READ_UE_ADDR, addr );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x, READ_UE_ADDR) failed",
+ i_chip->getHuid() );
+ break;
+ }
+ MemRank rank = addr.getRank();
+
+ // Callout the rank.
+ MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK };
+ io_sc.service_data->SetCallout( mm );
+
+ #ifdef __HOSTBOOT_RUNTIME
+
+ // Add an entry to the RCE table.
+ if ( getMbaDataBundle(i_chip)->iv_rceTable.addEntry(rank, io_sc) )
+ {
+ TdEntry * entry = new TpsEvent<TYPE_MBA>{ i_chip, rank };
+ MemDbUtils::pushToQueue<TYPE_MBA>( i_chip, entry );
+ o_rc = MemDbUtils::handleTdEvent<TYPE_MBA>( i_chip, io_sc );
+ if ( SUCCESS != o_rc )
+ {
+ PRDF_ERR( PRDF_FUNC "handleTdEvent(0x%08x) failed on rank "
+ "0x%02x", i_chip->getHuid(), rank.getKey() );
+ break;
+ }
+ }
+
+ #endif // __HOSTBOOT_RUNTIME
+
+ } while (0);
+
+ MemCaptureData::addEccData<TYPE_MBA>( i_chip, io_sc );
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
} // end namespace MemEcc
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
index e01b81a78..1a96afeec 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
@@ -194,6 +194,16 @@ uint32_t analyzeMaintIue( ExtensibleChip * i_chip,
template<TARGETING::TYPE T>
uint32_t analyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc );
+/**
+ * @brief Analyzes fetch retry CE or prefetch UE errors.
+ * @param i_chip MBA.
+ * @param io_sc The step code data struct.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+template<TARGETING::TYPE T>
+uint32_t analyzeFetchRcePue( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc );
+
#ifdef __HOSTBOOT_RUNTIME
/**
diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf.rule
index 61dd6bfba..99280eccc 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/Membuf.rule
+++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf.rule
@@ -1518,50 +1518,47 @@ rule rMBSECCFIR_0
};
group gMBSECCFIR_0 filter priority ( 19, 41 ),
- secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
- 17,18,20,21,22,23,24,25,26,27,28,29,30,31,
- 32,33,34,35,36,37,38,39,40,41,42,43,44,45,
- 48,50,51)
+ cs_root_cause( 19, 44, 47, 49 )
{
/** MBSECCFIR_0[0]
* Memory chip mark on rank 0
*/
- (rMBSECCFIR_0, bit(0)) ? analyzeFetchMpe0_0;
+ (rMBSECCFIR_0, bit(0)) ? verify_chip_mark_0_0;
/** MBSECCFIR_0[1]
* Memory chip mark on rank 1
*/
- (rMBSECCFIR_0, bit(1)) ? analyzeFetchMpe0_1;
+ (rMBSECCFIR_0, bit(1)) ? verify_chip_mark_0_1;
/** MBSECCFIR_0[2]
* Memory chip mark on rank 2
*/
- (rMBSECCFIR_0, bit(2)) ? analyzeFetchMpe0_2;
+ (rMBSECCFIR_0, bit(2)) ? verify_chip_mark_0_2;
/** MBSECCFIR_0[3]
* Memory chip mark on rank 3
*/
- (rMBSECCFIR_0, bit(3)) ? analyzeFetchMpe0_3;
+ (rMBSECCFIR_0, bit(3)) ? verify_chip_mark_0_3;
/** MBSECCFIR_0[4]
* Memory chip mark on rank 4
*/
- (rMBSECCFIR_0, bit(4)) ? analyzeFetchMpe0_4;
+ (rMBSECCFIR_0, bit(4)) ? verify_chip_mark_0_4;
/** MBSECCFIR_0[5]
* Memory chip mark on rank 5
*/
- (rMBSECCFIR_0, bit(5)) ? analyzeFetchMpe0_5;
+ (rMBSECCFIR_0, bit(5)) ? verify_chip_mark_0_5;
/** MBSECCFIR_0[6]
* Memory chip mark on rank 6
*/
- (rMBSECCFIR_0, bit(6)) ? analyzeFetchMpe0_6;
+ (rMBSECCFIR_0, bit(6)) ? verify_chip_mark_0_6;
/** MBSECCFIR_0[7]
* Memory chip mark on rank 7
*/
- (rMBSECCFIR_0, bit(7)) ? analyzeFetchMpe0_7;
+ (rMBSECCFIR_0, bit(7)) ? verify_chip_mark_0_7;
/** MBSECCFIR_0[8:15]
* Reserved
@@ -1571,12 +1568,12 @@ group gMBSECCFIR_0 filter priority ( 19, 41 ),
/** MBSECCFIR_0[16]
* Memory NCE
*/
- (rMBSECCFIR_0, bit(16)) ? analyzeFetchNce0;
+ (rMBSECCFIR_0, bit(16)) ? mainline_nce_handling_0;
/** MBSECCFIR_0[17]
* Memory RCE
*/
- (rMBSECCFIR_0, bit(17)) ? analyzeFetchRce0;
+ (rMBSECCFIR_0, bit(17)) ? mainline_rce_pue_handling_0;
/** MBSECCFIR_0[18]
* Memory SUE
@@ -1586,7 +1583,7 @@ group gMBSECCFIR_0 filter priority ( 19, 41 ),
/** MBSECCFIR_0[19]
* Memory UE
*/
- (rMBSECCFIR_0, bit(19)) ? mba0MemoryUe;
+ (rMBSECCFIR_0, bit(19)) ? mainline_ue_handling_0_UERE;
/** MBSECCFIR_0[20:27]
* Maintenance chip mark
@@ -1636,12 +1633,12 @@ group gMBSECCFIR_0 filter priority ( 19, 41 ),
/** MBSECCFIR_0[43]
* Prefetch Memory UE
*/
- (rMBSECCFIR_0, bit(43)) ? analyzeFetchPreUe0;
+ (rMBSECCFIR_0, bit(43)) ? mainline_rce_pue_handling_0;
/** MBSECCFIR_0[44]
* Memory RCD parity error
*/
- (rMBSECCFIR_0, bit(44)) ? defaultMaskedError;
+ (rMBSECCFIR_0, bit(44)) ? self_th_1_UERE; # CUMULUS_10
/** MBSECCFIR_0[45]
* Maintenance RCD parity error
@@ -1693,50 +1690,47 @@ rule rMBSECCFIR_1
};
group gMBSECCFIR_1 filter priority ( 19, 41 ),
- secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
- 17,18,20,21,22,23,24,25,26,27,28,29,30,31,
- 32,33,34,35,36,37,38,39,40,41,42,43,44,45,
- 48,50,51)
+ cs_root_cause( 19, 44, 47, 49 )
{
/** MBSECCFIR_1[0]
* Memory chip mark on rank 0
*/
- (rMBSECCFIR_1, bit(0)) ? analyzeFetchMpe1_0;
+ (rMBSECCFIR_1, bit(0)) ? verify_chip_mark_1_0;
/** MBSECCFIR_1[1]
* Memory chip mark on rank 1
*/
- (rMBSECCFIR_1, bit(1)) ? analyzeFetchMpe1_1;
+ (rMBSECCFIR_1, bit(1)) ? verify_chip_mark_1_1;
/** MBSECCFIR_1[2]
* Memory chip mark on rank 2
*/
- (rMBSECCFIR_1, bit(2)) ? analyzeFetchMpe1_2;
+ (rMBSECCFIR_1, bit(2)) ? verify_chip_mark_1_2;
/** MBSECCFIR_1[3]
* Memory chip mark on rank 3
*/
- (rMBSECCFIR_1, bit(3)) ? analyzeFetchMpe1_3;
+ (rMBSECCFIR_1, bit(3)) ? verify_chip_mark_1_3;
/** MBSECCFIR_1[4]
* Memory chip mark on rank 4
*/
- (rMBSECCFIR_1, bit(4)) ? analyzeFetchMpe1_4;
+ (rMBSECCFIR_1, bit(4)) ? verify_chip_mark_1_4;
/** MBSECCFIR_1[5]
* Memory chip mark on rank 5
*/
- (rMBSECCFIR_1, bit(5)) ? analyzeFetchMpe1_5;
+ (rMBSECCFIR_1, bit(5)) ? verify_chip_mark_1_5;
/** MBSECCFIR_1[6]
* Memory chip mark on rank 6
*/
- (rMBSECCFIR_1, bit(6)) ? analyzeFetchMpe1_6;
+ (rMBSECCFIR_1, bit(6)) ? verify_chip_mark_1_6;
/** MBSECCFIR_1[7]
* Memory chip mark on rank 7
*/
- (rMBSECCFIR_1, bit(7)) ? analyzeFetchMpe1_7;
+ (rMBSECCFIR_1, bit(7)) ? verify_chip_mark_1_7;
/** MBSECCFIR_1[8:15]
* Reserved
@@ -1746,12 +1740,12 @@ group gMBSECCFIR_1 filter priority ( 19, 41 ),
/** MBSECCFIR_1[16]
* Memory NCE
*/
- (rMBSECCFIR_1, bit(16)) ? analyzeFetchNce1;
+ (rMBSECCFIR_1, bit(16)) ? mainline_nce_handling_1;
/** MBSECCFIR_1[17]
* Memory RCE
*/
- (rMBSECCFIR_1, bit(17)) ? analyzeFetchRce1;
+ (rMBSECCFIR_1, bit(17)) ? mainline_rce_pue_handling_1;
/** MBSECCFIR_1[18]
* Memory SUE
@@ -1761,7 +1755,7 @@ group gMBSECCFIR_1 filter priority ( 19, 41 ),
/** MBSECCFIR_1[19]
* Memory UE
*/
- (rMBSECCFIR_1, bit(19)) ? mba1MemoryUe;
+ (rMBSECCFIR_1, bit(19)) ? mainline_ue_handling_1_UERE;
/** MBSECCFIR_1[20:27]
* Maintenance chip mark
@@ -1811,12 +1805,12 @@ group gMBSECCFIR_1 filter priority ( 19, 41 ),
/** MBSECCFIR_1[43]
* Prefetch Memory UE
*/
- (rMBSECCFIR_1, bit(43)) ? analyzeFetchPreUe1;
+ (rMBSECCFIR_1, bit(43)) ? mainline_rce_pue_handling_1;
/** MBSECCFIR_1[44]
* Memory RCD parity error
*/
- (rMBSECCFIR_1, bit(44)) ? defaultMaskedError;
+ (rMBSECCFIR_1, bit(44)) ? self_th_1_UERE; # CUMULUS_10
/** MBSECCFIR_1[45]
* Maintenance RCD parity error
diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule
index 8a4869233..c1fbb30bc 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule
+++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule
@@ -53,85 +53,3 @@ actionclass calloutDmiBusTh1 { calloutDmiBus; threshold1; };
/** Callout the DMI bus, threshold 2 per day */
actionclass calloutDmiBusTh2pday { calloutDmiBus; threshold2pday; };
-/** Analyze a fetch MPE on MBA0 rank 0 */
-actionclass analyzeFetchMpe0_0 { funccall("AnalyzeFetchMpe0_0"); };
-
-/** Analyze a fetch MPE on MBA1 rank 0 */
-actionclass analyzeFetchMpe1_0 { funccall("AnalyzeFetchMpe1_0"); };
-
-/** Analyze a fetch MPE on MBA0 rank 1 */
-actionclass analyzeFetchMpe0_1 { funccall("AnalyzeFetchMpe0_1"); };
-
-/** Analyze a fetch MPE on MBA1 rank 1 */
-actionclass analyzeFetchMpe1_1 { funccall("AnalyzeFetchMpe1_1"); };
-
-/** Analyze a fetch MPE on MBA0 rank 2 */
-actionclass analyzeFetchMpe0_2 { funccall("AnalyzeFetchMpe0_2"); };
-
-/** Analyze a fetch MPE on MBA1 rank 2 */
-actionclass analyzeFetchMpe1_2 { funccall("AnalyzeFetchMpe1_2"); };
-
-/** Analyze a fetch MPE on MBA0 rank 3 */
-actionclass analyzeFetchMpe0_3 { funccall("AnalyzeFetchMpe0_3"); };
-
-/** Analyze a fetch MPE on MBA1 rank 3 */
-actionclass analyzeFetchMpe1_3 { funccall("AnalyzeFetchMpe1_3"); };
-
-/** Analyze a fetch MPE on MBA0 rank 4 */
-actionclass analyzeFetchMpe0_4 { funccall("AnalyzeFetchMpe0_4"); };
-
-/** Analyze a fetch MPE on MBA1 rank 4 */
-actionclass analyzeFetchMpe1_4 { funccall("AnalyzeFetchMpe1_4"); };
-
-/** Analyze a fetch MPE on MBA0 rank 5 */
-actionclass analyzeFetchMpe0_5 { funccall("AnalyzeFetchMpe0_5"); };
-
-/** Analyze a fetch MPE on MBA1 rank 5 */
-actionclass analyzeFetchMpe1_5 { funccall("AnalyzeFetchMpe1_5"); };
-
-/** Analyze a fetch MPE on MBA0 rank 6 */
-actionclass analyzeFetchMpe0_6 { funccall("AnalyzeFetchMpe0_6"); };
-
-/** Analyze a fetch MPE on MBA1 rank 6 */
-actionclass analyzeFetchMpe1_6 { funccall("AnalyzeFetchMpe1_6"); };
-
-/** Analyze a fetch MPE on MBA0 rank 7 */
-actionclass analyzeFetchMpe0_7 { funccall("AnalyzeFetchMpe0_7"); };
-
-/** Analyze a fetch MPE on MBA1 rank 7 */
-actionclass analyzeFetchMpe1_7 { funccall("AnalyzeFetchMpe1_7"); };
-
-/** Analyze a fetch NCE on MBA0 */
-actionclass analyzeFetchNce0 { funccall("AnalyzeFetchNce0"); };
-
-/** Analyze a fetch NCE on MBA1 */
-actionclass analyzeFetchNce1 { funccall("AnalyzeFetchNce1"); };
-
-/** Analyze a fetch RCE on MBA0 */
-actionclass analyzeFetchRce0 { funccall("AnalyzeFetchRce0"); };
-
-/** Analyze a fetch RCE on MBA1 */
-actionclass analyzeFetchRce1 { funccall("AnalyzeFetchRce1"); };
-
-/** Analyze a PreFetch Ue on MBA0 */
-actionclass analyzeFetchPreUe0 { funccall("AnalyzeFetchPreUe0"); };
-
-/** Analyze a PreFetch Ue on MBA1 */
-actionclass analyzeFetchPreUe1 { funccall("AnalyzeFetchPreUe1"); };
-
-/** Analyze a fetch UE on MBA0 */
-actionclass analyzeFetchUe0
-{
- funccall("AnalyzeFetchUe0");
- threshold( field(33 / 30 min ) );
- SUEGenerationPoint;
-};
-
-/** Analyze a fetch UE on MBA1 */
-actionclass analyzeFetchUe1
-{
- funccall("AnalyzeFetchUe1");
- threshold( field(33 / 30 min ) );
- SUEGenerationPoint;
-};
-
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C
index 0ff352b44..2a2b8da3d 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C
@@ -270,396 +270,6 @@ PRDF_PLUGIN_DEFINE( Membuf, maxSparesExceeded );
//
//##############################################################################
-/**
- * @brief MBSECCFIR[0:7] - Mailine MPE.
- * @param i_chip MEMBUF chip.
- * @param io_sc The step code data struct.
- * @return SUCCESS
- */
-#define PLUGIN_FETCH_MPE_ERROR( POS, RANK ) \
-int32_t AnalyzeFetchMpe##POS_##RANK( ExtensibleChip * i_chip, \
- STEP_CODE_DATA_STRUCT & io_sc ) \
-{ \
- ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \
- PRDF_ASSERT( nullptr != mbaChip ); \
- MemEcc::analyzeFetchMpe<TYPE_MBA, MbaDataBundle *>( mbaChip, RANK, io_sc );\
- return SUCCESS; \
-} \
-PRDF_PLUGIN_DEFINE( Membuf, AnalyzeFetchMpe##POS_##RANK );
-
-PLUGIN_FETCH_MPE_ERROR( 0, 0 )
-PLUGIN_FETCH_MPE_ERROR( 0, 1 )
-PLUGIN_FETCH_MPE_ERROR( 0, 2 )
-PLUGIN_FETCH_MPE_ERROR( 0, 3 )
-PLUGIN_FETCH_MPE_ERROR( 0, 4 )
-PLUGIN_FETCH_MPE_ERROR( 0, 5 )
-PLUGIN_FETCH_MPE_ERROR( 0, 6 )
-PLUGIN_FETCH_MPE_ERROR( 0, 7 )
-
-PLUGIN_FETCH_MPE_ERROR( 1, 0 )
-PLUGIN_FETCH_MPE_ERROR( 1, 1 )
-PLUGIN_FETCH_MPE_ERROR( 1, 2 )
-PLUGIN_FETCH_MPE_ERROR( 1, 3 )
-PLUGIN_FETCH_MPE_ERROR( 1, 4 )
-PLUGIN_FETCH_MPE_ERROR( 1, 5 )
-PLUGIN_FETCH_MPE_ERROR( 1, 6 )
-PLUGIN_FETCH_MPE_ERROR( 1, 7 )
-
-#undef PLUGIN_FETCH_MPE_ERROR
-
-//------------------------------------------------------------------------------
-
-/**
- * @brief MBSECCFIR[16] - Fetch New CE (NCE).
- * @param i_membChip A Centaur chip.
- * @param i_sc The step code data struct.
- * @param i_mbaPos The MBA position.
- * @return SUCCESS
- */
-int32_t AnalyzeFetchNce( ExtensibleChip * i_membChip,
- STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_mbaPos )
-{
- #define PRDF_FUNC "[AnalyzeFetchNce] "
-
- int32_t l_rc = SUCCESS;
-
- ExtensibleChip * mbaChip = NULL;
-
- do
- {
- CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip );
- mbaChip = membdb->getMbaChip( i_mbaPos );
- if ( NULL == mbaChip )
- {
- PRDF_ERR( PRDF_FUNC "getMbaChip() returned NULL" );
- l_rc = FAIL; break;
- }
- TargetHandle_t mbaTrgt = mbaChip->GetChipHandle();
-
- CenAddr addr;
- l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_NCE_ADDR, addr );
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "getCenReadAddr() failed" );
- break;
- }
- CenRank rank = addr.getRank();
-
- if ( 0x20 > getChipLevel(i_membChip->GetChipHandle()) )
- {
- // There is a bug in DD1.x where the value of MBSEVR cannot be
- // trusted. The workaround is too complicated for its value so
- // callout the rank instead.
- MemoryMru memmru ( mbaTrgt, rank, MemoryMruData::CALLOUT_RANK );
- i_sc.service_data->SetCallout( memmru );
- }
- else // DD2.0+
- {
- // Get the failing symbol
- const char * reg_str = (0 == i_mbaPos) ? "MBA0_MBSEVR"
- : "MBA1_MBSEVR";
- SCAN_COMM_REGISTER_CLASS * reg = i_membChip->getRegister(reg_str);
- l_rc = reg->Read();
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str );
- break;
- }
-
- uint8_t galois = reg->GetBitFieldJustified( 40, 8 );
- uint8_t mask = reg->GetBitFieldJustified( 32, 8 );
-
- CenSymbol symbol = CenSymbol::fromGalois( mbaTrgt, rank, galois,
- mask );
- if ( !symbol.isValid() )
- {
- PRDF_ERR( PRDF_FUNC "Failed to create symbol: galois=0x%02x "
- "mask=0x%02x", galois, mask );
- break;
- }
-
- // Check if this symbol is on any of the spares.
- CenSymbol sp0, sp1, ecc;
- l_rc = mssGetSteerMux( mbaTrgt, rank, sp0, sp1, ecc );
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed. HUID: 0x%08x "
- "rank: %d", getHuid(mbaTrgt), rank.getMaster() );
- break;
- }
- if ( (sp0.isValid() && (sp0.getDram() == symbol.getDram())) ||
- (sp1.isValid() && (sp1.getDram() == symbol.getDram())) )
- {
- symbol.setDramSpared();
- }
- if ( ecc.isValid() && (ecc.getDram() == symbol.getDram()) )
- {
- symbol.setEccSpared();
- }
-
- // Add the DIMM to the callout list
- MemoryMru memmru ( mbaTrgt, rank, symbol );
- i_sc.service_data->SetCallout( memmru, MRU_MEDA );
-
- // Add to CE table
- CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip );
- uint32_t ceTableRc = mbadb->iv_ceTable.addEntry( addr, symbol );
- bool doTps = false;
-
- // Check MNFG thresholds, if needed.
- if ( mfgMode() )
- {
- // Get the MNFG CE thresholds.
- uint32_t dramTh, hrTh, dimmTh;
- getMnfgMemCeTh( mbaChip, rank, dramTh, hrTh, dimmTh );
-
- // Get counts from CE table.
- uint32_t dramCount, hrCount, dimmCount;
- mbadb->iv_ceTable.getMnfgCounts( addr.getRank(), symbol,
- dramCount, hrCount,
- dimmCount );
-
- if ( dramTh < dramCount )
- {
- i_sc.service_data->AddSignatureList( mbaTrgt,
- PRDFSIG_MnfgDramCte );
- i_sc.service_data->setServiceCall();
- doTps = true;
- }
- else if ( hrTh < hrCount )
- {
- i_sc.service_data->AddSignatureList( mbaTrgt,
- PRDFSIG_MnfgHrCte );
- i_sc.service_data->setServiceCall();
- doTps = true;
- }
- else if ( dimmTh < dimmCount )
- {
- i_sc.service_data->AddSignatureList( mbaTrgt,
- PRDFSIG_MnfgDimmCte );
- i_sc.service_data->setServiceCall();
- doTps = true;
- }
- else if ( 0 != (CenMbaCeTable::TABLE_FULL & ceTableRc) )
- {
- i_sc.service_data->AddSignatureList( mbaTrgt,
- PRDFSIG_MnfgTableFull);
-
- // The table is full and no other threshold has been met.
- // We are in a state where we may never hit a MNFG
- // threshold. Callout all memory behind the MBA. Also, since
- // the counts are all over the place, there may be a problem
- // with the MBA. So call it out as well.
- MemoryMru all_mm ( mbaTrgt, rank,
- MemoryMruData::CALLOUT_ALL_MEM );
- i_sc.service_data->SetCallout( all_mm, MRU_MEDA );
- i_sc.service_data->SetCallout( mbaTrgt, MRU_MEDA );
- i_sc.service_data->setServiceCall();
- doTps = true;
- }
- else if ( 0 != (CenMbaCeTable::ENTRY_TH_REACHED & ceTableRc) )
- {
- i_sc.service_data->AddSignatureList( mbaTrgt,
- PRDFSIG_MnfgEntryCte );
-
- // There is a single entry threshold and no other threshold
- // has been met. This is a potential flooding issue, so make
- // the DIMM callout predictive.
- i_sc.service_data->setServiceCall();
- doTps = true;
- }
- }
- else // field
- {
- doTps = ( CenMbaCeTable::NO_TH_REACHED != ceTableRc );
- }
-
- // Initiate a TPS procedure, if needed.
- if ( doTps )
- {
- // If a MNFG threshold has been reached (predictive callout), we
- // will still try to start TPS just in case MNFG disables the
- // termination policy.
-
- // Will not be able to do TPS during hostboot. Note that we will
- // still call handleTdEvent() so we can get the trace statement
- // indicating TPS was requested during Hostboot.
-
- l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank,
- CenMbaTdCtlrCommon::TPS_EVENT );
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleTdEvent() failed: rank=m%ds%d",
- rank.getMaster(), rank.getSlave() );
- break;
- }
- }
- }
-
- } while (0);
-
- // Add ECC capture data for FFDC.
- if ( NULL != mbaChip )
- MemCaptureData::addEccData<TYPE_MBA>( mbaChip, i_sc );
-
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "Failed: i_membChip=0x%08x i_mbaPos=%d",
- i_membChip->GetId(), i_mbaPos );
- CalloutUtil::defaultError( i_sc );
- }
-
- return SUCCESS; // Intentionally return SUCCESS for this plugin
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-/**
- * @brief Fetch Retry CE / Prefetch UE Errors.
- * @param i_membChip A Centaur chip.
- * @param i_sc The step code data struct.
- * @param i_mbaPos The MBA position.
- * @param i_isRceError True for RCE error false otherwise.
- * @return SUCCESS
- */
-int32_t AnalyzeFetchRcePue( ExtensibleChip * i_membChip,
- STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_mbaPos,
- bool i_isRceError )
-{
- #define PRDF_FUNC "[AnalyzeFetchRcePue] "
-
- int32_t l_rc = SUCCESS;
-
- ExtensibleChip * mbaChip = NULL;
-
- do
- {
- CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip );
- mbaChip = membdb->getMbaChip( i_mbaPos );
- if ( NULL == mbaChip )
- {
- PRDF_ERR( PRDF_FUNC "getMbaChip() returned NULL" );
- l_rc = FAIL; break;
- }
-
- CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip );
-
- // WORKAROUND: Since an RCE starts as a UE, it's address is trapped in
- // MBUER (note: UE fir bit not set at this point). But since multiple
- // addresses are retried (not just the failing address), MBRCER will
- // contain the last address retried, and not necessarily the address
- // that started out with the UE.
-
- CenAddr addr;
- l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_UE_ADDR, addr );
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "getCenReadAddr() failed" );
- break;
- }
- CenRank rank = addr.getRank();
-
- // Callout the rank.
- MemoryMru memmru ( mbaChip->GetChipHandle(), rank,
- MemoryMruData::CALLOUT_RANK );
- i_sc.service_data->SetCallout( memmru );
-
- // Add an entry to the RCE table.
- if ( mbadb->iv_rceTable.addEntry(rank, i_sc) )
- {
- // Add a TPS request to the queue TD queue.
- l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank,
- CenMbaTdCtlrCommon::TPS_EVENT );
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "handleTdEvent() failed." );
- break;
- }
- }
-
- } while (0);
-
- // Add ECC capture data for FFDC.
- if ( NULL != mbaChip )
- MemCaptureData::addEccData<TYPE_MBA>( mbaChip, i_sc );
-
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "Failed: i_membChip=0x%08x i_mbaPos=%d "
- "i_isRceError=%c", i_membChip->GetId(), i_mbaPos,
- i_isRceError ? 'T' : 'F' );
- CalloutUtil::defaultError( i_sc );
- }
-
- return SUCCESS; // Intentionally return SUCCESS for this plugin
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-/**
- * @brief MBSECCFIR[19] - Mainline UE.
- * @param i_chip MEMBUF chip.
- * @param io_sc The step code data struct.
- * @return SUCCESS
- */
-#define PLUGIN_FETCH_UE_ERROR( POS ) \
-int32_t AnalyzeFetchUe##POS( ExtensibleChip * i_chip, \
- STEP_CODE_DATA_STRUCT & io_sc ) \
-{ \
- ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \
- PRDF_ASSERT( nullptr != mbaChip ); \
- MemEcc::analyzeFetchUe<TYPE_MBA, MbaDataBundle *>( mbaChip, io_sc ); \
- return SUCCESS; \
-} \
-PRDF_PLUGIN_DEFINE( Membuf, AnalyzeFetchUe##POS );
-
-PLUGIN_FETCH_UE_ERROR( 0 )
-PLUGIN_FETCH_UE_ERROR( 1 )
-
-#undef PLUGIN_FETCH_UE_ERROR
-
-//------------------------------------------------------------------------------
-
-// Define the plugins for memory ECC errors.
-#define PLUGIN_FETCH_ECC_ERROR( TYPE, MBA ) \
-int32_t AnalyzeFetch##TYPE##MBA( ExtensibleChip * i_membChip, \
- STEP_CODE_DATA_STRUCT & i_sc ) \
-{ \
- return AnalyzeFetch##TYPE( i_membChip, i_sc, MBA ); \
-} \
-PRDF_PLUGIN_DEFINE( Membuf, AnalyzeFetch##TYPE##MBA );
-
-PLUGIN_FETCH_ECC_ERROR( Nce, 0 )
-PLUGIN_FETCH_ECC_ERROR( Nce, 1 )
-
-#undef PLUGIN_FETCH_ECC_ERROR
-
-// Handling for RCE and prefetch UE is similar.
-// So use common macro and function ( AnalyzeFetchRcePue ).
-
-#define PLUGIN_FETCH_RCE_PREUE_ERROR( TYPE, MBA, IS_RCE ) \
-int32_t AnalyzeFetch##TYPE##MBA( ExtensibleChip * i_membChip, \
- STEP_CODE_DATA_STRUCT & i_sc ) \
-{ \
- return AnalyzeFetchRcePue( i_membChip, i_sc, MBA, IS_RCE ); \
-} \
-PRDF_PLUGIN_DEFINE( Membuf, AnalyzeFetch##TYPE##MBA );
-
-// This is bit inefficient. 1st and 3rd argument have 1 to 1
-// mapping. But to keep macro expansion simple, using extra argument.
-PLUGIN_FETCH_RCE_PREUE_ERROR( Rce, 0, true )
-PLUGIN_FETCH_RCE_PREUE_ERROR( Rce, 1, true )
-PLUGIN_FETCH_RCE_PREUE_ERROR( PreUe, 0, false )
-PLUGIN_FETCH_RCE_PREUE_ERROR( PreUe, 1, false )
-
-#undef PLUGIN_FETCH_RCE_PREUE_ERROR
-
-//------------------------------------------------------------------------------
-
int32_t calloutInterface_dmi( ExtensibleChip * i_membChip,
STEP_CODE_DATA_STRUCT & io_sc )
{
OpenPOWER on IntegriCloud