diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2018-04-25 12:46:24 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-04-27 21:30:46 -0400 |
commit | 096bf926ad629fae603499bd5fbdeba19cf818b5 (patch) | |
tree | 3b8a75a32b64e9785bbb4277bde809dcb5d26aa0 | |
parent | cda40fd41b6921d307a384c600098d3ef395e01c (diff) | |
download | talos-hostboot-096bf926ad629fae603499bd5fbdeba19cf818b5.tar.gz talos-hostboot-096bf926ad629fae603499bd5fbdeba19cf818b5.zip |
PRD: Memory CE, UE, RCE isolation for MBA
Change-Id: If6e80e2c6bd3f83113fd24486ca8a285ea0d4447
RTC: 187480
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57855
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57932
CI-Ready: Zane C. Shelley <zshelle@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
8 files changed, 296 insertions, 532 deletions
diff --git a/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule b/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule index ea4724c10..da358c8b7 100644 --- a/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule +++ b/src/usr/diag/prdf/common/plat/cen/cen_centaur.rule @@ -1425,47 +1425,48 @@ rule rMBSECCFIR_0 MBSECCFIR_0 & ~MBSECCFIR_0_MASK & ~MBSECCFIR_0_ACT0 & MBSECCFIR_0_ACT1; }; -group gMBSECCFIR_0 filter singlebit, cs_root_cause( 19, 44, 47, 49 ) +group gMBSECCFIR_0 filter priority ( 19, 41 ), + cs_root_cause( 19, 44, 47, 49 ) { /** MBSECCFIR_0[0] * Memory chip mark on rank 0 */ - (rMBSECCFIR_0, bit(0)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(0)) ? verify_chip_mark_0_0; /** MBSECCFIR_0[1] * Memory chip mark on rank 1 */ - (rMBSECCFIR_0, bit(1)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(1)) ? verify_chip_mark_0_1; /** MBSECCFIR_0[2] * Memory chip mark on rank 2 */ - (rMBSECCFIR_0, bit(2)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(2)) ? verify_chip_mark_0_2; /** MBSECCFIR_0[3] * Memory chip mark on rank 3 */ - (rMBSECCFIR_0, bit(3)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(3)) ? verify_chip_mark_0_3; /** MBSECCFIR_0[4] * Memory chip mark on rank 4 */ - (rMBSECCFIR_0, bit(4)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(4)) ? verify_chip_mark_0_4; /** MBSECCFIR_0[5] * Memory chip mark on rank 5 */ - (rMBSECCFIR_0, bit(5)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(5)) ? verify_chip_mark_0_5; /** MBSECCFIR_0[6] * Memory chip mark on rank 6 */ - (rMBSECCFIR_0, bit(6)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(6)) ? verify_chip_mark_0_6; /** MBSECCFIR_0[7] * Memory chip mark on rank 7 */ - (rMBSECCFIR_0, bit(7)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(7)) ? verify_chip_mark_0_7; /** MBSECCFIR_0[8:15] * Reserved @@ -1475,12 +1476,12 @@ group gMBSECCFIR_0 filter singlebit, cs_root_cause( 19, 44, 47, 49 ) /** MBSECCFIR_0[16] * Memory NCE */ - (rMBSECCFIR_0, bit(16)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(16)) ? mainline_nce_handling_0; /** MBSECCFIR_0[17] * Memory RCE */ - (rMBSECCFIR_0, bit(17)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(17)) ? mainline_rce_pue_handling_0; /** MBSECCFIR_0[18] * Memory SUE @@ -1490,7 +1491,7 @@ group gMBSECCFIR_0 filter singlebit, cs_root_cause( 19, 44, 47, 49 ) /** MBSECCFIR_0[19] * Memory UE */ - (rMBSECCFIR_0, bit(19)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(19)) ? mainline_ue_handling_0_UERE; /** MBSECCFIR_0[20:27] * Maintenance chip mark @@ -1540,7 +1541,7 @@ group gMBSECCFIR_0 filter singlebit, cs_root_cause( 19, 44, 47, 49 ) /** MBSECCFIR_0[43] * Prefetch Memory UE */ - (rMBSECCFIR_0, bit(43)) ? TBDDefaultCallout; + (rMBSECCFIR_0, bit(43)) ? mainline_rce_pue_handling_0; /** MBSECCFIR_0[44] * Memory RCD parity error @@ -1596,47 +1597,48 @@ rule rMBSECCFIR_1 MBSECCFIR_1 & ~MBSECCFIR_1_MASK & ~MBSECCFIR_1_ACT0 & MBSECCFIR_1_ACT1; }; -group gMBSECCFIR_1 filter singlebit, cs_root_cause( 19, 44, 47, 49 ) +group gMBSECCFIR_1 filter priority ( 19, 41 ), + cs_root_cause( 19, 44, 47, 49 ) { /** MBSECCFIR_1[0] * Memory chip mark on rank 0 */ - (rMBSECCFIR_1, bit(0)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(0)) ? verify_chip_mark_1_0; /** MBSECCFIR_1[1] * Memory chip mark on rank 1 */ - (rMBSECCFIR_1, bit(1)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(1)) ? verify_chip_mark_1_1; /** MBSECCFIR_1[2] * Memory chip mark on rank 2 */ - (rMBSECCFIR_1, bit(2)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(2)) ? verify_chip_mark_1_2; /** MBSECCFIR_1[3] * Memory chip mark on rank 3 */ - (rMBSECCFIR_1, bit(3)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(3)) ? verify_chip_mark_1_3; /** MBSECCFIR_1[4] * Memory chip mark on rank 4 */ - (rMBSECCFIR_1, bit(4)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(4)) ? verify_chip_mark_1_4; /** MBSECCFIR_1[5] * Memory chip mark on rank 5 */ - (rMBSECCFIR_1, bit(5)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(5)) ? verify_chip_mark_1_5; /** MBSECCFIR_1[6] * Memory chip mark on rank 6 */ - (rMBSECCFIR_1, bit(6)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(6)) ? verify_chip_mark_1_6; /** MBSECCFIR_1[7] * Memory chip mark on rank 7 */ - (rMBSECCFIR_1, bit(7)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(7)) ? verify_chip_mark_1_7; /** MBSECCFIR_1[8:15] * Reserved @@ -1646,12 +1648,12 @@ group gMBSECCFIR_1 filter singlebit, cs_root_cause( 19, 44, 47, 49 ) /** MBSECCFIR_1[16] * Memory NCE */ - (rMBSECCFIR_1, bit(16)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(16)) ? mainline_nce_handling_1; /** MBSECCFIR_1[17] * Memory RCE */ - (rMBSECCFIR_1, bit(17)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(17)) ? mainline_rce_pue_handling_1; /** MBSECCFIR_1[18] * Memory SUE @@ -1661,7 +1663,7 @@ group gMBSECCFIR_1 filter singlebit, cs_root_cause( 19, 44, 47, 49 ) /** MBSECCFIR_1[19] * Memory UE */ - (rMBSECCFIR_1, bit(19)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(19)) ? mainline_ue_handling_1_UERE; /** MBSECCFIR_1[20:27] * Maintenance chip mark @@ -1711,7 +1713,7 @@ group gMBSECCFIR_1 filter singlebit, cs_root_cause( 19, 44, 47, 49 ) /** MBSECCFIR_1[43] * Prefetch Memory UE */ - (rMBSECCFIR_1, bit(43)) ? TBDDefaultCallout; + (rMBSECCFIR_1, bit(43)) ? mainline_rce_pue_handling_1; /** MBSECCFIR_1[44] * Memory RCD parity error diff --git a/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule b/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule index f60f534e7..5eb5716ee 100644 --- a/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule +++ b/src/usr/diag/prdf/common/plat/cen/cen_centaur_actions.rule @@ -183,3 +183,43 @@ actionclass l4_cache_co_ue_UERE SueSource; }; +/** Verify Chip Mark */ +actionclass verify_chip_mark_0_0 { funccall("AnalyzeFetchMpe0_0"); }; +actionclass verify_chip_mark_0_1 { funccall("AnalyzeFetchMpe0_1"); }; +actionclass verify_chip_mark_0_2 { funccall("AnalyzeFetchMpe0_2"); }; +actionclass verify_chip_mark_0_3 { funccall("AnalyzeFetchMpe0_3"); }; +actionclass verify_chip_mark_0_4 { funccall("AnalyzeFetchMpe0_4"); }; +actionclass verify_chip_mark_0_5 { funccall("AnalyzeFetchMpe0_5"); }; +actionclass verify_chip_mark_0_6 { funccall("AnalyzeFetchMpe0_6"); }; +actionclass verify_chip_mark_0_7 { funccall("AnalyzeFetchMpe0_7"); }; +actionclass verify_chip_mark_1_0 { funccall("AnalyzeFetchMpe1_0"); }; +actionclass verify_chip_mark_1_1 { funccall("AnalyzeFetchMpe1_1"); }; +actionclass verify_chip_mark_1_2 { funccall("AnalyzeFetchMpe1_2"); }; +actionclass verify_chip_mark_1_3 { funccall("AnalyzeFetchMpe1_3"); }; +actionclass verify_chip_mark_1_4 { funccall("AnalyzeFetchMpe1_4"); }; +actionclass verify_chip_mark_1_5 { funccall("AnalyzeFetchMpe1_5"); }; +actionclass verify_chip_mark_1_6 { funccall("AnalyzeFetchMpe1_6"); }; +actionclass verify_chip_mark_1_7 { funccall("AnalyzeFetchMpe1_7"); }; + +/** Fetch NCE */ +actionclass mainline_nce_handling_0 { funccall("AnalyzeFetchNce0"); }; +actionclass mainline_nce_handling_1 { funccall("AnalyzeFetchNce1"); }; + +/** Fetch UE */ +actionclass mainline_ue_handling_0_UERE +{ + funccall("AnalyzeFetchUe0"); + threshold( field(33 / 30 min) ); + SueSource; +}; +actionclass mainline_ue_handling_1_UERE +{ + funccall("AnalyzeFetchUe1"); + threshold( field(33 / 30 min) ); + SueSource; +}; + +/** Fetch RCE or Prefetch UE */ +actionclass mainline_rce_pue_handling_0 { funccall("AnalyzeFetchRcePue0"); }; +actionclass mainline_rce_pue_handling_1 { funccall("AnalyzeFetchRcePue1"); }; + diff --git a/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C b/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C index af1b22761..2e0707f22 100644 --- a/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C +++ b/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C @@ -30,6 +30,8 @@ #include <prdfPluginMap.H> // Platform includes +#include <prdfCenMbaDataBundle.H> +#include <prdfMemEccAnalysis.H> #include <prdfMemUtils.H> using namespace TARGETING; @@ -194,6 +196,122 @@ PLUGIN_RCD_PARITY_UE_SIDEEFFECTS( 1 ) #undef PLUGIN_RCD_PARITY_UE_SIDEEFFECTS +//############################################################################## +// +// MBSECCFIRs +// +//############################################################################## + +/** + * @brief MBSECCFIR[0:7] - Mailine MPE. + * @param i_chip MEMBUF chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +#define PLUGIN_FETCH_MPE_ERROR( POS, RANK ) \ +int32_t AnalyzeFetchMpe##POS##_##RANK( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \ + PRDF_ASSERT( nullptr != mbaChip ); \ + MemRank rank { RANK }; \ + MemEcc::analyzeFetchMpe<TYPE_MBA>( mbaChip, rank, io_sc );\ + return SUCCESS; \ +} \ +PRDF_PLUGIN_DEFINE( cen_centaur, AnalyzeFetchMpe##POS##_##RANK ); + +PLUGIN_FETCH_MPE_ERROR( 0, 0 ) +PLUGIN_FETCH_MPE_ERROR( 0, 1 ) +PLUGIN_FETCH_MPE_ERROR( 0, 2 ) +PLUGIN_FETCH_MPE_ERROR( 0, 3 ) +PLUGIN_FETCH_MPE_ERROR( 0, 4 ) +PLUGIN_FETCH_MPE_ERROR( 0, 5 ) +PLUGIN_FETCH_MPE_ERROR( 0, 6 ) +PLUGIN_FETCH_MPE_ERROR( 0, 7 ) + +PLUGIN_FETCH_MPE_ERROR( 1, 0 ) +PLUGIN_FETCH_MPE_ERROR( 1, 1 ) +PLUGIN_FETCH_MPE_ERROR( 1, 2 ) +PLUGIN_FETCH_MPE_ERROR( 1, 3 ) +PLUGIN_FETCH_MPE_ERROR( 1, 4 ) +PLUGIN_FETCH_MPE_ERROR( 1, 5 ) +PLUGIN_FETCH_MPE_ERROR( 1, 6 ) +PLUGIN_FETCH_MPE_ERROR( 1, 7 ) + +#undef PLUGIN_FETCH_MPE_ERROR + +//------------------------------------------------------------------------------ + +/** + * @brief MBSECCFIR[16] - Mainline CE. + * @param i_chip MEMBUF chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +#define PLUGIN_FETCH_NCE_ERROR( POS ) \ +int32_t AnalyzeFetchNce##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \ + PRDF_ASSERT( nullptr != mbaChip ); \ + MemEcc::analyzeFetchNceTce<TYPE_MBA, MbaDataBundle *>( mbaChip, io_sc ); \ + return SUCCESS; \ +} \ +PRDF_PLUGIN_DEFINE( cen_centaur, AnalyzeFetchNce##POS ); + +PLUGIN_FETCH_NCE_ERROR( 0 ) +PLUGIN_FETCH_NCE_ERROR( 1 ) + +#undef PLUGIN_FETCH_NCE_ERROR + +//------------------------------------------------------------------------------ + +/** + * @brief MBSECCFIR[19] - Mainline UE. + * @param i_chip MEMBUF chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +#define PLUGIN_FETCH_UE_ERROR( POS ) \ +int32_t AnalyzeFetchUe##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \ + PRDF_ASSERT( nullptr != mbaChip ); \ + MemEcc::analyzeFetchUe<TYPE_MBA>( mbaChip, io_sc ); \ + return SUCCESS; \ +} \ +PRDF_PLUGIN_DEFINE( cen_centaur, AnalyzeFetchUe##POS ); + +PLUGIN_FETCH_UE_ERROR( 0 ) +PLUGIN_FETCH_UE_ERROR( 1 ) + +#undef PLUGIN_FETCH_UE_ERROR + +//------------------------------------------------------------------------------ + +/** + * @brief MBSECCFIR[17] - Mainline RCE / MBSECCFIR[43] Prefetch UE. + * @param i_chip MEMBUF chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +#define PLUGIN_FETCH_RCE_PUE_ERROR( POS ) \ +int32_t AnalyzeFetchRcePue##POS( ExtensibleChip * i_chip, \ + STEP_CODE_DATA_STRUCT & io_sc ) \ +{ \ + ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \ + PRDF_ASSERT( nullptr != mbaChip ); \ + MemEcc::analyzeFetchRcePue<TYPE_MBA>( mbaChip, io_sc ); \ + return SUCCESS; \ +} \ +PRDF_PLUGIN_DEFINE( cen_centaur, AnalyzeFetchRcePue##POS ); + +PLUGIN_FETCH_RCE_PUE_ERROR( 0 ) +PLUGIN_FETCH_RCE_PUE_ERROR( 1 ) + +#undef PLUGIN_FETCH_RCE_PUE_ERROR + //------------------------------------------------------------------------------ } // end namespace cen_centaur diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C index 6175a4c7c..4c5153e81 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C @@ -555,6 +555,10 @@ template uint32_t analyzeFetchMpe<TYPE_MCA>( ExtensibleChip * i_chip, const MemRank & i_rank, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchMpe<TYPE_MBA>( ExtensibleChip * i_chip, + const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -765,6 +769,9 @@ uint32_t analyzeFetchNceTce( ExtensibleChip * i_chip, template uint32_t analyzeFetchNceTce<TYPE_MCA, McaDataBundle *>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchNceTce<TYPE_MBA, MbaDataBundle *>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -834,6 +841,9 @@ uint32_t analyzeFetchUe( ExtensibleChip * i_chip, template uint32_t analyzeFetchUe<TYPE_MCA>( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +template +uint32_t analyzeFetchUe<TYPE_MBA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); //------------------------------------------------------------------------------ @@ -1128,6 +1138,68 @@ uint32_t analyzeImpe<TYPE_MCA>( ExtensibleChip * i_chip, //------------------------------------------------------------------------------ +template<> +uint32_t analyzeFetchRcePue<TYPE_MBA>( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemEcc::analyzeFetchRcePue] " + + PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + + uint32_t o_rc = SUCCESS; + + do + { + // WORKAROUND: An RCE starts as a UE and its address is trapped in the + // MBUER (note: UE fir bit not set at this point). Since + // multiple addresses are retried (not just the failing + // address), the MBRCER will contain the last address + // retried, and not necessarily the address that started out + // with the UE. Therefore, we will use the MBUER instead. + + MemAddr addr; + o_rc = getMemReadAddr<TYPE_MBA>( i_chip, MemAddr::READ_UE_ADDR, addr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x, READ_UE_ADDR) failed", + i_chip->getHuid() ); + break; + } + MemRank rank = addr.getRank(); + + // Callout the rank. + MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK }; + io_sc.service_data->SetCallout( mm ); + + #ifdef __HOSTBOOT_RUNTIME + + // Add an entry to the RCE table. + if ( getMbaDataBundle(i_chip)->iv_rceTable.addEntry(rank, io_sc) ) + { + TdEntry * entry = new TpsEvent<TYPE_MBA>{ i_chip, rank }; + MemDbUtils::pushToQueue<TYPE_MBA>( i_chip, entry ); + o_rc = MemDbUtils::handleTdEvent<TYPE_MBA>( i_chip, io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "handleTdEvent(0x%08x) failed on rank " + "0x%02x", i_chip->getHuid(), rank.getKey() ); + break; + } + } + + #endif // __HOSTBOOT_RUNTIME + + } while (0); + + MemCaptureData::addEccData<TYPE_MBA>( i_chip, io_sc ); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + } // end namespace MemEcc } // end namespace PRDF diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H index e01b81a78..1a96afeec 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H @@ -194,6 +194,16 @@ uint32_t analyzeMaintIue( ExtensibleChip * i_chip, template<TARGETING::TYPE T> uint32_t analyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ); +/** + * @brief Analyzes fetch retry CE or prefetch UE errors. + * @param i_chip MBA. + * @param io_sc The step code data struct. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ +template<TARGETING::TYPE T> +uint32_t analyzeFetchRcePue( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ); + #ifdef __HOSTBOOT_RUNTIME /** diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf.rule index 61dd6bfba..99280eccc 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf.rule @@ -1518,50 +1518,47 @@ rule rMBSECCFIR_0 }; group gMBSECCFIR_0 filter priority ( 19, 41 ), - secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, - 17,18,20,21,22,23,24,25,26,27,28,29,30,31, - 32,33,34,35,36,37,38,39,40,41,42,43,44,45, - 48,50,51) + cs_root_cause( 19, 44, 47, 49 ) { /** MBSECCFIR_0[0] * Memory chip mark on rank 0 */ - (rMBSECCFIR_0, bit(0)) ? analyzeFetchMpe0_0; + (rMBSECCFIR_0, bit(0)) ? verify_chip_mark_0_0; /** MBSECCFIR_0[1] * Memory chip mark on rank 1 */ - (rMBSECCFIR_0, bit(1)) ? analyzeFetchMpe0_1; + (rMBSECCFIR_0, bit(1)) ? verify_chip_mark_0_1; /** MBSECCFIR_0[2] * Memory chip mark on rank 2 */ - (rMBSECCFIR_0, bit(2)) ? analyzeFetchMpe0_2; + (rMBSECCFIR_0, bit(2)) ? verify_chip_mark_0_2; /** MBSECCFIR_0[3] * Memory chip mark on rank 3 */ - (rMBSECCFIR_0, bit(3)) ? analyzeFetchMpe0_3; + (rMBSECCFIR_0, bit(3)) ? verify_chip_mark_0_3; /** MBSECCFIR_0[4] * Memory chip mark on rank 4 */ - (rMBSECCFIR_0, bit(4)) ? analyzeFetchMpe0_4; + (rMBSECCFIR_0, bit(4)) ? verify_chip_mark_0_4; /** MBSECCFIR_0[5] * Memory chip mark on rank 5 */ - (rMBSECCFIR_0, bit(5)) ? analyzeFetchMpe0_5; + (rMBSECCFIR_0, bit(5)) ? verify_chip_mark_0_5; /** MBSECCFIR_0[6] * Memory chip mark on rank 6 */ - (rMBSECCFIR_0, bit(6)) ? analyzeFetchMpe0_6; + (rMBSECCFIR_0, bit(6)) ? verify_chip_mark_0_6; /** MBSECCFIR_0[7] * Memory chip mark on rank 7 */ - (rMBSECCFIR_0, bit(7)) ? analyzeFetchMpe0_7; + (rMBSECCFIR_0, bit(7)) ? verify_chip_mark_0_7; /** MBSECCFIR_0[8:15] * Reserved @@ -1571,12 +1568,12 @@ group gMBSECCFIR_0 filter priority ( 19, 41 ), /** MBSECCFIR_0[16] * Memory NCE */ - (rMBSECCFIR_0, bit(16)) ? analyzeFetchNce0; + (rMBSECCFIR_0, bit(16)) ? mainline_nce_handling_0; /** MBSECCFIR_0[17] * Memory RCE */ - (rMBSECCFIR_0, bit(17)) ? analyzeFetchRce0; + (rMBSECCFIR_0, bit(17)) ? mainline_rce_pue_handling_0; /** MBSECCFIR_0[18] * Memory SUE @@ -1586,7 +1583,7 @@ group gMBSECCFIR_0 filter priority ( 19, 41 ), /** MBSECCFIR_0[19] * Memory UE */ - (rMBSECCFIR_0, bit(19)) ? mba0MemoryUe; + (rMBSECCFIR_0, bit(19)) ? mainline_ue_handling_0_UERE; /** MBSECCFIR_0[20:27] * Maintenance chip mark @@ -1636,12 +1633,12 @@ group gMBSECCFIR_0 filter priority ( 19, 41 ), /** MBSECCFIR_0[43] * Prefetch Memory UE */ - (rMBSECCFIR_0, bit(43)) ? analyzeFetchPreUe0; + (rMBSECCFIR_0, bit(43)) ? mainline_rce_pue_handling_0; /** MBSECCFIR_0[44] * Memory RCD parity error */ - (rMBSECCFIR_0, bit(44)) ? defaultMaskedError; + (rMBSECCFIR_0, bit(44)) ? self_th_1_UERE; # CUMULUS_10 /** MBSECCFIR_0[45] * Maintenance RCD parity error @@ -1693,50 +1690,47 @@ rule rMBSECCFIR_1 }; group gMBSECCFIR_1 filter priority ( 19, 41 ), - secondarybits(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, - 17,18,20,21,22,23,24,25,26,27,28,29,30,31, - 32,33,34,35,36,37,38,39,40,41,42,43,44,45, - 48,50,51) + cs_root_cause( 19, 44, 47, 49 ) { /** MBSECCFIR_1[0] * Memory chip mark on rank 0 */ - (rMBSECCFIR_1, bit(0)) ? analyzeFetchMpe1_0; + (rMBSECCFIR_1, bit(0)) ? verify_chip_mark_1_0; /** MBSECCFIR_1[1] * Memory chip mark on rank 1 */ - (rMBSECCFIR_1, bit(1)) ? analyzeFetchMpe1_1; + (rMBSECCFIR_1, bit(1)) ? verify_chip_mark_1_1; /** MBSECCFIR_1[2] * Memory chip mark on rank 2 */ - (rMBSECCFIR_1, bit(2)) ? analyzeFetchMpe1_2; + (rMBSECCFIR_1, bit(2)) ? verify_chip_mark_1_2; /** MBSECCFIR_1[3] * Memory chip mark on rank 3 */ - (rMBSECCFIR_1, bit(3)) ? analyzeFetchMpe1_3; + (rMBSECCFIR_1, bit(3)) ? verify_chip_mark_1_3; /** MBSECCFIR_1[4] * Memory chip mark on rank 4 */ - (rMBSECCFIR_1, bit(4)) ? analyzeFetchMpe1_4; + (rMBSECCFIR_1, bit(4)) ? verify_chip_mark_1_4; /** MBSECCFIR_1[5] * Memory chip mark on rank 5 */ - (rMBSECCFIR_1, bit(5)) ? analyzeFetchMpe1_5; + (rMBSECCFIR_1, bit(5)) ? verify_chip_mark_1_5; /** MBSECCFIR_1[6] * Memory chip mark on rank 6 */ - (rMBSECCFIR_1, bit(6)) ? analyzeFetchMpe1_6; + (rMBSECCFIR_1, bit(6)) ? verify_chip_mark_1_6; /** MBSECCFIR_1[7] * Memory chip mark on rank 7 */ - (rMBSECCFIR_1, bit(7)) ? analyzeFetchMpe1_7; + (rMBSECCFIR_1, bit(7)) ? verify_chip_mark_1_7; /** MBSECCFIR_1[8:15] * Reserved @@ -1746,12 +1740,12 @@ group gMBSECCFIR_1 filter priority ( 19, 41 ), /** MBSECCFIR_1[16] * Memory NCE */ - (rMBSECCFIR_1, bit(16)) ? analyzeFetchNce1; + (rMBSECCFIR_1, bit(16)) ? mainline_nce_handling_1; /** MBSECCFIR_1[17] * Memory RCE */ - (rMBSECCFIR_1, bit(17)) ? analyzeFetchRce1; + (rMBSECCFIR_1, bit(17)) ? mainline_rce_pue_handling_1; /** MBSECCFIR_1[18] * Memory SUE @@ -1761,7 +1755,7 @@ group gMBSECCFIR_1 filter priority ( 19, 41 ), /** MBSECCFIR_1[19] * Memory UE */ - (rMBSECCFIR_1, bit(19)) ? mba1MemoryUe; + (rMBSECCFIR_1, bit(19)) ? mainline_ue_handling_1_UERE; /** MBSECCFIR_1[20:27] * Maintenance chip mark @@ -1811,12 +1805,12 @@ group gMBSECCFIR_1 filter priority ( 19, 41 ), /** MBSECCFIR_1[43] * Prefetch Memory UE */ - (rMBSECCFIR_1, bit(43)) ? analyzeFetchPreUe1; + (rMBSECCFIR_1, bit(43)) ? mainline_rce_pue_handling_1; /** MBSECCFIR_1[44] * Memory RCD parity error */ - (rMBSECCFIR_1, bit(44)) ? defaultMaskedError; + (rMBSECCFIR_1, bit(44)) ? self_th_1_UERE; # CUMULUS_10 /** MBSECCFIR_1[45] * Maintenance RCD parity error diff --git a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule index 8a4869233..c1fbb30bc 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule +++ b/src/usr/diag/prdf/common/plat/pegasus/Membuf_acts_NEST.rule @@ -53,85 +53,3 @@ actionclass calloutDmiBusTh1 { calloutDmiBus; threshold1; }; /** Callout the DMI bus, threshold 2 per day */ actionclass calloutDmiBusTh2pday { calloutDmiBus; threshold2pday; }; -/** Analyze a fetch MPE on MBA0 rank 0 */ -actionclass analyzeFetchMpe0_0 { funccall("AnalyzeFetchMpe0_0"); }; - -/** Analyze a fetch MPE on MBA1 rank 0 */ -actionclass analyzeFetchMpe1_0 { funccall("AnalyzeFetchMpe1_0"); }; - -/** Analyze a fetch MPE on MBA0 rank 1 */ -actionclass analyzeFetchMpe0_1 { funccall("AnalyzeFetchMpe0_1"); }; - -/** Analyze a fetch MPE on MBA1 rank 1 */ -actionclass analyzeFetchMpe1_1 { funccall("AnalyzeFetchMpe1_1"); }; - -/** Analyze a fetch MPE on MBA0 rank 2 */ -actionclass analyzeFetchMpe0_2 { funccall("AnalyzeFetchMpe0_2"); }; - -/** Analyze a fetch MPE on MBA1 rank 2 */ -actionclass analyzeFetchMpe1_2 { funccall("AnalyzeFetchMpe1_2"); }; - -/** Analyze a fetch MPE on MBA0 rank 3 */ -actionclass analyzeFetchMpe0_3 { funccall("AnalyzeFetchMpe0_3"); }; - -/** Analyze a fetch MPE on MBA1 rank 3 */ -actionclass analyzeFetchMpe1_3 { funccall("AnalyzeFetchMpe1_3"); }; - -/** Analyze a fetch MPE on MBA0 rank 4 */ -actionclass analyzeFetchMpe0_4 { funccall("AnalyzeFetchMpe0_4"); }; - -/** Analyze a fetch MPE on MBA1 rank 4 */ -actionclass analyzeFetchMpe1_4 { funccall("AnalyzeFetchMpe1_4"); }; - -/** Analyze a fetch MPE on MBA0 rank 5 */ -actionclass analyzeFetchMpe0_5 { funccall("AnalyzeFetchMpe0_5"); }; - -/** Analyze a fetch MPE on MBA1 rank 5 */ -actionclass analyzeFetchMpe1_5 { funccall("AnalyzeFetchMpe1_5"); }; - -/** Analyze a fetch MPE on MBA0 rank 6 */ -actionclass analyzeFetchMpe0_6 { funccall("AnalyzeFetchMpe0_6"); }; - -/** Analyze a fetch MPE on MBA1 rank 6 */ -actionclass analyzeFetchMpe1_6 { funccall("AnalyzeFetchMpe1_6"); }; - -/** Analyze a fetch MPE on MBA0 rank 7 */ -actionclass analyzeFetchMpe0_7 { funccall("AnalyzeFetchMpe0_7"); }; - -/** Analyze a fetch MPE on MBA1 rank 7 */ -actionclass analyzeFetchMpe1_7 { funccall("AnalyzeFetchMpe1_7"); }; - -/** Analyze a fetch NCE on MBA0 */ -actionclass analyzeFetchNce0 { funccall("AnalyzeFetchNce0"); }; - -/** Analyze a fetch NCE on MBA1 */ -actionclass analyzeFetchNce1 { funccall("AnalyzeFetchNce1"); }; - -/** Analyze a fetch RCE on MBA0 */ -actionclass analyzeFetchRce0 { funccall("AnalyzeFetchRce0"); }; - -/** Analyze a fetch RCE on MBA1 */ -actionclass analyzeFetchRce1 { funccall("AnalyzeFetchRce1"); }; - -/** Analyze a PreFetch Ue on MBA0 */ -actionclass analyzeFetchPreUe0 { funccall("AnalyzeFetchPreUe0"); }; - -/** Analyze a PreFetch Ue on MBA1 */ -actionclass analyzeFetchPreUe1 { funccall("AnalyzeFetchPreUe1"); }; - -/** Analyze a fetch UE on MBA0 */ -actionclass analyzeFetchUe0 -{ - funccall("AnalyzeFetchUe0"); - threshold( field(33 / 30 min ) ); - SUEGenerationPoint; -}; - -/** Analyze a fetch UE on MBA1 */ -actionclass analyzeFetchUe1 -{ - funccall("AnalyzeFetchUe1"); - threshold( field(33 / 30 min ) ); - SUEGenerationPoint; -}; - diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C index 0ff352b44..2a2b8da3d 100755 --- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C +++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C @@ -270,396 +270,6 @@ PRDF_PLUGIN_DEFINE( Membuf, maxSparesExceeded ); // //############################################################################## -/** - * @brief MBSECCFIR[0:7] - Mailine MPE. - * @param i_chip MEMBUF chip. - * @param io_sc The step code data struct. - * @return SUCCESS - */ -#define PLUGIN_FETCH_MPE_ERROR( POS, RANK ) \ -int32_t AnalyzeFetchMpe##POS_##RANK( ExtensibleChip * i_chip, \ - STEP_CODE_DATA_STRUCT & io_sc ) \ -{ \ - ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \ - PRDF_ASSERT( nullptr != mbaChip ); \ - MemEcc::analyzeFetchMpe<TYPE_MBA, MbaDataBundle *>( mbaChip, RANK, io_sc );\ - return SUCCESS; \ -} \ -PRDF_PLUGIN_DEFINE( Membuf, AnalyzeFetchMpe##POS_##RANK ); - -PLUGIN_FETCH_MPE_ERROR( 0, 0 ) -PLUGIN_FETCH_MPE_ERROR( 0, 1 ) -PLUGIN_FETCH_MPE_ERROR( 0, 2 ) -PLUGIN_FETCH_MPE_ERROR( 0, 3 ) -PLUGIN_FETCH_MPE_ERROR( 0, 4 ) -PLUGIN_FETCH_MPE_ERROR( 0, 5 ) -PLUGIN_FETCH_MPE_ERROR( 0, 6 ) -PLUGIN_FETCH_MPE_ERROR( 0, 7 ) - -PLUGIN_FETCH_MPE_ERROR( 1, 0 ) -PLUGIN_FETCH_MPE_ERROR( 1, 1 ) -PLUGIN_FETCH_MPE_ERROR( 1, 2 ) -PLUGIN_FETCH_MPE_ERROR( 1, 3 ) -PLUGIN_FETCH_MPE_ERROR( 1, 4 ) -PLUGIN_FETCH_MPE_ERROR( 1, 5 ) -PLUGIN_FETCH_MPE_ERROR( 1, 6 ) -PLUGIN_FETCH_MPE_ERROR( 1, 7 ) - -#undef PLUGIN_FETCH_MPE_ERROR - -//------------------------------------------------------------------------------ - -/** - * @brief MBSECCFIR[16] - Fetch New CE (NCE). - * @param i_membChip A Centaur chip. - * @param i_sc The step code data struct. - * @param i_mbaPos The MBA position. - * @return SUCCESS - */ -int32_t AnalyzeFetchNce( ExtensibleChip * i_membChip, - STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_mbaPos ) -{ - #define PRDF_FUNC "[AnalyzeFetchNce] " - - int32_t l_rc = SUCCESS; - - ExtensibleChip * mbaChip = NULL; - - do - { - CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip ); - mbaChip = membdb->getMbaChip( i_mbaPos ); - if ( NULL == mbaChip ) - { - PRDF_ERR( PRDF_FUNC "getMbaChip() returned NULL" ); - l_rc = FAIL; break; - } - TargetHandle_t mbaTrgt = mbaChip->GetChipHandle(); - - CenAddr addr; - l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_NCE_ADDR, addr ); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "getCenReadAddr() failed" ); - break; - } - CenRank rank = addr.getRank(); - - if ( 0x20 > getChipLevel(i_membChip->GetChipHandle()) ) - { - // There is a bug in DD1.x where the value of MBSEVR cannot be - // trusted. The workaround is too complicated for its value so - // callout the rank instead. - MemoryMru memmru ( mbaTrgt, rank, MemoryMruData::CALLOUT_RANK ); - i_sc.service_data->SetCallout( memmru ); - } - else // DD2.0+ - { - // Get the failing symbol - const char * reg_str = (0 == i_mbaPos) ? "MBA0_MBSEVR" - : "MBA1_MBSEVR"; - SCAN_COMM_REGISTER_CLASS * reg = i_membChip->getRegister(reg_str); - l_rc = reg->Read(); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str ); - break; - } - - uint8_t galois = reg->GetBitFieldJustified( 40, 8 ); - uint8_t mask = reg->GetBitFieldJustified( 32, 8 ); - - CenSymbol symbol = CenSymbol::fromGalois( mbaTrgt, rank, galois, - mask ); - if ( !symbol.isValid() ) - { - PRDF_ERR( PRDF_FUNC "Failed to create symbol: galois=0x%02x " - "mask=0x%02x", galois, mask ); - break; - } - - // Check if this symbol is on any of the spares. - CenSymbol sp0, sp1, ecc; - l_rc = mssGetSteerMux( mbaTrgt, rank, sp0, sp1, ecc ); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed. HUID: 0x%08x " - "rank: %d", getHuid(mbaTrgt), rank.getMaster() ); - break; - } - if ( (sp0.isValid() && (sp0.getDram() == symbol.getDram())) || - (sp1.isValid() && (sp1.getDram() == symbol.getDram())) ) - { - symbol.setDramSpared(); - } - if ( ecc.isValid() && (ecc.getDram() == symbol.getDram()) ) - { - symbol.setEccSpared(); - } - - // Add the DIMM to the callout list - MemoryMru memmru ( mbaTrgt, rank, symbol ); - i_sc.service_data->SetCallout( memmru, MRU_MEDA ); - - // Add to CE table - CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip ); - uint32_t ceTableRc = mbadb->iv_ceTable.addEntry( addr, symbol ); - bool doTps = false; - - // Check MNFG thresholds, if needed. - if ( mfgMode() ) - { - // Get the MNFG CE thresholds. - uint32_t dramTh, hrTh, dimmTh; - getMnfgMemCeTh( mbaChip, rank, dramTh, hrTh, dimmTh ); - - // Get counts from CE table. - uint32_t dramCount, hrCount, dimmCount; - mbadb->iv_ceTable.getMnfgCounts( addr.getRank(), symbol, - dramCount, hrCount, - dimmCount ); - - if ( dramTh < dramCount ) - { - i_sc.service_data->AddSignatureList( mbaTrgt, - PRDFSIG_MnfgDramCte ); - i_sc.service_data->setServiceCall(); - doTps = true; - } - else if ( hrTh < hrCount ) - { - i_sc.service_data->AddSignatureList( mbaTrgt, - PRDFSIG_MnfgHrCte ); - i_sc.service_data->setServiceCall(); - doTps = true; - } - else if ( dimmTh < dimmCount ) - { - i_sc.service_data->AddSignatureList( mbaTrgt, - PRDFSIG_MnfgDimmCte ); - i_sc.service_data->setServiceCall(); - doTps = true; - } - else if ( 0 != (CenMbaCeTable::TABLE_FULL & ceTableRc) ) - { - i_sc.service_data->AddSignatureList( mbaTrgt, - PRDFSIG_MnfgTableFull); - - // The table is full and no other threshold has been met. - // We are in a state where we may never hit a MNFG - // threshold. Callout all memory behind the MBA. Also, since - // the counts are all over the place, there may be a problem - // with the MBA. So call it out as well. - MemoryMru all_mm ( mbaTrgt, rank, - MemoryMruData::CALLOUT_ALL_MEM ); - i_sc.service_data->SetCallout( all_mm, MRU_MEDA ); - i_sc.service_data->SetCallout( mbaTrgt, MRU_MEDA ); - i_sc.service_data->setServiceCall(); - doTps = true; - } - else if ( 0 != (CenMbaCeTable::ENTRY_TH_REACHED & ceTableRc) ) - { - i_sc.service_data->AddSignatureList( mbaTrgt, - PRDFSIG_MnfgEntryCte ); - - // There is a single entry threshold and no other threshold - // has been met. This is a potential flooding issue, so make - // the DIMM callout predictive. - i_sc.service_data->setServiceCall(); - doTps = true; - } - } - else // field - { - doTps = ( CenMbaCeTable::NO_TH_REACHED != ceTableRc ); - } - - // Initiate a TPS procedure, if needed. - if ( doTps ) - { - // If a MNFG threshold has been reached (predictive callout), we - // will still try to start TPS just in case MNFG disables the - // termination policy. - - // Will not be able to do TPS during hostboot. Note that we will - // still call handleTdEvent() so we can get the trace statement - // indicating TPS was requested during Hostboot. - - l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank, - CenMbaTdCtlrCommon::TPS_EVENT ); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "handleTdEvent() failed: rank=m%ds%d", - rank.getMaster(), rank.getSlave() ); - break; - } - } - } - - } while (0); - - // Add ECC capture data for FFDC. - if ( NULL != mbaChip ) - MemCaptureData::addEccData<TYPE_MBA>( mbaChip, i_sc ); - - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "Failed: i_membChip=0x%08x i_mbaPos=%d", - i_membChip->GetId(), i_mbaPos ); - CalloutUtil::defaultError( i_sc ); - } - - return SUCCESS; // Intentionally return SUCCESS for this plugin - - #undef PRDF_FUNC -} - -//------------------------------------------------------------------------------ - -/** - * @brief Fetch Retry CE / Prefetch UE Errors. - * @param i_membChip A Centaur chip. - * @param i_sc The step code data struct. - * @param i_mbaPos The MBA position. - * @param i_isRceError True for RCE error false otherwise. - * @return SUCCESS - */ -int32_t AnalyzeFetchRcePue( ExtensibleChip * i_membChip, - STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_mbaPos, - bool i_isRceError ) -{ - #define PRDF_FUNC "[AnalyzeFetchRcePue] " - - int32_t l_rc = SUCCESS; - - ExtensibleChip * mbaChip = NULL; - - do - { - CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip ); - mbaChip = membdb->getMbaChip( i_mbaPos ); - if ( NULL == mbaChip ) - { - PRDF_ERR( PRDF_FUNC "getMbaChip() returned NULL" ); - l_rc = FAIL; break; - } - - CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip ); - - // WORKAROUND: Since an RCE starts as a UE, it's address is trapped in - // MBUER (note: UE fir bit not set at this point). But since multiple - // addresses are retried (not just the failing address), MBRCER will - // contain the last address retried, and not necessarily the address - // that started out with the UE. - - CenAddr addr; - l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_UE_ADDR, addr ); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "getCenReadAddr() failed" ); - break; - } - CenRank rank = addr.getRank(); - - // Callout the rank. - MemoryMru memmru ( mbaChip->GetChipHandle(), rank, - MemoryMruData::CALLOUT_RANK ); - i_sc.service_data->SetCallout( memmru ); - - // Add an entry to the RCE table. - if ( mbadb->iv_rceTable.addEntry(rank, i_sc) ) - { - // Add a TPS request to the queue TD queue. - l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank, - CenMbaTdCtlrCommon::TPS_EVENT ); - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "handleTdEvent() failed." ); - break; - } - } - - } while (0); - - // Add ECC capture data for FFDC. - if ( NULL != mbaChip ) - MemCaptureData::addEccData<TYPE_MBA>( mbaChip, i_sc ); - - if ( SUCCESS != l_rc ) - { - PRDF_ERR( PRDF_FUNC "Failed: i_membChip=0x%08x i_mbaPos=%d " - "i_isRceError=%c", i_membChip->GetId(), i_mbaPos, - i_isRceError ? 'T' : 'F' ); - CalloutUtil::defaultError( i_sc ); - } - - return SUCCESS; // Intentionally return SUCCESS for this plugin - - #undef PRDF_FUNC -} - -//------------------------------------------------------------------------------ - -/** - * @brief MBSECCFIR[19] - Mainline UE. - * @param i_chip MEMBUF chip. - * @param io_sc The step code data struct. - * @return SUCCESS - */ -#define PLUGIN_FETCH_UE_ERROR( POS ) \ -int32_t AnalyzeFetchUe##POS( ExtensibleChip * i_chip, \ - STEP_CODE_DATA_STRUCT & io_sc ) \ -{ \ - ExtensibleChip * mbaChip = getConnectedChild( i_chip, TYPE_MBA, POS ); \ - PRDF_ASSERT( nullptr != mbaChip ); \ - MemEcc::analyzeFetchUe<TYPE_MBA, MbaDataBundle *>( mbaChip, io_sc ); \ - return SUCCESS; \ -} \ -PRDF_PLUGIN_DEFINE( Membuf, AnalyzeFetchUe##POS ); - -PLUGIN_FETCH_UE_ERROR( 0 ) -PLUGIN_FETCH_UE_ERROR( 1 ) - -#undef PLUGIN_FETCH_UE_ERROR - -//------------------------------------------------------------------------------ - -// Define the plugins for memory ECC errors. -#define PLUGIN_FETCH_ECC_ERROR( TYPE, MBA ) \ -int32_t AnalyzeFetch##TYPE##MBA( ExtensibleChip * i_membChip, \ - STEP_CODE_DATA_STRUCT & i_sc ) \ -{ \ - return AnalyzeFetch##TYPE( i_membChip, i_sc, MBA ); \ -} \ -PRDF_PLUGIN_DEFINE( Membuf, AnalyzeFetch##TYPE##MBA ); - -PLUGIN_FETCH_ECC_ERROR( Nce, 0 ) -PLUGIN_FETCH_ECC_ERROR( Nce, 1 ) - -#undef PLUGIN_FETCH_ECC_ERROR - -// Handling for RCE and prefetch UE is similar. -// So use common macro and function ( AnalyzeFetchRcePue ). - -#define PLUGIN_FETCH_RCE_PREUE_ERROR( TYPE, MBA, IS_RCE ) \ -int32_t AnalyzeFetch##TYPE##MBA( ExtensibleChip * i_membChip, \ - STEP_CODE_DATA_STRUCT & i_sc ) \ -{ \ - return AnalyzeFetchRcePue( i_membChip, i_sc, MBA, IS_RCE ); \ -} \ -PRDF_PLUGIN_DEFINE( Membuf, AnalyzeFetch##TYPE##MBA ); - -// This is bit inefficient. 1st and 3rd argument have 1 to 1 -// mapping. But to keep macro expansion simple, using extra argument. -PLUGIN_FETCH_RCE_PREUE_ERROR( Rce, 0, true ) -PLUGIN_FETCH_RCE_PREUE_ERROR( Rce, 1, true ) -PLUGIN_FETCH_RCE_PREUE_ERROR( PreUe, 0, false ) -PLUGIN_FETCH_RCE_PREUE_ERROR( PreUe, 1, false ) - -#undef PLUGIN_FETCH_RCE_PREUE_ERROR - -//------------------------------------------------------------------------------ - int32_t calloutInterface_dmi( ExtensibleChip * i_membChip, STEP_CODE_DATA_STRUCT & io_sc ) { |