diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2017-02-10 16:30:47 -0600 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2017-02-16 12:32:30 -0500 |
commit | a4c70975fc0248d66bbfaa5a18b9817cd0d4384a (patch) | |
tree | ff30a2f9463237407a391bc8a11b5c3cbd02759b | |
parent | 2fc2af3a3531e91e0f20c245742e6fd08d283d7f (diff) | |
download | talos-hostboot-a4c70975fc0248d66bbfaa5a18b9817cd0d4384a.tar.gz talos-hostboot-a4c70975fc0248d66bbfaa5a18b9817cd0d4384a.zip |
PRD: rule file updates for MCA unit
Change-Id: Ib37b1ec290081428a9c627ec1683568382d3612f
RTC: 169104
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/36316
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/36515
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C | 51 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H | 13 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C | 83 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule | 8 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca.rule | 18 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule | 23 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule | 13 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C | 6 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C | 11 |
9 files changed, 199 insertions, 27 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C index 3c6b757fb..23e97f0d2 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C @@ -49,6 +49,51 @@ namespace MemEcc //------------------------------------------------------------------------------ +template<> +void calloutMemUe<TYPE_MCA>( ExtensibleChip * i_chip, const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[MemEcc::calloutMemUe] " + + PRDF_ASSERT( TYPE_MCA == i_chip->getType() ); + + SCAN_COMM_REGISTER_CLASS * fir = i_chip->getRegister( "DDRPHYFIR" ); + int32_t l_rc = fir->Read(); + if ( SUCCESS != l_rc ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on DDRPHYFIR: i_chip=0x%08x", + i_chip->getHuid() ); + } + + // Check DDRPHYFIR[54:55,57:59] to determine if this UE is a side-effect. + if ( SUCCESS == l_rc && (0 != (fir->GetBitFieldJustified(54,6) & 0x37)) ) + { + // Callout the MCA. + io_sc.service_data->SetCallout( i_chip->getTrgt() ); + } + else + { + // Callout the rank anyway. + MemoryMru memmru ( i_chip->getTrgt(), i_rank, + MemoryMruData::CALLOUT_RANK ); + io_sc.service_data->SetCallout( memmru ); + } + + #undef PRDF_FUNC +} + +template<> +void calloutMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + PRDF_ASSERT( TYPE_MBA == i_chip->getType() ); + + MemoryMru memmru ( i_chip->getTrgt(), i_rank, MemoryMruData::CALLOUT_RANK ); + io_sc.service_data->SetCallout( memmru ); +} + +//------------------------------------------------------------------------------ + #ifdef __HOSTBOOT_RUNTIME template<TARGETING::TYPE T> @@ -258,11 +303,9 @@ uint32_t analyzeFetchUe( ExtensibleChip * i_chip, D db = static_cast<D>(i_chip->getDataBundle()); db->iv_ueTable.addEntry( UE_TABLE::FETCH_UE, addr ); - // Callout the rank. + // Make the hardware callout. MemRank rank = addr.getRank(); - MemoryMru memmru ( i_chip->getTrgt(), rank, - MemoryMruData::CALLOUT_RANK ); - io_sc.service_data->SetCallout( memmru ); + calloutMemUe<T>( i_chip, rank, io_sc ); #ifdef __HOSTBOOT_RUNTIME diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H index 58c1fa4f4..00dbd33e5 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016 */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -41,6 +41,17 @@ namespace MemEcc { /** + * @brief Will check if the UE is a side-effect attention and make a callout + * appropriately. + * @param i_chip MCA or MBA. + * @param i_rank Target rank. + * @param io_sc The step code data struct. + */ +template<TARGETING::TYPE T> +void calloutMemUe( ExtensibleChip * i_chip, const MemRank & i_rank, + STEP_CODE_DATA_STRUCT & io_sc ); + +/** * @brief Analyzes a fetch MPE attention. * @param i_chip MCA or MBA. * @param i_rank Target rank. diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C b/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C index 5605bf32d..271856750 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C +++ b/src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016 */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -82,6 +82,87 @@ PRDF_PLUGIN_DEFINE( p9_mca, PostAnalysis ); //############################################################################## // +// DDRPHYFIR +// +//############################################################################## + +/** + * @brief DDRPHYFIR[54:55,57:59] MCA/UE algorithm + * @param i_chip MCA chip. + * @param io_sc The step code data struct. + * @return SUCCESS + */ +int32_t mcaUeAlgorithm( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[p9_mca::mcaUeAlgorithm] " + + SCAN_COMM_REGISTER_CLASS * fir = nullptr; + SCAN_COMM_REGISTER_CLASS * msk = nullptr; + + // If the attention is currently at threshold or if there is a mainline or + // maintenance UE on at the same time as the attention: + // - Make the error log predictive. + // - Mask the attention. + // - Do not clear the attention. This will be used during maintenance and + // memory UE analysis to indicate that the MCA should be called out + // instead of the DIMMs. This is unconventional process is needed because + // maintenance UEs are always masked (handled manually in maintenance + // command complete attentions) and memory UEs will get unmasked anytime + // Targeted Diagnostics is complete on that area of memory. So we never + // truly have a way to permanently mask the UEs. + + bool maskDoNotClearAttn = io_sc.service_data->IsAtThreshold(); + + if ( !maskDoNotClearAttn ) + { + fir = i_chip->getRegister("MCAECCFIR"); + if ( SUCCESS != fir->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on MCAECCFIR: i_chip=0x%08x", + i_chip->getHuid() ); + } + else + { + maskDoNotClearAttn = fir->IsBitSet(14) || fir->IsBitSet(34); + } + } + + if ( maskDoNotClearAttn ) + { + // Get the active attentions of DDRPHYFIR[54:55,57:59] and mask. + fir = i_chip->getRegister("DDRPHYFIR"); + + if ( SUCCESS != fir->Read() ) + { + PRDF_ERR( PRDF_FUNC "Read() failed on DDRPHYFIR: i_chip=0x%08x", + i_chip->getHuid() ); + } + else + { + uint64_t tmp = fir->GetBitFieldJustified(54, 6) & 0x37; + + msk = i_chip->getRegister("DDRPHYFIR_MASK_OR"); + + msk->clearAllBits(); + msk->SetBitFieldJustified( 54, 6, tmp ); + + if ( SUCCESS != msk->Write() ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on DDRPHYFIR_MASK_OR: " + "i_chip=0x%08x", i_chip->getHuid() ); + } + } + } + + return maskDoNotClearAttn ? PRD_NO_CLEAR_FIR_BITS : SUCCESS; + + #undef PRDF_FUNC +} +PRDF_PLUGIN_DEFINE( p9_mca, mcaUeAlgorithm ); + +//############################################################################## +// // MCAECCFIR // //############################################################################## diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule index 45e65f0d5..564535bb5 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016 +# Contributors Listed Below - COPYRIGHT 2016,2017 # [+] International Business Machines Corp. # # @@ -83,6 +83,12 @@ actionclass threshold5phour threshold( field(5 / hour) ); }; +/** Threshold of 5 per day */ +actionclass threshold5pday +{ + threshold( field(5 / day) ); +}; + ################################################################################ # Threshold and Mask policy ################################################################################ diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule index 0fc5ec54b..787c8700a 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule @@ -414,7 +414,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14 ) /** MCAECCFIR[17] * Mainline read IUE */ - (rMCAECCFIR, bit(17)) ? defaultMaskedError; + (rMCAECCFIR, bit(17)) ? mainline_iue_handling; /** MCAECCFIR[18] * Mainline read IRCD @@ -424,7 +424,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14 ) /** MCAECCFIR[19] * Mainline read IMPE */ - (rMCAECCFIR, bit(19)) ? defaultMaskedError; + (rMCAECCFIR, bit(19)) ? mainline_impe_handling; /** MCAECCFIR[20:27] * Maintenance MPE @@ -479,7 +479,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14 ) /** MCAECCFIR[37] * Maintenance IUE */ - (rMCAECCFIR, bit(37)) ? defaultMaskedError; + (rMCAECCFIR, bit(37)) ? maintenance_iue_handling; /** MCAECCFIR[38] * Maintenance IRCD @@ -489,7 +489,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14 ) /** MCAECCFIR[39] * Maintenance IMPE */ - (rMCAECCFIR, bit(39)) ? defaultMaskedError; + (rMCAECCFIR, bit(39)) ? maintenance_impe_handling; /** MCAECCFIR[40] * spare @@ -625,12 +625,12 @@ group gDDRPHYFIR filter singlebit, cs_root_cause /** DDRPHYFIR[54] * Non-recoverable FSM error */ - (rDDRPHYFIR, bit(54)) ? defaultMaskedError; + (rDDRPHYFIR, bit(54)) ? mca_ue_algorithm_th_5perDay; /** DDRPHYFIR[55] * Full bus impact Register Parity Error */ - (rDDRPHYFIR, bit(55)) ? defaultMaskedError; + (rDDRPHYFIR, bit(55)) ? mca_ue_algorithm_th_1; /** DDRPHYFIR[56] * DDRPHY Parity errors @@ -640,17 +640,17 @@ group gDDRPHYFIR filter singlebit, cs_root_cause /** DDRPHYFIR[57] * FSM errors */ - (rDDRPHYFIR, bit(57)) ? defaultMaskedError; + (rDDRPHYFIR, bit(57)) ? mca_ue_algorithm_th_5perDay; /** DDRPHYFIR[58] * Register parity error impacting 16 bits */ - (rDDRPHYFIR, bit(58)) ? defaultMaskedError; + (rDDRPHYFIR, bit(58)) ? mca_ue_algorithm_th_1; /** DDRPHYFIR[59] * Register parity error impacting 8 bits */ - (rDDRPHYFIR, bit(59)) ? defaultMaskedError; + (rDDRPHYFIR, bit(59)) ? mca_ue_algorithm_th_1; /** DDRPHYFIR[60] * Register PE 4 bit impact diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule index 6579b1d84..f708196a8 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016 +# Contributors Listed Below - COPYRIGHT 2016,2017 # [+] International Business Machines Corp. # # @@ -76,3 +76,24 @@ actionclass rcd_parity_error funccall("RcdParityError"); # Run TPS on TH for all MCA ranks }; +actionclass mainline_iue_handling { TBDDefaultCallout; }; # TODO RTC 165383 +actionclass mainline_impe_handling { TBDDefaultCallout; }; # TODO RTC 165384 +actionclass maintenance_iue_handling { TBDDefaultCallout; }; # TODO RTC 165383 +actionclass maintenance_impe_handling { TBDDefaultCallout; }; # TODO RTC 165384 + +/** MCA/UE algroithm, threshold 5 per day */ +actionclass mca_ue_algorithm_th_5perDay +{ + calloutSelfMed; + threshold5pday; + funccall("mcaUeAlgorithm"); # must be called last +}; + +/** MCA/UE algroithm, threshold 1 */ +actionclass mca_ue_algorithm_th_1 +{ + calloutSelfMed; + threshold1; + funccall("mcaUeAlgorithm"); # must be called last +}; + diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule index 0317b5914..cb16de1f0 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule @@ -30,6 +30,7 @@ ############################################################################ # P9 MCA target MCAECCFIR ############################################################################ + register MCAECCFIR_AND { name "P9 MCA target MCAECCFIR AND"; @@ -39,6 +40,18 @@ }; ############################################################################ + # P9 MCA target DDRPHYFIR + ############################################################################ + + register DDRPHYFIR_MASK_OR + { + name "P9 MCA target DDRPHYFIR MASK OR"; + scomaddr 0x07011005; + capture group never; + access write_only; + }; + + ############################################################################ # P9 Hardware Mark Stores ############################################################################ register HW_MS0 diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C index 7490802fb..d17021c7a 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016 */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -30,6 +30,7 @@ #include <prdfMemTdCtlr.H> // Platform includes +#include <prdfMemEccAnalysis.H> #include <prdfMemMark.H> #include <prdfMemoryMru.H> #include <prdfMemScrubUtils.H> @@ -137,8 +138,7 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, const MemRank & i_rank, io_sc.service_data->setSignature( huid, PRDFSIG_MaintUE ); // Add the rank to the callout list. - MemoryMru mm { trgt, i_rank, MemoryMruData::CALLOUT_RANK }; - io_sc.service_data->SetCallout( mm ); + MemEcc::calloutMemUe<T>( i_chip, i_rank, io_sc ); // Make the error log predictive. io_sc.service_data->setServiceCall(); diff --git a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C index 00a230ca5..5ab4183bf 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemVcm_ipl.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016 */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -26,6 +26,7 @@ /** @file prdfMemVcm_ipl.C */ // Platform includes +#include <prdfMemEccAnalysis.H> #include <prdfMemVcm.H> #include <prdfMemScrubUtils.H> #include <prdfPlatServices.H> @@ -98,9 +99,7 @@ uint32_t VcmEvent<TYPE_MCA>::nextStep( STEP_CODE_DATA_STRUCT & io_sc, } //Add the rank to the callout list (via MemoryMru) - MemoryMru memmru(iv_chip->getTrgt(), iv_rank, - MemoryMruData::CALLOUT_RANK); - io_sc.service_data->SetCallout( memmru ); + MemEcc::calloutMemUe<TYPE_MCA>( iv_chip, iv_rank, io_sc ); //Make the error log predictive io_sc.service_data->setServiceCall(); @@ -159,9 +158,7 @@ uint32_t VcmEvent<TYPE_MCA>::nextStep( STEP_CODE_DATA_STRUCT & io_sc, } //Add the rank to the callout list (via MemoryMru) - MemoryMru memmru(iv_chip->getTrgt(), iv_rank, - MemoryMruData::CALLOUT_RANK); - io_sc.service_data->SetCallout( memmru ); + MemEcc::calloutMemUe<TYPE_MCA>( iv_chip, iv_rank, io_sc ); //Make the error log predictive io_sc.service_data->setServiceCall(); |