diff options
author | Zane Shelley <zshelle@us.ibm.com> | 2018-01-17 16:13:38 -0600 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-01-26 11:36:21 -0500 |
commit | d55a6fa5234f5eff4ebb55b71fb8ff6e399654ec (patch) | |
tree | 4aca87b375837c4e9a04d79364ed0bdbc094dcb5 /src/usr | |
parent | ecedb32fc183bd3381a060e9116819bc25e516eb (diff) | |
download | talos-hostboot-d55a6fa5234f5eff4ebb55b71fb8ff6e399654ec.tar.gz talos-hostboot-d55a6fa5234f5eff4ebb55b71fb8ff6e399654ec.zip |
PRD: generic solution to associate HWP failures with PRD isolation
Change-Id: I17a45889db343c94c24f24b5c10fb72579b66dea
CQ: SW413495
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/52130
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/52671
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca.rule | 6 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule | 15 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C | 40 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C | 27 | ||||
-rwxr-xr-x | src/usr/diag/prdf/common/plat/prdfTargetServices.C | 36 | ||||
-rwxr-xr-x | src/usr/diag/prdf/common/plat/prdfTargetServices.H | 27 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/rule/prdfRuleMetaData.C | 5 |
7 files changed, 77 insertions, 79 deletions
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule index 9a2b0dfcf..4101b6e0a 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2016,2017 +# Contributors Listed Below - COPYRIGHT 2016,2018 # [+] International Business Machines Corp. # # @@ -247,7 +247,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 ) /** MCACALFIR[2] * Excessive refreshes to a single rank. */ - (rMCACALFIR, bit(2)) ? excessive_refreshes; + (rMCACALFIR, bit(2)) ? self_th_32perDay; /** MCACALFIR[3] * Err detected in the MBA debug WAT logic @@ -262,7 +262,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 ) /** MCACALFIR[5] * Calibration complete indication xout */ - (rMCACALFIR, bit(5)) ? calibration_complete; + (rMCACALFIR, bit(5)) ? self_th_32perDay; /** MCACALFIR[6] * Emergency Throttle diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule index 5cbb91e63..fc37c4809 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule @@ -118,7 +118,6 @@ actionclass maintenance_iaue_handling /** MCA/UE algroithm, threshold 5 per day */ actionclass mca_ue_algorithm_th_5perDay { - funccall("HwpErrorIsolation"); calloutSelfMed; threshold5pday; funccall("mcaUeAlgorithm"); # must be called last @@ -132,17 +131,3 @@ actionclass mca_ue_algorithm_th_1 funccall("mcaUeAlgorithm"); # must be called last }; -/** Excessive refreshes to a single rank */ -actionclass excessive_refreshes -{ - funccall("HwpErrorIsolation"); - self_th_32perDay; -}; - -/** Calibration complete indication xout */ -actionclass calibration_complete -{ - funccall("HwpErrorIsolation"); - self_th_32perDay; -}; - diff --git a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C index ea2565edb..c25f131b8 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* Contributors Listed Below - COPYRIGHT 2016,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -103,44 +103,6 @@ int32_t ClearServiceCallFlag_mnfgInfo( ExtensibleChip * i_chip, PRDF_PLUGIN_DEFINE_NS(p9_nimbus, CommonPlugins, ClearServiceCallFlag_mnfgInfo); PRDF_PLUGIN_DEFINE_NS(p9_cumulus, CommonPlugins, ClearServiceCallFlag_mnfgInfo); -/** - * @brief PRD will perform error isolation for certain errors that may cause - * a HWP to fail. - * @param i_chip PROC or MCA - * @param i_sc Step code data struct - * @returns SUCCESS always - */ -int32_t HwpErrorIsolation( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & io_sc ) -{ - #if defined (__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) - - TargetHandle_t trgt = i_chip->getTrgt(); - uint32_t plid = trgt->getAttr<ATTR_PRD_HWP_PLID>(); - - // Check for non-zero value in PLID attribute - if ( 0 != plid ) - { - // Link HWP PLID to PRD error log - errlHndl_t errl = - ServiceGeneratorClass::ThisServiceGenerator().getErrl(); - errl->plid(plid); - - // Make the error log and callouts predictive - io_sc.service_data->setServiceCall(); - - // Clear PRD_HWP_PLID attribute - trgt->setAttr<ATTR_PRD_HWP_PLID>( 0 ); - } - - #endif - - return SUCCESS; -} -PRDF_PLUGIN_DEFINE_NS(p9_nimbus, CommonPlugins, HwpErrorIsolation); -PRDF_PLUGIN_DEFINE_NS(p9_cumulus, CommonPlugins, HwpErrorIsolation); -PRDF_PLUGIN_DEFINE_NS(p9_mca, CommonPlugins, HwpErrorIsolation); - } // namespace CommonPlugins ends }// namespace PRDF ends diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C b/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C index b99e77bc8..46f4a40c2 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C @@ -163,28 +163,17 @@ int32_t PllDomain::Analyze(STEP_CODE_DATA_STRUCT & serviceData, } // In the case of a PLL_UNLOCK error, we want to do additional isolation - // in case of a HWP failure - ExtensibleChipFunction * l_hwpErrIsolation = - l_chip->getExtensibleFunction("HwpErrorIsolation"); - - // Update error lists - if (l_errType & SYS_PLL_UNLOCK) + // in case of a HWP failure. + if ( (l_errType & SYS_PLL_UNLOCK) || (l_errType & PCI_PLL_UNLOCK) ) { - sysRefList.push_back( l_chip ); - (*l_hwpErrIsolation)(l_chip, - PluginDef::bindParm<STEP_CODE_DATA_STRUCT&>(serviceData)); + PlatServices::hwpErrorIsolation( l_chip, serviceData ); } - if (l_errType & PCI_PLL_UNLOCK) - { - pciList.push_back( l_chip ); - (*l_hwpErrIsolation)(l_chip, - PluginDef::bindParm<STEP_CODE_DATA_STRUCT&>(serviceData)); - } - if (l_errType & SYS_OSC_FAILOVER) - mfFoList.push_back( l_chip ); - if (l_errType & PCI_OSC_FAILOVER) - sysRefFoList.push_back( l_chip ); + // Update error lists + if (l_errType & SYS_PLL_UNLOCK ) sysRefList.push_back( l_chip ); + if (l_errType & PCI_PLL_UNLOCK ) pciList.push_back( l_chip ); + if (l_errType & SYS_OSC_FAILOVER) mfFoList.push_back( l_chip ); + if (l_errType & PCI_OSC_FAILOVER) sysRefFoList.push_back( l_chip ); } // end for each chip in domain diff --git a/src/usr/diag/prdf/common/plat/prdfTargetServices.C b/src/usr/diag/prdf/common/plat/prdfTargetServices.C index ab1964049..52a762b14 100755 --- a/src/usr/diag/prdf/common/plat/prdfTargetServices.C +++ b/src/usr/diag/prdf/common/plat/prdfTargetServices.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* Contributors Listed Below - COPYRIGHT 2016,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -35,12 +35,14 @@ #include <prdfTargetServices.H> // Framework includes +#include <iipServiceDataCollector.h> #include <iipSystem.h> #include <prdfAssert.h> #include <prdfErrlUtil.H> #include <prdfExtensibleChip.H> #include <prdfGlobal.H> #include <prdfTrace.H> +#include <xspprdService.h> // External includes #include <algorithm> @@ -120,6 +122,38 @@ bool hasRedundantClocks() //############################################################################## //## +//## General Utility Functions +//## +//############################################################################## + +void hwpErrorIsolation( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ + #if defined (__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) + + TargetHandle_t trgt = i_chip->getTrgt(); + uint32_t plid = 0; + + // Check for non-zero value in PLID attribute. + if ( trgt->tryGetAttr<ATTR_PRD_HWP_PLID>(plid) && (0 != plid) ) + { + PRDF_INF( "ATTR_PRD_HWP_PLID found on 0x%08x with value 0x%08x", + getHuid(trgt), plid ); + + // Link HWP PLID to PRD error log. + ServiceGeneratorClass::ThisServiceGenerator().getErrl()->plid( plid ); + + // Clear PRD_HWP_PLID attribute. + trgt->setAttr<ATTR_PRD_HWP_PLID>( 0 ); + + // Make the error log and callouts predictive. + io_sc.service_data->setServiceCall(); + } + + #endif +} + +//############################################################################## +//## //## Target Manipulation Utility Functions //## //############################################################################## diff --git a/src/usr/diag/prdf/common/plat/prdfTargetServices.H b/src/usr/diag/prdf/common/plat/prdfTargetServices.H index 60b930136..e0df471a2 100755 --- a/src/usr/diag/prdf/common/plat/prdfTargetServices.H +++ b/src/usr/diag/prdf/common/plat/prdfTargetServices.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* Contributors Listed Below - COPYRIGHT 2016,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -49,6 +49,7 @@ namespace PRDF class ExtensibleChip; class MemRank; +struct STEP_CODE_DATA_STRUCT; typedef std::vector<ExtensibleChip *> ExtensibleChipList; @@ -160,6 +161,30 @@ void setHWStateChanged(TARGETING::TargetHandle_t i_target); //############################################################################## //## +//## General Utility Functions +//## +//############################################################################## + +/** + * @brief Gives the ability to link HWPs error logs with PRD error logs. + * + * During the IPL, a HWP may find a FIR attention that could cause the HWP to + * fail. The HWP will create/commit an error log and write the PLID in + * ATTR_PRD_HWP_PLID (all via FAPI interface). If PRD isolates to a target with + * a non-zero ATTR_PRD_HWP_PLID, it will link the two error logs together, clear + * the attribute, and override the thresholding to make the error log + * predictive. + * + * @note It is a requirement for the HWP to set the attribute on the same + * target as the active FIR attention. + * + * @param i_chip Any chip. + * @param i_sc Step code data struct. + */ +void hwpErrorIsolation( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc); + +//############################################################################## +//## //## getConnected() support functions //## //############################################################################## diff --git a/src/usr/diag/prdf/common/rule/prdfRuleMetaData.C b/src/usr/diag/prdf/common/rule/prdfRuleMetaData.C index 85c9b7602..ae4fcb973 100644 --- a/src/usr/diag/prdf/common/rule/prdfRuleMetaData.C +++ b/src/usr/diag/prdf/common/rule/prdfRuleMetaData.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* Contributors Listed Below - COPYRIGHT 2016,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -487,6 +487,9 @@ int32_t RuleMetaData::Analyze( STEP_CODE_DATA_STRUCT & i_serviceData, ); //@pw01 } + // Additional error isolation for HWPs, if needed. + PlatServices::hwpErrorIsolation( l_chipAnalyzed, i_serviceData ); + // Call postanalysis plugin. // @jl02 JL Adding PostAnalysis plugin call. ExtensibleChipFunction * l_postanalysis = |