diff options
author | Caleb Palmer <cnpalmer@us.ibm.com> | 2017-04-26 09:32:41 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2017-05-17 14:43:24 -0400 |
commit | 5eb64e93621a53282ba7115cacc092eb9a8e588b (patch) | |
tree | 205a5dd5a932a1ea765532ef90aec4efcf63ae69 /src/usr/diag | |
parent | 158c1ca0eaa97d496646830610a0b0038602c527 (diff) | |
download | talos-hostboot-5eb64e93621a53282ba7115cacc092eb9a8e588b.tar.gz talos-hostboot-5eb64e93621a53282ba7115cacc092eb9a8e588b.zip |
PRD: Isolation for HWP Failure
Change-Id: I384c7874d1acb583afa0c2d11492e83acacddee7
RTC: 172287
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/39724
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/40597
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag')
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca.rule | 4 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule | 15 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C | 42 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C | 13 |
4 files changed, 72 insertions, 2 deletions
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule index 1494622db..4dd5677ec 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule @@ -247,7 +247,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 ) /** MCACALFIR[2] * Excessive refreshes to a single rank. */ - (rMCACALFIR, bit(2)) ? self_th_32perDay; + (rMCACALFIR, bit(2)) ? excessive_refreshes; /** MCACALFIR[3] * Err detected in the MBA debug WAT logic @@ -262,7 +262,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 ) /** MCACALFIR[5] * Calibration complete indication xout */ - (rMCACALFIR, bit(5)) ? self_th_1; + (rMCACALFIR, bit(5)) ? calibration_complete; /** MCACALFIR[6] * Emergency Throttle diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule index a18372c0f..196a95485 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule @@ -115,6 +115,7 @@ actionclass maintenance_iaue_handling /** MCA/UE algroithm, threshold 5 per day */ actionclass mca_ue_algorithm_th_5perDay { + funccall("HwpErrorIsolation"); calloutSelfMed; threshold5pday; funccall("mcaUeAlgorithm"); # must be called last @@ -128,3 +129,17 @@ actionclass mca_ue_algorithm_th_1 funccall("mcaUeAlgorithm"); # must be called last }; +/** Excessive refreshes to a single rank */ +actionclass excessive_refreshes +{ + funccall("HwpErrorIsolation"); + self_th_32perDay; +}; + +/** Calibration complete indication xout */ +actionclass calibration_complete +{ + funccall("HwpErrorIsolation"); + self_th_1; +}; + diff --git a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C index ebf4f8ca7..eeedc505b 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C @@ -31,6 +31,10 @@ #include <iipServiceDataCollector.h> #include <prdfExtensibleChip.H> #include <prdfPluginMap.H> +#include <prdfPlatServices.H> +#include <xspprdService.h> + +using namespace TARGETING; namespace PRDF { @@ -92,6 +96,44 @@ int32_t ClearServiceCallFlag_mnfgInfo( ExtensibleChip * i_chip, } PRDF_PLUGIN_DEFINE_NS(p9_nimbus, CommonPlugins, ClearServiceCallFlag_mnfgInfo); + +/** + * @brief PRD will perform error isolation for certain errors that may cause + * a HWP to fail. + * @param i_chip PROC or MCA + * @param i_sc Step code data struct + * @returns SUCCESS always + */ +int32_t HwpErrorIsolation( ExtensibleChip * i_chip, + STEP_CODE_DATA_STRUCT & io_sc ) +{ + #if defined (__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) + + TargetHandle_t trgt = i_chip->getTrgt(); + uint32_t plid = trgt->getAttr<ATTR_PRD_HWP_PLID>(); + + // Check for non-zero value in PLID attribute + if ( 0 != plid ) + { + // Link HWP PLID to PRD error log + errlHndl_t errl = + ServiceGeneratorClass::ThisServiceGenerator().getErrl(); + errl->plid(plid); + + // Make the error log and callouts predictive + io_sc.service_data->setServiceCall(); + + // Clear PRD_HWP_PLID attribute + trgt->setAttr<ATTR_PRD_HWP_PLID>( 0 ); + } + + #endif + + return SUCCESS; +} +PRDF_PLUGIN_DEFINE_NS(p9_nimbus, CommonPlugins, HwpErrorIsolation); +PRDF_PLUGIN_DEFINE_NS(p9_mca, CommonPlugins, HwpErrorIsolation); + } // namespace CommonPlugins ends }// namespace PRDF ends diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C b/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C index 4819869ac..472eaae1f 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C @@ -162,11 +162,24 @@ int32_t PllDomain::Analyze(STEP_CODE_DATA_STRUCT & serviceData, } } + // In the case of a PLL_UNLOCK error, we want to do additional isolation + // in case of a HWP failure + ExtensibleChipFunction * l_hwpErrIsolation = + l_chip->getExtensibleFunction("HwpErrorIsolation"); + // Update error lists if (l_errType & SYS_PLL_UNLOCK) + { sysRefList.push_back( l_chip ); + (*l_hwpErrIsolation)(l_chip, + PluginDef::bindParm<STEP_CODE_DATA_STRUCT&>(serviceData)); + } if (l_errType & PCI_PLL_UNLOCK) + { pciList.push_back( l_chip ); + (*l_hwpErrIsolation)(l_chip, + PluginDef::bindParm<STEP_CODE_DATA_STRUCT&>(serviceData)); + } if (l_errType & SYS_OSC_FAILOVER) mfFoList.push_back( l_chip ); if (l_errType & PCI_OSC_FAILOVER) |