summaryrefslogtreecommitdiffstats
path: root/src/usr/diag
diff options
context:
space:
mode:
authorCaleb Palmer <cnpalmer@us.ibm.com>2017-04-26 09:32:41 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2017-05-17 14:43:24 -0400
commit5eb64e93621a53282ba7115cacc092eb9a8e588b (patch)
tree205a5dd5a932a1ea765532ef90aec4efcf63ae69 /src/usr/diag
parent158c1ca0eaa97d496646830610a0b0038602c527 (diff)
downloadtalos-hostboot-5eb64e93621a53282ba7115cacc092eb9a8e588b.tar.gz
talos-hostboot-5eb64e93621a53282ba7115cacc092eb9a8e588b.zip
PRD: Isolation for HWP Failure
Change-Id: I384c7874d1acb583afa0c2d11492e83acacddee7 RTC: 172287 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/39724 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/40597 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr/diag')
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca.rule4
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule15
-rw-r--r--src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C42
-rw-r--r--src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C13
4 files changed, 72 insertions, 2 deletions
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
index 1494622db..4dd5677ec 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
@@ -247,7 +247,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 )
/** MCACALFIR[2]
* Excessive refreshes to a single rank.
*/
- (rMCACALFIR, bit(2)) ? self_th_32perDay;
+ (rMCACALFIR, bit(2)) ? excessive_refreshes;
/** MCACALFIR[3]
* Err detected in the MBA debug WAT logic
@@ -262,7 +262,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 )
/** MCACALFIR[5]
* Calibration complete indication xout
*/
- (rMCACALFIR, bit(5)) ? self_th_1;
+ (rMCACALFIR, bit(5)) ? calibration_complete;
/** MCACALFIR[6]
* Emergency Throttle
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
index a18372c0f..196a95485 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
@@ -115,6 +115,7 @@ actionclass maintenance_iaue_handling
/** MCA/UE algroithm, threshold 5 per day */
actionclass mca_ue_algorithm_th_5perDay
{
+ funccall("HwpErrorIsolation");
calloutSelfMed;
threshold5pday;
funccall("mcaUeAlgorithm"); # must be called last
@@ -128,3 +129,17 @@ actionclass mca_ue_algorithm_th_1
funccall("mcaUeAlgorithm"); # must be called last
};
+/** Excessive refreshes to a single rank */
+actionclass excessive_refreshes
+{
+ funccall("HwpErrorIsolation");
+ self_th_32perDay;
+};
+
+/** Calibration complete indication xout */
+actionclass calibration_complete
+{
+ funccall("HwpErrorIsolation");
+ self_th_1;
+};
+
diff --git a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
index ebf4f8ca7..eeedc505b 100644
--- a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
+++ b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
@@ -31,6 +31,10 @@
#include <iipServiceDataCollector.h>
#include <prdfExtensibleChip.H>
#include <prdfPluginMap.H>
+#include <prdfPlatServices.H>
+#include <xspprdService.h>
+
+using namespace TARGETING;
namespace PRDF
{
@@ -92,6 +96,44 @@ int32_t ClearServiceCallFlag_mnfgInfo( ExtensibleChip * i_chip,
}
PRDF_PLUGIN_DEFINE_NS(p9_nimbus, CommonPlugins, ClearServiceCallFlag_mnfgInfo);
+
+/**
+ * @brief PRD will perform error isolation for certain errors that may cause
+ * a HWP to fail.
+ * @param i_chip PROC or MCA
+ * @param i_sc Step code data struct
+ * @returns SUCCESS always
+ */
+int32_t HwpErrorIsolation( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #if defined (__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME)
+
+ TargetHandle_t trgt = i_chip->getTrgt();
+ uint32_t plid = trgt->getAttr<ATTR_PRD_HWP_PLID>();
+
+ // Check for non-zero value in PLID attribute
+ if ( 0 != plid )
+ {
+ // Link HWP PLID to PRD error log
+ errlHndl_t errl =
+ ServiceGeneratorClass::ThisServiceGenerator().getErrl();
+ errl->plid(plid);
+
+ // Make the error log and callouts predictive
+ io_sc.service_data->setServiceCall();
+
+ // Clear PRD_HWP_PLID attribute
+ trgt->setAttr<ATTR_PRD_HWP_PLID>( 0 );
+ }
+
+ #endif
+
+ return SUCCESS;
+}
+PRDF_PLUGIN_DEFINE_NS(p9_nimbus, CommonPlugins, HwpErrorIsolation);
+PRDF_PLUGIN_DEFINE_NS(p9_mca, CommonPlugins, HwpErrorIsolation);
+
} // namespace CommonPlugins ends
}// namespace PRDF ends
diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C b/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C
index 4819869ac..472eaae1f 100644
--- a/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C
+++ b/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C
@@ -162,11 +162,24 @@ int32_t PllDomain::Analyze(STEP_CODE_DATA_STRUCT & serviceData,
}
}
+ // In the case of a PLL_UNLOCK error, we want to do additional isolation
+ // in case of a HWP failure
+ ExtensibleChipFunction * l_hwpErrIsolation =
+ l_chip->getExtensibleFunction("HwpErrorIsolation");
+
// Update error lists
if (l_errType & SYS_PLL_UNLOCK)
+ {
sysRefList.push_back( l_chip );
+ (*l_hwpErrIsolation)(l_chip,
+ PluginDef::bindParm<STEP_CODE_DATA_STRUCT&>(serviceData));
+ }
if (l_errType & PCI_PLL_UNLOCK)
+ {
pciList.push_back( l_chip );
+ (*l_hwpErrIsolation)(l_chip,
+ PluginDef::bindParm<STEP_CODE_DATA_STRUCT&>(serviceData));
+ }
if (l_errType & SYS_OSC_FAILOVER)
mfFoList.push_back( l_chip );
if (l_errType & PCI_OSC_FAILOVER)
OpenPOWER on IntegriCloud