summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-01-17 16:13:38 -0600
committerZane C. Shelley <zshelle@us.ibm.com>2018-01-26 11:36:21 -0500
commitd55a6fa5234f5eff4ebb55b71fb8ff6e399654ec (patch)
tree4aca87b375837c4e9a04d79364ed0bdbc094dcb5 /src
parentecedb32fc183bd3381a060e9116819bc25e516eb (diff)
downloadtalos-hostboot-d55a6fa5234f5eff4ebb55b71fb8ff6e399654ec.tar.gz
talos-hostboot-d55a6fa5234f5eff4ebb55b71fb8ff6e399654ec.zip
PRD: generic solution to associate HWP failures with PRD isolation
Change-Id: I17a45889db343c94c24f24b5c10fb72579b66dea CQ: SW413495 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/52130 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/52671 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca.rule6
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule15
-rw-r--r--src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C40
-rw-r--r--src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C27
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/prdfTargetServices.C36
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/prdfTargetServices.H27
-rw-r--r--src/usr/diag/prdf/common/rule/prdfRuleMetaData.C5
7 files changed, 77 insertions, 79 deletions
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
index 9a2b0dfcf..4101b6e0a 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca.rule
@@ -5,7 +5,7 @@
#
# OpenPOWER HostBoot Project
#
-# Contributors Listed Below - COPYRIGHT 2016,2017
+# Contributors Listed Below - COPYRIGHT 2016,2018
# [+] International Business Machines Corp.
#
#
@@ -247,7 +247,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 )
/** MCACALFIR[2]
* Excessive refreshes to a single rank.
*/
- (rMCACALFIR, bit(2)) ? excessive_refreshes;
+ (rMCACALFIR, bit(2)) ? self_th_32perDay;
/** MCACALFIR[3]
* Err detected in the MBA debug WAT logic
@@ -262,7 +262,7 @@ group gMCACALFIR filter priority( 13 ), cs_root_cause( 4, 13, 14 )
/** MCACALFIR[5]
* Calibration complete indication xout
*/
- (rMCACALFIR, bit(5)) ? calibration_complete;
+ (rMCACALFIR, bit(5)) ? self_th_32perDay;
/** MCACALFIR[6]
* Emergency Throttle
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
index 5cbb91e63..fc37c4809 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
@@ -118,7 +118,6 @@ actionclass maintenance_iaue_handling
/** MCA/UE algroithm, threshold 5 per day */
actionclass mca_ue_algorithm_th_5perDay
{
- funccall("HwpErrorIsolation");
calloutSelfMed;
threshold5pday;
funccall("mcaUeAlgorithm"); # must be called last
@@ -132,17 +131,3 @@ actionclass mca_ue_algorithm_th_1
funccall("mcaUeAlgorithm"); # must be called last
};
-/** Excessive refreshes to a single rank */
-actionclass excessive_refreshes
-{
- funccall("HwpErrorIsolation");
- self_th_32perDay;
-};
-
-/** Calibration complete indication xout */
-actionclass calibration_complete
-{
- funccall("HwpErrorIsolation");
- self_th_32perDay;
-};
-
diff --git a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
index ea2565edb..c25f131b8 100644
--- a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
+++ b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016,2017 */
+/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -103,44 +103,6 @@ int32_t ClearServiceCallFlag_mnfgInfo( ExtensibleChip * i_chip,
PRDF_PLUGIN_DEFINE_NS(p9_nimbus, CommonPlugins, ClearServiceCallFlag_mnfgInfo);
PRDF_PLUGIN_DEFINE_NS(p9_cumulus, CommonPlugins, ClearServiceCallFlag_mnfgInfo);
-/**
- * @brief PRD will perform error isolation for certain errors that may cause
- * a HWP to fail.
- * @param i_chip PROC or MCA
- * @param i_sc Step code data struct
- * @returns SUCCESS always
- */
-int32_t HwpErrorIsolation( ExtensibleChip * i_chip,
- STEP_CODE_DATA_STRUCT & io_sc )
-{
- #if defined (__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME)
-
- TargetHandle_t trgt = i_chip->getTrgt();
- uint32_t plid = trgt->getAttr<ATTR_PRD_HWP_PLID>();
-
- // Check for non-zero value in PLID attribute
- if ( 0 != plid )
- {
- // Link HWP PLID to PRD error log
- errlHndl_t errl =
- ServiceGeneratorClass::ThisServiceGenerator().getErrl();
- errl->plid(plid);
-
- // Make the error log and callouts predictive
- io_sc.service_data->setServiceCall();
-
- // Clear PRD_HWP_PLID attribute
- trgt->setAttr<ATTR_PRD_HWP_PLID>( 0 );
- }
-
- #endif
-
- return SUCCESS;
-}
-PRDF_PLUGIN_DEFINE_NS(p9_nimbus, CommonPlugins, HwpErrorIsolation);
-PRDF_PLUGIN_DEFINE_NS(p9_cumulus, CommonPlugins, HwpErrorIsolation);
-PRDF_PLUGIN_DEFINE_NS(p9_mca, CommonPlugins, HwpErrorIsolation);
-
} // namespace CommonPlugins ends
}// namespace PRDF ends
diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C b/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C
index b99e77bc8..46f4a40c2 100644
--- a/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C
+++ b/src/usr/diag/prdf/common/plat/p9/prdfP9PllDomain.C
@@ -163,28 +163,17 @@ int32_t PllDomain::Analyze(STEP_CODE_DATA_STRUCT & serviceData,
}
// In the case of a PLL_UNLOCK error, we want to do additional isolation
- // in case of a HWP failure
- ExtensibleChipFunction * l_hwpErrIsolation =
- l_chip->getExtensibleFunction("HwpErrorIsolation");
-
- // Update error lists
- if (l_errType & SYS_PLL_UNLOCK)
+ // in case of a HWP failure.
+ if ( (l_errType & SYS_PLL_UNLOCK) || (l_errType & PCI_PLL_UNLOCK) )
{
- sysRefList.push_back( l_chip );
- (*l_hwpErrIsolation)(l_chip,
- PluginDef::bindParm<STEP_CODE_DATA_STRUCT&>(serviceData));
+ PlatServices::hwpErrorIsolation( l_chip, serviceData );
}
- if (l_errType & PCI_PLL_UNLOCK)
- {
- pciList.push_back( l_chip );
- (*l_hwpErrIsolation)(l_chip,
- PluginDef::bindParm<STEP_CODE_DATA_STRUCT&>(serviceData));
- }
- if (l_errType & SYS_OSC_FAILOVER)
- mfFoList.push_back( l_chip );
- if (l_errType & PCI_OSC_FAILOVER)
- sysRefFoList.push_back( l_chip );
+ // Update error lists
+ if (l_errType & SYS_PLL_UNLOCK ) sysRefList.push_back( l_chip );
+ if (l_errType & PCI_PLL_UNLOCK ) pciList.push_back( l_chip );
+ if (l_errType & SYS_OSC_FAILOVER) mfFoList.push_back( l_chip );
+ if (l_errType & PCI_OSC_FAILOVER) sysRefFoList.push_back( l_chip );
} // end for each chip in domain
diff --git a/src/usr/diag/prdf/common/plat/prdfTargetServices.C b/src/usr/diag/prdf/common/plat/prdfTargetServices.C
index ab1964049..52a762b14 100755
--- a/src/usr/diag/prdf/common/plat/prdfTargetServices.C
+++ b/src/usr/diag/prdf/common/plat/prdfTargetServices.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016,2017 */
+/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -35,12 +35,14 @@
#include <prdfTargetServices.H>
// Framework includes
+#include <iipServiceDataCollector.h>
#include <iipSystem.h>
#include <prdfAssert.h>
#include <prdfErrlUtil.H>
#include <prdfExtensibleChip.H>
#include <prdfGlobal.H>
#include <prdfTrace.H>
+#include <xspprdService.h>
// External includes
#include <algorithm>
@@ -120,6 +122,38 @@ bool hasRedundantClocks()
//##############################################################################
//##
+//## General Utility Functions
+//##
+//##############################################################################
+
+void hwpErrorIsolation( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #if defined (__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME)
+
+ TargetHandle_t trgt = i_chip->getTrgt();
+ uint32_t plid = 0;
+
+ // Check for non-zero value in PLID attribute.
+ if ( trgt->tryGetAttr<ATTR_PRD_HWP_PLID>(plid) && (0 != plid) )
+ {
+ PRDF_INF( "ATTR_PRD_HWP_PLID found on 0x%08x with value 0x%08x",
+ getHuid(trgt), plid );
+
+ // Link HWP PLID to PRD error log.
+ ServiceGeneratorClass::ThisServiceGenerator().getErrl()->plid( plid );
+
+ // Clear PRD_HWP_PLID attribute.
+ trgt->setAttr<ATTR_PRD_HWP_PLID>( 0 );
+
+ // Make the error log and callouts predictive.
+ io_sc.service_data->setServiceCall();
+ }
+
+ #endif
+}
+
+//##############################################################################
+//##
//## Target Manipulation Utility Functions
//##
//##############################################################################
diff --git a/src/usr/diag/prdf/common/plat/prdfTargetServices.H b/src/usr/diag/prdf/common/plat/prdfTargetServices.H
index 60b930136..e0df471a2 100755
--- a/src/usr/diag/prdf/common/plat/prdfTargetServices.H
+++ b/src/usr/diag/prdf/common/plat/prdfTargetServices.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016,2017 */
+/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -49,6 +49,7 @@ namespace PRDF
class ExtensibleChip;
class MemRank;
+struct STEP_CODE_DATA_STRUCT;
typedef std::vector<ExtensibleChip *> ExtensibleChipList;
@@ -160,6 +161,30 @@ void setHWStateChanged(TARGETING::TargetHandle_t i_target);
//##############################################################################
//##
+//## General Utility Functions
+//##
+//##############################################################################
+
+/**
+ * @brief Gives the ability to link HWPs error logs with PRD error logs.
+ *
+ * During the IPL, a HWP may find a FIR attention that could cause the HWP to
+ * fail. The HWP will create/commit an error log and write the PLID in
+ * ATTR_PRD_HWP_PLID (all via FAPI interface). If PRD isolates to a target with
+ * a non-zero ATTR_PRD_HWP_PLID, it will link the two error logs together, clear
+ * the attribute, and override the thresholding to make the error log
+ * predictive.
+ *
+ * @note It is a requirement for the HWP to set the attribute on the same
+ * target as the active FIR attention.
+ *
+ * @param i_chip Any chip.
+ * @param i_sc Step code data struct.
+ */
+void hwpErrorIsolation( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc);
+
+//##############################################################################
+//##
//## getConnected() support functions
//##
//##############################################################################
diff --git a/src/usr/diag/prdf/common/rule/prdfRuleMetaData.C b/src/usr/diag/prdf/common/rule/prdfRuleMetaData.C
index 85c9b7602..ae4fcb973 100644
--- a/src/usr/diag/prdf/common/rule/prdfRuleMetaData.C
+++ b/src/usr/diag/prdf/common/rule/prdfRuleMetaData.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2016,2017 */
+/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -487,6 +487,9 @@ int32_t RuleMetaData::Analyze( STEP_CODE_DATA_STRUCT & i_serviceData,
); //@pw01
}
+ // Additional error isolation for HWPs, if needed.
+ PlatServices::hwpErrorIsolation( l_chipAnalyzed, i_serviceData );
+
// Call postanalysis plugin.
// @jl02 JL Adding PostAnalysis plugin call.
ExtensibleChipFunction * l_postanalysis =
OpenPOWER on IntegriCloud