summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/diag/prdf')
-rwxr-xr-xsrc/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h7
-rwxr-xr-xsrc/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C46
-rw-r--r--src/usr/diag/prdf/common/plat/mem/prdfMemMark.C3
-rw-r--r--src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule14
-rw-r--r--src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule22
-rw-r--r--src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule4
-rw-r--r--src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule13
-rw-r--r--src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule4
-rw-r--r--src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule13
-rw-r--r--src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule1
-rw-r--r--src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C60
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C14
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfP9Mca.C4
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C19
14 files changed, 202 insertions, 22 deletions
diff --git a/src/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h b/src/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h
index 704dddf70..e8cdb79a5 100755
--- a/src/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h
+++ b/src/usr/diag/prdf/common/framework/service/iipServiceDataCollector.h
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2012,2018 */
+/* Contributors Listed Below - COPYRIGHT 2012,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -628,6 +628,11 @@ public:
void clearMruListGard();
/**
+ * @brief Iterates the MRU list and clears gard for any NVDIMM targets.
+ */
+ void clearNvdimmMruListGard();
+
+ /**
* @brief Iterates the MRU list and returns true if at least on target in
* the list is set to be garded.
* @return True if there is at least one target set to be garded.
diff --git a/src/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C b/src/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C
index d9681d66b..8ba990077 100755
--- a/src/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C
+++ b/src/usr/diag/prdf/common/framework/service/prdfServiceDataCollector.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2012,2015 */
+/* Contributors Listed Below - COPYRIGHT 2012,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -177,6 +177,50 @@ void ServiceDataCollector::clearMruListGard()
//------------------------------------------------------------------------------
+void ServiceDataCollector::clearNvdimmMruListGard()
+{
+ #define PRDF_FUNC "[ServiceDataCollector::clearNvdimmMruListGard] "
+
+ // Loop through the MRU list.
+ for ( auto & mru : xMruList )
+ {
+ PRDcallout callout = mru.callout;
+ TargetHandle_t trgt = callout.getTarget();
+ if ( TYPE_DIMM == PlatServices::getTargetType(trgt) )
+ {
+ // If the callout target is an NVDIMM, do not gard it and send a
+ // message to PHYP/Hostboot that a save/restore may work.
+ if ( isNVDIMM(trgt) )
+ {
+ mru.gardState = NO_GARD;
+
+ #ifdef __HOSTBOOT_MODULE
+
+ #ifdef __HOSTBOOT_RUNTIME
+ // Hostboot runtime, send the message to PHYP
+ uint32_t l_rc = PlatServices::nvdimmNotifyPhypProtChange( trgt,
+ NVDIMM::NVDIMM_RISKY_HW_ERROR );
+ if ( SUCCESS != l_rc )
+ {
+ PRDF_TRAC( PRDF_FUNC "nvdimmNotifyPhypProtChange(0x%08x) "
+ "failed.", PlatServices::getHuid(trgt) );
+ continue;
+ }
+ #else
+ // IPL, set the appropriate internal attribute in Hostboot
+ trgt->setAttr<ATTR_NV_STATUS_FLAG>(0x40);
+ #endif
+
+ #endif // __HOSTBOOT_MODULE
+ }
+ }
+ }
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
+
bool ServiceDataCollector::isGardRequested()
{
bool gardRecordExit = false;
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
index 61dd6e548..e0b54ab31 100644
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemMark.C
@@ -1390,6 +1390,9 @@ uint32_t applyRasPolicies( ExtensibleChip * i_chip, const MemRank & i_rank,
{
io_sc.service_data->setServiceCall();
+ // We want to try to avoid garding NVDIMMs, so clear gard for them now.
+ io_sc.service_data->clearNvdimmMruListGard();
+
#ifdef __HOSTBOOT_RUNTIME
// No more repairs left so no point doing any more TPS procedures.
MemDbUtils::banTps<T>( i_chip, i_rank );
diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule
index d61845b20..d1a6bc290 100644
--- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule
+++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca.rule
@@ -241,7 +241,7 @@ group gMCACALFIR
/** MCACALFIR[0]
* A MBA recoverable error has occurred.
*/
- (rMCACALFIR, bit(0)) ? self_th_1;
+ (rMCACALFIR, bit(0)) ? nvdimm_self_th_1;
/** MCACALFIR[1]
* MBA Nonrecoverable Error
@@ -251,7 +251,7 @@ group gMCACALFIR
/** MCACALFIR[2]
* Excessive refreshes to a single rank.
*/
- (rMCACALFIR, bit(2)) ? self_th_32perDay;
+ (rMCACALFIR, bit(2)) ? nvdimm_self_th_32perDay;
/** MCACALFIR[3]
* Err detected in the MBA debug WAT logic
@@ -266,7 +266,7 @@ group gMCACALFIR
/** MCACALFIR[5]
* Calibration complete indication xout
*/
- (rMCACALFIR, bit(5)) ? self_th_32perDay;
+ (rMCACALFIR, bit(5)) ? nvdimm_self_th_32perDay;
/** MCACALFIR[6]
* Emergency Throttle
@@ -533,7 +533,7 @@ group gMCAECCFIR
/** MCAECCFIR[42]
* SCOM_PARITY_CLASS_RECOVERABLE
*/
- (rMCAECCFIR, bit(42)) ? self_th_1;
+ (rMCAECCFIR, bit(42)) ? nvdimm_self_th_1;
/** MCAECCFIR[43]
* SCOM_PARITY_CLASS_UNRECOVERABLE
@@ -548,7 +548,7 @@ group gMCAECCFIR
/** MCAECCFIR[45]
* WRITE_RMW_CE
*/
- (rMCAECCFIR, bit(45)) ? self_th_32perDay;
+ (rMCAECCFIR, bit(45)) ? nvdimm_self_th_32perDay;
/** MCAECCFIR[46]
* WRITE_RMW_UE
@@ -686,12 +686,12 @@ group gDDRPHYFIR
/** DDRPHYFIR[60]
* Register PE 4 bit impact
*/
- (rDDRPHYFIR, bit(60)) ? self_th_1;
+ (rDDRPHYFIR, bit(60)) ? nvdimm_self_th_1;
/** DDRPHYFIR[61]
* Register PE 1 bit impact
*/
- (rDDRPHYFIR, bit(61)) ? self_th_1;
+ (rDDRPHYFIR, bit(61)) ? nvdimm_self_th_1;
};
diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule
index da3a73f82..e0529afd5 100644
--- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule
+++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mca_actions.rule
@@ -70,6 +70,7 @@ actionclass rcd_parity_error
calloutSelfLowNoGard; # Self LOW
# Thresholding done in plugin
funccall("RcdParityError"); # Run TPS on TH for all MCA ranks
+ funccall("ClearNvdimmGardState"); # Clear gard for NVDIMMs
};
/** Handle Mainline IUEs */
@@ -125,7 +126,7 @@ actionclass maintenance_iaue_handling
/** MCA/UE algroithm, threshold 5 per day */
actionclass mca_ue_algorithm_th_5perDay
{
- calloutSelfMed;
+ try( funccall("CheckForNvdimms"), calloutSelfMed );
threshold5pday;
funccall("mcaUeAlgorithm"); # must be called last
};
@@ -133,12 +134,29 @@ actionclass mca_ue_algorithm_th_5perDay
/** MCA/UE algroithm, threshold 1 */
actionclass mca_ue_algorithm_th_1
{
- calloutSelfMed;
+ try( funccall("CheckForNvdimms"), calloutSelfMed );
threshold1;
funccall("mcaUeAlgorithm"); # must be called last
};
################################################################################
+# NVDIMM callouts #
+################################################################################
+
+# Simple callouts that will avoid gard for NVDIMMs
+actionclass nvdimm_self_th_1
+{
+ try( funccall("CheckForNvdimms"), calloutSelfMed );
+ threshold1;
+};
+
+actionclass nvdimm_self_th_32perDay
+{
+ try( funccall("CheckForNvdimms"), calloutSelfMed );
+ threshold32pday;
+};
+
+################################################################################
# Analyze groups
################################################################################
diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule
index 1f61719a7..0a3301e2a 100644
--- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule
+++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist.rule
@@ -5,7 +5,7 @@
#
# OpenPOWER HostBoot Project
#
-# Contributors Listed Below - COPYRIGHT 2016,2018
+# Contributors Listed Below - COPYRIGHT 2016,2019
# [+] International Business Machines Corp.
#
#
@@ -599,7 +599,7 @@ group gMCBISTFIR
/** MCBISTFIR[13]
* SCOM_RECOVERABLE_REG_PE
*/
- (rMCBISTFIR, bit(13)) ? self_th_1;
+ (rMCBISTFIR, bit(13)) ? nvdimm_self_th_1;
/** MCBISTFIR[14]
* SCOM_FATAL_REG_PE
diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule
index 9b2127f3f..b71610835 100644
--- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule
+++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcbist_actions.rule
@@ -5,7 +5,7 @@
#
# OpenPOWER HostBoot Project
#
-# Contributors Listed Below - COPYRIGHT 2016,2018
+# Contributors Listed Below - COPYRIGHT 2016,2019
# [+] International Business Machines Corp.
#
#
@@ -36,6 +36,17 @@ actionclass command_addr_timeout
funccall("commandAddrTimeout");
};
+################################################################################
+# NVDIMM callouts #
+################################################################################
+
+# Simple callouts that will avoid gard for NVDIMMs
+actionclass nvdimm_self_th_1
+{
+ try( funccall("CheckForNvdimms"), calloutSelfMed );
+ threshold1;
+};
+
###############################################################################
# Analyze groups
###############################################################################
diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule
index 71a0342ab..987d68afb 100644
--- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule
+++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs.rule
@@ -5,7 +5,7 @@
#
# OpenPOWER HostBoot Project
#
-# Contributors Listed Below - COPYRIGHT 2016,2018
+# Contributors Listed Below - COPYRIGHT 2016,2019
# [+] International Business Machines Corp.
#
#
@@ -148,7 +148,7 @@ group gMCFIR
/** MCFIR[0]
* mc internal recoverable eror
*/
- (rMCFIR, bit(0)) ? self_th_1;
+ (rMCFIR, bit(0)) ? nvdimm_self_th_1;
/** MCFIR[1]
* mc internal non recovervable error
diff --git a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule
index 1497cdccb..839a9dc44 100644
--- a/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule
+++ b/src/usr/diag/prdf/common/plat/nimbus/nimbus_mcs_actions.rule
@@ -5,7 +5,7 @@
#
# OpenPOWER HostBoot Project
#
-# Contributors Listed Below - COPYRIGHT 2018
+# Contributors Listed Below - COPYRIGHT 2018,2019
# [+] International Business Machines Corp.
#
#
@@ -24,6 +24,17 @@
# IBM_PROLOG_END_TAG
################################################################################
+# NVDIMM callouts #
+################################################################################
+
+# Simple callouts that will avoid gard for NVDIMMs
+actionclass nvdimm_self_th_1
+{
+ try( funccall("CheckForNvdimms"), calloutSelfMed );
+ threshold1;
+};
+
+################################################################################
# Analyze groups
################################################################################
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule
index 174009192..2e7e32869 100644
--- a/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule
+++ b/src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule
@@ -279,4 +279,3 @@ actionclass chip_to_chip
calloutSelfMed;
threshold1;
};
-
diff --git a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
index ece3fc1a8..77cecfb9f 100644
--- a/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
+++ b/src/usr/diag/prdf/common/plat/p9/prdfCommonPlugins.C
@@ -127,6 +127,66 @@ PRDF_PLUGIN_DEFINE_NS(nimbus_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo
PRDF_PLUGIN_DEFINE_NS(cumulus_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo);
PRDF_PLUGIN_DEFINE_NS(axone_proc, CommonPlugins, ClearServiceCallFlag_mnfgInfo);
+/**
+ * @brief Will change the gard state of any NVDIMMs in the callout list to
+ * NO_GARD.
+ * @param i_chip The chip.
+ * @param io_sc The step code data struct.
+ * @returns SUCCESS
+ */
+int32_t ClearNvdimmGardState( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ #ifdef __HOSTBOOT_MODULE
+
+ // Call the sdc to clear the NVDIMM mru list.
+ io_sc.service_data->clearNvdimmMruListGard();
+
+ #endif
+
+ return SUCCESS;
+}
+PRDF_PLUGIN_DEFINE_NS(nimbus_mcs, CommonPlugins, ClearNvdimmGardState);
+PRDF_PLUGIN_DEFINE_NS(nimbus_mca, CommonPlugins, ClearNvdimmGardState);
+PRDF_PLUGIN_DEFINE_NS(nimbus_mcbist, CommonPlugins, ClearNvdimmGardState);
+
+/**
+ * @brief Will check if any of the DIMMs connected to this chip are NVDIMMs
+ * and callout self, no gard if there are.
+ * @param i_chip The chip of the DIMM parent.
+ * @param io_sc The step code data struct.
+ * @returns SUCCESS if NVDIMMs found, PRD_SCAN_COMM_REGISTER_ZERO if not.
+ */
+int32_t CheckForNvdimms( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ int32_t rc = PRD_SCAN_COMM_REGISTER_ZERO;
+
+ #ifdef __HOSTBOOT_MODULE
+
+ TargetHandleList dimmList = getConnected( i_chip->getTrgt(), TYPE_DIMM );
+
+ for ( auto & dimm : dimmList )
+ {
+ if ( isNVDIMM(dimm) )
+ {
+ // Callout self, no gard
+ io_sc.service_data->SetCallout(i_chip->getTrgt(), MRU_MED, NO_GARD);
+
+ // No need for other actions, so return SUCCESS
+ rc = SUCCESS;
+ break;
+ }
+ }
+
+ #endif
+
+ return rc;
+}
+PRDF_PLUGIN_DEFINE_NS(nimbus_mcs, CommonPlugins, CheckForNvdimms);
+PRDF_PLUGIN_DEFINE_NS(nimbus_mca, CommonPlugins, CheckForNvdimms);
+PRDF_PLUGIN_DEFINE_NS(nimbus_mcbist, CommonPlugins, CheckForNvdimms);
+
} // namespace CommonPlugins ends
}// namespace PRDF ends
diff --git a/src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C b/src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C
index 0cf4bfa7c..9286a31ee 100644
--- a/src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C
+++ b/src/usr/diag/prdf/plat/mem/prdfMemDynDealloc.C
@@ -1351,6 +1351,20 @@ int32_t dimmList( TargetHandleList & i_dimmList )
sendPredDeallocRequest( ssAddr, seAddr );
PRDF_TRAC( PRDF_FUNC "Predictive dealloc for start addr: 0x%016llx "
"end addr: 0x%016llx", ssAddr, seAddr );
+
+ // If the DIMM is an NVDIMM, send a message to PHYP that a save/restore
+ // may work.
+ if ( isNVDIMM(*it) )
+ {
+ uint32_t l_rc = PlatServices::nvdimmNotifyPhypProtChange( *it,
+ NVDIMM::NVDIMM_RISKY_HW_ERROR );
+ if ( SUCCESS != l_rc )
+ {
+ PRDF_TRAC( PRDF_FUNC "nvdimmNotifyPhypProtChange(0x%08x) "
+ "failed.", getHuid(*it) );
+ continue;
+ }
+ }
}
return o_rc;
diff --git a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
index 5f7efa274..b8367ee4d 100644
--- a/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
+++ b/src/usr/diag/prdf/plat/mem/prdfP9Mca.C
@@ -782,9 +782,9 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
// and make the log predictive.
io_sc.service_data->SetThresholdMaskId(0);
- // Send persistency lost message to PHYP
+ // Send message to PHYP that save/restore may work
l_rc = PlatServices::nvdimmNotifyPhypProtChange( dimm,
- NVDIMM::UNPROTECTED_BECAUSE_ERROR );
+ NVDIMM::NVDIMM_RISKY_HW_ERROR );
if ( SUCCESS != l_rc ) continue;
// Analyze Health Status0 Reg, Health Status1 Reg,
diff --git a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
index ef3a143eb..04eff661e 100644
--- a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
+++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
@@ -109,9 +109,24 @@ void __calloutDimm( errlHndl_t & io_errl, TargetHandle_t i_portTrgt,
PRDF_ASSERT( nullptr != i_dimmTrgt );
PRDF_ASSERT( TYPE_DIMM == getTargetType(i_dimmTrgt) );
- // Callout the DIMM.
+ HWAS::DeconfigEnum deconfigPolicy = HWAS::DELAYED_DECONFIG;
+ HWAS::GARD_ErrorType gardPolicy = HWAS::GARD_Predictive;
+
+ // If the DIMM is an NVDIMM, change the gard and deconfig options to no
+ // gard/deconfig and set the appropriate attribute to indicate a
+ // save/restore may work
+ if ( isNVDIMM(i_dimmTrgt) )
+ {
+ deconfigPolicy = HWAS::NO_DECONFIG;
+ gardPolicy = HWAS::GARD_NULL;
+
+ i_dimmTrgt->setAttr<ATTR_NV_STATUS_FLAG>(0x40);
+ }
+
+
io_errl->addHwCallout( i_dimmTrgt, HWAS::SRCI_PRIORITY_HIGH,
- HWAS::DELAYED_DECONFIG, HWAS::GARD_Predictive );
+ deconfigPolicy, gardPolicy );
+
// Clear the VPD on this DIMM. The DIMM has been garded, but it is possible
// the customer will want to ungard the DIMM. Without clearing the VPD, the
OpenPOWER on IntegriCloud