summaryrefslogtreecommitdiffstats
path: root/src/usr/isteps/nvdimm
diff options
context:
space:
mode:
authorMatthew Hickman <Matthew.Hickman@ibm.com>2019-05-09 15:24:42 -0500
committerDaniel M Crowell <dcrowell@us.ibm.com>2019-08-19 10:55:15 -0500
commitd577988774d58c45fea951a6dded16d652f4fba3 (patch)
tree7075a5468a7fccbccc7bbddebcd068f369b346ab /src/usr/isteps/nvdimm
parentf691dc23accc3b86a8fc2c301c907aa7140955b1 (diff)
downloadtalos-hostboot-d577988774d58c45fea951a6dded16d652f4fba3.tar.gz
talos-hostboot-d577988774d58c45fea951a6dded16d652f4fba3.zip
Added error checking and handling to nvdimm ipl
Change-Id: I153cf39cc674b49441b5f41f7b96cd667b2a265b Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/77543 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: TSUNG K YEUNG <tyeung@us.ibm.com> Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/isteps/nvdimm')
-rw-r--r--src/usr/isteps/nvdimm/errlud_nvdimm.C41
-rw-r--r--src/usr/isteps/nvdimm/errlud_nvdimm.H33
-rw-r--r--src/usr/isteps/nvdimm/nvdimm.C636
-rw-r--r--src/usr/isteps/nvdimm/nvdimm.H63
-rw-r--r--src/usr/isteps/nvdimm/nvdimm.mk1
-rw-r--r--src/usr/isteps/nvdimm/nvdimmErrorLog.C1313
-rw-r--r--src/usr/isteps/nvdimm/nvdimmErrorLog.H108
-rwxr-xr-xsrc/usr/isteps/nvdimm/nvdimmdd.C1
-rwxr-xr-xsrc/usr/isteps/nvdimm/nvdimmdd.H72
-rw-r--r--src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H113
-rw-r--r--src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H10
-rw-r--r--src/usr/isteps/nvdimm/runtime/nvdimm_rt.C214
12 files changed, 2372 insertions, 233 deletions
diff --git a/src/usr/isteps/nvdimm/errlud_nvdimm.C b/src/usr/isteps/nvdimm/errlud_nvdimm.C
index 743297b94..9f916e040 100644
--- a/src/usr/isteps/nvdimm/errlud_nvdimm.C
+++ b/src/usr/isteps/nvdimm/errlud_nvdimm.C
@@ -158,9 +158,46 @@ UdNvdimmParms::UdNvdimmParms( uint8_t i_opType,
}
//------------------------------------------------------------------------------
-UdNvdimmParms::~UdNvdimmParms()
-{
+UdNvdimmParms::~UdNvdimmParms() = default;
+//------------------------------------------------------------------------------
+// NVDIMM Dimm Operation Parameters and Errors
+//------------------------------------------------------------------------------
+UdNvdimmOPParms::UdNvdimmOPParms( const nvdimm_reg_t &i_RegInfo )
+{
+ // Version control for ErrorUD struct
+ iv_CompId = NVDIMM_COMP_ID;
+ iv_Version = 3;
+ iv_SubSection = NVDIMM_OP_PARAMETERS;
+
+ //***** Memory Layout *****
+ // 1 byte : MODULE_HEALTH
+ // 1 byte : MODULE_HEALTH_STATUS0
+ // 1 byte : MODULE_HEALTH_STATUS1
+ // 1 byte : CSAVE_STATUS
+ // 1 byte : CSAVE_INFO
+ // 1 byte : CSAVE_FAIL_INFO0
+ // 1 byte : CSAVE_FAIL_INFO1
+ // 1 byte : ERROR_THRESHOLD_STATUS
+ // 1 byte : NVDIMM_READY
+ // 1 byte : NVDIMM_CMD_STATUS0
+ // 1 byte : ABORT_CMD_TIMEOUT
+ // 1 byte : ERASE_STATUS
+ // 1 byte : ERASE_TIMEOUT0
+ // 1 byte : ERASE_TIMEOUT1
+ // 1 byte : SET_ES_POLICY_STATUS
+ // 1 byte : RESTORE_STATUS
+ // 1 byte : RESTORE_FAIL_INFO
+ // 1 byte : RESTORE_TIMEOUT0
+ // 1 byte : RESTORE_TIMEOUT1
+ // 1 byte : ARM_STATUS
+ // 1 byte : SET_EVENT_NOTIFICATION_STATUS
+
+ char * l_pBuf = reinterpret_cast<char *>( reallocUsrBuf(sizeof(i_RegInfo)));
+ memcpy(l_pBuf, &i_RegInfo, sizeof(i_RegInfo));
}
+// Default the deconstructor
+UdNvdimmOPParms::~UdNvdimmOPParms() = default;
+
} // end NVDIMM namespace
diff --git a/src/usr/isteps/nvdimm/errlud_nvdimm.H b/src/usr/isteps/nvdimm/errlud_nvdimm.H
index 55b5f9b20..2041da054 100644
--- a/src/usr/isteps/nvdimm/errlud_nvdimm.H
+++ b/src/usr/isteps/nvdimm/errlud_nvdimm.H
@@ -61,12 +61,37 @@ class UdNvdimmParms : public ERRORLOG::ErrlUserDetails
*/
virtual ~UdNvdimmParms();
- private:
// Disabled
- UdNvdimmParms(UdNvdimmParms &);
- UdNvdimmParms & operator=(UdNvdimmParms &);
+ UdNvdimmParms(UdNvdimmParms &) = delete;
+ UdNvdimmParms & operator=(UdNvdimmParms &) = delete;
};
-} // end NVDIMM namespace
+/**
+ * @class UdNvdimmOPParms
+ *
+ * Adds NVDIMM information to an error log as user detail data
+ */
+class UdNvdimmOPParms : public ERRORLOG::ErrlUserDetails
+{
+ public:
+ /**
+ * @brief Constructor
+ *
+ * @param i_i2cInfo Miscellaneous Parameters
+ */
+ UdNvdimmOPParms( const nvdimm_reg_t &i_RegInfo );
+
+ /**
+ * @brief Destructor
+ */
+ virtual ~UdNvdimmOPParms();
+
+ // Disabled
+ UdNvdimmOPParms() = delete;
+ UdNvdimmOPParms(UdNvdimmOPParms &) = delete;
+ UdNvdimmOPParms & operator=(UdNvdimmOPParms &) = delete;
+};
+
+} // end of namespace NVDIMM
#endif
diff --git a/src/usr/isteps/nvdimm/nvdimm.C b/src/usr/isteps/nvdimm/nvdimm.C
index db26eb184..3e0d712ff 100644
--- a/src/usr/isteps/nvdimm/nvdimm.C
+++ b/src/usr/isteps/nvdimm/nvdimm.C
@@ -40,6 +40,8 @@
#include <lib/dimm/ddr4/nvdimm_utils.H>
#include <lib/mc/port.H>
#include <isteps/nvdimm/nvdimmreasoncodes.H>
+#include "errlud_nvdimm.H"
+#include "nvdimmErrorLog.H"
#include <isteps/nvdimm/nvdimm.H>
#include <vpd/spdenums.H>
#include <secureboot/trustedbootif.H>
@@ -54,6 +56,7 @@
using namespace TARGETING;
using namespace DeviceFW;
using namespace EEPROM;
+using namespace ERRORLOG;
trace_desc_t* g_trac_nvdimm = NULL;
TRAC_INIT(&g_trac_nvdimm, NVDIMM_COMP_NAME, 2*KILOBYTE);
@@ -360,24 +363,24 @@ void nvdimmSetStatusFlag(Target *i_nvdimm, const uint8_t i_status_flag)
switch(i_status_flag)
{
- // Make sure NSTD_VAL_PRSV (content preserved) is unset before setting NSTD_VAL_NOPRSV
- // (data not preserved) or NSTD_ERR_NOPRSV (error preserving data)
+ // Make sure NSTD_VAL_ERROR (content preserved) is unset before setting NSTD_VAL_ERASED
+ // (data not preserved) or NSTD_VAL_SR_FAILED (error preserving data)
case NSTD_ERR:
- case NSTD_VAL_NOPRSV:
- case NSTD_ERR_NOPRSV:
- l_statusFlag &= NSTD_VAL_PRSV_MASK;
+ case NSTD_VAL_ERASED:
+ case NSTD_VAL_SR_FAILED:
+ l_statusFlag &= NSTD_VAL_ERROR_MASK;
l_statusFlag |= i_status_flag;
break;
// If the content preserved(restore sucessfully), make sure
- // NSTD_VAL_NOPRSV (not preserved) and NSTD_ERR_NOPRSV (error preserving)
+ // NSTD_VAL_ERASED (not preserved) and NSTD_VAL_SR_FAILED (error preserving)
// are unset before setting this flag.
- case NSTD_VAL_PRSV:
- l_statusFlag &= (NSTD_VAL_NOPRSV_MASK & NSTD_ERR_NOPRSV_MASK);
+ case NSTD_VAL_ERROR:
+ l_statusFlag &= (NSTD_VAL_ERASED_MASK & NSTD_VAL_SR_FAILED_MASK);
l_statusFlag |= i_status_flag;
break;
- case NSTD_ERR_NOBKUP:
+ case NSTD_VAL_DISARMED:
l_statusFlag |= i_status_flag;
break;
@@ -407,7 +410,8 @@ errlHndl_t nvdimmReady(Target *i_nvdimm)
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmReady() HUID[%X]",get_huid(i_nvdimm));
errlHndl_t l_err = nullptr;
- uint8_t l_data = 0x0;
+ nvdimm_reg_t l_RegInfo;
+ uint8_t l_data;
uint8_t l_nvm_init_time = 0;
size_t l_numBytes = 1;
@@ -456,6 +460,48 @@ errlHndl_t nvdimmReady(Target *i_nvdimm)
if ((l_data != NV_READY) && !l_err)
{
+
+ // Collect available status registers for error log
+ do
+ {
+ // Read and save NVDIMM_READY for traces
+ l_err = nvdimmReadReg(i_nvdimm, NVDIMM_READY, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ break;
+ }
+ l_RegInfo.NVDimm_Ready = l_data;
+
+ // Read and save MODULE_HEALTH for traces
+ l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ break;
+ }
+ l_RegInfo.Module_Health = l_data;
+
+ // Read and save MODULE_HEALTH_STATUS0 for traces
+ l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS0, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ break;
+ }
+ l_RegInfo.Module_Health_Status0 = l_data;
+
+ // Read and save MODULE_HEALTH_STATUS1 for traces
+ l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS1, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ break;
+ }
+ l_RegInfo.Module_Health_Status1 = l_data;
+
+ }while(0);
+
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmReady() nvdimm[%X] - nvdimm not ready[%d]",
get_huid(i_nvdimm), l_data);
/*@
@@ -484,7 +530,12 @@ errlHndl_t nvdimmReady(Target *i_nvdimm)
// a failing indication on the NV controller
l_err->addPartCallout( i_nvdimm,
HWAS::NV_CONTROLLER_PART_TYPE,
- HWAS::SRCI_PRIORITY_HIGH);
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+
+ // Add Register Traces to error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
}
}while(0);
@@ -616,12 +667,6 @@ errlHndl_t nvdimmPollStatus ( Target *i_nvdimm,
ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
l_err->collectTrace(NVDIMM_COMP_NAME);
-
- // May have to move the error handling to the caller
- // as different op could have different error severity
- l_err->addPartCallout( i_nvdimm,
- HWAS::NV_CONTROLLER_PART_TYPE,
- HWAS::SRCI_PRIORITY_HIGH);
}
return l_err;
@@ -646,9 +691,39 @@ errlHndl_t nvdimmPollBackupDone(Target* i_nvdimm,
get_huid(i_nvdimm));
errlHndl_t l_err = nullptr;
+ nvdimm_reg_t l_RegInfo = nvdimm_reg_t();
l_err = nvdimmPollStatus ( i_nvdimm, SAVE, o_poll);
+ if (l_err)
+ {
+ errlCommit(l_err, NVDIMM_COMP_ID);
+
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_BACKUP_TIMEOUT
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_POLL_BACKUP
+ *@userdata1[0:31] Related ops (0xff = NA)
+ *@userdata1[32:63] Target Huid
+ *@devdesc Encountered timeout while performing NVDIMM Restore operation
+ *@custdesc NVDIMM timed out
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_POLL_BACKUP,
+ NVDIMM_BACKUP_TIMEOUT,
+ NVDIMM_SET_USER_DATA_1(SAVE, TARGETING::get_huid(i_nvdimm)),
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+
+ l_err->collectTrace( NVDIMM_COMP_NAME );
+
+ // Collect register data for FFDC Traces
+ nvdimmTraceRegs ( i_nvdimm, l_RegInfo );
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
+ }
+
TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollBackupDone() nvdimm[%X]",
get_huid(i_nvdimm));
@@ -673,15 +748,52 @@ errlHndl_t nvdimmPollRestoreDone(Target* i_nvdimm,
get_huid(i_nvdimm));
errlHndl_t l_err = nullptr;
+ nvdimm_reg_t l_RegInfo = nvdimm_reg_t();
l_err = nvdimmPollStatus ( i_nvdimm, RESTORE, o_poll );
+ if (l_err)
+ {
+ errlCommit(l_err, NVDIMM_COMP_ID);
+
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_RESTORE_TIMEOUT
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_POLL_RESTORE
+ *@userdata1[0:31] Related ops (0xff = NA)
+ *@userdata1[32:63] Target Huid
+ *@devdesc Encountered timeout while performing NVDIMM Restore operation
+ *@custdesc NVDIMM timed out
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_POLL_RESTORE,
+ NVDIMM_RESTORE_TIMEOUT,
+ NVDIMM_SET_USER_DATA_1(RESTORE, TARGETING::get_huid(i_nvdimm)),
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+
+ l_err->collectTrace( NVDIMM_COMP_NAME );
+
+ // May have to move the error handling to the caller
+ // as different op could have different error severity
+ l_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // Collect register data for FFDC Traces
+ nvdimmTraceRegs ( i_nvdimm, l_RegInfo );
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
+ }
+
TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollRestoreDone() nvdimm[%X]",
get_huid(i_nvdimm));
return l_err;
}
+
/**
* @brief This function polls the command status register for erase
* completion (does not indicate success or fail)
@@ -701,7 +813,31 @@ errlHndl_t nvdimmPollEraseDone(Target* i_nvdimm,
errlHndl_t l_err = nullptr;
- l_err = nvdimmPollStatus ( i_nvdimm, ERASE, o_poll);
+ l_err = nvdimmPollStatus( i_nvdimm, ERASE, o_poll);
+
+ if (l_err)
+ {
+ errlCommit(l_err, NVDIMM_COMP_ID);
+
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_ERASE_TIMEOUT
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_POLL_ERASE
+ *@userdata1[0:31] Related ops (0xff = NA)
+ *@userdata1[32:63] Target Huid
+ *@devdesc Encountered timeout while performing NVDIMM Restore operation
+ *@custdesc NVDIMM timed out
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_POLL_ERASE,
+ NVDIMM_ERASE_TIMEOUT,
+ NVDIMM_SET_USER_DATA_1(ERASE, TARGETING::get_huid(i_nvdimm)),
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+
+ l_err->collectTrace( NVDIMM_COMP_NAME );
+
+ }
TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollEraseDone() nvdimm[%X]",
get_huid(i_nvdimm));
@@ -729,7 +865,11 @@ errlHndl_t nvdimmPollESChargeStatus(Target* i_nvdimm,
errlHndl_t l_err = nullptr;
- l_err = nvdimmPollStatus ( i_nvdimm, CHARGE, o_poll );
+ l_err = nvdimmPollStatus( i_nvdimm, CHARGE, o_poll );
+
+ l_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollESChargeDone() nvdimm[%X]",
get_huid(i_nvdimm));
@@ -781,7 +921,8 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm)
get_huid(i_nvdimm));
errlHndl_t l_err = nullptr;
- uint8_t l_data;
+ uint8_t l_data = 0x0;
+ nvdimm_reg_t l_RegInfo = nvdimm_reg_t();
do
{
@@ -790,7 +931,7 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm)
if (l_err)
{
- nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_NOBKUP);
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_DISARMED);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmSetESPolicy() nvdimm[%X]"
"failed to write ES register!",get_huid(i_nvdimm));
break;
@@ -804,13 +945,13 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm)
if (l_err)
{
- nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_NOBKUP);
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_DISARMED);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmSetESPolicy() nvdimm[%X]"
"failed to read ES register!",get_huid(i_nvdimm));
break;
}
- if ((l_data & ES_SUCCESS) != ES_SUCCESS)
+ if (((l_data & ES_SUCCESS) != ES_SUCCESS) || ((l_data & ES_POLICY_ERROR) == ES_POLICY_ERROR))
{
TRACFCOMP(g_trac_nvdimm, EXIT_MRK"NDVIMM HUID[%X], nvdimmSetESPolicy() "
"failed!",get_huid(i_nvdimm));
@@ -837,14 +978,11 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm)
l_err->collectTrace(NVDIMM_COMP_NAME);
- // Failure setting the energy source policy could mean error on the
- // battery or even the cabling
- l_err->addPartCallout( i_nvdimm,
- HWAS::BPM_PART_TYPE,
- HWAS::SRCI_PRIORITY_HIGH);
- l_err->addPartCallout( i_nvdimm,
- HWAS::BPM_CABLE_PART_TYPE,
- HWAS::SRCI_PRIORITY_HIGH);
+ // Read relevant regs for trace data
+ nvdimmTraceRegs(i_nvdimm, l_RegInfo);
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
}
}while(0);
@@ -938,12 +1076,12 @@ errlHndl_t nvdimmValidImage(Target *i_nvdimm, bool &o_imgValid)
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
-errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
+errlHndl_t nvdimmRestore(TargetHandleList& i_nvdimmList, uint8_t &i_mpipl)
{
errlHndl_t l_err = nullptr;
- bool l_imgValid;
uint8_t l_rstrValid;
uint32_t l_poll = 0;
+ TargetHandleList l_nvdimmList = i_nvdimmList;
do
{
@@ -952,23 +1090,7 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
it != i_nvdimmList.end();)
{
// Default state during boot is unarmed, therefore not preserved
- nvdimmSetStatusFlag(*it, NSTD_ERR_NOBKUP);
-
- l_err = nvdimmValidImage(*it, l_imgValid);
-
- // No reason to run if we can't figure out
- // if there is an image or not
- if (l_err)
- {
- break;
- }
-
- if (!l_imgValid)
- {
- nvdimmSetStatusFlag(*it, NSTD_VAL_NOPRSV);
- i_nvdimmList.erase(it);
- continue;
- }
+ nvdimmSetStatusFlag(*it, NSTD_VAL_DISARMED);
TargetHandleList l_mcaList;
getParentAffinityTargets(l_mcaList, *it, CLASS_UNIT, TYPE_MCA);
@@ -987,13 +1109,6 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
{
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore() HUID[%X] i_mpipl[%u] failed to de-assert resetn!",
get_huid(*it), i_mpipl);
-
- nvdimmSetStatusFlag(*it, NSTD_ERR_NOPRSV);
- //@TODO RTC 199645 - add HW callout on dimm target
- // If we failed to de-assert reset_n, the dimm is pretty much useless.
- // Let's not restore if that happens
- // The callout will be added inside the HWP
- // Leaving this comment here as a reminder, will remove later
break;
}
@@ -1009,7 +1124,7 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
get_huid(*it), i_mpipl);
l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
- l_err->collectTrace(NVDIMM_COMP_NAME, 256);
+ l_err->collectTrace( NVDIMM_COMP_NAME );
ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID);
}
@@ -1022,12 +1137,6 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
{
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore() HUID[%X] self_refresh_entry failed!",
get_huid(*it));
-
- nvdimmSetStatusFlag(*it, NSTD_ERR_NOPRSV);
- //@TODO RTC 199645 - add HW callout on dimm target
- // Without SRE the data could be not reliably restored
- // The callout will be added inside the HWP
- // Leaving this comment here as a reminder, will remove later
break;
}
it++;
@@ -1050,7 +1159,6 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
l_err = nvdimmWriteReg(l_nvdimm, NVDIMM_FUNC_CMD, RESTORE_IMAGE);
if (l_err)
{
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X], error initiating restore!!",
get_huid(l_nvdimm));
break;
@@ -1071,10 +1179,8 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
l_err = nvdimmPollRestoreDone(l_nvdimm, l_poll);
if (l_err)
{
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X], error restoring!",
get_huid(l_nvdimm));
- errlCommit(l_err, NVDIMM_COMP_ID);
break;
}
}
@@ -1084,22 +1190,22 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
break;
}
- // Make sure the restore is valid
+ // Check for restore errors
for (const auto & l_nvdimm : i_nvdimmList)
{
l_err = nvdimmGetRestoreValid(l_nvdimm, l_rstrValid);
if (l_err)
{
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore Target[%X] error validating restore status!",
get_huid(l_nvdimm));
break;
}
- if ((l_rstrValid & RSTR_SUCCESS) != RSTR_SUCCESS){
+ if ((l_rstrValid & RSTR_ERROR) == RSTR_ERROR)
+ {
- TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X] restoreValid[%d], restore failed!",
- get_huid(l_nvdimm), l_rstrValid);
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X] restore failed due to errors",
+ get_huid(l_nvdimm));
/*@
*@errortype
*@reasoncode NVDIMM_RESTORE_FAILED
@@ -1119,28 +1225,19 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
get_huid(l_nvdimm),
0x0,
ERRORLOG::ErrlEntry::NO_SW_CALLOUT);
-
- l_err->collectTrace(NVDIMM_COMP_NAME);
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV);
-
- // Invalid restore could be due to dram not in self-refresh
- // or controller issue. Data should not be trusted at this point
- l_err->addPartCallout( l_nvdimm,
- HWAS::NV_CONTROLLER_PART_TYPE,
- HWAS::SRCI_PRIORITY_HIGH);
break;
}
}
if (l_err)
{
+ TRACFCOMP(g_trac_nvdimm, "restore encountered an error");
break;
}
// Exit self-refresh
for (const auto & l_nvdimm : i_nvdimmList)
{
-
TargetHandleList l_mcaList;
getParentAffinityTargets(l_mcaList, l_nvdimm, CLASS_UNIT, TYPE_MCA);
assert(l_mcaList.size(), "nvdimmRestore() failed to find parent MCA.");
@@ -1155,21 +1252,25 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
{
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmRestore() HUID[%X] post_restore_transition failed!",
get_huid(l_nvdimm));
-
- // Commit the error from the HWP
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV);
break;
}
else
{
// Restore success!
- nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_PRSV);
+ // Remove dimm from list for error handling
+ i_nvdimmList.erase(i_nvdimmList.begin());
}
}
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() HUID[%X] encounrterd an error during restore");
+ break;
+ }
+
if (i_mpipl)
{
- for (const auto & l_nvdimm : i_nvdimmList)
+ for (const auto & l_nvdimm : l_nvdimmList)
{
TargetHandleList l_mcaList;
errlHndl_t err = nullptr;
@@ -1188,7 +1289,7 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
get_huid(l_nvdimm), i_mpipl);
err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
- err->collectTrace(NVDIMM_COMP_NAME, 256);
+ err->collectTrace( NVDIMM_COMP_NAME );
ERRORLOG::errlCommit(err, NVDIMM_COMP_ID);
}
}
@@ -1203,68 +1304,42 @@ errlHndl_t nvdimmRestore(TargetHandleList i_nvdimmList, uint8_t &i_mpipl)
#endif
/**
- * @brief This function checks the erase status register to make sure
- * the last erase completed witout error
+ * @brief This function checks the status and success of an erase
*
* @param[in] i_nvdimm - nvdimm target with NV controller
*
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
-errlHndl_t nvdimmCheckEraseSuccess(Target *i_nvdimm)
+errlHndl_t nvdimmEraseCheck(Target *i_nvdimm)
{
- TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmCheckEraseSuccess() : nvdimm[%X]",
- get_huid(i_nvdimm));
-
- uint8_t l_data = 0;
errlHndl_t l_err = nullptr;
+ nvdimm_reg_t l_RegInfo;
- l_err = nvdimmReadReg(i_nvdimm, ERASE_STATUS, l_data);
+ // Erase happens one module at a time. No need to set any offset on the counter
+ uint32_t l_poll = 0;
+ l_err = nvdimmPollEraseDone(i_nvdimm, l_poll);
+ // Add part callout, currently all erase calls have same callout
+ // Dump traces to the error log if error exists
if (l_err)
{
- TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckEraseSuccess() nvdimm[%X]"
- "failed to read erase status reg!",get_huid(i_nvdimm));
- }
- else if ((l_data & ERASE_SUCCESS) != ERASE_SUCCESS)
- {
+ // For both Erase timeout and Erase fail
+ // Callout nvdimm on high, gard and deconfig
+ l_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
- TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckEraseSuccess() nvdimm[%X]"
- "failed to erase!",get_huid(i_nvdimm));
- /*@
- *@errortype
- *@reasoncode NVDIMM_ERASE_FAILED
- *@severity ERRORLOG_SEV_PREDICTIVE
- *@moduleid NVDIMM_CHECK_ERASE
- *@userdata1[0:31] Related ops (0xff = NA)
- *@userdata1[32:63] Target Huid
- *@userdata2 <UNUSED>
- *@devdesc Encountered error erasing previously stored data image
- * on NVDIMM. Likely due to timeout and/or controller error
- *@custdesc NVDIMM error erasing data image
- */
- l_err = new ERRORLOG::ErrlEntry(
- ERRORLOG::ERRL_SEV_PREDICTIVE,
- NVDIMM_CHECK_ERASE,
- NVDIMM_ERASE_FAILED,
- NVDIMM_SET_USER_DATA_1(ERASE, get_huid(i_nvdimm)),
- 0x0,
- ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
- l_err->collectTrace(NVDIMM_COMP_NAME);
- errlCommit( l_err, NVDIMM_COMP_ID );
+ // Collect register data for FFDC Traces
+ nvdimmTraceRegs ( i_nvdimm, l_RegInfo );
- // Failure to erase could mean internal NV controller error and/or
- // HW error on nand flash. NVDIMM will lose persistency if failed to
- // erase nand flash
- l_err->addPartCallout( i_nvdimm,
- HWAS::NV_CONTROLLER_PART_TYPE,
- HWAS::SRCI_PRIORITY_HIGH);
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
}
- TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmCheckEraseSuccess(): nvdimm[%X] ret[%X]",
- get_huid(i_nvdimm), l_data);
-
return l_err;
}
@@ -1293,13 +1368,8 @@ errlHndl_t nvdimmEraseNF(Target *i_nvdimm)
break;
}
- // Erase happens one module at a time. No need to set any offset on the counter
- uint32_t l_poll = 0;
- l_err = nvdimmPollEraseDone(i_nvdimm, l_poll);
- if (!l_err)
- {
- l_err = nvdimmCheckEraseSuccess(i_nvdimm);
- }
+ // Poll for success and check status
+ l_err = nvdimmEraseCheck(i_nvdimm);
}while(0);
@@ -1525,7 +1595,7 @@ errlHndl_t nvdimmEpowSetup(TargetHandleList &i_nvdimmList)
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmEpowSetup() HUID[%X] failed to setup epow!",
get_huid(*it));
- nvdimmSetStatusFlag(*it, NSTD_ERR_NOPRSV);
+ nvdimmSetStatusFlag(*it, NSTD_VAL_SR_FAILED);
break;
}
it++;
@@ -1547,32 +1617,46 @@ errlHndl_t nvdimmEpowSetup(TargetHandleList &i_nvdimmList)
* @param[in] i_nvdimmList - list of nvdimm targets
*
*/
-void nvdimm_restore(TargetHandleList &i_nvdimmList)
+errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_restore()");
+
errlHndl_t l_err = nullptr;
- Target* l_sys = nullptr;
- targetService().getTopLevelTarget( l_sys );
+ bool l_valid = false;
+ bool l_exit = false;
+ TARGETING::Target* l_sys = nullptr;
+ TARGETING::targetService().getTopLevelTarget( l_sys );
assert(l_sys, "nvdimm_restore: no TopLevelTarget");
uint8_t l_mpipl = l_sys->getAttr<ATTR_IS_MPIPL_HB>();
+ nvdimm_reg_t l_RegInfo = nvdimm_reg_t();
+ TargetHandleList l_nvdimmList = i_nvdimmList;
+ uint8_t l_rstrValid;
do
{
- // Set the energy policy to device-managed
- // Don't think this is needed for the supercaps to start charging
- // but do it anyway to get the charging going
for (const auto & l_nvdimm : i_nvdimmList)
{
- l_err = nvdimmSetESPolicy(l_nvdimm);
+ // Check for a valid image
+ l_err = nvdimmValidImage( l_nvdimm, l_valid );
if (l_err)
{
- // Failing this is an indication of power pack issue.
- // This will prevent future backup, but let's continue
- // since we can still restore the data if there is any
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOBKUP);
- TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() - Failing nvdimmSetESPolicy()");
- errlCommit( l_err, NVDIMM_COMP_ID );
+ TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] restore failed to read the image", get_huid(l_nvdimm));
+ errlCommit(l_err, NVDIMM_COMP_ID);
}
+
+ if (!l_valid)
+ {
+ TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] restore failed due to invalid image", get_huid(l_nvdimm));
+ // Set ATTR NV STATUS FLAG to Erased
+ nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERASED);
+ break;
+ }
+
+ }
+
+ if (!l_valid)
+ {
+ break;
}
if (l_mpipl)
@@ -1586,7 +1670,7 @@ void nvdimm_restore(TargetHandleList &i_nvdimmList)
if (l_err)
{
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOPRSV);
+ nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERASED);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X], error backing up the DRAM!",
get_huid(l_nvdimm));
errlCommit(l_err, NVDIMM_COMP_ID);
@@ -1596,31 +1680,77 @@ void nvdimm_restore(TargetHandleList &i_nvdimmList)
}
// Start the restore
- l_err = nvdimmRestore(i_nvdimmList, l_mpipl);
+ l_err = nvdimmRestore(l_nvdimmList, l_mpipl);
+ // Check if restore completed successfully
if (l_err)
{
- TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() - Failing nvdimmRestore()");
- errlCommit( l_err, NVDIMM_COMP_ID );
+ const auto l_nvdimm = l_nvdimmList.front();
+
+ TRACFCOMP(g_trac_nvdimm, "nvdimm_restore() - Failing nvdimmRestore()");
+ nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_SR_FAILED);
+
+ // Invalid restore could be due to dram not in self-refresh
+ // or controller issue. Data should not be trusted at this point
+ l_err->addPartCallout( l_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+
+ // Collect register data for FFDC Traces
+ nvdimmTraceRegs ( l_nvdimm, l_RegInfo );
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
break;
}
- // Make sure the energy source is fully charged before erasing the images
- // Doing this on all the nvdimms since the ones w/o image will need
- // to be fully charged before arming the trigger
- uint32_t l_poll = 0;
+ // Check health status registers and exit if required
for (const auto & l_nvdimm : i_nvdimmList)
{
- l_err = nvdimmPollESChargeStatus(l_nvdimm, l_poll);
+ l_err = nvdimmHealthStatusCheck( l_nvdimm, HEALTH_RESTORE, l_exit );
- if (l_err){
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOBKUP);
- errlCommit( l_err, NVDIMM_COMP_ID );
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] failed during health status check", get_huid(l_nvdimm));
+ if (l_exit)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ else
+ {
+ // Redundant check with external err bugged
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ return l_err;
+ }
}
+
+ // Make sure the restore is valid
+ l_err = nvdimmGetRestoreValid(l_nvdimm, l_rstrValid);
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, "nvdimmRestore Target[%X] error validating restore status!",
+ get_huid(l_nvdimm));
+ break;
+ }
+
+ if ((l_rstrValid & RSTR_SUCCESS) == RSTR_SUCCESS)
+ {
+ // Restore success!
+ nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERROR);
+ }
+
}
}while(0);
+ // Return err not being handled, temp commit:
+ if (l_err)
+ {
+ errlCommit(l_err, NVDIMM_COMP_ID);
+ }
+
// At the end, pre-load CCS with commands for EPOW. This will stage the CCS
// with the require commands to trigger the save on NVDIMMs. The actual
// triggering will be done by OCC when EPOW is detected.
@@ -1633,6 +1763,7 @@ void nvdimm_restore(TargetHandleList &i_nvdimmList)
}
TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_restore()");
+ return l_err;
}
/**
@@ -1733,12 +1864,16 @@ errlHndl_t nvdimm_factory_reset(Target *i_nvdimm)
* @param[in] i_nvdimm - nvdimm target
*
*/
-void nvdimm_init(Target *i_nvdimm)
+errlHndl_t nvdimm_init(Target *i_nvdimm)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_init() nvdimm[%X]",
get_huid(i_nvdimm));
errlHndl_t l_err = nullptr;
+ bool l_continue = true;
+ uint8_t l_data = 0;
+ nvdimm_reg_t l_RegInfo;
+ uint32_t l_poll = 0;
do
{
@@ -1759,6 +1894,15 @@ void nvdimm_init(Target *i_nvdimm)
}
}
+ // Set ATTR_NV_STATUS_FLAG to default disarmed state
+ l_err = notifyNvdimmProtectionChange(i_nvdimm, NVDIMM_DISARMED);
+ if (l_err)
+ {
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR);
+ errlCommit(l_err, NVDIMM_COMP_ID);
+ }
+
+ // Check if the nvdimm ready status
l_err = nvdimmReady(i_nvdimm);
if (l_err)
@@ -1766,7 +1910,6 @@ void nvdimm_init(Target *i_nvdimm)
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], controller not ready",
get_huid(i_nvdimm));
- errlCommit(l_err, NVDIMM_COMP_ID);
break;
}
@@ -1777,46 +1920,163 @@ void nvdimm_init(Target *i_nvdimm)
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], error retrieving timeout values",
get_huid(i_nvdimm));
- errlCommit(l_err, NVDIMM_COMP_ID);
break;
}
- //Check save progress
- uint32_t l_poll = 0;
- l_err = nvdimmPollBackupDone(i_nvdimm, l_poll);
+ // Check for Erase in progress and its status
+ l_err = nvdimmEraseCheck(i_nvdimm);
+ if (l_err)
+ {
+ break;
+ }
+ // Check NO_RESET_N bit for power loss without save
+ l_err = nvdimmReadReg ( i_nvdimm, CSAVE_FAIL_INFO1, l_data);
if (l_err)
{
- nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_NOPRSV);
- TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], error backing up the DRAM!",
- get_huid(i_nvdimm));
- errlCommit(l_err, NVDIMM_COMP_ID);
break;
}
+ else if ((l_data & NO_RESET_N) == NO_RESET_N)
+ {
+ // Set ATTR_NV_STATUS_FLAG to restored, as data may persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmInit() nvdimm[%X]"
+ "failed to save due to power loss!",get_huid(i_nvdimm));
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_POWER_SAVE_FAILURE
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_CHECK_RESETN
+ *@userdata1[0:31] Related ops (0xff = NA)
+ *@userdata1[32:63] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc Encountered error erasing previously stored data image
+ * on NVDIMM. Likely due to timeout and/or controller error
+ *@custdesc NVDIMM error erasing data image
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_CHECK_RESETN,
+ NVDIMM_POWER_SAVE_FAILURE,
+ NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+
+ l_err->collectTrace( NVDIMM_COMP_NAME );
+
+ // Failure to erase could mean internal NV controller error and/or
+ // HW error on nand flash. NVDIMM will lose persistency if failed to
+ // erase nand flash
+ l_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
- // Unlock encryption if enabled
- TargetHandleList l_nvdimmTargetList;
- l_nvdimmTargetList.push_back(i_nvdimm);
- NVDIMM::nvdimm_encrypt_unlock(l_nvdimmTargetList);
+ // Collect register data for FFDC Traces
+ nvdimmTraceRegs ( i_nvdimm, l_RegInfo );
- // Disarm the ddr_resetn here in case it came in armed. When the nvdimm is
- // armed the reset_n is masked off from the host, meaning the drams won't
- // be able to get reset properly later, causing training to fail.
- l_err = nvdimmChangeArmState(i_nvdimm, DISARM_TRIGGER);
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
+ errlCommit(l_err, NVDIMM_COMP_ID);
+ }
+ else
+ {
+ // Check save progress
+ l_err = nvdimmPollBackupDone(i_nvdimm, l_poll);
+ if (l_err)
+ {
+ // May have to move the error handling to the caller
+ // as different op could have different error severity
+ l_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_int() nvdimm[%X], error backing up the DRAM!",
+ get_huid(i_nvdimm));
+ break;
+ }
+ }
+
+ // Check CSAVE_ERROR Register
+ l_err = nvdimmReadReg( i_nvdimm, CSAVE_FAIL_INFO0, l_data );
if (l_err)
{
- nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_NOPRSV);
- TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], error disarming the nvdimm!",
- get_huid(i_nvdimm));
- errlCommit(l_err, NVDIMM_COMP_ID);
+ break;
+ }
+ else if (l_data != ZERO)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_CSAVE_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_CHECK_CSAVE
+ *@userdata1[0:31] Related ops (0xff = NA)
+ *@userdata1[32:63] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc Encountered error saving during catastrophic save
+ * on NVDIMM. Check error register trace for details
+ *@custdesc NVDIMM error during Catastrophic Save
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_CHECK_CSAVE,
+ NVDIMM_CSAVE_ERROR,
+ NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+
+ l_err->collectTrace( NVDIMM_COMP_NAME );
+
+ // Collect register data for FFDC Traces
+ nvdimmTraceRegs ( i_nvdimm, l_RegInfo );
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
+
+ // Check if the image is still valid
+ if ( l_RegInfo.CSave_Info != VALID_IMAGE )
+ {
+ // Callout and gard dimm if image is not valid
+ l_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+ }
+ else
+ {
+ // Set ATTR_NV_STATUS_FLAG to Restored as data might persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ errlCommit(l_err, NVDIMM_COMP_ID);
+ }
+ break;
+ }
+
+ // Check Health Status Registers
+ l_err = nvdimmHealthStatusCheck(i_nvdimm, HEALTH_SAVE, l_continue);
+ if(!l_continue)
+ {
break;
}
+ // Unlock encryption if enabled
+ TargetHandleList l_nvdimmTargetList;
+ l_nvdimmTargetList.push_back(i_nvdimm);
+ NVDIMM::nvdimm_encrypt_unlock(l_nvdimmTargetList);
+
}while(0);
TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_init() nvdimm[%X]",
get_huid(i_nvdimm));
+
+ // Return err not being handled, temp commit:
+ if (l_err)
+ {
+ errlCommit(l_err, NVDIMM_COMP_ID);
+ }
+
+
+ return l_err;
}
diff --git a/src/usr/isteps/nvdimm/nvdimm.H b/src/usr/isteps/nvdimm/nvdimm.H
index 8304486f6..af77866ff 100644
--- a/src/usr/isteps/nvdimm/nvdimm.H
+++ b/src/usr/isteps/nvdimm/nvdimm.H
@@ -343,12 +343,17 @@ enum i2c_out_values : uint8_t
CHARGE_IN_PROGRESS = 0x01,
SAVE_SUCCESS = 0x01,
RSTR_SUCCESS = 0X01,
- ARM_SUCCESS = 0X09,
+ ARM_SUCCESS = 0X01,
ERASE_SUCCESS = 0X01,
ES_SUCCESS = 0x05,
CHARGE_SUCCESS = 0x00,
NV_READY = 0xA5,
FACTORY_RESET_IN_PROGRESS = 0x03,
+ NO_RESET_N = 0x20,
+ RESET_N_ARMED = 0x08,
+ ES_POLICY_ERROR = 0x02,
+ ARM_ERROR = 0X02,
+ RSTR_ERROR = 0x02,
};
// Timeout-related enum
@@ -422,6 +427,49 @@ union scap_status_union
typedef scap_status_union scap_status_register_t;
+// Bits in Health Status Check Registers
+enum health_status : uint8_t
+{
+ // Module Health Status0
+ VOLTAGE_REGULATOR_FAILED = 0x01,
+ VDD_LOST = 0x02,
+ VPP_LOST = 0x04,
+ VTT_LOST = 0x08,
+ DRAM_NOT_SELF_REFRESH = 0x10,
+ CONTROLLER_HARDWARE_ERROR = 0x20,
+ NVM_CONTROLLER_ERROR = 0x40,
+ NVM_LIFETIME_ERROR = 0x80,
+ // Module Health Status1
+ NOT_ENOUGH_ENERGY_FOR_CSAVE = 0x01,
+ INVALID_FIRMWARE_ERROR = 0x02,
+ CONFIG_DATA_ERROR = 0x04,
+ NO_ES_PRESENT = 0x08,
+ ES_POLICY_NOT_SET = 0x10,
+ ES_HARDWARE_FAILURE = 0x20,
+ ES_HEALTH_ASSESSMENT_ERROR = 0x40,
+ // Error Threshold Status
+ ES_LIFETIME_ERROR = 0x02,
+ ES_TEMP_ERROR = 0x04,
+};
+
+// Int representation for health status function call
+enum health_function : uint8_t
+{
+ HEALTH_SAVE = 0x01,
+ HEALTH_RESTORE = 0x02,
+ HEALTH_UPDATE = 0x03,
+ HEALTH_PRE_ARM = 0x04,
+ HEALTH_POST_ARM = 0x05,
+};
+
+// Event notification register values
+enum event_n : uint8_t
+{
+ PERSISTENCY_NOTIFICATION = 0x01,
+ SET_EVENT_NOTIFICATION_ERROR = 0x02,
+ PERSISTENCY_ENABLED = 0x04,
+};
+
/**
* @brief Wrapper to call deviceOp to read the NV controller via I2C
*
@@ -504,7 +552,6 @@ errlHndl_t nvdimmPollStatus(TARGETING::Target *i_nvdimm, ops_id i_ops_id, uint32
*/
errlHndl_t nvdimmSetESPolicy(TARGETING::Target* i_nvdimm);
-
/**
* @brief Helper function to handle conflicting attribute keys
*
@@ -631,6 +678,18 @@ errlHndl_t nvdimm_getTPM(TARGETING::Target*& o_tpm);
#endif
+/**
+ * @brief This function checks for valid image on the given target
+ *
+ * @param[in] i_nvdimm - nvdimm target with NV controller
+ *
+ * @param[out] o_imgValid - return true if the target has a valid image
+ *
+ * @return errlHndl_t - Null if successful, otherwise a pointer to
+ * the error log.
+ */
+errlHndl_t nvdimmValidImage(TARGETING::Target *i_nvdimm, bool &o_imgValid);
+
} //End NVDIMM namespace
diff --git a/src/usr/isteps/nvdimm/nvdimm.mk b/src/usr/isteps/nvdimm/nvdimm.mk
index f26c8232b..d9418b414 100644
--- a/src/usr/isteps/nvdimm/nvdimm.mk
+++ b/src/usr/isteps/nvdimm/nvdimm.mk
@@ -47,6 +47,7 @@ EXTRAINCDIR += ${PROCEDURE_PATH}/hwp/ffdc/
OBJS += nvdimm.o
OBJS += nvdimmdd.o
OBJS += errlud_nvdimm.o
+OBJS += nvdimmErrorLog.o
ifneq (${HOSTBOOT_RUNTIME},1)
diff --git a/src/usr/isteps/nvdimm/nvdimmErrorLog.C b/src/usr/isteps/nvdimm/nvdimmErrorLog.C
new file mode 100644
index 000000000..57984bb97
--- /dev/null
+++ b/src/usr/isteps/nvdimm/nvdimmErrorLog.C
@@ -0,0 +1,1313 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/usr/isteps/nvdimm/nvdimmErrorLog.C $ */
+/* */
+/* OpenPOWER HostBoot Project */
+/* */
+/* Contributors Listed Below - COPYRIGHT 2014,2019 */
+/* [+] International Business Machines Corp. */
+/* */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+#include "nvdimm.H"
+#include <trace/interface.H>
+#include <errl/errlentry.H>
+#include <errl/errlmanager.H>
+#include <errl/errludtarget.H>
+#include <targeting/common/commontargeting.H>
+#include <targeting/common/util.H>
+#include <targeting/common/utilFilter.H>
+#include <fapi2.H>
+#include <lib/shared/nimbus_defaults.H>
+#include <isteps/nvdimm/nvdimmreasoncodes.H>
+#include <isteps/nvdimm/nvdimm.H>
+#include "errlud_nvdimm.H"
+
+using namespace TARGETING;
+
+namespace NVDIMM
+{
+
+/**
+ * @brief Read and save various status registers needed for error log traces
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[out] o_RegInfo - struct to hold register data
+ *
+ */
+void nvdimmTraceRegs(Target *i_nvdimm, nvdimm_reg_t& o_RegInfo)
+{
+ uint8_t l_data = 0x0;
+ errlHndl_t l_err = nullptr;
+
+ // Read MODULE HEALTH register
+ l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Module_Health = l_data;
+
+ // Read MODULE HEALTH STATUS0 register
+ l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS0, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Module_Health_Status0 = l_data;
+
+ // Read MODULE HEALTH STATUS1 register
+ l_err = nvdimmReadReg(i_nvdimm, MODULE_HEALTH_STATUS1, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Module_Health_Status1 = l_data;
+
+ // Read CSAVE STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, CSAVE_STATUS, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.CSave_Status = l_data;
+
+ // Read CSAVE INFO register
+ l_err = nvdimmReadReg(i_nvdimm, CSAVE_INFO, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.CSave_Info = l_data;
+
+ // Read CSAVE FAIL INFO0 register
+ l_err = nvdimmReadReg(i_nvdimm, CSAVE_FAIL_INFO0, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.CSave_Fail_Info0 = l_data;
+
+ // Read CSAVE FAIL INFO1 register
+ l_err = nvdimmReadReg(i_nvdimm, CSAVE_FAIL_INFO1, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.CSave_Fail_Info1 = l_data;
+
+ // Read CSAVE TIMEOUT0 register
+ l_err = nvdimmReadReg(i_nvdimm, CSAVE_TIMEOUT0, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.CSave_Timeout0 = l_data;
+
+ // Read CSAVE TIMEOUT1 register
+ l_err = nvdimmReadReg(i_nvdimm, CSAVE_TIMEOUT1, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.CSave_Timeout1 = l_data;
+
+ // Read ERROR THRESHOLD STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, ERROR_THRESHOLD_STATUS, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Error_Threshold_Status = l_data;
+
+ // Read NVDIMM READY register
+ l_err = nvdimmReadReg(i_nvdimm, NVDIMM_READY, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.NVDimm_Ready = l_data;
+
+ // Read NVDIMM CMD STATUS0 register
+ l_err = nvdimmReadReg(i_nvdimm, NVDIMM_CMD_STATUS0, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.NVDimm_CMD_Status0 = l_data;
+
+ // Read ERASE STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, ERASE_STATUS, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Erase_Status = l_data;
+
+ // Read ERASE TIMEOUT0 register
+ l_err = nvdimmReadReg(i_nvdimm, ERASE_TIMEOUT0, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Erase_Timeout0 = l_data;
+
+ // Read ERASE TIMEOUT1 register
+ l_err = nvdimmReadReg(i_nvdimm, ERASE_TIMEOUT1, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Erase_Timeout1 = l_data;
+
+ // Read ABORT CMD TIMEOUT register
+ l_err = nvdimmReadReg(i_nvdimm, ABORT_CMD_TIMEOUT, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Abort_CMD_Timeout = l_data;
+
+ // Read SET ES POLICY STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, SET_ES_POLICY_STATUS, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Set_ES_Policy_Status = l_data;
+
+ // Read RESTORE STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, RESTORE_STATUS, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Restore_Status = l_data;
+
+ // Read RESTORE FAIL INFO register
+ l_err = nvdimmReadReg(i_nvdimm, RESTORE_FAIL_INFO, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Restore_Fail_Info = l_data;
+
+ // Read RESTORE TIMEOUT0 register
+ l_err = nvdimmReadReg(i_nvdimm, RESTORE_TIMEOUT0, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Restore_Timeout0 = l_data;
+
+ // Read RESTORE TIMEOUT1 register
+ l_err = nvdimmReadReg(i_nvdimm, RESTORE_TIMEOUT1, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Restore_Timeout1 = l_data;
+
+ // Read ARM STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Arm_Status = l_data;
+
+ // Read ARM TIMEOUT0 register
+ l_err = nvdimmReadReg(i_nvdimm, ARM_TIMEOUT0, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Arm_Timeout0 = l_data;
+
+ // Read ARM TIMEOUT1 register
+ l_err = nvdimmReadReg(i_nvdimm, ARM_TIMEOUT1, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Arm_Timeout1 = l_data;
+
+ // Read SET EVENT NOTIFICATION STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, SET_EVENT_NOTIFICATION_STATUS, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ o_RegInfo.Set_Event_Notification_Status = l_data;
+}
+
+/**
+ * @brief Helper function for standard callout of an NVDIMM
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[in] i_step - the nvdimm function calling the health check
+ *
+ * @param[out] o_err - error log handler to be modified
+ *
+ * @return bool - true to commit log and continue, false to return
+ * the error log to caller and exit.
+ */
+bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
+{
+ bool l_continue = true;
+ uint8_t l_data;
+ errlHndl_t l_err = nullptr;
+
+ // Check which callout check is necessary
+ switch(i_step)
+ {
+ // Post save errors always continue with callouts
+ case HEALTH_SAVE:
+ {
+ // Check to see if the nvdimm image is still valid
+ l_err = nvdimmValidImage(i_nvdimm, l_continue);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+
+ // Checkout image validity and set dimm status accordingly
+ if(l_continue)
+ {
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+ }
+ else
+ {
+ // Callout, deconfig and gard the dimm
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+ }
+
+ break;
+ }
+
+ // Post restore errors always continue with callouts
+ case HEALTH_RESTORE:
+ {
+ // Check restore status
+ l_err = nvdimmReadReg(i_nvdimm, RESTORE_STATUS, l_data);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ else if ((l_data & RSTR_SUCCESS) != RSTR_SUCCESS)
+ {
+ l_continue = false;
+ }
+
+ // Check restore status and set dimm status accordingly
+ if(l_continue)
+ {
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+
+ }
+ else
+ {
+ // Callout, deconfig and gard the dimm
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+ }
+
+ break;
+ }
+
+ // Post ARM errors need check for arm success
+ case HEALTH_PRE_ARM:
+ {
+
+ // Check arm status
+ l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED))
+ {
+ l_continue = true;
+ }
+
+ // Check arm status and set dimm status accordingly
+ if(!l_continue)
+ {
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+ }
+ else
+ {
+ // Set ATTR_NV_STATUS_FLAG to dimm diarmed
+ l_err = notifyNvdimmProtectionChange(i_nvdimm, NVDIMM_DISARMED);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+
+ // Callout, deconfig and gard the dimm
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_Fatal);
+ }
+
+ break;
+ }
+
+ // Post ARM errors need check for arm success
+ case HEALTH_POST_ARM:
+ {
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+
+ // Set ATTR_NV_STATUS_FLAG to restored as data may persist despite errors
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ break;
+ }
+
+ }
+
+ return l_continue;
+}
+
+/**
+ * @brief Helper function for BPM/Cable high, NVDIMM low callout
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[in] i_step - the nvdimm function calling the health check
+ *
+ * @param[out] o_err - error log handler to be modified
+ *
+ * @return bool - true to commit log and continue, false to return
+ * the error log to caller and exit.
+ */
+bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
+{
+ bool l_continue = true;
+ uint8_t l_data;
+ errlHndl_t l_err = nullptr;
+
+ // Check which callout check is necessary
+ switch(i_step)
+ {
+ // Post save errors always continue with callouts
+ case HEALTH_SAVE:
+ {
+ // Check to see if the nvdimm image is still valid
+ l_err = nvdimmValidImage(i_nvdimm, l_continue);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+
+ // Callout BPM and Cable but cannot deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_CABLE_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // Check image validity and set dimm status accordingly
+ if(l_continue)
+ {
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+ }
+ else
+ {
+ // Callout dimm, deconfig and gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+ }
+
+ break;
+ }
+
+ // Post restore errors always continue with callouts
+ case HEALTH_RESTORE:
+ {
+ // Check restore status
+ l_err = nvdimmReadReg(i_nvdimm, RESTORE_STATUS, l_data);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ else if ((l_data & RSTR_SUCCESS) != RSTR_SUCCESS)
+ {
+ l_continue = false;
+ }
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_CABLE_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // Check restore status and set dimm status accordingly
+ if(l_continue)
+ {
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+ }
+ else
+ {
+ // Callout dimm, deconfig and gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+ }
+
+ break;
+ }
+
+ // Post ARM errors need check for arm success
+ case HEALTH_PRE_ARM:
+ {
+ // Check arm status
+ l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED))
+ {
+ l_continue = true;
+ }
+
+ // Callout BPM and Cable but cannot deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_CABLE_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // Check arm status and set dimm status accordingly
+ if(!l_continue)
+ {
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+ }
+ else
+ {
+ // Set ATTR_NV_STATUS_FLAG to dimm diarmed
+ l_err = notifyNvdimmProtectionChange(i_nvdimm, NVDIMM_DISARMED);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ // Callout dimm, deconfig and gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
+ }
+
+ break;
+ }
+
+ // Post ARM errors need check for arm success
+ case HEALTH_POST_ARM:
+ {
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_CABLE_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ break;
+ }
+
+ }
+
+ return l_continue;
+}
+
+/**
+ * @brief Helper function for BPM high, NVDIMM low callout
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[in] i_step - the nvdimm function calling the health check
+ *
+ * @param[out] o_err - error log handler to be modified
+ *
+ * @return bool - true to commit log and continue, false to return
+ * the error log to caller and exit.
+ */
+bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
+{
+ bool l_continue = true;
+ uint8_t l_data;
+ errlHndl_t l_err = nullptr;
+
+ // Check which callout check is necessary
+ switch(i_step)
+ {
+ // Post save errors always continue with callouts
+ case HEALTH_SAVE:
+ {
+ // Callout BPM on high
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ break;
+ }
+
+ // Post restore errors always continue with callouts
+ case HEALTH_RESTORE:
+ {
+ // Callout BPM on high
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ break;
+ }
+
+ // Post ARM errors need check for arm success
+ case HEALTH_PRE_ARM:
+ {
+ // Check arm status
+ l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED))
+ {
+ l_continue = true;
+ }
+
+ // Callout BPM on high
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+
+ // Check arm status and set dimm status accordingly
+ if(!l_continue)
+ {
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ }
+ else
+ {
+ // Set ATTR_NV_STATUS_FLAG to dimm diarmed
+ l_err = notifyNvdimmProtectionChange(i_nvdimm, NVDIMM_DISARMED);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ }
+
+ break;
+ }
+
+ // Post ARM errors need check for arm success
+ case HEALTH_POST_ARM:
+ {
+ // Callout BPM on high
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::BPM_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH);
+
+ // Callout dimm but do not deconfig or gard
+ o_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+
+ // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+
+ break;
+ }
+
+ }
+
+ return l_continue;
+}
+
+/**
+ * @brief Function checking the Health Status Registers for an nvdimm
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[in] i_step - the nvdimm step calling the check
+ *
+ * @param[out] o_continue - bool to signal a return to caller fail
+ *
+ * @return errlHndl_t - Null if successful, otherwise a pointer to
+ * the error log.
+ */
+errlHndl_t nvdimmHealthStatusCheck(Target *i_nvdimm, uint8_t i_step, bool& o_continue)
+{
+ uint8_t l_data = 0x0;
+ errlHndl_t l_err = nullptr;
+ errlHndl_t l_err_t = nullptr;
+ nvdimm_reg_t l_RegInfo;
+ bool l_arm_timeout = false;
+
+ if (i_step == HEALTH_PRE_ARM)
+ {
+ l_arm_timeout = o_continue;
+ }
+
+ //Collect Register data for parsing and traces
+ nvdimmTraceRegs(i_nvdimm, l_RegInfo);
+
+ // Read SET_EVENT_NOTIFICATION_STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, SET_EVENT_NOTIFICATION_STATUS, l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ l_RegInfo.Set_Event_Notification_Status = l_data;
+
+ // Read RESTORE STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, RESTORE_STATUS , l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ l_RegInfo.Restore_Status = l_data;
+
+ // Read RESTORE_FAIL_INFO register
+ l_err = nvdimmReadReg(i_nvdimm, RESTORE_FAIL_INFO , l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ l_RegInfo.Restore_Fail_Info = l_data;
+
+ // Read NVDIMM_CMD_STATUS0 register
+ l_err = nvdimmReadReg(i_nvdimm, NVDIMM_CMD_STATUS0 , l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ l_RegInfo.NVDimm_CMD_Status0 = l_data;
+
+ // Read ARM_STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS , l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ l_RegInfo.Arm_Status = l_data;
+
+ // Read SET_ES_POLICY_STATUS register
+ l_err = nvdimmReadReg(i_nvdimm, SET_ES_POLICY_STATUS , l_data);
+ if(l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ l_RegInfo.Set_ES_Policy_Status = l_data;
+
+ // Check all nvdimm deconfig cases
+ do
+ {
+ // Check MODULE_HEALTH_STATUS0[0]
+ if ((l_RegInfo.Module_Health_Status0 & VOLTAGE_REGULATOR_FAILED) == VOLTAGE_REGULATOR_FAILED)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_VOLTAGE_REGULATOR_FAILED
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * voltage regulator failure
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_VOLTAGE_REGULATOR_FAILED,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS0[1]
+ if ((l_RegInfo.Module_Health_Status0 & VDD_LOST) == VDD_LOST)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_VDD_LOST
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * vdd loss
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_VDD_LOST,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS0[2]
+ if ((l_RegInfo.Module_Health_Status0 & VPP_LOST) == VPP_LOST)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_VPP_LOST
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * vpp loss
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_VPP_LOST,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS0[3]
+ if ((l_RegInfo.Module_Health_Status0 & VTT_LOST) == VTT_LOST)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_VTT_LOST
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * vtt loss
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_VTT_LOST,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS0[4]
+ if ((l_RegInfo.Module_Health_Status0 & DRAM_NOT_SELF_REFRESH) == DRAM_NOT_SELF_REFRESH)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_DRAM_NOT_SELF_REFRESH
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * no self refresh on the nvdimm
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_DRAM_NOT_SELF_REFRESH,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS0[5]
+ if ((l_RegInfo.Module_Health_Status0 & CONTROLLER_HARDWARE_ERROR) == CONTROLLER_HARDWARE_ERROR)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_CONTROLLER_HARDWARE_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * error with the hardware controller
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_CONTROLLER_HARDWARE_ERROR,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS0[6]
+ if ((l_RegInfo.Module_Health_Status0 & NVM_CONTROLLER_ERROR) == NVM_CONTROLLER_ERROR)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_NVM_CONTROLLER_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * error with the nvdimm controller
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_NVM_CONTROLLER_ERROR,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+
+ // Check MODULE_HEALTH_STATUS0[7]
+ if ((l_RegInfo.Module_Health_Status0 & NVM_LIFETIME_ERROR) == NVM_LIFETIME_ERROR)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_NVM_LIFETIME_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * an nvdimm lifetime error
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_NVM_LIFETIME_ERROR,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS1[1]
+ if ((l_RegInfo.Module_Health_Status1 & INVALID_FIRMWARE_ERROR) == INVALID_FIRMWARE_ERROR)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_INVALID_FIRMWARE_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * an invalid firmware image
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_INVALID_FIRMWARE_ERROR,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS1[2]
+ if ((l_RegInfo.Module_Health_Status1 & CONFIG_DATA_ERROR) == CONFIG_DATA_ERROR)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_CONFIG_DATA_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * invalid configuration data
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_CONFIG_DATA_ERROR,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ }while(0);
+
+ if (l_err)
+ {
+ // Setup Trace
+ l_err->collectTrace( NVDIMM_COMP_NAME );
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
+
+ // Callout nvdimm depending on istep call
+ o_continue &= nvdimmCalloutDimm(i_nvdimm, i_step, l_err);
+
+ if(l_arm_timeout)
+ {
+ // Callout, deconfig and gard the dimm
+ l_err->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_Fatal);
+ }
+ }
+
+ // Check all BPM and Cable high, nvdimm low cases
+ do
+ {
+ // If function calling is SAVE, ignore NOT_ENOUGH_ENERGY_FOR_CSAVE
+ if (i_step == HEALTH_SAVE)
+ {
+ // Check MODULE_HEALTH_STATUS1[0]
+ if ((l_RegInfo.Module_Health_Status1 & NOT_ENOUGH_ENERGY_FOR_CSAVE) == NOT_ENOUGH_ENERGY_FOR_CSAVE)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_NOT_ENOUGH_ENERGY_FOR_CSAVE
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * insufficient energy for csave
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_NOT_ENOUGH_ENERGY_FOR_CSAVE,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+ }
+
+ // Check MODULE_HEALTH_STATUS1[3]
+ if ((l_RegInfo.Module_Health_Status1 & NO_ES_PRESENT) == NO_ES_PRESENT)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_NO_ES_PRESENT
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * no ES active
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_NO_ES_PRESENT,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS1[5]
+ if ((l_RegInfo.Module_Health_Status1 & ES_HARDWARE_FAILURE) == ES_HARDWARE_FAILURE)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_ES_HARDWARE_FAILURE
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * ES hardware failure
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_ES_HARDWARE_FAILURE,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check MODULE_HEALTH_STATUS1[6]
+ if ((l_RegInfo.Module_Health_Status1 & ES_HEALTH_ASSESSMENT_ERROR) == ES_HEALTH_ASSESSMENT_ERROR)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_ES_HEALTH_ASSESSMENT_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * ES error during health assessment
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_ES_HEALTH_ASSESSMENT_ERROR,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ }while(0);
+
+ if (l_err_t)
+ {
+ // Setup Trace
+ l_err_t->collectTrace( NVDIMM_COMP_NAME );
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err_t);
+
+ // Callout BPM, Cable, and nvdimm
+ o_continue &= nvdimmBPMCableCallout(i_nvdimm, i_step, l_err_t);
+ }
+
+ // Check for multiple errors and commit old error
+ if ((l_err) && (l_err_t))
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+
+ // If there was a new error, save off to l_err
+ if (l_err_t)
+ {
+ l_err = l_err_t;
+ l_err_t = nullptr;
+ }
+
+ // Check all BPM high, nvdimm low cases
+ do
+ {
+ // Check ERROR_THRESHOLD_STATUS[1]
+ if ((l_RegInfo.Error_Threshold_Status & ES_LIFETIME_ERROR) == ES_LIFETIME_ERROR)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_ES_LIFETIME_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * ES lifetime error
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_ES_LIFETIME_ERROR,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ // Check ERROR_THRESHOLD_STATUS[2]
+ if ((l_RegInfo.Error_Threshold_Status & ES_TEMP_ERROR) == ES_TEMP_ERROR)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_ES_TEMP_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * ES temporary error
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_ES_TEMP_ERROR,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ break;
+ }
+
+ }while(0);
+
+ if (l_err_t)
+ {
+ // Setup Trace
+ l_err_t->collectTrace( NVDIMM_COMP_NAME );
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err_t);
+
+ // Callout nvdimm
+ o_continue &= nvdimmBPMCallout(i_nvdimm, i_step, l_err_t);
+ }
+
+ // Check for multiple errors and commit old error
+ if ((l_err) && (l_err_t))
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+
+ // If there was a new error, save off to l_err
+ if (l_err_t)
+ {
+ l_err = l_err_t;
+ l_err_t = nullptr;
+ }
+
+ // Check special pre arm case
+ if (i_step == HEALTH_PRE_ARM)
+ {
+ // Check ES_POLICY_NOT_SET[4]
+ if ((l_RegInfo.Set_ES_Policy_Status & ES_POLICY_NOT_SET) == ES_POLICY_NOT_SET)
+ {
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_ES_POLICY_NOT_SET
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_MODULE_HEALTH_STATUS_CHECK
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM failed module health status check due to
+ * ES policy not being set during an arm
+ *@custdesc NVDIMM failed module health status check
+ */
+ l_err_t = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_MODULE_HEALTH_STATUS_CHECK,
+ NVDIMM_ES_POLICY_NOT_SET,
+ TARGETING::get_huid(i_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+ o_continue = true;
+ // Callout dimm but no deconfig and gard
+ l_err_t->addPartCallout( i_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+ }
+ }
+
+ // Check for multiple errors and commit old error
+ if ((l_err) && (l_err_t))
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+
+ // If there was a new error, save off to l_err
+ if (l_err_t)
+ {
+ l_err = l_err_t;
+ l_err_t = nullptr;
+ }
+
+ return l_err;
+}
+
+} // end NVDIMM namespace
diff --git a/src/usr/isteps/nvdimm/nvdimmErrorLog.H b/src/usr/isteps/nvdimm/nvdimmErrorLog.H
new file mode 100644
index 000000000..dae8e2f2f
--- /dev/null
+++ b/src/usr/isteps/nvdimm/nvdimmErrorLog.H
@@ -0,0 +1,108 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/usr/isteps/nvdimm/nvdimmErrorLog.H $ */
+/* */
+/* OpenPOWER HostBoot Project */
+/* */
+/* Contributors Listed Below - COPYRIGHT 2014,2019 */
+/* [+] International Business Machines Corp. */
+/* */
+/* */
+/* Licensed under the Apache License, Version 2.0 (the "License"); */
+/* you may not use this file except in compliance with the License. */
+/* You may obtain a copy of the License at */
+/* */
+/* http://www.apache.org/licenses/LICENSE-2.0 */
+/* */
+/* Unless required by applicable law or agreed to in writing, software */
+/* distributed under the License is distributed on an "AS IS" BASIS, */
+/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
+/* implied. See the License for the specific language governing */
+/* permissions and limitations under the License. */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+#ifndef NVDIMM_ERROR_LOG_H__
+#define NVDIMM_ERROR_LOG_H__
+
+#include <usr/errl/errlentry.H>
+#include <targeting/common/commontargeting.H>
+#include <targeting/common/util.H>
+#include <targeting/common/utilFilter.H>
+#include <i2c/eepromif.H>
+#include <map>
+#include "nvdimmdd.H"
+#include "nvdimm.H"
+
+using namespace TARGETING;
+using namespace EEPROM;
+
+// Trace definition
+extern trace_desc_t* g_trac_nvdimm;
+
+namespace NVDIMM
+{
+
+/**
+ * @brief Function to read and save status registers for traces
+ *
+ * @param[in] i_nvdimm - nvdimm target with NV controller
+ *
+ * @param[out] o_RegInfo - the structure holding the register data
+ *
+ */
+void nvdimmTraceRegs(Target *i_nvdimm, nvdimm_reg_t& o_RegInfo);
+
+/**
+ * @brief Helper function for standard callout of an NVDIMM
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[out] o_err - error log handler to be modified
+ *
+ * @return bool - true to commit log and continue, false to return
+ * the error log to caller and exit.
+ */
+bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err);
+
+/**
+ * @brief Helper function for BPM/Cable high, NVDIMM low callout
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[out] o_err - error log handler to be modified
+ *
+ * @return bool - true to commit log and continue, false to return
+ * the error log to caller and exit.
+ */
+bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err);
+
+/**
+ * @brief Helper function for BPM high, NVDIMM low callout
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[out] o_err - error log handler to be modified
+ *
+ * @return bool - true to commit log and continue, false to return
+ * the error log to caller and exit.
+ */
+bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err);
+
+/**
+ * @brief Function checking the Health Status Registers for an nvdimm
+ *
+ * @param[in] i_nvdimm - nvdimm target
+ *
+ * @param[out] o_exit - bool to signify exit procedure
+ *
+ * @return errlHndl_t - Null if successful, otherwise a pointer to
+ * the error log.
+ */
+errlHndl_t nvdimmHealthStatusCheck(Target *i_nvdimm, uint8_t i_step, bool& o_continue);
+
+} //End NVDIMM namespace
+
+
+#endif // NVDIMM_ERROR_LOG_H__
diff --git a/src/usr/isteps/nvdimm/nvdimmdd.C b/src/usr/isteps/nvdimm/nvdimmdd.C
index 695a60b93..ce6fa65d8 100755
--- a/src/usr/isteps/nvdimm/nvdimmdd.C
+++ b/src/usr/isteps/nvdimm/nvdimmdd.C
@@ -79,6 +79,7 @@ TRAC_INIT( & g_trac_nvdimmr, "NVDIMMR", KILOBYTE );
#define MAX_READ_RETRY_SECS 30
// ----------------------------------------------
+using namespace TARGETING;
namespace
{
diff --git a/src/usr/isteps/nvdimm/nvdimmdd.H b/src/usr/isteps/nvdimm/nvdimmdd.H
index 1e299f2de..37fa4a188 100755
--- a/src/usr/isteps/nvdimm/nvdimmdd.H
+++ b/src/usr/isteps/nvdimm/nvdimmdd.H
@@ -92,6 +92,78 @@ struct nvdimm_addr_t
}
};
+/**
+ * @brief Structure of registers for error log traces
+ */
+struct nvdimm_reg_t
+{
+ uint8_t Module_Health;
+ uint8_t Module_Health_Status0;
+ uint8_t Module_Health_Status1;
+ uint8_t CSave_Status;
+ uint8_t CSave_Info;
+ uint8_t CSave_Fail_Info0;
+ uint8_t CSave_Fail_Info1;
+ uint8_t CSave_Timeout0;
+ uint8_t CSave_Timeout1;
+ uint8_t Error_Threshold_Status;
+ uint8_t NVDimm_Ready;
+ uint8_t NVDimm_CMD_Status0;
+ uint8_t Erase_Status;
+ uint8_t Erase_Timeout0;
+ uint8_t Erase_Timeout1;
+ uint8_t Abort_CMD_Timeout;
+ uint8_t Set_ES_Policy_Status;
+ uint8_t Restore_Status;
+ uint8_t Restore_Fail_Info;
+ uint8_t Restore_Timeout0;
+ uint8_t Restore_Timeout1;
+ uint8_t Arm_Status;
+ uint8_t Arm_Timeout0;
+ uint8_t Arm_Timeout1;
+ uint8_t Set_Event_Notification_Status;
+
+ /**
+ * @brief Construct a default nvdimm_reg_t
+ */
+ nvdimm_reg_t()
+ : Module_Health(0),
+ Module_Health_Status0(0),
+ Module_Health_Status1(0),
+ CSave_Status(0),
+ CSave_Info(0),
+ CSave_Fail_Info0(0),
+ CSave_Fail_Info1(0),
+ CSave_Timeout0(0),
+ CSave_Timeout1(0),
+ Error_Threshold_Status(0),
+ NVDimm_Ready(0),
+ NVDimm_CMD_Status0(0),
+ Erase_Status(0),
+ Erase_Timeout0(0),
+ Erase_Timeout1(0),
+ Abort_CMD_Timeout(0),
+ Set_ES_Policy_Status(0),
+ Restore_Status(0),
+ Restore_Fail_Info(0),
+ Restore_Timeout0(0),
+ Restore_Timeout1(0),
+ Arm_Status(0),
+ Arm_Timeout0(0),
+ Arm_Timeout1(0),
+ Set_Event_Notification_Status(0)
+
+ {
+ }
+
+ /**
+ * @brief Default deconstructor of nvdimm_reg_t
+ */
+ ~nvdimm_reg_t() = default;
+
+};
+
+
/*
* @brief Miscellaneous enums for NVDIMM
*/
diff --git a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H
index 460add6f3..2c7f1d2a0 100644
--- a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H
+++ b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H
@@ -164,6 +164,119 @@ private:
UdParserNvdimmParms & operator=(const UdParserNvdimmParms&);
};
+/**
+ * @class UdParserNvdimmOPParms
+ *
+ * Parses UdNvdimmOPParms
+ */
+class UdParserNvdimmOPParms : public ERRORLOG::ErrlUserDetailsParser
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ UdParserNvdimmOPParms() {}
+
+ /**
+ * @brief Destructor
+ */
+ virtual ~UdParserNvdimmOPParms() = default;
+
+ /**
+ * @brief Parses string user detail data from an error log
+ *
+ * @param i_version Version of the data
+ * @param i_parse ErrlUsrParser object for outputting information
+ * @param i_pBuffer Pointer to buffer containing detail data
+ * @param i_buflen Length of the buffer
+ */
+ virtual void parse(errlver_t i_version,
+ ErrlUsrParser & i_parser,
+ void * i_pBuffer,
+ const uint32_t i_buflen) const
+ {
+ char* l_databuf = static_cast<char*>(i_pBuffer);
+ i_parser.PrintHeading("NVDIMM I2C Register Traces");
+
+ //***** Memorr Layout *****
+ // 1 byte : MODULE_HEALTH
+ // 1 byte : MODULE_HEALTH_STATUS0
+ // 1 byte : MODULE_HEALTH_STATUS1
+ // 1 byte : CSAVE_STATUS
+ // 1 byte : CSAVE_INFO
+ // 1 byte : CSAVE_FAIL_INFO0
+ // 1 byte : CSAVE_FAIL_INFO1
+ // 1 byte : ERROR_THRESHOLD_STATUS
+ // 1 byte : NVDIMM_READY
+ // 1 byte : NVDIMM_CMD_STATUS0
+ // 1 byte : ERASE_STATUS
+ // 1 byte : ERASE_TIMEOUT0
+ // 1 byte : ERASE_TIMEOUT1
+ // 1 byte : ABORT_CMD_TIMEOUT
+ // 1 byte : SET_ES_POLICY_STATUS
+ // 1 byte : RESTORE_STATUS
+ // 1 byte : RESTORE_FAIL_INFO
+ // 1 byte : RESTORE_TIMEOUT0
+ // 1 byte : RESTORE_TIMEOUT1
+ // 1 byte : ARM_STATUS
+ // 1 byte : ARM_TIMEOUT0
+ // 1 byte : ARM_TIMEOUT1
+ // 1 byte : SET_EVENT_NOTIFICATION_STATUS
+ //
+
+ i_parser.PrintNumber("Module Health Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Module Health Status0 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Module Health Status1 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("CSave Status Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("CSave Info Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("CSave Fail Info0 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("CSave Fail Info1 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Error Threshold Status Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("NVDIMM Ready Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("NVDIMM CMD Status0 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Erase Status Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Erase Timeout0 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Erase Timeout1 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Abort CMD Timeout Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Set ES Policy Status Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Restore Status Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Restore Fail Info0 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Restore Timeout0 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Restore Timeout1 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Arm Status Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Arm Timeout0 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Arm Timeout1 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("Set Event Notification Status Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ }
+
+ // Disabled
+ UdParserNvdimmOPParms(const UdParserNvdimmOPParms&) = delete;
+ UdParserNvdimmOPParms & operator=(UdParserNvdimmOPParms &) = delete;
+};
+
} // end NVDIMM namespace
#endif
diff --git a/src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H b/src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H
index b27774b13..f208ac060 100644
--- a/src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H
+++ b/src/usr/isteps/nvdimm/plugins/nvdimmUdParserFactory.H
@@ -38,14 +38,14 @@ namespace NVDIMM
{
registerParser<NVDIMM::UdParserNvdimmParms>
(NVDIMM_UDT_PARAMETERS);
+ registerParser<NVDIMM::UdParserNvdimmOPParms>
+ (NVDIMM_OP_PARAMETERS);
}
- private:
-
- UserDetailsParserFactory(const UserDetailsParserFactory &);
- UserDetailsParserFactory & operator=
- (const UserDetailsParserFactory &);
+ UserDetailsParserFactory(const UserDetailsParserFactory &) = delete;
+ UserDetailsParserFactory & operator=(UserDetailsParserFactory &) = delete;
};
+
};
#endif
diff --git a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
index d3a9d41a4..d5432712c 100644
--- a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
+++ b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
@@ -41,7 +41,10 @@
#include <usr/runtime/rt_targeting.H>
#include <runtime/interface.h>
#include <arch/ppc.H>
+#include <lib/shared/nimbus_defaults.H>
#include <isteps/nvdimm/nvdimmreasoncodes.H>
+#include "../errlud_nvdimm.H"
+#include "../nvdimmErrorLog.H"
#include <isteps/nvdimm/nvdimm.H> // implements some of these
#include "../nvdimm.H" // for g_trac_nvdimm
@@ -88,11 +91,12 @@ errlHndl_t nvdimmPollArmDone(Target* i_nvdimm,
* the trigger has been armed to ddr_reset_n
*
* @param[in] i_nvdimm - nvdimm target with NV controller
+ * @param[in] i_arm_timeout - nvdimm local timeout status
*
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
-errlHndl_t nvdimmCheckArmSuccess(Target *i_nvdimm)
+errlHndl_t nvdimmCheckArmSuccess(Target *i_nvdimm, bool i_arm_timeout)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmCheckArmSuccess() nvdimm[%X]",
get_huid(i_nvdimm));
@@ -107,7 +111,7 @@ errlHndl_t nvdimmCheckArmSuccess(Target *i_nvdimm)
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckArmSuccess() nvdimm[%X]"
"failed to read arm status reg!",get_huid(i_nvdimm));
}
- else if ((l_data & ARM_SUCCESS) != ARM_SUCCESS)
+ else if (((l_data & ARM_ERROR) == ARM_ERROR) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED) || i_arm_timeout)
{
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckArmSuccess() nvdimm[%X]"
@@ -140,13 +144,9 @@ errlHndl_t nvdimmCheckArmSuccess(Target *i_nvdimm)
// if failed to arm trigger
l_err->addPartCallout( i_nvdimm,
HWAS::NV_CONTROLLER_PART_TYPE,
- HWAS::SRCI_PRIORITY_HIGH);
- l_err->addPartCallout( i_nvdimm,
- HWAS::BPM_PART_TYPE,
- HWAS::SRCI_PRIORITY_MED);
- l_err->addPartCallout( i_nvdimm,
- HWAS::BPM_CABLE_PART_TYPE,
- HWAS::SRCI_PRIORITY_MED);
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::DECONFIG,
+ HWAS::GARD_Fatal);
}
TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmCheckArmSuccess() nvdimm[%X] ret[%X]",
@@ -158,14 +158,21 @@ errlHndl_t nvdimmCheckArmSuccess(Target *i_nvdimm)
bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
{
bool o_arm_successful = true;
+ bool l_continue = true;
+ bool l_arm_timeout = false;
+ uint8_t l_data;
+ auto l_RegInfo = nvdimm_reg_t();
TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmArm() %d",
i_nvdimmTargetList.size());
errlHndl_t l_err = nullptr;
+ errlHndl_t l_err_t = nullptr;
for (auto const l_nvdimm : i_nvdimmTargetList)
{
+ l_arm_timeout = false;
+
// skip if the nvdimm is already armed
ATTR_NVDIMM_ARMED_type l_armed_state = {};
l_armed_state = l_nvdimm->getAttr<ATTR_NVDIMM_ARMED>();
@@ -175,27 +182,35 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
continue;
}
- // skip if the nvdimm is in error state
- if (NVDIMM::nvdimmInErrorState(l_nvdimm))
- {
- // error state means arming not successful
- o_arm_successful = false;
- continue;
- }
-
+ // Set ES Policy, contains all of its status checks
l_err = nvdimmSetESPolicy(l_nvdimm);
if (l_err)
{
+ TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to set ES Policy", get_huid(l_nvdimm));
o_arm_successful = false;
- nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_NOBKUP);
+
+ nvdimmDisarm(i_nvdimmTargetList);
+ l_err_t = notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_DISARMED);
+ if (l_err_t)
+ {
+ errlCommit( l_err_t, NVDIMM_COMP_ID );
+ }
// Committing the error as we don't want this to interrupt
// the boot. This will notify the user that action is needed
// on this module
l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE);
l_err->collectTrace(NVDIMM_COMP_NAME);
+
+ // Callout nvdimm on high, gard and deconfig
+ l_err->addPartCallout( l_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_HIGH,
+ HWAS::NO_DECONFIG,
+ HWAS::GARD_Fatal);
+
errlCommit( l_err, NVDIMM_COMP_ID );
- continue;
+ break;
}
l_err = NVDIMM::nvdimmChangeArmState(l_nvdimm, ARM_TRIGGER);
@@ -205,7 +220,14 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
// salvage the data
if (l_err)
{
- NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP);
+ TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to trigger arm", get_huid(l_nvdimm));
+
+ l_err_t = notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_DISARMED);
+ if (l_err_t)
+ {
+ errlCommit( l_err_t, NVDIMM_COMP_ID );
+ }
+
// Committing the error as we don't want this to interrupt
// the boot. This will notify the user that action is needed
// on this module
@@ -221,29 +243,78 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
l_err = nvdimmPollArmDone(l_nvdimm, l_poll);
if (l_err)
{
- NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP);
+ TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] arm command timed out", get_huid(l_nvdimm));
+ l_arm_timeout = true;
+
+ l_err_t = notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_DISARMED);
+ if (l_err_t)
+ {
+ errlCommit( l_err_t, NVDIMM_COMP_ID );
+ }
+
// Committing the error as we don't want this to interrupt
// the boot. This will notify the user that action is needed
// on this module
l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE);
l_err->collectTrace(NVDIMM_COMP_NAME);
+
errlCommit( l_err, NVDIMM_COMP_ID );
o_arm_successful = false;
- continue;
}
- l_err = nvdimmCheckArmSuccess(l_nvdimm);
+ // Check health status registers and exit if required
+ l_err = nvdimmHealthStatusCheck( l_nvdimm, HEALTH_PRE_ARM, l_arm_timeout );
+
+ // Check for health status failure
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed first health status check", get_huid(l_nvdimm));
+ if (!l_continue)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+
+ // Disarming all dimms due to error
+ nvdimmDisarm(i_nvdimmTargetList);
+
+ o_arm_successful = false;
+ break;
+ }
+ else
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ continue;
+ }
+ }
+
+ l_err = nvdimmCheckArmSuccess(l_nvdimm, l_arm_timeout);
if (l_err)
{
- NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP);
+ TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to succesfully arm", get_huid(l_nvdimm));
+
+ // Disarming all dimms due to error
+ nvdimmDisarm(i_nvdimmTargetList);
+
+ l_err_t = notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_DISARMED);
+ if (l_err_t)
+ {
+ errlCommit( l_err_t, NVDIMM_COMP_ID );
+ }
+
// Committing the error as we don't want this to interrupt
// the boot. This will notify the user that action is needed
// on this module
l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE);
l_err->collectTrace(NVDIMM_COMP_NAME);
- errlCommit( l_err, NVDIMM_COMP_ID );
+
+ // Dump Traces for error logs
+ nvdimmTraceRegs( l_nvdimm, l_RegInfo );
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
+
+ errlCommit(l_err, NVDIMM_COMP_ID);
o_arm_successful = false;
- continue;
+ break;
}
// After arming the trigger, erase the image to prevent the possible
@@ -252,7 +323,17 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
l_err = nvdimmEraseNF(l_nvdimm);
if (l_err)
{
- NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP);
+ TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to erase post arm", get_huid(l_nvdimm));
+
+ // Disarming all dimms due to error
+ nvdimmDisarm(i_nvdimmTargetList);
+
+ l_err_t = notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_DISARMED);
+ if (l_err_t)
+ {
+ errlCommit( l_err_t, NVDIMM_COMP_ID );
+ }
+
// Committing the error as we don't want this to interrupt
// the boot. This will notify the user that action is needed
// on this module
@@ -271,8 +352,7 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
l_err->collectTrace(NVDIMM_COMP_NAME);
errlCommit(l_err, NVDIMM_COMP_ID);
}
-
- continue;
+ break;
}
// Arm successful, update armed status
@@ -284,6 +364,78 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
l_err->collectTrace(NVDIMM_COMP_NAME);
errlCommit(l_err, NVDIMM_COMP_ID);
}
+
+ // Enable event notification
+ l_err = nvdimmWriteReg(l_nvdimm, SET_EVENT_NOTIFICATION_CMD, PERSISTENCY_NOTIFICATION);
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"NDVIMM HUID[%X] error initiating erase!!",
+ TARGETING::get_huid(l_nvdimm));
+ errlCommit(l_err, NVDIMM_COMP_ID);
+ }
+
+ // Check notification status and errors
+ l_err = nvdimmReadReg(l_nvdimm, SET_EVENT_NOTIFICATION_STATUS, l_data);
+ if (l_err)
+ {
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ }
+ else if (((l_data & SET_EVENT_NOTIFICATION_ERROR) == SET_EVENT_NOTIFICATION_ERROR) || ((l_data & PERSISTENCY_ENABLED) != PERSISTENCY_ENABLED))
+ {
+ TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to set event notification", get_huid(l_nvdimm));
+
+ // Set NVDIMM Status flag to Restored, as error detected but data might persist
+ nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERROR);
+
+ /*@
+ *@errortype
+ *@reasoncode NVDIMM_SET_EVENT_NOTIFICATION_ERROR
+ *@severity ERRORLOG_SEV_PREDICTIVE
+ *@moduleid NVDIMM_SET_EVENT_NOTIFICATION
+ *@userdata1[0:31] Target Huid
+ *@userdata2 <UNUSED>
+ *@devdesc NVDIMM threw an error or failed to set event
+ * notifications during arming
+ *@custdesc NVDIMM failed to enable event notificaitons
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
+ NVDIMM_SET_EVENT_NOTIFICATION,
+ NVDIMM_SET_EVENT_NOTIFICATION_ERROR,
+ TARGETING::get_huid(l_nvdimm),
+ 0x0,
+ ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
+
+ l_err->collectTrace( NVDIMM_COMP_NAME );
+
+ // Callout, deconfig and gard the dimm
+ l_err->addPartCallout( l_nvdimm,
+ HWAS::NV_CONTROLLER_PART_TYPE,
+ HWAS::SRCI_PRIORITY_LOW);
+
+
+ // Read relevant regs for trace data
+ nvdimmTraceRegs(l_nvdimm, l_RegInfo);
+
+ // Add reg traces to the error log
+ NVDIMM::UdNvdimmOPParms( l_RegInfo ).addToLog(l_err);
+
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ break;
+ }
+
+ // Re-check health status registers
+ l_err = nvdimmHealthStatusCheck( l_nvdimm, HEALTH_POST_ARM, l_continue );
+
+ // Check for health status failure
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed final health status check", get_huid(l_nvdimm));
+
+ errlCommit( l_err, NVDIMM_COMP_ID );
+ o_arm_successful = false;
+ break;
+ }
+
}
TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmArm() returning %d",
@@ -318,7 +470,6 @@ bool nvdimmDisarm(TargetHandleList &i_nvdimmTargetList)
// salvage the data
if (l_err)
{
- NVDIMM::nvdimmSetStatusFlag(l_nvdimm, NVDIMM::NSTD_ERR_NOBKUP);
// Committing the error as we don't want this to interrupt
// the boot. This will notify the user that action is needed
// on this module
@@ -361,8 +512,7 @@ bool nvdimmInErrorState(Target *i_nvdimm)
// Just checking bit 1 for now, need to investigate these
// Should be checking NVDIMM_ARMED instead
- //if ((l_statusFlag & NSTD_ERR) == 0)
- if ((l_statusFlag & NSTD_ERR_NOPRSV) == 0)
+ if ((l_statusFlag & NSTD_VAL_ERASED) == 0)
{
l_ret = false;
}
OpenPOWER on IntegriCloud