summaryrefslogtreecommitdiffstats
path: root/src/usr
diff options
context:
space:
mode:
authorTsung Yeung <tyeung@us.ibm.com>2019-08-23 13:56:43 -0500
committerDaniel M Crowell <dcrowell@us.ibm.com>2019-08-26 19:39:51 -0500
commitb94854d1f4aebe63361ee9baf6d3fa4e69b951ba (patch)
tree3c78d319b58381a6ee6fb017a5f82a21c7b83518 /src/usr
parenta95709f74a6f73560457b03fbf08c8b861ff47dc (diff)
downloadtalos-hostboot-b94854d1f4aebe63361ee9baf6d3fa4e69b951ba.tar.gz
talos-hostboot-b94854d1f4aebe63361ee9baf6d3fa4e69b951ba.zip
Fix restore fail due to restore attempt on empty flash
-fix NVDIMM FFDC misalignment -correct NV_STATUS enum naming Change-Id: Ib0084f5fb95ce8a93ee5e85a0790878469065acb CQ:SW473934 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/82775 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r--src/usr/isteps/nvdimm/nvdimm.C100
-rw-r--r--src/usr/isteps/nvdimm/nvdimmErrorLog.C48
-rw-r--r--src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H6
-rw-r--r--src/usr/isteps/nvdimm/runtime/nvdimm_rt.C4
4 files changed, 87 insertions, 71 deletions
diff --git a/src/usr/isteps/nvdimm/nvdimm.C b/src/usr/isteps/nvdimm/nvdimm.C
index 2858dc53c..31feffac1 100644
--- a/src/usr/isteps/nvdimm/nvdimm.C
+++ b/src/usr/isteps/nvdimm/nvdimm.C
@@ -381,19 +381,19 @@ void nvdimmSetStatusFlag(Target *i_nvdimm, const uint8_t i_status_flag)
switch(i_status_flag)
{
- // Make sure NSTD_VAL_ERROR (content preserved) is unset before setting NSTD_VAL_ERASED
+ // Make sure NSTD_VAL_RESTORED (content preserved) is unset before setting NSTD_VAL_ERASED
// (data not preserved) or NSTD_VAL_SR_FAILED (error preserving data)
case NSTD_ERR:
case NSTD_VAL_ERASED:
case NSTD_VAL_SR_FAILED:
- l_statusFlag &= NSTD_VAL_ERROR_MASK;
+ l_statusFlag &= NSTD_VAL_RESTORED_MASK;
l_statusFlag |= i_status_flag;
break;
// If the content preserved(restore sucessfully), make sure
// NSTD_VAL_ERASED (not preserved) and NSTD_VAL_SR_FAILED (error preserving)
// are unset before setting this flag.
- case NSTD_VAL_ERROR:
+ case NSTD_VAL_RESTORED:
l_statusFlag &= (NSTD_VAL_ERASED_MASK & NSTD_VAL_SR_FAILED_MASK);
l_statusFlag |= i_status_flag;
break;
@@ -402,6 +402,11 @@ void nvdimmSetStatusFlag(Target *i_nvdimm, const uint8_t i_status_flag)
l_statusFlag |= i_status_flag;
break;
+ // Error detected but save/restore might work. May coexsit with other bits.
+ case NSTD_ERR_VAL_SR:
+ l_statusFlag |= i_status_flag;
+ break;
+
default:
assert(0, "nvdimmSetStatusFlag() HUID[%X], i_status_flag[%X] invalid flag!",
get_huid(i_nvdimm), i_status_flag);
@@ -1165,12 +1170,6 @@ errlHndl_t nvdimmRestore(TargetHandleList& i_nvdimmList, uint8_t &i_mpipl)
break;
}
- // Nothing to do. Move on.
- if (i_nvdimmList.empty())
- {
- break;
- }
-
// Kick off the restore on each nvdimm in the nvdimm list
for (const auto & l_nvdimm : i_nvdimmList)
{
@@ -1647,36 +1646,12 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
assert(l_sys, "nvdimm_restore: no TopLevelTarget");
uint8_t l_mpipl = l_sys->getAttr<ATTR_IS_MPIPL_HB>();
nvdimm_reg_t l_RegInfo = nvdimm_reg_t();
- TargetHandleList l_nvdimmList = i_nvdimmList;
+ TargetHandleList l_nvdimm_restore_list = i_nvdimmList;
uint8_t l_rstrValid;
do
{
- for (const auto & l_nvdimm : i_nvdimmList)
- {
- // Check for a valid image
- l_err = nvdimmValidImage( l_nvdimm, l_valid );
- if (l_err)
- {
- TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] restore failed to read the image", get_huid(l_nvdimm));
- errlCommit(l_err, NVDIMM_COMP_ID);
- }
-
- if (!l_valid)
- {
- TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] restore failed due to invalid image", get_huid(l_nvdimm));
- // Set ATTR NV STATUS FLAG to Erased
- nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERASED);
- break;
- }
-
- }
-
- if (!l_valid)
- {
- break;
- }
-
+ // Check MPIPL case first to make sure any on-going backup is complete
if (l_mpipl)
{
// During MPIPL, make sure any in-progress save is completed before proceeding
@@ -1697,15 +1672,50 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
}
}
+ // Compile a list of nvdimms with valid image
+ // TODO: Reach out to RAS on how to handle odd number of nvdimms
+ // since we always operate in pairs
+ for (TargetHandleList::iterator it = l_nvdimm_restore_list.begin();
+ it != l_nvdimm_restore_list.end();)
+ {
+ // Check for a valid image
+ l_err = nvdimmValidImage( *it, l_valid );
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] Failed to detect valid image", get_huid(*it));
+ errlCommit(l_err, NVDIMM_COMP_ID);
+ }
+
+ // Remove it from the restore list if there is no valid image
+ if (!l_valid)
+ {
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] No valid image discovered", get_huid(*it));
+ // Set ATTR NV STATUS FLAG to Erased
+ nvdimmSetStatusFlag(*it, NSTD_VAL_ERASED);
+ it = l_nvdimm_restore_list.erase(it);
+
+ }
+ else
+ {
+ it++;
+ }
+ }
+
+ // Exit if there is nothing to restore
+ if (l_nvdimm_restore_list.empty())
+ {
+ break;
+ }
+
// Start the restore
- l_err = nvdimmRestore(l_nvdimmList, l_mpipl);
+ l_err = nvdimmRestore(l_nvdimm_restore_list, l_mpipl);
// Check if restore completed successfully
if (l_err)
{
- const auto l_nvdimm = l_nvdimmList.front();
+ const auto l_nvdimm = l_nvdimm_restore_list.front();
- TRACFCOMP(g_trac_nvdimm, "nvdimm_restore() - Failing nvdimmRestore()");
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() - Failing nvdimmRestore()");
nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_SR_FAILED);
// Invalid restore could be due to dram not in self-refresh
@@ -1731,7 +1741,7 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
if (l_err)
{
- TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] failed during health status check", get_huid(l_nvdimm));
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] failed during health status check", get_huid(l_nvdimm));
if (l_exit)
{
errlCommit( l_err, NVDIMM_COMP_ID );
@@ -1748,7 +1758,7 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
l_err = nvdimmGetRestoreValid(l_nvdimm, l_rstrValid);
if (l_err)
{
- TRACFCOMP(g_trac_nvdimm, "nvdimmRestore Target[%X] error validating restore status!",
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore Target[%X] error validating restore status!",
get_huid(l_nvdimm));
break;
}
@@ -1756,7 +1766,7 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
if ((l_rstrValid & RSTR_SUCCESS) == RSTR_SUCCESS)
{
// Restore success!
- nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERROR);
+ nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_RESTORED);
}
}
@@ -1956,8 +1966,8 @@ errlHndl_t nvdimm_init(Target *i_nvdimm)
}
else if ((l_data & NO_RESET_N) == NO_RESET_N)
{
- // Set ATTR_NV_STATUS_FLAG to restored, as data may persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partial working as data may persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmInit() nvdimm[%X]"
"failed to save due to power loss!",get_huid(i_nvdimm));
/*@
@@ -2063,8 +2073,8 @@ errlHndl_t nvdimm_init(Target *i_nvdimm)
}
else
{
- // Set ATTR_NV_STATUS_FLAG to Restored as data might persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partial working as data may persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
errlCommit(l_err, NVDIMM_COMP_ID);
}
break;
diff --git a/src/usr/isteps/nvdimm/nvdimmErrorLog.C b/src/usr/isteps/nvdimm/nvdimmErrorLog.C
index 57984bb97..ccd1ad801 100644
--- a/src/usr/isteps/nvdimm/nvdimmErrorLog.C
+++ b/src/usr/isteps/nvdimm/nvdimmErrorLog.C
@@ -290,8 +290,8 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
// Checkout image validity and set dimm status accordingly
if(l_continue)
{
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
// Callout dimm but do not deconfig or gard
o_err->addPartCallout( i_nvdimm,
@@ -328,8 +328,8 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
// Check restore status and set dimm status accordingly
if(l_continue)
{
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
// Callout dimm but do not deconfig or gard
o_err->addPartCallout( i_nvdimm,
@@ -368,8 +368,8 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
// Check arm status and set dimm status accordingly
if(!l_continue)
{
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
// Callout dimm but do not deconfig or gard
o_err->addPartCallout( i_nvdimm,
@@ -404,8 +404,8 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::NV_CONTROLLER_PART_TYPE,
HWAS::SRCI_PRIORITY_LOW);
- // Set ATTR_NV_STATUS_FLAG to restored as data may persist despite errors
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may persist despite errors
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
break;
}
@@ -457,8 +457,8 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
// Check image validity and set dimm status accordingly
if(l_continue)
{
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
// Callout dimm but do not deconfig or gard
o_err->addPartCallout( i_nvdimm,
@@ -503,8 +503,8 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
// Check restore status and set dimm status accordingly
if(l_continue)
{
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
// Callout dimm but do not deconfig or gard
o_err->addPartCallout( i_nvdimm,
@@ -549,8 +549,8 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
// Check arm status and set dimm status accordingly
if(!l_continue)
{
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
// Callout dimm but do not deconfig or gard
o_err->addPartCallout( i_nvdimm,
@@ -590,8 +590,8 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::NV_CONTROLLER_PART_TYPE,
HWAS::SRCI_PRIORITY_LOW);
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
break;
}
@@ -635,8 +635,8 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::NV_CONTROLLER_PART_TYPE,
HWAS::SRCI_PRIORITY_LOW);
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
break;
}
@@ -654,8 +654,8 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::NV_CONTROLLER_PART_TYPE,
HWAS::SRCI_PRIORITY_LOW);
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
break;
}
@@ -687,8 +687,8 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
// Check arm status and set dimm status accordingly
if(!l_continue)
{
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
}
else
{
@@ -716,8 +716,8 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::NV_CONTROLLER_PART_TYPE,
HWAS::SRCI_PRIORITY_LOW);
- // Set ATTR_NV_STATUS_FLAG to restored as data may still persist
- nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR);
+ // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
+ nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
break;
}
diff --git a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H
index 2c7f1d2a0..1e951a96b 100644
--- a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H
+++ b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H
@@ -206,6 +206,8 @@ public:
// 1 byte : CSAVE_INFO
// 1 byte : CSAVE_FAIL_INFO0
// 1 byte : CSAVE_FAIL_INFO1
+ // 1 byte : CSAVE_TIMEOUT_INFO0
+ // 1 byte : CSAVE_TIMEOUT_INFO1
// 1 byte : ERROR_THRESHOLD_STATUS
// 1 byte : NVDIMM_READY
// 1 byte : NVDIMM_CMD_STATUS0
@@ -238,6 +240,10 @@ public:
++l_databuf;
i_parser.PrintNumber("CSave Fail Info1 Register: ","%.2lX",TO_UINT8(l_databuf));
++l_databuf;
+ i_parser.PrintNumber("CSave Timeout Info0 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
+ i_parser.PrintNumber("CSave Timeout Info1 Register: ","%.2lX",TO_UINT8(l_databuf));
+ ++l_databuf;
i_parser.PrintNumber("Error Threshold Status Register: ","%.2lX",TO_UINT8(l_databuf));
++l_databuf;
i_parser.PrintNumber("NVDIMM Ready Register: ","%.2lX",TO_UINT8(l_databuf));
diff --git a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
index fc0944c8e..42a7b49d3 100644
--- a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
+++ b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
@@ -384,8 +384,8 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
{
TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to set event notification", get_huid(l_nvdimm));
- // Set NVDIMM Status flag to Restored, as error detected but data might persist
- nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERROR);
+ // Set NVDIMM Status flag to partial working, as error detected but data might persist
+ nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_VAL_SR);
/*@
*@errortype
OpenPOWER on IntegriCloud