diff options
author | Tsung Yeung <tyeung@us.ibm.com> | 2019-08-23 13:56:43 -0500 |
---|---|---|
committer | Daniel M Crowell <dcrowell@us.ibm.com> | 2019-08-26 19:39:51 -0500 |
commit | b94854d1f4aebe63361ee9baf6d3fa4e69b951ba (patch) | |
tree | 3c78d319b58381a6ee6fb017a5f82a21c7b83518 /src/usr | |
parent | a95709f74a6f73560457b03fbf08c8b861ff47dc (diff) | |
download | talos-hostboot-b94854d1f4aebe63361ee9baf6d3fa4e69b951ba.tar.gz talos-hostboot-b94854d1f4aebe63361ee9baf6d3fa4e69b951ba.zip |
Fix restore fail due to restore attempt on empty flash
-fix NVDIMM FFDC misalignment
-correct NV_STATUS enum naming
Change-Id: Ib0084f5fb95ce8a93ee5e85a0790878469065acb
CQ:SW473934
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/82775
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimm.C | 100 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimmErrorLog.C | 48 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H | 6 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/runtime/nvdimm_rt.C | 4 |
4 files changed, 87 insertions, 71 deletions
diff --git a/src/usr/isteps/nvdimm/nvdimm.C b/src/usr/isteps/nvdimm/nvdimm.C index 2858dc53c..31feffac1 100644 --- a/src/usr/isteps/nvdimm/nvdimm.C +++ b/src/usr/isteps/nvdimm/nvdimm.C @@ -381,19 +381,19 @@ void nvdimmSetStatusFlag(Target *i_nvdimm, const uint8_t i_status_flag) switch(i_status_flag) { - // Make sure NSTD_VAL_ERROR (content preserved) is unset before setting NSTD_VAL_ERASED + // Make sure NSTD_VAL_RESTORED (content preserved) is unset before setting NSTD_VAL_ERASED // (data not preserved) or NSTD_VAL_SR_FAILED (error preserving data) case NSTD_ERR: case NSTD_VAL_ERASED: case NSTD_VAL_SR_FAILED: - l_statusFlag &= NSTD_VAL_ERROR_MASK; + l_statusFlag &= NSTD_VAL_RESTORED_MASK; l_statusFlag |= i_status_flag; break; // If the content preserved(restore sucessfully), make sure // NSTD_VAL_ERASED (not preserved) and NSTD_VAL_SR_FAILED (error preserving) // are unset before setting this flag. - case NSTD_VAL_ERROR: + case NSTD_VAL_RESTORED: l_statusFlag &= (NSTD_VAL_ERASED_MASK & NSTD_VAL_SR_FAILED_MASK); l_statusFlag |= i_status_flag; break; @@ -402,6 +402,11 @@ void nvdimmSetStatusFlag(Target *i_nvdimm, const uint8_t i_status_flag) l_statusFlag |= i_status_flag; break; + // Error detected but save/restore might work. May coexsit with other bits. + case NSTD_ERR_VAL_SR: + l_statusFlag |= i_status_flag; + break; + default: assert(0, "nvdimmSetStatusFlag() HUID[%X], i_status_flag[%X] invalid flag!", get_huid(i_nvdimm), i_status_flag); @@ -1165,12 +1170,6 @@ errlHndl_t nvdimmRestore(TargetHandleList& i_nvdimmList, uint8_t &i_mpipl) break; } - // Nothing to do. Move on. - if (i_nvdimmList.empty()) - { - break; - } - // Kick off the restore on each nvdimm in the nvdimm list for (const auto & l_nvdimm : i_nvdimmList) { @@ -1647,36 +1646,12 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) assert(l_sys, "nvdimm_restore: no TopLevelTarget"); uint8_t l_mpipl = l_sys->getAttr<ATTR_IS_MPIPL_HB>(); nvdimm_reg_t l_RegInfo = nvdimm_reg_t(); - TargetHandleList l_nvdimmList = i_nvdimmList; + TargetHandleList l_nvdimm_restore_list = i_nvdimmList; uint8_t l_rstrValid; do { - for (const auto & l_nvdimm : i_nvdimmList) - { - // Check for a valid image - l_err = nvdimmValidImage( l_nvdimm, l_valid ); - if (l_err) - { - TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] restore failed to read the image", get_huid(l_nvdimm)); - errlCommit(l_err, NVDIMM_COMP_ID); - } - - if (!l_valid) - { - TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] restore failed due to invalid image", get_huid(l_nvdimm)); - // Set ATTR NV STATUS FLAG to Erased - nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERASED); - break; - } - - } - - if (!l_valid) - { - break; - } - + // Check MPIPL case first to make sure any on-going backup is complete if (l_mpipl) { // During MPIPL, make sure any in-progress save is completed before proceeding @@ -1697,15 +1672,50 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) } } + // Compile a list of nvdimms with valid image + // TODO: Reach out to RAS on how to handle odd number of nvdimms + // since we always operate in pairs + for (TargetHandleList::iterator it = l_nvdimm_restore_list.begin(); + it != l_nvdimm_restore_list.end();) + { + // Check for a valid image + l_err = nvdimmValidImage( *it, l_valid ); + if (l_err) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] Failed to detect valid image", get_huid(*it)); + errlCommit(l_err, NVDIMM_COMP_ID); + } + + // Remove it from the restore list if there is no valid image + if (!l_valid) + { + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] No valid image discovered", get_huid(*it)); + // Set ATTR NV STATUS FLAG to Erased + nvdimmSetStatusFlag(*it, NSTD_VAL_ERASED); + it = l_nvdimm_restore_list.erase(it); + + } + else + { + it++; + } + } + + // Exit if there is nothing to restore + if (l_nvdimm_restore_list.empty()) + { + break; + } + // Start the restore - l_err = nvdimmRestore(l_nvdimmList, l_mpipl); + l_err = nvdimmRestore(l_nvdimm_restore_list, l_mpipl); // Check if restore completed successfully if (l_err) { - const auto l_nvdimm = l_nvdimmList.front(); + const auto l_nvdimm = l_nvdimm_restore_list.front(); - TRACFCOMP(g_trac_nvdimm, "nvdimm_restore() - Failing nvdimmRestore()"); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() - Failing nvdimmRestore()"); nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_SR_FAILED); // Invalid restore could be due to dram not in self-refresh @@ -1731,7 +1741,7 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) if (l_err) { - TRACFCOMP(g_trac_nvdimm, "nvdimmRestore() nvdimm[%X] failed during health status check", get_huid(l_nvdimm)); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] failed during health status check", get_huid(l_nvdimm)); if (l_exit) { errlCommit( l_err, NVDIMM_COMP_ID ); @@ -1748,7 +1758,7 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) l_err = nvdimmGetRestoreValid(l_nvdimm, l_rstrValid); if (l_err) { - TRACFCOMP(g_trac_nvdimm, "nvdimmRestore Target[%X] error validating restore status!", + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore Target[%X] error validating restore status!", get_huid(l_nvdimm)); break; } @@ -1756,7 +1766,7 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) if ((l_rstrValid & RSTR_SUCCESS) == RSTR_SUCCESS) { // Restore success! - nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERROR); + nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_RESTORED); } } @@ -1956,8 +1966,8 @@ errlHndl_t nvdimm_init(Target *i_nvdimm) } else if ((l_data & NO_RESET_N) == NO_RESET_N) { - // Set ATTR_NV_STATUS_FLAG to restored, as data may persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partial working as data may persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmInit() nvdimm[%X]" "failed to save due to power loss!",get_huid(i_nvdimm)); /*@ @@ -2063,8 +2073,8 @@ errlHndl_t nvdimm_init(Target *i_nvdimm) } else { - // Set ATTR_NV_STATUS_FLAG to Restored as data might persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partial working as data may persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); errlCommit(l_err, NVDIMM_COMP_ID); } break; diff --git a/src/usr/isteps/nvdimm/nvdimmErrorLog.C b/src/usr/isteps/nvdimm/nvdimmErrorLog.C index 57984bb97..ccd1ad801 100644 --- a/src/usr/isteps/nvdimm/nvdimmErrorLog.C +++ b/src/usr/isteps/nvdimm/nvdimmErrorLog.C @@ -290,8 +290,8 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) // Checkout image validity and set dimm status accordingly if(l_continue) { - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); // Callout dimm but do not deconfig or gard o_err->addPartCallout( i_nvdimm, @@ -328,8 +328,8 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) // Check restore status and set dimm status accordingly if(l_continue) { - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); // Callout dimm but do not deconfig or gard o_err->addPartCallout( i_nvdimm, @@ -368,8 +368,8 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) // Check arm status and set dimm status accordingly if(!l_continue) { - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); // Callout dimm but do not deconfig or gard o_err->addPartCallout( i_nvdimm, @@ -404,8 +404,8 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_LOW); - // Set ATTR_NV_STATUS_FLAG to restored as data may persist despite errors - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may persist despite errors + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); break; } @@ -457,8 +457,8 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) // Check image validity and set dimm status accordingly if(l_continue) { - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); // Callout dimm but do not deconfig or gard o_err->addPartCallout( i_nvdimm, @@ -503,8 +503,8 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) // Check restore status and set dimm status accordingly if(l_continue) { - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); // Callout dimm but do not deconfig or gard o_err->addPartCallout( i_nvdimm, @@ -549,8 +549,8 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) // Check arm status and set dimm status accordingly if(!l_continue) { - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); // Callout dimm but do not deconfig or gard o_err->addPartCallout( i_nvdimm, @@ -590,8 +590,8 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_LOW); - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); break; } @@ -635,8 +635,8 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_LOW); - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); break; } @@ -654,8 +654,8 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_LOW); - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); break; } @@ -687,8 +687,8 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) // Check arm status and set dimm status accordingly if(!l_continue) { - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); } else { @@ -716,8 +716,8 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::NV_CONTROLLER_PART_TYPE, HWAS::SRCI_PRIORITY_LOW); - // Set ATTR_NV_STATUS_FLAG to restored as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_VAL_ERROR); + // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); break; } diff --git a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H index 2c7f1d2a0..1e951a96b 100644 --- a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H +++ b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H @@ -206,6 +206,8 @@ public: // 1 byte : CSAVE_INFO // 1 byte : CSAVE_FAIL_INFO0 // 1 byte : CSAVE_FAIL_INFO1 + // 1 byte : CSAVE_TIMEOUT_INFO0 + // 1 byte : CSAVE_TIMEOUT_INFO1 // 1 byte : ERROR_THRESHOLD_STATUS // 1 byte : NVDIMM_READY // 1 byte : NVDIMM_CMD_STATUS0 @@ -238,6 +240,10 @@ public: ++l_databuf; i_parser.PrintNumber("CSave Fail Info1 Register: ","%.2lX",TO_UINT8(l_databuf)); ++l_databuf; + i_parser.PrintNumber("CSave Timeout Info0 Register: ","%.2lX",TO_UINT8(l_databuf)); + ++l_databuf; + i_parser.PrintNumber("CSave Timeout Info1 Register: ","%.2lX",TO_UINT8(l_databuf)); + ++l_databuf; i_parser.PrintNumber("Error Threshold Status Register: ","%.2lX",TO_UINT8(l_databuf)); ++l_databuf; i_parser.PrintNumber("NVDIMM Ready Register: ","%.2lX",TO_UINT8(l_databuf)); diff --git a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C index fc0944c8e..42a7b49d3 100644 --- a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C +++ b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C @@ -384,8 +384,8 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList) { TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed to set event notification", get_huid(l_nvdimm)); - // Set NVDIMM Status flag to Restored, as error detected but data might persist - nvdimmSetStatusFlag(l_nvdimm, NSTD_VAL_ERROR); + // Set NVDIMM Status flag to partial working, as error detected but data might persist + nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_VAL_SR); /*@ *@errortype |