diff options
author | MATTHEW I. HICKMAN <matthew.hickman@ibm.com> | 2019-09-03 16:28:45 -0500 |
---|---|---|
committer | Daniel M Crowell <dcrowell@us.ibm.com> | 2019-09-19 11:02:26 -0500 |
commit | b0cd81c61e92c24cd6046f769f06df196432e8c9 (patch) | |
tree | a09f584a002579fe2c5cbb43db05cf36a712149c | |
parent | 08501bc6578ac2ba40cc803ac2449828ee6b1550 (diff) | |
download | talos-hostboot-b0cd81c61e92c24cd6046f769f06df196432e8c9.tar.gz talos-hostboot-b0cd81c61e92c24cd6046f769f06df196432e8c9.zip |
Fixed several small bugs found via code review
CQ:SW474830
Change-Id: I1ff22805b9bb3facf68d4d62ea66de169be9b98b
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/83213
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
-rw-r--r-- | src/include/usr/isteps/nvdimm/nvdimm.H | 4 | ||||
-rw-r--r-- | src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H | 2 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimm.C | 93 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimm.H | 2 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/nvdimmErrorLog.C | 23 | ||||
-rw-r--r-- | src/usr/isteps/nvdimm/runtime/nvdimm_rt.C | 125 |
6 files changed, 152 insertions, 97 deletions
diff --git a/src/include/usr/isteps/nvdimm/nvdimm.H b/src/include/usr/isteps/nvdimm/nvdimm.H index 2aae34c82..2f4430791 100644 --- a/src/include/usr/isteps/nvdimm/nvdimm.H +++ b/src/include/usr/isteps/nvdimm/nvdimm.H @@ -57,7 +57,7 @@ enum nvdimm_err_status * @param[in] i_nvdimmList - list of nvdimm targets * **/ -errlHndl_t nvdimm_restore(TARGETING::TargetHandleList &i_nvdimmList); +void nvdimm_restore(TARGETING::TargetHandleList &i_nvdimmList); /** @@ -456,7 +456,7 @@ void nvdimmAddPage4Regs(TARGETING::Target *i_nvdimm, errlHndl_t& io_err); * @return errlHndl_t - nullptr if successful, otherwise a pointer to * the error log. */ -errlHndl_t nvdimm_init(TARGETING::Target *i_nvdimm); +void nvdimm_init(TARGETING::Target *i_nvdimm); } diff --git a/src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H b/src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H index 72b77d835..8f27f3171 100644 --- a/src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H +++ b/src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H @@ -106,6 +106,7 @@ enum nvdimmModuleId NVDIMM_NVM_HEALTH_CHECK = 0x38, // Health check on the NVM (non-volatile memory)/flash NVDIMM_WAIT_OPER_OPS_COMPLETE = 0x39, NVDIMM_COMPARE_CKSUM = 0x3A, + NVDIMM_CHECK_FW_SLOT = 0x3B, }; /** @@ -196,6 +197,7 @@ enum nvdimmReasonCode NVDIMM_NVM_HEALTH_CHECK_FAILED = NVDIMM_COMP_ID | 0x4D, // !< An NVM health check on the NVDIMM failed NVDIMM_VENDOR_LOG_TIMEOUT = NVDIMM_COMP_ID | 0x4E, // Vendor log for FFDC timeout NVDIMM_VENDOR_LOG_CKSUM_FAILED = NVDIMM_COMP_ID | 0x4F, // Vendor log for FFDC checksum fail + NVDIMM_INVALID_FW_SLOT = NVDIMM_COMP_ID | 0x50, }; enum UserDetailsTypes diff --git a/src/usr/isteps/nvdimm/nvdimm.C b/src/usr/isteps/nvdimm/nvdimm.C index 8ef591a6a..f38e39ee9 100644 --- a/src/usr/isteps/nvdimm/nvdimm.C +++ b/src/usr/isteps/nvdimm/nvdimm.C @@ -155,6 +155,9 @@ static constexpr uint8_t MSBIT_SET_MASK = 0x80; static constexpr uint8_t MSBIT_CLR_MASK = 0x7F; static constexpr uint8_t OPERATION_SLEEP_SECONDS = 0x1; +// Bit mask for checking the fw slot running +static constexpr uint8_t RUNNING_FW_SLOT = 0xF0; + #ifndef __HOSTBOOT_RUNTIME // Warning thresholds static constexpr uint8_t THRESHOLD_ES_LIFETIME = 0x07; // 7% @@ -1675,7 +1678,7 @@ errlHndl_t nvdimmEpowSetup(TargetHandleList &i_nvdimmList) * @param[in] i_nvdimmList - list of nvdimm targets * */ -errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) +void nvdimm_restore(TargetHandleList &i_nvdimmList) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_restore()"); @@ -1783,15 +1786,10 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) if (l_err) { TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] failed during health status check", get_huid(l_nvdimm)); - if (l_exit) - { - errlCommit( l_err, NVDIMM_COMP_ID ); - } - else + errlCommit( l_err, NVDIMM_COMP_ID ); + if (!l_exit) { - // Redundant check with external err bugged - errlCommit( l_err, NVDIMM_COMP_ID ); - return l_err; + break; } } @@ -1814,7 +1812,6 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) }while(0); - // Return err not being handled, temp commit: if (l_err) { errlCommit(l_err, NVDIMM_COMP_ID); @@ -1832,7 +1829,6 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList) } TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_restore()"); - return l_err; } /** @@ -1935,7 +1931,7 @@ errlHndl_t nvdimm_factory_reset(Target *i_nvdimm) * @param[in] i_nvdimm - nvdimm target * */ -errlHndl_t nvdimm_init(Target *i_nvdimm) +void nvdimm_init(Target *i_nvdimm) { TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_init() nvdimm[%X]", get_huid(i_nvdimm)); @@ -1986,6 +1982,52 @@ errlHndl_t nvdimm_init(Target *i_nvdimm) break; } + // Check if the firmware slot is 0 + l_err = nvdimmReadReg ( i_nvdimm, FW_SLOT_INFO, l_data); + + if (l_err) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], failed to read slot info", + get_huid(i_nvdimm)); + break; + } + + if (!(l_data & RUNNING_FW_SLOT)) + { + nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], running on fw slot 0", + get_huid(i_nvdimm)); + /*@ + *@errortype + *@reasoncode NVDIMM_INVALID_FW_SLOT + *@severity ERRORLOG_SEV_PREDICTIVE + *@moduleid NVDIMM_CHECK_FW_SLOT + *@userdata1[0:31] Related ops (0xff = NA) + *@userdata1[32:63] Target Huid + *@userdata2 <UNUSED> + *@devdesc Encountered error when checking the firmware slot running + * on NVDIMM. Firmware is running on slot 0 instead of 1 + *@custdesc NVDIMM incorrect firmware slot + */ + l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, + NVDIMM_CHECK_FW_SLOT, + NVDIMM_INVALID_FW_SLOT, + NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)), + 0x0, + ERRORLOG::ErrlEntry::NO_SW_CALLOUT ); + + l_err->collectTrace( NVDIMM_COMP_NAME ); + + // Add callout of nvdimm with no deconfig/gard + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + + errlCommit(l_err, NVDIMM_COMP_ID); + } + // Get the timeout values for the major ops at init l_err = nvdimmGetTimeoutVal(i_nvdimm); if (l_err) @@ -2023,8 +2065,12 @@ errlHndl_t nvdimm_init(Target *i_nvdimm) *@userdata1[0:31] Related ops (0xff = NA) *@userdata1[32:63] Target Huid *@userdata2 <UNUSED> - *@devdesc Encountered error erasing previously stored data image - * on NVDIMM. Likely due to timeout and/or controller error + *@devdesc NO_RESET_N: The NVDIMM experienced a power loss, but no CSAVE + * was triggered since the NVDIMM did not detect an asserted + * RESET_N. If there is a prior predicitve log for OCC in safe + * mode, than this would be the reason for NO_RESET_N. Otherwise + * there could be a problem with the RESET_N signal between proc + * and NVDIMM. *@custdesc NVDIMM error erasing data image */ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, @@ -2040,9 +2086,10 @@ errlHndl_t nvdimm_init(Target *i_nvdimm) // Failure to erase could mean internal NV controller error and/or // HW error on nand flash. NVDIMM will lose persistency if failed to // erase nand flash - l_err->addPartCallout( i_nvdimm, - HWAS::NV_CONTROLLER_PART_TYPE, - HWAS::SRCI_PRIORITY_LOW); + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); // Collect register data for FFDC Traces nvdimmTraceRegs ( i_nvdimm, l_RegInfo ); @@ -2131,14 +2178,20 @@ errlHndl_t nvdimm_init(Target *i_nvdimm) HWAS::SRCI_PRIORITY_HIGH, HWAS::DECONFIG, HWAS::GARD_Fatal); + break; } else { + // Callout dimm without gard if image is valid + l_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + // Set ATTR_NV_STATUS_FLAG to partial working as data may persist nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); errlCommit(l_err, NVDIMM_COMP_ID); } - break; } // Check Health Status Registers @@ -2158,14 +2211,10 @@ errlHndl_t nvdimm_init(Target *i_nvdimm) TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_init() nvdimm[%X]", get_huid(i_nvdimm)); - // Return err not being handled, temp commit: if (l_err) { errlCommit(l_err, NVDIMM_COMP_ID); } - - - return l_err; } diff --git a/src/usr/isteps/nvdimm/nvdimm.H b/src/usr/isteps/nvdimm/nvdimm.H index 5379385e9..697efddeb 100644 --- a/src/usr/isteps/nvdimm/nvdimm.H +++ b/src/usr/isteps/nvdimm/nvdimm.H @@ -366,7 +366,7 @@ enum i2c_out_values : uint8_t ARM_ERROR = 0X02, RSTR_ERROR = 0x02, SAVE_ERROR = 0x02, - ARM_CLEAR = 0x20, + ARM_CLEAR_ALL = 0x3C, }; // Timeout-related enum diff --git a/src/usr/isteps/nvdimm/nvdimmErrorLog.C b/src/usr/isteps/nvdimm/nvdimmErrorLog.C index 87866f2e1..384d083d4 100644 --- a/src/usr/isteps/nvdimm/nvdimmErrorLog.C +++ b/src/usr/isteps/nvdimm/nvdimmErrorLog.C @@ -375,13 +375,13 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) if(!l_continue) { // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); - // Callout dimm and gard + // Callout dimm without deconfig or gard o_err->addHwCallout( i_nvdimm, HWAS::SRCI_PRIORITY_LOW, HWAS::NO_DECONFIG, - HWAS::GARD_Fatal); + HWAS::GARD_NULL); } else { @@ -392,11 +392,11 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) errlCommit( l_err, NVDIMM_COMP_ID ); } - // Callout the dimm but do not deconfig or gard + // Callout and gard the dimm o_err->addHwCallout( i_nvdimm, - HWAS::SRCI_PRIORITY_LOW, + HWAS::SRCI_PRIORITY_HIGH, HWAS::NO_DECONFIG, - HWAS::GARD_NULL); + HWAS::GARD_Fatal); } break; @@ -412,7 +412,7 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::GARD_NULL); // Set ATTR_NV_STATUS_FLAG to partially working as data may persist despite errors - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); break; } @@ -557,7 +557,7 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) if(!l_continue) { // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); // Callout dimm but do not deconfig or gard o_err->addHwCallout( i_nvdimm, @@ -579,7 +579,6 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::DECONFIG, HWAS::GARD_Fatal); } - break; } @@ -599,7 +598,7 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::GARD_NULL); // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); break; } @@ -699,7 +698,7 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) if(!l_continue) { // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); } else { @@ -729,7 +728,7 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) HWAS::GARD_NULL); // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist - nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); + notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR); break; } diff --git a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C index 76b38ce99..ac00f81b0 100644 --- a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C +++ b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C @@ -239,11 +239,11 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList) break; } - // Clear ARM status register - l_err = nvdimmWriteReg(l_nvdimm, NVDIMM_MGT_CMD0, ARM_CLEAR); + // Clear all CMD error status registers + l_err = nvdimmWriteReg(l_nvdimm, NVDIMM_MGT_CMD0, ARM_CLEAR_ALL); if (l_err) { - TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArm() nvdimm[%X] - error clearing ARM status register", + TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArm() nvdimm[%X] - error clearing all status registers", get_huid(l_nvdimm)); break; } @@ -423,7 +423,7 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList) get_huid(l_nvdimm)); // Set NVDIMM Status flag to partial working, as error detected but data might persist - nvdimmSetStatusFlag(l_nvdimm, NSTD_ERR_VAL_SR); + notifyNvdimmProtectionChange(l_nvdimm, NVDIMM_RISKY_HW_ERROR); /*@ *@errortype @@ -487,72 +487,77 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList) return false; } - // Unmask MBACALFIR EventN and set to recoverable - for (TargetHandleList::iterator it = i_nvdimmTargetList.begin(); - it != i_nvdimmTargetList.end();) + // Unmask firs if the arm completed successfully + if (o_arm_successful) { - TargetHandleList l_mcaList; - getParentAffinityTargets(l_mcaList, *it, CLASS_UNIT, TYPE_MCA); - assert(l_mcaList.size(), "nvdimmArm() failed to find parent MCA."); - - // Set MBACALFIR_ACTION0 to recoverable - l_writeAddress = MBACALFIR_ACTION0_REG; - l_writeData = 0; - l_err = deviceRead(l_mcaList[0], &l_writeData, l_writeSize, - DEVICE_SCOM_ADDRESS(l_writeAddress)); - if(l_err) + // Unmask MBACALFIR EventN and set to recoverable + for (TargetHandleList::iterator it = i_nvdimmTargetList.begin(); + it != i_nvdimmTargetList.end();) { - TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", - l_writeAddress); - errlCommit( l_err, NVDIMM_COMP_ID ); - } + TargetHandleList l_mcaList; + getParentAffinityTargets(l_mcaList, *it, CLASS_UNIT, TYPE_MCA); + assert(l_mcaList.size(), "nvdimmArm() failed to find parent MCA."); + + // Set MBACALFIR_ACTION0 to recoverable + l_writeAddress = MBACALFIR_ACTION0_REG; + l_writeData = 0; + l_err = deviceRead(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } - l_writeData &= MBACALFIR_EVENTN_AND_BIT; - l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + l_writeData &= MBACALFIR_EVENTN_AND_BIT; + l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } + + // Set MBACALFIR_ACTION1 to recoverable + l_writeAddress = MBACALFIR_ACTION1_REG; + l_writeData = 0; + l_err = deviceRead(l_mcaList[0], &l_writeData, l_writeSize, DEVICE_SCOM_ADDRESS(l_writeAddress)); - if(l_err) - { - TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", - l_writeAddress); - errlCommit( l_err, NVDIMM_COMP_ID ); - } + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } - // Set MBACALFIR_ACTION1 to recoverable - l_writeAddress = MBACALFIR_ACTION1_REG; - l_writeData = 0; - l_err = deviceRead(l_mcaList[0], &l_writeData, l_writeSize, - DEVICE_SCOM_ADDRESS(l_writeAddress)); - if(l_err) - { - TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", - l_writeAddress); - errlCommit( l_err, NVDIMM_COMP_ID ); - } + l_writeData |= MBACALFIR_EVENTN_OR_BIT; + l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } - l_writeData |= MBACALFIR_EVENTN_OR_BIT; - l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, - DEVICE_SCOM_ADDRESS(l_writeAddress)); - if(l_err) - { - TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", - l_writeAddress); - errlCommit( l_err, NVDIMM_COMP_ID ); - } + // Unmask MBACALFIR[8] + l_writeAddress = MBACALFIR_AND_MASK_REG; + l_writeData = MBACALFIR_UNMASK_BIT; + l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, + DEVICE_SCOM_ADDRESS(l_writeAddress)); + if(l_err) + { + TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", + l_writeAddress); + errlCommit( l_err, NVDIMM_COMP_ID ); + } - // Unmask MBACALFIR[8] - l_writeAddress = MBACALFIR_AND_MASK_REG; - l_writeData = MBACALFIR_UNMASK_BIT; - l_err = deviceWrite(l_mcaList[0], &l_writeData, l_writeSize, - DEVICE_SCOM_ADDRESS(l_writeAddress)); - if(l_err) - { - TRACFCOMP(g_trac_nvdimm, "SCOM to address 0x%08x failed", - l_writeAddress); - errlCommit( l_err, NVDIMM_COMP_ID ); + it++; } - it++; } TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmArm() returning %d", |