summaryrefslogtreecommitdiffstats
path: root/src/usr/isteps
diff options
context:
space:
mode:
authorMatt Derksen <mderkse1@us.ibm.com>2019-09-18 15:51:25 -0500
committerDaniel M Crowell <dcrowell@us.ibm.com>2019-11-05 09:10:36 -0600
commit027b53647116cde8a8284e9d07a7463a92e7def4 (patch)
treeaa94fef8c9c72080d8e28361818fc0437cbb0ee9 /src/usr/isteps
parente01fc4be9a402a42d26a154ec80cdebae9665163 (diff)
downloadtalos-hostboot-027b53647116cde8a8284e9d07a7463a92e7def4.tar.gz
talos-hostboot-027b53647116cde8a8284e9d07a7463a92e7def4.zip
NVDIMM FW update region write retry
Retry writing a region of code if the checksums do not match. Retry up to 3 times per region. Second of two commits for this defect. CQ:SW469894 Change-Id: Id3cbd71a2930a2dc19532b3b5c87f692dde447cd Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/80360 Reviewed-by: TSUNG K YEUNG <tyeung@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Corey V Swenson <cswenson@us.ibm.com> Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/isteps')
-rw-r--r--src/usr/isteps/nvdimm/nvdimm_update.C64
-rw-r--r--src/usr/isteps/nvdimm/nvdimm_update.H13
2 files changed, 61 insertions, 16 deletions
diff --git a/src/usr/isteps/nvdimm/nvdimm_update.C b/src/usr/isteps/nvdimm/nvdimm_update.C
index a7687d556..359ad2e3f 100644
--- a/src/usr/isteps/nvdimm/nvdimm_update.C
+++ b/src/usr/isteps/nvdimm/nvdimm_update.C
@@ -146,8 +146,10 @@ typedef union {
} nvdimm_cmd_status0_t;
// A code update block is composed of this many bytes
-const uint8_t BYTES_PER_BLOCK = 32;
+constexpr uint8_t BYTES_PER_BLOCK = 32;
+// Maximum allowed region write retries
+constexpr uint8_t MAX_REGION_WRITE_RETRY_ATTEMPTS = 3;
///////////////////////////////////////////////////////////////////////////////
// NVDIMM LID Image
@@ -267,7 +269,8 @@ NvdimmInstalledImage::NvdimmInstalledImage(TARGETING::Target * i_nvDimm) :
iv_manufacturer_id(INVALID_ID), iv_product_id(INVALID_ID),
iv_timeout(INVALID_TIMEOUT),
iv_max_blocks_per_region(INVALID_REGION_BLOCK_SIZE),
- iv_fw_update_mode_enabled(false)
+ iv_fw_update_mode_enabled(false),
+ iv_region_write_retries(0)
{
// initialize to invalid values
}
@@ -819,6 +822,7 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage)
break;
}
+ uint8_t l_region_write_retries = 0; // local region write retry count
uint16_t region = 0;
while (region < fw_img_total_regions)
{
@@ -924,15 +928,17 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage)
if (hostCksm != nvCksm)
{
TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"updateImageData: "
- "Region %d of NVDIMM 0x%.8X: data checksums mismatch "
+ "Region %d out of %d on NVDIMM 0x%.8X: data checksums mismatch "
"(calc host: 0x%X and nv: 0x%X)",
- region, TARGETING::get_huid(iv_dimm), hostCksm, nvCksm);
+ region, fw_img_total_regions,
+ TARGETING::get_huid(iv_dimm), hostCksm, nvCksm);
/*@
*@errortype
*@moduleid UPDATE_IMAGE_DATA
*@reasoncode NVDIMM_CHECKSUM_ERROR
- *@userdata1 NVDIMM Target Huid
+ *@userdata1[0:31] NVDIMM Target Huid
+ *@userdata1[32:63] Retry count for this region
*@userdata2[0:15] Host checksum calculated
*@userdata2[16:31] NV checksum returned
*@userdata2[32:47] size of data for checksum
@@ -944,12 +950,13 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage)
ERRORLOG::ERRL_SEV_PREDICTIVE,
UPDATE_IMAGE_DATA,
NVDIMM_CHECKSUM_ERROR,
- TARGETING::get_huid(iv_dimm),
+ TWO_UINT32_TO_UINT64(
+ TARGETING::get_huid(iv_dimm),
+ l_region_write_retries),
FOUR_UINT16_TO_UINT64(
hostCksm, nvCksm,
region, data_len),
ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
- l_err->collectTrace( NVDIMM_COMP_NAME, 256 );
nvdimmAddVendorLog(iv_dimm, l_err);
l_err->addPartCallout( iv_dimm,
HWAS::NV_CONTROLLER_PART_TYPE,
@@ -959,6 +966,28 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage)
nvdimmAddPage4Regs(iv_dimm,l_err);
nvdimmAddUpdateRegs(iv_dimm,l_err);
+ // Under the total retry attempts per region?
+ if (l_region_write_retries < MAX_REGION_WRITE_RETRY_ATTEMPTS)
+ {
+ TRACFCOMP(g_trac_nvdimm_upd, ERR_MRK"updateImageData: "
+ "Region %d on NVDIMM 0x%.8X failed, retry %d",
+ region, TARGETING::get_huid(iv_dimm),l_region_write_retries);
+ l_err->collectTrace(NVDIMM_UPD, 512);
+
+ // Change PREDICTIVE to INFORMATIONAL as this might be recoverable
+ l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
+
+ // Commit this log and retry region write
+ ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID);
+ l_err = nullptr;
+
+ // Update total for this region
+ l_region_write_retries++;
+
+ // update total retries for entire NVDIMM
+ iv_region_write_retries++;
+ continue;
+ }
break;
}
@@ -1997,7 +2026,8 @@ bool NvdimmsUpdate::runUpdateUsingLid(NvdimmLidImage * i_lidImage,
*@errortype INFORMATIONAL
*@reasoncode NVDIMM_UPDATE_COMPLETE
*@moduleid NVDIMM_RUN_UPDATE_USING_LID
- *@userdata1 NVDIMM Target Huid
+ *@userdata1[0:31] NVDIMM Target Huid
+ *@userdata1[32:63] Total region write retries
*@userdata2[0:15] Previous level
*@userdata2[16:31] Current updated level
*@userdata2[32:63] Installed type (manufacturer and product)
@@ -2005,14 +2035,16 @@ bool NvdimmsUpdate::runUpdateUsingLid(NvdimmLidImage * i_lidImage,
*@custdesc NVDIMM was successfully updated
*/
l_err = new ERRORLOG::ErrlEntry(
- ERRORLOG::ERRL_SEV_INFORMATIONAL,
- NVDIMM_RUN_UPDATE_USING_LID,
- NVDIMM_UPDATE_COMPLETE,
- l_nvdimm_huid,
- TWO_UINT16_ONE_UINT32_TO_UINT64(
- l_oldVersion, curVersion,
- l_installed_type),
- ERRORLOG::ErrlEntry::ADD_SW_CALLOUT );
+ ERRORLOG::ERRL_SEV_INFORMATIONAL,
+ NVDIMM_RUN_UPDATE_USING_LID,
+ NVDIMM_UPDATE_COMPLETE,
+ TWO_UINT32_TO_UINT64(
+ l_nvdimm_huid,
+ pInstalledImage->getRegionWriteRetries()),
+ TWO_UINT16_ONE_UINT32_TO_UINT64(
+ l_oldVersion, curVersion,
+ l_installed_type),
+ ERRORLOG::ErrlEntry::ADD_SW_CALLOUT );
l_err->collectTrace(NVDIMM_UPD, 512);
ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID);
}
diff --git a/src/usr/isteps/nvdimm/nvdimm_update.H b/src/usr/isteps/nvdimm/nvdimm_update.H
index 5e8a88eda..191d683f1 100644
--- a/src/usr/isteps/nvdimm/nvdimm_update.H
+++ b/src/usr/isteps/nvdimm/nvdimm_update.H
@@ -229,6 +229,15 @@ class NvdimmInstalledImage
}
/**
+ * @brief Accessor to grab the amount of retries it took to write regions
+ * @return Cumulative total region write retries
+ */
+ uint8_t getRegionWriteRetries(void)
+ {
+ return iv_region_write_retries;
+ }
+
+ /**
* @brief Update the current NV Controller
* @param Update using this image
* @return error pointer if failure to update, else nullptr
@@ -258,6 +267,10 @@ class NvdimmInstalledImage
// set to true when doing update
bool iv_fw_update_mode_enabled;
+ // retry attempts for all regions
+ uint8_t iv_region_write_retries;
+
+
// Helper functions for updating the installed lid
/**
* @brief Transfer a region of bytes in multiple 32-byte blocks
OpenPOWER on IntegriCloud