summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Derksen <mderkse1@us.ibm.com>2019-04-05 16:50:18 -0500
committerChristian R. Geddes <crgeddes@us.ibm.com>2019-05-16 11:16:30 -0500
commit22d6da8c6be6f5174cf9a923a80784ef1794429f (patch)
treecb62f4505953d44e83dc8dfd90e29d692cd365b6
parent4f842cf1d6febb37ca7917e40a9fb0a1e9e3e6e4 (diff)
downloadtalos-hostboot-22d6da8c6be6f5174cf9a923a80784ef1794429f.tar.gz
talos-hostboot-22d6da8c6be6f5174cf9a923a80784ef1794429f.zip
NVDIMM info logs around update
Information error logs around the start/end of individual NVDIMM updates. Also add in a 30 second retry window to nvdimm reads. Change-Id: I8f3e76e7dc811aad2817fa8c0d210c283ac61bcf Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/75678 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
-rw-r--r--src/usr/isteps/nvdimm/nvdimm_update.C122
-rwxr-xr-xsrc/usr/isteps/nvdimm/nvdimmdd.C82
2 files changed, 136 insertions, 68 deletions
diff --git a/src/usr/isteps/nvdimm/nvdimm_update.C b/src/usr/isteps/nvdimm/nvdimm_update.C
index ee9a731e2..4f235aa6c 100644
--- a/src/usr/isteps/nvdimm/nvdimm_update.C
+++ b/src/usr/isteps/nvdimm/nvdimm_update.C
@@ -815,8 +815,11 @@ errlHndl_t NvdimmInstalledImage::updateImageData(NvdimmLidImage * i_lidImage)
uint16_t region = 0;
while (region < fw_img_total_regions)
{
- if (region % 10 == 0)
+ if (region % 100 == 0)
{
+ TRACFCOMP(g_trac_nvdimm_upd,
+ "updateImage: progress code for sending region %d",
+ region);
INITSERVICE::sendProgressCode();
}
TRACUCOMP(g_trac_nvdimm_upd, "updateImage: step 10.a - region 0x%04X",
@@ -1708,22 +1711,15 @@ errlHndl_t NvdimmInstalledImage::resetController()
}
else
{
- TRACUCOMP(g_trac_nvdimm_upd,"resetController: waiting 5 seconds after controller 0x%.8X reset",
- TARGETING::get_huid(iv_dimm));
-
- // sleep 5 seconds to allow for i2c controller to come back online
- nanosleep(5,0);
-
- TRACUCOMP(g_trac_nvdimm_upd,"resetController: now check if NV controller is ready again",
- TARGETING::get_huid(iv_dimm));
-
// Now wait until NV controller is ready again after reset
+ // nvdimmReady will retry NACK calls
l_err = nvdimmReady(iv_dimm);
if (l_err)
{
TRACFCOMP(g_trac_nvdimm_upd,ERR_MRK"resetController: NV controller for "
"NVDIMM 0x%.8X is not reporting as ready after reset",
TARGETING::get_huid(iv_dimm));
+
}
}
return l_err;
@@ -1785,14 +1781,71 @@ bool NvdimmsUpdate::runUpdateUsingLid(NvdimmLidImage * i_lidImage,
}
else if (updateNeeded)
{
+ // shared trace variables
+ const TARGETING::Target * l_nvdimm = pInstalledImage->getNvdimmTarget();
+ uint64_t l_nvdimm_huid = TARGETING::get_huid(l_nvdimm);
+
+ uint32_t l_installed_type = INVALID_TYPE;
+ l_err = pInstalledImage->getType(l_installed_type);
+ if (l_err)
+ {
+ // Continue updating other dimms
+ TRACFCOMP(g_trac_nvdimm_upd,
+ ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - "
+ "Unable to get nvdimm[0x%.8X] installed image type. "
+ "RC=0x%X, PLID=0x%.8X", l_nvdimm_huid,
+ ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err));
+ commitPredictiveNvdimmError(l_err);
+ l_err = nullptr;
+ }
+
+ uint16_t l_oldVersion = INVALID_VERSION;
+ l_err = pInstalledImage->getVersion(l_oldVersion);
+ if (l_err)
+ {
+ // This shouldn't happen as getVersion should return a
+ // cached version
+ TRACFCOMP(g_trac_nvdimm_upd,
+ ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - "
+ "Failed to find current NVDIMM level of %.8X. "
+ "RC=0x%X, PLID=0x%.8X", l_nvdimm_huid,
+ ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err));
+ commitPredictiveNvdimmError(l_err);
+ l_err = nullptr;
+ o_no_error_found = false;
+ continue;
+ }
+
// perform update for this DIMM with the current LID image
TRACFCOMP(g_trac_nvdimm_upd, "NvdimmsUpdate::runUpdateUsingLid() - "
- "now update nvdimm[0x%.8X]",
- TARGETING::get_huid(pInstalledImage->getNvdimmTarget()));
+ "now update nvdimm[0x%.8X]", l_nvdimm_huid);
TRACFCOMP(g_trac_nvdimm_upd,"Updating with flash size: 0x%08X",
i_lidImage->getFlashImageSize());
+ /*
+ *@errortype INFORMATIONAL
+ *@reasoncode NVDIMM_START_UPDATE
+ *@moduleid NVDIMM_RUN_UPDATE_USING_LID
+ *@userdata1 NVDIMM Target Huid
+ *@userdata2[0:15] Old level (current)
+ *@userdata2[16:31] Update image level (new)
+ *@userdata2[32:63] Installed type (manufacturer and product)
+ *@devdesc Start of the NVDIMM update of this controller
+ *@custdesc NVDIMM update started
+ */
+ l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_INFORMATIONAL,
+ NVDIMM_RUN_UPDATE_USING_LID,
+ NVDIMM_START_UPDATE,
+ l_nvdimm_huid,
+ TWO_UINT16_ONE_UINT32_TO_UINT64(
+ l_oldVersion, i_lidImage->getVersion(),
+ l_installed_type),
+ ERRORLOG::ErrlEntry::ADD_SW_CALLOUT);
+ l_err->collectTrace(NVDIMM_UPD, 256);
+ ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID);
+ l_err = nullptr;
+
l_err = pInstalledImage->updateImage(i_lidImage);
if (l_err)
{
@@ -1803,13 +1856,54 @@ bool NvdimmsUpdate::runUpdateUsingLid(NvdimmLidImage * i_lidImage,
TRACFCOMP(g_trac_nvdimm_upd,
ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - "
"NVDIMM 0x%.8X NV controller update failed. "
- "RC=0x%X, PLID=0x%.8X",
- TARGETING::get_huid(pInstalledImage->getNvdimmTarget()),
+ "RC=0x%X, PLID=0x%.8X", l_nvdimm_huid,
ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err));
commitPredictiveNvdimmError(l_err);
l_err = nullptr;
o_no_error_found = false;
}
+ else
+ {
+ // successfully updated this NVDIMM
+
+ // Note: call for version should just return a saved value
+ uint16_t curVersion = INVALID_VERSION;
+ l_err = pInstalledImage->getVersion(curVersion);
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm_upd,
+ ERR_MRK"NvdimmsUpdate::runUpdateUsingLid() - "
+ "Failed to find current NVDIMM level of %.8X after "
+ "successful update. RC=0x%X, PLID=0x%.8X",
+ l_nvdimm_huid,
+ ERRL_GETRC_SAFE(l_err), ERRL_GETPLID_SAFE(l_err));
+ commitPredictiveNvdimmError(l_err);
+ l_err = nullptr;
+ }
+
+ /*
+ *@errortype INFORMATIONAL
+ *@reasoncode NVDIMM_UPDATE_COMPLETE
+ *@moduleid NVDIMM_RUN_UPDATE_USING_LID
+ *@userdata1 NVDIMM Target Huid
+ *@userdata2[0:15] Previous level
+ *@userdata2[16:31] Current updated level
+ *@userdata2[32:63] Installed type (manufacturer and product)
+ *@devdesc Successful update of NVDIMM code
+ *@custdesc NVDIMM was successfully updated
+ */
+ l_err = new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_INFORMATIONAL,
+ NVDIMM_RUN_UPDATE_USING_LID,
+ NVDIMM_UPDATE_COMPLETE,
+ l_nvdimm_huid,
+ TWO_UINT16_ONE_UINT32_TO_UINT64(
+ l_oldVersion, curVersion,
+ l_installed_type),
+ ERRORLOG::ErrlEntry::ADD_SW_CALLOUT );
+ l_err->collectTrace(NVDIMM_UPD, 512);
+ ERRORLOG::errlCommit(l_err, NVDIMM_COMP_ID);
+ }
} // end of updateNeeded
}
return o_no_error_found;
diff --git a/src/usr/isteps/nvdimm/nvdimmdd.C b/src/usr/isteps/nvdimm/nvdimmdd.C
index 730fe0271..84869266a 100755
--- a/src/usr/isteps/nvdimm/nvdimmdd.C
+++ b/src/usr/isteps/nvdimm/nvdimmdd.C
@@ -35,6 +35,7 @@
// Includes
// ----------------------------------------------
#include <string.h>
+#include <time.h>
#include <sys/time.h>
#include <trace/interface.H>
#include <errl/errlentry.H>
@@ -75,6 +76,7 @@ TRAC_INIT( & g_trac_nvdimmr, "NVDIMMR", KILOBYTE );
// ----------------------------------------------
#define MAX_BYTE_ADDR 2
#define NVDIMM_MAX_RETRIES 2
+#define MAX_READ_RETRY_SECS 30
// ----------------------------------------------
@@ -462,12 +464,13 @@ errlHndl_t nvdimmReadData( TARGETING::Target * i_target,
ENTER_MRK"nvdimmReadData()");
do
{
+ timespec_t l_CurTime, l_PrevTime;
+ clock_gettime(CLOCK_MONOTONIC, &l_PrevTime);
+ int retry = 0;
/************************************************************/
/* Attempt read multiple times ONLY on retryable fails */
/************************************************************/
- for (uint8_t retry = 0;
- retry <= NVDIMM_MAX_RETRIES;
- retry++)
+ do
{
// Only write the byte address if we have data to write
if( 0 != i_byteAddressSize )
@@ -567,64 +570,35 @@ errlHndl_t nvdimmReadData( TARGETING::Target * i_target,
else // Handle retryable error
{
// If op will be attempted again: save log and continue
- if ( retry < NVDIMM_MAX_RETRIES )
- {
- // Only save original retryable error
- if ( err_retryable == nullptr )
- {
- // Save original retryable error
- err_retryable = l_err;
-
- TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmReadData(): "
- "Retryable Error rc=0x%X, eid=0x%X, tgt=0x%X, "
- "retry/MAX=%d/%d. Save error and retry",
- err_retryable->reasonCode(),
- err_retryable->eid(),
- TARGETING::get_huid(i_target),
- retry, NVDIMM_MAX_RETRIES);
-
- err_retryable->collectTrace(NVDIMM_COMP_NAME);
- }
- else
- {
- // Add data to original retryable error
- TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmReadData(): "
- "Another Retryable Error rc=0x%X, eid=0x%X "
- "plid=0x%X, tgt=0x%X, retry/MAX=%d/%d. "
- "Delete error and retry",
- l_err->reasonCode(), l_err->eid(), l_err->plid(),
- TARGETING::get_huid(i_target),
- retry, NVDIMM_MAX_RETRIES);
-
- ERRORLOG::ErrlUserDetailsString(
- "Another Retryable ERROR found")
- .addToLog(err_retryable);
-
- // Delete this new retryable error
- delete l_err;
- l_err = nullptr;
- }
-
- // continue to retry
- continue;
- }
- else // no more retries: trace and break
+ // Only save original retryable error
+ if ( err_retryable == nullptr )
{
+ // Save original retryable error
+ err_retryable = l_err;
+
TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmReadData(): "
- "Error rc=0x%X, eid=%d, tgt=0x%X. No More "
- "Retries (retry/MAX=%d/%d). Returning Error",
- l_err->reasonCode(), l_err->eid(),
+ "Retryable Error rc=0x%X, eid=0x%X, tgt=0x%X, "
+ "retry/MAX=%d/%d. Save error and retry",
+ err_retryable->reasonCode(),
+ err_retryable->eid(),
TARGETING::get_huid(i_target),
- retry, NVDIMM_MAX_RETRIES);
-
- l_err->collectTrace(NVDIMM_COMP_NAME);
+ retry);
- // break from retry loop
- break;
+ err_retryable->collectTrace(NVDIMM_COMP_NAME);
}
- }
+ else
+ {
+ // Delete this new retryable error
+ delete l_err;
+ l_err = nullptr;
+ }
+ } // retryable error
+ // update current time
+ clock_gettime(CLOCK_MONOTONIC, &l_CurTime);
+ retry++;
} // end of retry loop
+ while( (l_CurTime.tv_sec - l_PrevTime.tv_sec) < MAX_READ_RETRY_SECS );
// Handle saved retryable error, if any
if (err_retryable)
OpenPOWER on IntegriCloud