summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/include/runtime/interface.h8
-rw-r--r--src/include/usr/isteps/nvdimm/nvdimm.H51
-rw-r--r--src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H158
-rw-r--r--src/usr/isteps/nvdimm/nvdimm.H5
-rw-r--r--src/usr/isteps/nvdimm/runtime/nvdimm_rt.C708
-rw-r--r--src/usr/util/runtime/rt_cmds.C74
-rw-r--r--src/usr/util/runtime/rt_fwnotify.C26
7 files changed, 807 insertions, 223 deletions
diff --git a/src/include/runtime/interface.h b/src/include/runtime/interface.h
index 152226d54..638bf8b89 100644
--- a/src/include/runtime/interface.h
+++ b/src/include/runtime/interface.h
@@ -593,17 +593,19 @@ typedef struct hostInterfaces
// Arm the NV logic
HBRT_FW_NVDIMM_ARM = 0x0010,
- /// The following operation pertains to the Health of the NVDIMM
+ /// The following operations pertain to the Health of the NVDIMM
/// This operation can be performed with the arming/disarming
/// operation, these operation types are orthogonal to each other
- // Manufacturing energy source(ES) health check request
+ // Manufacturing(MNFG) energy source(ES) health check request
HBRT_FW_MNFG_ES_HEALTH_CHECK = 0x0020,
+ // Manufacturing(MNFG) non-volatile memory(NVM) health check request
+ HBRT_FW_MNFG_NVM_HEALTH_CHECK = 0x0040
};
// NVDIMM (PHYP -> HBRT) message to request NVDIMM operation(s)
struct nvdimm_operation_t
{
- uint64_t procId; // Retrieve all NVDIMMs under the processor ID, all
+ uint64_t procId; // Retrieve all NVDIMMs under the processor ID; all
// FFs (HBRT_NVDIMM_OPERATION_APPLY_TO_ALL_NVDIMMS)
// means operate on all NVDIMMs in the system
uint32_t rsvd1; // reserved
diff --git a/src/include/usr/isteps/nvdimm/nvdimm.H b/src/include/usr/isteps/nvdimm/nvdimm.H
index 4f7804f3e..9d5e3c0e0 100644
--- a/src/include/usr/isteps/nvdimm/nvdimm.H
+++ b/src/include/usr/isteps/nvdimm/nvdimm.H
@@ -206,7 +206,8 @@ bool nvdimmArm(TARGETING::TargetHandleList &i_nvdimmTargetList);
bool nvdimmDisarm(TARGETING::TargetHandleList &i_nvdimmTargetList);
/**
- * @brief Check the health status of the individual NVDIMMs supplied in list
+ * @brief Check the ES (enery source)/backup power module(BPM) health status of
+ * the individual NVDIMMs supplied in list
*
* @details The BPM will trigger the health check when power is applied at the
* beginning of the IPL, with results ready to check about 20 mins
@@ -219,25 +220,57 @@ bool nvdimmDisarm(TARGETING::TargetHandleList &i_nvdimmTargetList);
* Bit 1 : Health Check Succeeded
* Bit 2 : Health Check Failed
*
- * @param[in] i_nvdimmTargetList - list of NVDIMMs to check the health of
+ * @param[in] i_nvdimmTargetList - list of NVDIMMs to check the ES health of
*
- * @return false if one or more NVDIMMs fail health check, else true
+ * @return false if one or more NVDIMMs fail ES health check, else true
*/
-bool nvDimmCheckHealthStatus(TARGETING::TargetHandleList &i_nvdimmTargetList);
+bool nvDimmEsCheckHealthStatus(const TARGETING::TargetHandleList
+ &i_nvdimmTargetList);
/**
- * @brief A wrapper around the call to nvDimmCheckHealthStatus
+ * @brief A wrapper around the call to nvDimmEsCheckHealthStatus
*
* @details This will aggregate all the NVDIMMs of the system and pass
- * them to the call nvDimmCheckHealthStatus
+ * them to the call nvDimmEsCheckHealthStatus
*
- * @see nvDimmCheckHealthStatus for more details
+ * @see nvDimmEsCheckHealthStatus for more details
*
- * @return false if one or more NVDIMMs fail health check, else true
+ * @return false if one or more NVDIMMs fail an ES health check, else true
*/
-bool nvDimmCheckHealthStatusOnSystem();
+bool nvDimmEsCheckHealthStatusOnSystem();
+/*
+ * @brief Check the NVM (non-volatile memory)/flash health status of the
+ * individual NVDIMMs supplied in list.
+ *
+ * @details This method will check the flash error count registers
+ * (FLASH_ERROR_COUNT0 to FLASH_ERROR_COUNT2) to determine if the
+ * number of flash error exceeds the maximum allowed. Will also check
+ * the flash bad block percentage register (FLASH_BAD_BLK_PCT) to
+ * determine if the percentage exceeds the maximum allowed.
+ * If any one of these or both of these fail their perspective
+ * maximums then a callout will be made with either or both failures.
+ *
+ * @param[in] i_nvdimmTargetList - list of NVDIMMs to check the NVM health of
+ *
+ * @return false if one or more NVDIMMs fail NVM health check, else true
+ */
+bool nvDimmNvmCheckHealthStatus(const TARGETING::TargetHandleList
+ &i_nvdimmTargetList);
+
+/**
+ * @brief A wrapper around the call to nvDimmNvmCheckHealthStatus
+ *
+ * @details This will aggregate all the NVDIMMs of the system and pass
+ * them to the call nvDimmNvmCheckHealthStatus
+ *
+ * @see nvDimmNvmCheckHealthStatus for more details
+ *
+ * @return false if one or more NVDIMMs fail an NVM health check, else true
+ */
+bool nvDimmNvmCheckHealthStatusOnSystem();
+
#endif
/**
diff --git a/src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H b/src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H
index da69fb86e..ad5c6be50 100644
--- a/src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H
+++ b/src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H
@@ -98,11 +98,12 @@ enum nvdimmModuleId
SET_ATTR_NVDIMM_ENCRYPTION_KEYS_FW = 0x30,
SEND_ATTR_NVDIMM_ARMED = 0x31,
NVDIMM_FACTORY_RESET = 0x32,
- NVDIMM_HEALTH_CHECK = 0x33,
+ NVDIMM_ES_HEALTH_CHECK = 0x33, // Health check on the ES (energy source)/backup power module
NVDIMM_CHECK_RESETN = 0x34,
NVDIMM_CHECK_CSAVE = 0x35,
NVDIMM_MODULE_HEALTH_STATUS_CHECK = 0x36,
NVDIMM_SET_EVENT_NOTIFICATION = 0x37,
+ NVDIMM_NVM_HEALTH_CHECK = 0x38, // Health check on the NVM (non-volatile memory)/flash
};
/**
@@ -113,83 +114,84 @@ enum nvdimmModuleId
*/
enum nvdimmReasonCode
{
- NVDIMM_INVALID_REASONCODE = NVDIMM_COMP_ID | 0x00, // Invalid Reasoncode
- NVDIMM_INVALID_OPERATION = NVDIMM_COMP_ID | 0x01,
- NVDIMM_INVALID_DEVICE_TYPE = NVDIMM_COMP_ID | 0x02,
- NVDIMM_ATTR_INFO_NOT_FOUND = NVDIMM_COMP_ID | 0x03,
- NVDIMM_INVALID_CHIP = NVDIMM_COMP_ID | 0x04,
- NVDIMM_I2C_MASTER_PATH_ERROR = NVDIMM_COMP_ID | 0x05,
- NVDIMM_TARGET_NULL = NVDIMM_COMP_ID | 0x06,
- NVDIMM_INVALID_ADDR_OFFSET_SIZE = NVDIMM_COMP_ID | 0x07,
- NVDIMM_OVERFLOW_ERROR = NVDIMM_COMP_ID | 0x08,
- NVDIMM_I2C_WRITE_PAGE_SIZE_ZERO = NVDIMM_COMP_ID | 0x09,
- NVDIMM_INVALID_OFFSET = NVDIMM_COMP_ID | 0x0A,
- NVDIMM_READ_FAILURE = NVDIMM_COMP_ID | 0x0B, // NV Controller read failure
- NVDIMM_WRITE_FAILURE = NVDIMM_COMP_ID | 0x0C, // NV Controller write failure
- NVDIMM_BACKUP_TIMEOUT = NVDIMM_COMP_ID | 0x0D, // Backup/save timeout
- NVDIMM_RESTORE_TIMEOUT = NVDIMM_COMP_ID | 0x0E, // Restore timeout
- NVDIMM_ERASE_TIMEOUT = NVDIMM_COMP_ID | 0x0F, // Erase timeout
- NVDIMM_CHARGE_TIMEOUT = NVDIMM_COMP_ID | 0x10, // Battery charging timeout
- NVDIMM_ARM_TIMEOUT = NVDIMM_COMP_ID | 0x11, // Arming timeout
- NVDIMM_SET_ES_ERROR = NVDIMM_COMP_ID | 0x12, // Failure to set the ES policy
- NVDIMM_MSS_STR_ENTRY_ERROR = NVDIMM_COMP_ID | 0x13, // Failure to enter STR
- NVDIMM_MSS_STR_EXIT_ERROR = NVDIMM_COMP_ID | 0x14, // Failure to exit STR
- NVDIMM_MSS_POST_RSTR_ERROR = NVDIMM_COMP_ID | 0x15, // Failure to perform post restore work
- NVDIMM_OPEN_PAGE_TIMEOUT = NVDIMM_COMP_ID | 0x16, // Open page timeout
- NVDIMM_STATUS_TIMEOUT = NVDIMM_COMP_ID | 0x17, // Status timeout
- NVDIMM_ARM_FAILED = NVDIMM_COMP_ID | 0x18, // Failure to arm reset_n
- NVDIMM_ERASE_FAILED = NVDIMM_COMP_ID | 0x19, // Failure to erase
- NVDIMM_RESTORE_FAILED = NVDIMM_COMP_ID | 0x1A, // Failure to restore
- NVDIMM_NOT_READY = NVDIMM_COMP_ID | 0x1B, // NVDIMM not ready for host to access
- NVDIMM_NULL_FIRMWARE_REQUEST_PTR = NVDIMM_COMP_ID | 0x1C, // Firmware request is NULL
- NVDIMM_UNSUPPORTED_NVDIMM_TYPE = NVDIMM_COMP_ID | 0x1D, // Unsupported NVDIMM type for update
- NVDIMM_OPERATION_IN_PROGRESS = NVDIMM_COMP_ID | 0x1E, // NV controller is busy
- NVDIMM_CHECKSUM_ERROR = NVDIMM_COMP_ID | 0x1F, // Checksum error between host and nv calculated
- NVDIMM_ZERO_TOTAL_REGIONS = NVDIMM_COMP_ID | 0x20, // Zero write regions calculated
- NVDIMM_UPDATE_MODE_UNCHANGED = NVDIMM_COMP_ID | 0x21, // Unable to change update mode
- NVDIMM_FW_OPS_IN_PROGRESS_TIMEOUT = NVDIMM_COMP_ID | 0x22, // Operations In Progress timeout
- NVDIMM_DATA_SIZE_TOO_LARGE = NVDIMM_COMP_ID | 0x23, // Trying to write too much data
- NVDIMM_DATA_SIZE_INVALID = NVDIMM_COMP_ID | 0x24, // Data size is invalid
- NVDIMM_BLOCK_NOT_RECEIVED = NVDIMM_COMP_ID | 0x25, // Block data not received
- NVDIMM_FW_OPS_NOT_SUCCESSFUL = NVDIMM_COMP_ID | 0x26, // Unsuccessful Firmware Operation
- NVDIMM_UPDATE_NOT_SUPPORTED = NVDIMM_COMP_ID | 0x27, // NV controller cannot be updated
- NVDIMM_START_UPDATE = NVDIMM_COMP_ID | 0x28, // start update
- NVDIMM_UPDATE_COMPLETE = NVDIMM_COMP_ID | 0x29, // update completed
- NVDIMM_TPM_NOT_FOUND = NVDIMM_COMP_ID | 0x2A, // TPM not found
- NVDIMM_POWER_SAVE_FAILURE = NVDIMM_COMP_ID | 0x2B, // Save failed due to power loss
- NVDIMM_CSAVE_ERROR = NVDIMM_COMP_ID | 0x2C, // CSave failed due to error
- NVDIMM_VOLTAGE_REGULATOR_FAILED = NVDIMM_COMP_ID | 0x2D,
- NVDIMM_VDD_LOST = NVDIMM_COMP_ID | 0x2E,
- NVDIMM_VPP_LOST = NVDIMM_COMP_ID | 0x2F,
- NVDIMM_VTT_LOST = NVDIMM_COMP_ID | 0x30,
- NVDIMM_DRAM_NOT_SELF_REFRESH = NVDIMM_COMP_ID | 0x31,
- NVDIMM_CONTROLLER_HARDWARE_ERROR = NVDIMM_COMP_ID | 0x32,
- NVDIMM_NVM_CONTROLLER_ERROR = NVDIMM_COMP_ID | 0x33,
- NVDIMM_NVM_LIFETIME_ERROR = NVDIMM_COMP_ID | 0x34,
- NVDIMM_NOT_ENOUGH_ENERGY_FOR_CSAVE = NVDIMM_COMP_ID | 0x35,
- NVDIMM_INVALID_FIRMWARE_ERROR = NVDIMM_COMP_ID | 0x36, // Module Health Status Registers
- NVDIMM_CONFIG_DATA_ERROR = NVDIMM_COMP_ID | 0x37,
- NVDIMM_NO_ES_PRESENT = NVDIMM_COMP_ID | 0x38,
- NVDIMM_ES_POLICY_NOT_SET = NVDIMM_COMP_ID | 0x39,
- NVDIMM_ES_HARDWARE_FAILURE = NVDIMM_COMP_ID | 0x3A,
- NVDIMM_ES_HEALTH_ASSESSMENT_ERROR = NVDIMM_COMP_ID | 0x3B,
- NVDIMM_ES_LIFETIME_ERROR = NVDIMM_COMP_ID | 0x3C,
- NVDIMM_ES_TEMP_ERROR = NVDIMM_COMP_ID | 0x3D,
- NVDIMM_SET_EVENT_NOTIFICATION_ERROR = NVDIMM_COMP_ID | 0x3E,
- NVDIMM_VERIF_BYTE_CHECK_FAILED = NVDIMM_COMP_ID | 0x3F, // Encryption key reg verif failed
- NVDIMM_ENCRYPTION_ENABLE_FAILED = NVDIMM_COMP_ID | 0x40, // Encryption enable failed
- NVDIMM_ENCRYPTION_ERASE_PENDING_FAILED = NVDIMM_COMP_ID | 0x41, // Encryption crypto erase pending failed
- NVDIMM_ENCRYPTION_ERASE_FAILED = NVDIMM_COMP_ID | 0x42, // Encryption crypto erase failed
- NVDIMM_ENCRYPTION_UNLOCK_FAILED = NVDIMM_COMP_ID | 0x43, // Encryption unlock failed
- NVDIMM_ENCRYPTION_INVALID_ATTRIBUTE = NVDIMM_COMP_ID | 0x44, // Encryption attribute key data invalid
- NVDIMM_ENCRYPTION_KEY_ATTRS_INVALID = NVDIMM_COMP_ID | 0x45, // Encryption key attributes are both invalid
- NVDIMM_ENCRYPTION_MAX_DARN_ERRORS = NVDIMM_COMP_ID | 0x46, // Darn random key gen reached max errors
- NVDIMM_ENCRYPTION_BAD_RANDOM_DATA = NVDIMM_COMP_ID | 0x47, // Generated key data not valid
- NVDIMM_CANNOT_MAKE_ATTRIBUTE = NVDIMM_COMP_ID | 0x48, // Cannot make Attribute
- NVDIMM_HEALTH_CHECK_IN_PROGRESS_FAILURE = NVDIMM_COMP_ID | 0x49, // !< pertains to ES_CMD_STATUS0[0]; the health check in progress flag
- NVDIMM_HEALTH_CHECK_REPORTED_FAILURE = NVDIMM_COMP_ID | 0x4A, // !< pertains to ES_CMD_STATUS0[2]; the health check reported a failure flag
- NVDIMM_LIFETIME_MIN_REQ_NOT_MET = NVDIMM_COMP_ID | 0x4B, // !< pertains to ES_LIFETIME; BPM does not meet minimum requirement for a new BPM
- NVDIMM_HEALTH_CHECK_NEVER_INITIATED = NVDIMM_COMP_ID | 0x4C, // !< A health check was never initiated at start of IPL
+ NVDIMM_INVALID_REASONCODE = NVDIMM_COMP_ID | 0x00, // Invalid Reasoncode
+ NVDIMM_INVALID_OPERATION = NVDIMM_COMP_ID | 0x01,
+ NVDIMM_INVALID_DEVICE_TYPE = NVDIMM_COMP_ID | 0x02,
+ NVDIMM_ATTR_INFO_NOT_FOUND = NVDIMM_COMP_ID | 0x03,
+ NVDIMM_INVALID_CHIP = NVDIMM_COMP_ID | 0x04,
+ NVDIMM_I2C_MASTER_PATH_ERROR = NVDIMM_COMP_ID | 0x05,
+ NVDIMM_TARGET_NULL = NVDIMM_COMP_ID | 0x06,
+ NVDIMM_INVALID_ADDR_OFFSET_SIZE = NVDIMM_COMP_ID | 0x07,
+ NVDIMM_OVERFLOW_ERROR = NVDIMM_COMP_ID | 0x08,
+ NVDIMM_I2C_WRITE_PAGE_SIZE_ZERO = NVDIMM_COMP_ID | 0x09,
+ NVDIMM_INVALID_OFFSET = NVDIMM_COMP_ID | 0x0A,
+ NVDIMM_READ_FAILURE = NVDIMM_COMP_ID | 0x0B, // NV Controller read failure
+ NVDIMM_WRITE_FAILURE = NVDIMM_COMP_ID | 0x0C, // NV Controller write failure
+ NVDIMM_BACKUP_TIMEOUT = NVDIMM_COMP_ID | 0x0D, // Backup/save timeout
+ NVDIMM_RESTORE_TIMEOUT = NVDIMM_COMP_ID | 0x0E, // Restore timeout
+ NVDIMM_ERASE_TIMEOUT = NVDIMM_COMP_ID | 0x0F, // Erase timeout
+ NVDIMM_CHARGE_TIMEOUT = NVDIMM_COMP_ID | 0x10, // Battery charging timeout
+ NVDIMM_ARM_TIMEOUT = NVDIMM_COMP_ID | 0x11, // Arming timeout
+ NVDIMM_SET_ES_ERROR = NVDIMM_COMP_ID | 0x12, // Failure to set the ES policy
+ NVDIMM_MSS_STR_ENTRY_ERROR = NVDIMM_COMP_ID | 0x13, // Failure to enter STR
+ NVDIMM_MSS_STR_EXIT_ERROR = NVDIMM_COMP_ID | 0x14, // Failure to exit STR
+ NVDIMM_MSS_POST_RSTR_ERROR = NVDIMM_COMP_ID | 0x15, // Failure to perform post restore work
+ NVDIMM_OPEN_PAGE_TIMEOUT = NVDIMM_COMP_ID | 0x16, // Open page timeout
+ NVDIMM_STATUS_TIMEOUT = NVDIMM_COMP_ID | 0x17, // Status timeout
+ NVDIMM_ARM_FAILED = NVDIMM_COMP_ID | 0x18, // Failure to arm reset_n
+ NVDIMM_ERASE_FAILED = NVDIMM_COMP_ID | 0x19, // Failure to erase
+ NVDIMM_RESTORE_FAILED = NVDIMM_COMP_ID | 0x1A, // Failure to restore
+ NVDIMM_NOT_READY = NVDIMM_COMP_ID | 0x1B, // NVDIMM not ready for host to access
+ NVDIMM_NULL_FIRMWARE_REQUEST_PTR = NVDIMM_COMP_ID | 0x1C, // Firmware request is NULL
+ NVDIMM_UNSUPPORTED_NVDIMM_TYPE = NVDIMM_COMP_ID | 0x1D, // Unsupported NVDIMM type for update
+ NVDIMM_OPERATION_IN_PROGRESS = NVDIMM_COMP_ID | 0x1E, // NV controller is busy
+ NVDIMM_CHECKSUM_ERROR = NVDIMM_COMP_ID | 0x1F, // Checksum error between host and nv calculated
+ NVDIMM_ZERO_TOTAL_REGIONS = NVDIMM_COMP_ID | 0x20, // Zero write regions calculated
+ NVDIMM_UPDATE_MODE_UNCHANGED = NVDIMM_COMP_ID | 0x21, // Unable to change update mode
+ NVDIMM_FW_OPS_IN_PROGRESS_TIMEOUT = NVDIMM_COMP_ID | 0x22, // Operations In Progress timeout
+ NVDIMM_DATA_SIZE_TOO_LARGE = NVDIMM_COMP_ID | 0x23, // Trying to write too much data
+ NVDIMM_DATA_SIZE_INVALID = NVDIMM_COMP_ID | 0x24, // Data size is invalid
+ NVDIMM_BLOCK_NOT_RECEIVED = NVDIMM_COMP_ID | 0x25, // Block data not received
+ NVDIMM_FW_OPS_NOT_SUCCESSFUL = NVDIMM_COMP_ID | 0x26, // Unsuccessful Firmware Operation
+ NVDIMM_UPDATE_NOT_SUPPORTED = NVDIMM_COMP_ID | 0x27, // NV controller cannot be updated
+ NVDIMM_START_UPDATE = NVDIMM_COMP_ID | 0x28, // start update
+ NVDIMM_UPDATE_COMPLETE = NVDIMM_COMP_ID | 0x29, // update completed
+ NVDIMM_TPM_NOT_FOUND = NVDIMM_COMP_ID | 0x2A, // TPM not found
+ NVDIMM_POWER_SAVE_FAILURE = NVDIMM_COMP_ID | 0x2B, // Save failed due to power loss
+ NVDIMM_CSAVE_ERROR = NVDIMM_COMP_ID | 0x2C, // CSave failed due to error
+ NVDIMM_VOLTAGE_REGULATOR_FAILED = NVDIMM_COMP_ID | 0x2D,
+ NVDIMM_VDD_LOST = NVDIMM_COMP_ID | 0x2E,
+ NVDIMM_VPP_LOST = NVDIMM_COMP_ID | 0x2F,
+ NVDIMM_VTT_LOST = NVDIMM_COMP_ID | 0x30,
+ NVDIMM_DRAM_NOT_SELF_REFRESH = NVDIMM_COMP_ID | 0x31,
+ NVDIMM_CONTROLLER_HARDWARE_ERROR = NVDIMM_COMP_ID | 0x32,
+ NVDIMM_NVM_CONTROLLER_ERROR = NVDIMM_COMP_ID | 0x33,
+ NVDIMM_NVM_LIFETIME_ERROR = NVDIMM_COMP_ID | 0x34,
+ NVDIMM_NOT_ENOUGH_ENERGY_FOR_CSAVE = NVDIMM_COMP_ID | 0x35,
+ NVDIMM_INVALID_FIRMWARE_ERROR = NVDIMM_COMP_ID | 0x36, // Module Health Status Registers
+ NVDIMM_CONFIG_DATA_ERROR = NVDIMM_COMP_ID | 0x37,
+ NVDIMM_NO_ES_PRESENT = NVDIMM_COMP_ID | 0x38,
+ NVDIMM_ES_POLICY_NOT_SET = NVDIMM_COMP_ID | 0x39,
+ NVDIMM_ES_HARDWARE_FAILURE = NVDIMM_COMP_ID | 0x3A,
+ NVDIMM_ES_HEALTH_ASSESSMENT_ERROR = NVDIMM_COMP_ID | 0x3B,
+ NVDIMM_ES_LIFETIME_ERROR = NVDIMM_COMP_ID | 0x3C,
+ NVDIMM_ES_TEMP_ERROR = NVDIMM_COMP_ID | 0x3D,
+ NVDIMM_SET_EVENT_NOTIFICATION_ERROR = NVDIMM_COMP_ID | 0x3E,
+ NVDIMM_VERIF_BYTE_CHECK_FAILED = NVDIMM_COMP_ID | 0x3F, // Encryption key reg verif failed
+ NVDIMM_ENCRYPTION_ENABLE_FAILED = NVDIMM_COMP_ID | 0x40, // Encryption enable failed
+ NVDIMM_ENCRYPTION_ERASE_PENDING_FAILED = NVDIMM_COMP_ID | 0x41, // Encryption crypto erase pending failed
+ NVDIMM_ENCRYPTION_ERASE_FAILED = NVDIMM_COMP_ID | 0x42, // Encryption crypto erase failed
+ NVDIMM_ENCRYPTION_UNLOCK_FAILED = NVDIMM_COMP_ID | 0x43, // Encryption unlock failed
+ NVDIMM_ENCRYPTION_INVALID_ATTRIBUTE = NVDIMM_COMP_ID | 0x44, // Encryption attribute key data invalid
+ NVDIMM_ENCRYPTION_KEY_ATTRS_INVALID = NVDIMM_COMP_ID | 0x45, // Encryption key attributes are both invalid
+ NVDIMM_ENCRYPTION_MAX_DARN_ERRORS = NVDIMM_COMP_ID | 0x46, // Darn random key gen reached max errors
+ NVDIMM_ENCRYPTION_BAD_RANDOM_DATA = NVDIMM_COMP_ID | 0x47, // Generated key data not valid
+ NVDIMM_CANNOT_MAKE_ATTRIBUTE = NVDIMM_COMP_ID | 0x48, // Cannot make Attribute
+ NVDIMM_ES_HEALTH_CHECK_IN_PROGRESS_FAILURE = NVDIMM_COMP_ID | 0x49, // !< pertains to ES_CMD_STATUS0[0]; the ES health check in progress flag
+ NVDIMM_ES_HEALTH_CHECK_REPORTED_FAILURE = NVDIMM_COMP_ID | 0x4A, // !< pertains to ES_CMD_STATUS0[2]; the ES health check reported a failure flag
+ NVDIMM_ES_LIFETIME_MIN_REQ_NOT_MET = NVDIMM_COMP_ID | 0x4B, // !< pertains to ES_LIFETIME; BPM does not meet minimum requirement for a new BPM
+ NVDIMM_ES_HEALTH_CHECK_NEVER_INITIATED = NVDIMM_COMP_ID | 0x4C, // !< An ES health check was never initiated at start of IPL
+ NVDIMM_NVM_HEALTH_CHECK_FAILED = NVDIMM_COMP_ID | 0x4D, // !< An NVM health check on the NVDIMM failed
};
enum UserDetailsTypes
diff --git a/src/usr/isteps/nvdimm/nvdimm.H b/src/usr/isteps/nvdimm/nvdimm.H
index a99f1180a..d2d2985b6 100644
--- a/src/usr/isteps/nvdimm/nvdimm.H
+++ b/src/usr/isteps/nvdimm/nvdimm.H
@@ -275,6 +275,11 @@ enum i2cReg : uint16_t
TYPED_BLOCK_DATA_BYTE30 = 0x39E,
TYPED_BLOCK_DATA_BYTE31 = 0x39F,
TYPED_BLOCK_DATA_OFFSET = 0x3E0,
+ FLASH_BAD_BLK_PCT = 0x41D, // Read only; Percentage of flash blocks
+ // in the flash array marked as bad blocks
+ FLASH_ERROR_COUNT0 = 0x428, // Read only; LSB[7:0] Flash error count
+ FLASH_ERROR_COUNT1 = 0x429, // Read only; [15:8]
+ FLASH_ERROR_COUNT2 = 0x42A, // Read only; MSB[23:16]
BPM_MAGIC_REG1 = 0x430,
BPM_MAGIC_REG2 = 0x431,
SCAP_STATUS = 0x432,
diff --git a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
index d615aa546..b38dd394d 100644
--- a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
+++ b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
@@ -25,7 +25,11 @@
/**
* @file nvdimm_rt.C
*
- * @brief NVDIMM functions only needed for runtime
+ * @brief NVDIMM functions only needed for runtime. These functions include
+ * but are not limited to arming/disarming the NVDIMM along with methods
+ * to poll the arming and check the status of the arming. Checking the
+ * error state of the NVDIMM, getting a random number with the darn
+ * instruction and checking the ES or NVM health status.
*/
/// BPM - Backup Power Module
@@ -734,65 +738,68 @@ errlHndl_t nvdimm_getRandom(uint8_t* o_genData)
}
/*
- * @brief Check the health status of the individual NVDIMMs supplied in list
+ * @brief Check the ES (enery source)/backup power module(BPM) health status of
+ * the individual NVDIMMs supplied in list
*
- * @param[in] i_nvdimmTargetList - list of NVDIMMs to check the health of
+ * @param[in] i_nvdimmTargetList - list of NVDIMMs to check the ES health of
*
- * @return false if one or more NVDIMMs fail health check, else true
+ * @return false if one or more NVDIMMs fail ES health check, else true
*/
-bool nvDimmCheckHealthStatus(TargetHandleList &i_nvdimmTargetList)
+bool nvDimmEsCheckHealthStatus(const TargetHandleList &i_nvdimmTargetList)
{
- TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmCheckHealthStatus(): "
+ TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmEsCheckHealthStatus(): "
"Target list size(%d)", i_nvdimmTargetList.size());
- // The minimum lifetime value
- const uint8_t LIFETIME_MINIMUM_REQUIREMENT = 0x62; // > 97%
+ // The minimum ES lifetime value
+ const uint8_t ES_LIFETIME_MINIMUM_REQUIREMENT = 0x62; // > 97%
- // The health check status flags for the different states of a health check
- const uint8_t HEALTH_CHECK_IN_PROGRESS_FLAG = 0x01; // bit 0
- const uint8_t HEALTH_CHECK_SUCCEEDED_FLAG = 0x02; // bit 1
- const uint8_t HEALTH_CHECK_FAILED_FLAG = 0x04; // bit 2
+ // The ES health check status flags for the different states of an
+ // ES health check
+ const uint8_t ES_HEALTH_CHECK_IN_PROGRESS_FLAG = 0x01; // bit 0
+ const uint8_t ES_HEALTH_CHECK_SUCCEEDED_FLAG = 0x02; // bit 1
+ const uint8_t ES_HEALTH_CHECK_FAILED_FLAG = 0x04; // bit 2
// Handle to catch any errors
errlHndl_t l_err(nullptr);
- // The health check status from a health check call
- uint8_t l_healthCheck(0);
+ // The ES health check status from an ES health check call
+ uint8_t l_esHealthCheck(0);
- // Status of the accumulation of all calls related to the health check.
+ // Status of the accumulation of all calls related to the ES health check.
// If any one call is bad/fails, then this will be false, else it stays true
- bool l_didHealthCheckPass(true);
+ bool l_didEsHealthCheckPass(true);
- // Iterate thru the NVDIMMs checking the health status of each one.
+ // Iterate thru the NVDIMMs checking the ES health status of each one.
// Going with the assumption that the caller waited the allotted time,
// roughly 20 to 30 minutes, after the start of an IPL.
// Success case:
- // * Health check initiated at start of the IPL, caller waited the
+ // * ES health check initiated at start of the IPL, caller waited the
// allotted time (20 to 30 mins) before doing a health check, health
// check returned success and the lifetime meets the minimum threshold
// for a new BPM.
// Error cases are:
- // * Health check is in progress, will assume BPM is hung
- // * Health check failed
- // * Health check succeeded but lifetime does not meet a certain threshold
+ // * ES health check is in progress, will assume BPM is hung
+ // * ES health check failed
+ // * ES health check succeeded but lifetime does not meet a
+ // certain threshold
// * If none of the above apply (success case and other error cases),
- // then assume the health check was never initiated at the start of the
- // IPL
+ // then assume the ES health check was never initiated at the start
+ // of the IPL
// For each of these error cases do a predictive callout
for (auto const l_nvdimm : i_nvdimmTargetList)
{
// Retrieve the Health Check status from the BPM
- TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckHealthStatus(): "
- "Reading NVDIMM(0x%.8X) health check data, "
+ TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmEsCheckHealthStatus(): "
+ "Reading NVDIMM(0x%.8X) ES health check data, "
"register ES_CMD_STATUS0(0x%.2X)",
get_huid(l_nvdimm), ES_CMD_STATUS0);
- l_err = nvdimmReadReg(l_nvdimm, ES_CMD_STATUS0, l_healthCheck);
+ l_err = nvdimmReadReg(l_nvdimm, ES_CMD_STATUS0, l_esHealthCheck);
if (l_err)
{
- TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckHealthStatus(): "
- "NVDIMM(0x%X) failed to read the health check "
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): "
+ "NVDIMM(0x%X) failed to read the ES health check "
"data, register ES_CMD_STATUS0(0x%.2X)",
get_huid(l_nvdimm), ES_CMD_STATUS0);
@@ -801,43 +808,43 @@ bool nvDimmCheckHealthStatus(TargetHandleList &i_nvdimmTargetList)
errlCommit(l_err, NVDIMM_COMP_ID);
// Let the caller know something went amiss
- l_didHealthCheckPass = false;
+ l_didEsHealthCheckPass = false;
// Proceed to next NVDIMM, better luck next time
continue;
}
// Trace out the returned data for inspection
- TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckHealthStatus(): "
- "NVDIMM(0x%X) returned value(0x%.2X) from health check "
- "data, register ES_CMD_STATUS0(0x%.2X)",
- get_huid(l_nvdimm), l_healthCheck, ES_CMD_STATUS0)
+ TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmEsCheckHealthStatus(): "
+ "NVDIMM(0x%X) returned value(0x%.2X) from the ES health "
+ "check data, register ES_CMD_STATUS0(0x%.2X)",
+ get_huid(l_nvdimm), l_esHealthCheck, ES_CMD_STATUS0);
- if (l_healthCheck & HEALTH_CHECK_IN_PROGRESS_FLAG)
+ if (l_esHealthCheck & ES_HEALTH_CHECK_IN_PROGRESS_FLAG)
{
- TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmCheckHealthStatus(): "
+ TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): "
"Assuming caller waited the allotted time before "
- "doing a health check on NVDIMM(0x%.8X), the BPM "
- "is hung doing the health check.",
+ "doing an ES health check on NVDIMM(0x%.8X), the BPM "
+ "is hung doing the ES health check.",
get_huid(l_nvdimm) );
/*@
* @errortype
* @severity ERRL_SEV_PREDICTIVE
- * @moduleid NVDIMM_HEALTH_CHECK
- * @reasoncode NVDIMM_HEALTH_CHECK_IN_PROGRESS_FAILURE
+ * @moduleid NVDIMM_ES_HEALTH_CHECK
+ * @reasoncode NVDIMM_ES_HEALTH_CHECK_IN_PROGRESS_FAILURE
* @userdata1 HUID of NVDIMM target
- * @userdata2 Health check status
+ * @userdata2 ES health check status
* @devdesc Assuming caller waited the allotted time before
- * doing a health check, then the BPM is hung doing
- * the health check.
- * @custdesc NVDIMM Health Check failed.
+ * doing an ES health check, then the BPM is hung doing
+ * the ES health check.
+ * @custdesc NVDIMM ES health check failed.
*/
l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE,
- NVDIMM_HEALTH_CHECK,
- NVDIMM_HEALTH_CHECK_IN_PROGRESS_FAILURE,
+ NVDIMM_ES_HEALTH_CHECK,
+ NVDIMM_ES_HEALTH_CHECK_IN_PROGRESS_FAILURE,
get_huid(l_nvdimm),
- l_healthCheck,
+ l_esHealthCheck,
ErrlEntry::NO_SW_CALLOUT );
l_err->collectTrace(NVDIMM_COMP_NAME);
@@ -849,34 +856,33 @@ bool nvDimmCheckHealthStatus(TargetHandleList &i_nvdimmTargetList)
errlCommit(l_err, NVDIMM_COMP_ID);
// Let the caller know something went amiss
- l_didHealthCheckPass = false;
+ l_didEsHealthCheckPass = false;
}
- else if (l_healthCheck & HEALTH_CHECK_FAILED_FLAG)
+ else if (l_esHealthCheck & ES_HEALTH_CHECK_FAILED_FLAG)
{
- TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmCheckHealthStatus(): "
+ TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): "
"Assuming caller waited the allotted time before "
- "doing a health check on NVDIMM(0x%.8X), the BPM "
+ "doing an ES health check on NVDIMM(0x%.8X), the BPM "
"reported a failure.",
get_huid(l_nvdimm) );
/*@
* @errortype
* @severity ERRL_SEV_PREDICTIVE
- * @moduleid NVDIMM_HEALTH_CHECK
- * @reasoncode NVDIMM_HEALTH_CHECK_REPORTED_FAILURE
+ * @moduleid NVDIMM_ES_HEALTH_CHECK
+ * @reasoncode NVDIMM_ES_HEALTH_CHECK_REPORTED_FAILURE
* @userdata1 HUID of NVDIMM target
- * @userdata2 Health check status
- * @devdesc NVDIMM Health Check failed
+ * @userdata2 ES health check status
* @devdesc Assuming caller waited the allotted time before
- * doing a health check, the BPM reported a failure
- * while doing a health check.
- * @custdesc NVDIMM Health Check failed.
+ * doing an ES health check, the BPM reported a failure
+ * while doing an ES health check.
+ * @custdesc NVDIMM ES health check failed.
*/
l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE,
- NVDIMM_HEALTH_CHECK,
- NVDIMM_HEALTH_CHECK_REPORTED_FAILURE,
+ NVDIMM_ES_HEALTH_CHECK,
+ NVDIMM_ES_HEALTH_CHECK_REPORTED_FAILURE,
get_huid(l_nvdimm),
- l_healthCheck,
+ l_esHealthCheck,
ErrlEntry::NO_SW_CALLOUT );
l_err->collectTrace(NVDIMM_COMP_NAME);
@@ -888,12 +894,12 @@ bool nvDimmCheckHealthStatus(TargetHandleList &i_nvdimmTargetList)
errlCommit(l_err, NVDIMM_COMP_ID);
// Let the caller know something went amiss
- l_didHealthCheckPass = false;
+ l_didEsHealthCheckPass = false;
}
- else if (l_healthCheck & HEALTH_CHECK_SUCCEEDED_FLAG)
+ else if (l_esHealthCheck & ES_HEALTH_CHECK_SUCCEEDED_FLAG)
{
- TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckHealthStatus(): "
- "Reading NVDIMM(0x%.8X) es lifetime data, "
+ TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmEsCheckHealthStatus(): "
+ "Reading NVDIMM(0x%.8X) ES lifetime data, "
"register ES_LIFETIME(0x%.2X)",
get_huid(l_nvdimm), ES_LIFETIME);
@@ -905,7 +911,7 @@ bool nvDimmCheckHealthStatus(TargetHandleList &i_nvdimmTargetList)
if (l_err)
{
- TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmCheckHealthStatus(): "
+ TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): "
"NVDIMM(0x%.8X) failed to read the "
"ES_LIFETIME(0x%.2X) data",
get_huid(l_nvdimm),
@@ -916,42 +922,42 @@ bool nvDimmCheckHealthStatus(TargetHandleList &i_nvdimmTargetList)
errlCommit(l_err, NVDIMM_COMP_ID);
// Let the caller know something went amiss
- l_didHealthCheckPass = false;
+ l_didEsHealthCheckPass = false;
}
- else if (l_lifetimePercentage < LIFETIME_MINIMUM_REQUIREMENT)
+ else if (l_lifetimePercentage < ES_LIFETIME_MINIMUM_REQUIREMENT)
{
- TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmCheckHealthStatus(): "
- "Health check on NVDIMM(0x%.8X) succeeded but the "
- "BPM's lifetime(%d) does not meet the minimum "
+ TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): "
+ "ES health check on NVDIMM(0x%.8X) succeeded but "
+ "the BPM's lifetime(%d) does not meet the minimum "
"requirement(%d) needed to qualify as a new BPM.",
get_huid(l_nvdimm),
l_lifetimePercentage,
- LIFETIME_MINIMUM_REQUIREMENT );
+ ES_LIFETIME_MINIMUM_REQUIREMENT );
/*@
* @errortype
* @severity ERRL_SEV_PREDICTIVE
- * @moduleid NVDIMM_HEALTH_CHECK
- * @reasoncode NVDIMM_LIFETIME_MIN_REQ_NOT_MET
+ * @moduleid NVDIMM_ES_HEALTH_CHECK
+ * @reasoncode NVDIMM_ES_LIFETIME_MIN_REQ_NOT_MET
* @userdata1[00:31] HUID of NVDIMM target
- * @userdata1[32:63] Health check status
+ * @userdata1[32:63] ES health check status
* @userdata2[00:31] Retrieved lifetime percentage
* @userdata2[32:63] lifetime minimum requirement
- * @devdesc Health check succeeded but the BPM's
+ * @devdesc ES health check succeeded but the BPM's
* lifetime does not meet the minimum
* requirement needed to qualify as a
* new BPM.
- * @custdesc NVDIMM Health Check failed
+ * @custdesc NVDIMM ES health check failed
*/
l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE,
- NVDIMM_HEALTH_CHECK,
- NVDIMM_LIFETIME_MIN_REQ_NOT_MET,
+ NVDIMM_ES_HEALTH_CHECK,
+ NVDIMM_ES_LIFETIME_MIN_REQ_NOT_MET,
TWO_UINT32_TO_UINT64(
get_huid(l_nvdimm),
- l_healthCheck),
+ l_esHealthCheck),
TWO_UINT32_TO_UINT64(
l_lifetimePercentage,
- LIFETIME_MINIMUM_REQUIREMENT),
+ ES_LIFETIME_MINIMUM_REQUIREMENT),
ErrlEntry::NO_SW_CALLOUT );
l_err->collectTrace(NVDIMM_COMP_NAME);
@@ -963,45 +969,46 @@ bool nvDimmCheckHealthStatus(TargetHandleList &i_nvdimmTargetList)
errlCommit(l_err, NVDIMM_COMP_ID);
// Let the caller know something went amiss
- l_didHealthCheckPass = false;
+ l_didEsHealthCheckPass = false;
} // end else if (l_lifetimePercentage ...
else
{
- TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvdimmCheckHealthStatus(): "
- "Success: Health check on NVDIMM(0x%.8X) succeeded "
- "and the BPM's lifetime(%d) meet's the minimum "
- "requirement(%d) needed to qualify as a new BPM.",
+ TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): "
+ "Success: ES health check on NVDIMM(0x%.8X) "
+ "succeeded and the BPM's lifetime(%d) meet's the "
+ "minimum requirement(%d) needed to qualify as "
+ "a new BPM.",
get_huid(l_nvdimm),
l_lifetimePercentage,
- LIFETIME_MINIMUM_REQUIREMENT );
+ ES_LIFETIME_MINIMUM_REQUIREMENT );
}
- } // end else if (l_healthCheck & HEALTH_CHECK_SUCCEEDED_FLAG)
- else // Assume the health check was never initiated at
+ } // end else if (l_esHealthCheck & ES_HEALTH_CHECK_SUCCEEDED_FLAG)
+ else // Assume the ES health check was never initiated at
// the start of the IPL.
{
- TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmCheckHealthStatus(): "
- "The health check on NVDIMM(0x%.8X) shows no status (in "
- "progress, fail or succeed) so assuming it was never "
- "initiated at the start of the IPL.",
+ TRACFCOMP( g_trac_nvdimm, ERR_MRK"nvDimmEsCheckHealthStatus(): "
+ "The ES health check on NVDIMM(0x%.8X) shows no status "
+ "(in progress, fail or succeed) so assuming it was "
+ "never initiated at the start of the IPL.",
get_huid(l_nvdimm) );
/*@
* @errortype
* @severity ERRL_SEV_PREDICTIVE
- * @moduleid NVDIMM_HEALTH_CHECK
- * @reasoncode NVDIMM_HEALTH_CHECK_NEVER_INITIATED
+ * @moduleid NVDIMM_ES_HEALTH_CHECK
+ * @reasoncode NVDIMM_ES_HEALTH_CHECK_NEVER_INITIATED
* @userdata1 HUID of NVDIMM target
- * @userdata2 Health check status
- * @devdesc The health check shows no status (in progress, fail
- * or succeed) so assuming it was never initiated
+ * @userdata2 ES health check status
+ * @devdesc The ES health check shows no status (in progress,
+ * fail or succeed) so assuming it was never initiated
* at the start of the IPL.
- * @custdesc NVDIMM Health Check failed.
+ * @custdesc NVDIMM ES health check failed.
*/
l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE,
- NVDIMM_HEALTH_CHECK,
- NVDIMM_HEALTH_CHECK_NEVER_INITIATED,
+ NVDIMM_ES_HEALTH_CHECK,
+ NVDIMM_ES_HEALTH_CHECK_NEVER_INITIATED,
get_huid(l_nvdimm),
- l_healthCheck,
+ l_esHealthCheck,
ErrlEntry::NO_SW_CALLOUT );
l_err->collectTrace(NVDIMM_COMP_NAME);
@@ -1013,42 +1020,509 @@ bool nvDimmCheckHealthStatus(TargetHandleList &i_nvdimmTargetList)
errlCommit(l_err, NVDIMM_COMP_ID);
// Let the caller know something went amiss
- l_didHealthCheckPass = false;
+ l_didEsHealthCheckPass = false;
}
} // end for (auto const l_nvdimm : i_nvdimmTargetList)
// Should not have any uncommitted errors
- assert(l_err == NULL, "nvDimmCheckHealthStatus() - unexpected uncommitted"
- "error found" );
+ assert(l_err == NULL, "nvDimmEsCheckHealthStatus() - unexpected "
+ "uncommitted error found" );
- TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmCheckHealthStatus(): "
- "Returning %s", l_didHealthCheckPass == true ? "true" : "false" );
+ TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmEsCheckHealthStatus(): "
+ "Returning %s", l_didEsHealthCheckPass == true ? "true" : "false");
- return l_didHealthCheckPass;
-} // end nvDimmCheckHealthStatus
+ return l_didEsHealthCheckPass;
+} // end nvDimmEsCheckHealthStatus
/**
- * @brief A wrapper around the call to nvDimmCheckHealthStatus
+ * @brief A wrapper around the call to nvDimmEsCheckHealthStatus
*
- * @see nvDimmCheckHealthStatus for more details
+ * @see nvDimmEsCheckHealthStatus for more details
*
- * @return false if one or more NVDIMMs fail health check, else true
+ * @return false if one or more NVDIMMs fail an ES health check, else true
*/
-bool nvDimmCheckHealthStatusOnSystem()
+bool nvDimmEsCheckHealthStatusOnSystem()
{
- TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmCheckHealthStatusOnSystem()");
+ TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmEsCheckHealthStatusOnSystem()");
// Get the list of NVDIMM Targets from the system
TargetHandleList l_nvDimmTargetList;
nvdimm_getNvdimmList(l_nvDimmTargetList);
// Return status of doing a check health status
- bool l_didHealthCheckPass = nvDimmCheckHealthStatus(l_nvDimmTargetList);
+ bool l_didEsHealthCheckPass = nvDimmEsCheckHealthStatus(l_nvDimmTargetList);
- TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmCheckHealthStatusOnSystem(): "
- "Returning %s", l_didHealthCheckPass == true ? "true" : "false" );
+ TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmEsCheckHealthStatusOnSystem(): "
+ "Returning %s", l_didEsHealthCheckPass == true ? "true" : "false" );
- return l_didHealthCheckPass;
+ return l_didEsHealthCheckPass;
} // end nvDimmCheckHealthStatusOnSystem
+/*
+ * @brief Check the bad flash block percentage against a given maximum allowed.
+ *
+ * @details This returns a tristate - 1 pass, 2 different fails
+ * If true is returned, then the check passed and
+ * o_badFlashBlockPercentage will contain what the retrieved
+ * flash block percentage is.
+ * If false is returned and the o_badFlashBlockPercentage is zero, then
+ * the check failed because of a register read fail
+ * If false is returned and the o_badFlashBlockPercentage is not zero,
+ * then the check failed because the retrieved bad flash block
+ * percentage exceeds the given maximum allowed
+ *
+ * @param[in] i_nvDimm - The NVDIMM to check
+ * @param[in] i_maxPercentageAllowed - The maximum percentage of bad flash
+ * block allowed
+ * @param[out] o_badFlashBlockPercentage - The retrieved bad flash block
+ * percentage from i_nvDimm, if no
+ * register read error.
+ *
+ * @return false if check failed or register read failed, else true
+ */
+bool nvDimmCheckBadFlashBlockPercentage(TargetHandle_t i_nvDimm,
+ const uint8_t i_maxPercentageAllowed,
+ uint8_t &o_badFlashBlockPercentage)
+{
+ // The status of the check on the bad block percentage
+ bool l_didBadFlashBlockPercentageCheckPass(false);
+
+ // The retrieved flash block percentage from register, initialize to zero
+ o_badFlashBlockPercentage = 0;
+
+ // Handle to catch any errors
+ errlHndl_t l_err(nullptr);
+
+ // Cache the HUID of the NVDIMM
+ uint32_t l_nvDimmHuid = get_huid( i_nvDimm );
+
+ // Retrieve the percentage of bad blocks and validate
+ TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckBadFlashBlockPercentage(): "
+ "Reading NVDIMM(0x%.8X) percentage of bad blocks from "
+ "register FLASH_BAD_BLK_PCT(0x%.4X)",
+ l_nvDimmHuid, FLASH_BAD_BLK_PCT);
+
+ l_err = nvdimmReadReg(i_nvDimm,
+ FLASH_BAD_BLK_PCT,
+ o_badFlashBlockPercentage);
+
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckBadFlashBlockPercentage(): "
+ "FAIL: NVDIMM(0x%.8X) failed to read the percentage of "
+ "bad blocks from register FLASH_BAD_BLK_PCT(0x%.4X), "
+ "marking as a fail",
+ l_nvDimmHuid, FLASH_BAD_BLK_PCT);
+
+ l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE);
+ l_err->collectTrace(NVDIMM_COMP_NAME);
+ errlCommit(l_err, NVDIMM_COMP_ID);
+
+ // Set up the fail state, so caller can determine that the fail was
+ // due to a register read error
+ l_didBadFlashBlockPercentageCheckPass = false;
+ o_badFlashBlockPercentage = 0;
+ }
+ else
+ {
+ // Trace out the returned data for inspection
+ TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckBadFlashBlockPercentage(): "
+ "NVDIMM(0x%.8X) returned value (%d) from the "
+ "percentage of bad blocks, register "
+ "FLASH_BAD_BLK_PCT(0x%.4X)",
+ l_nvDimmHuid,
+ o_badFlashBlockPercentage,
+ FLASH_BAD_BLK_PCT);
+
+ // Check to see if the bad flash block percentage
+ // exceeds maximum allowed.
+ if (o_badFlashBlockPercentage > i_maxPercentageAllowed)
+ {
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckBadFlashBlockPercentage(): "
+ "FAIL: For NVDIMM (0x%.8X), the percentage of bad "
+ "flash blocks (%d) exceeds the maximum percentage "
+ "of bad flash blocks allowed (%d), marking this "
+ "as a fail",
+ l_nvDimmHuid,
+ o_badFlashBlockPercentage,
+ i_maxPercentageAllowed);
+
+ // Set up the fail state, so caller can determine that the fail was
+ // due to percentage exceeding the max percentage allowed.
+ // Note: Leave the value in o_badFlashBlockPercentage so caller
+ // can inspect, if they wish
+ l_didBadFlashBlockPercentageCheckPass = false;
+ }
+ else
+ {
+ TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckBadFlashBlockPercentage(): "
+ "SUCCESS: For NVDIMM (0x%.8X), the percentage of bad "
+ "flash blocks (%d) is less than or meets the maximum "
+ "percentage of bad flash blocks allowed (%d), "
+ "marking this as a pass",
+ l_nvDimmHuid,
+ o_badFlashBlockPercentage,
+ i_maxPercentageAllowed);
+
+ // Set up the pass state
+ // Note: Leave the value in o_badFlashBlockPercentage so caller
+ // can inspect, if they wish
+ l_didBadFlashBlockPercentageCheckPass = true;
+ } // end if (l_badFlashBlockPercentage > i_maxPercentageAllowed)
+ } // end if (l_err) ... else
+
+ return l_didBadFlashBlockPercentageCheckPass;
+}
+
+/*
+ * @brief Check the flash error count against a given maximum allowed.
+ *
+ * @details This returns a tristate - 1 pass, 2 different fails
+ * If true is returned, then the check passed and
+ * o_readFlashErrorCount will contain what the retrieved
+ * flash error count is.
+ * If false is returned and the o_readFlashErrorCount is zero, then
+ * the check failed because of a register read fail
+ * If false is returned and the o_readFlashErrorCount is not zero,
+ * then the check failed because the retrieved flash error
+ * count exceeds the given maximum allowed
+ *
+ * @param[in] i_nvDimm - The NVDIMM to check
+ * @param[in] i_maxFlashErrorsAllowed - The maximum number of flash errors
+ * allowed
+ * @param[out] o_readFlashErrorCount - The retrieved bad flash error
+ * count from i_nvDimm, if no
+ * register read error.
+ *
+ * @return false if check failed or register read failed, else true
+ */
+bool nvDimmCheckFlashErrorCount(TargetHandle_t i_nvDimm,
+ const uint32_t i_maxFlashErrorsAllowed,
+ uint32_t &o_readFlashErrorCount)
+{
+ // The status of the check on the flash error count
+ bool l_didFlashErrorCountCheckPass(false);
+
+ // The retrieved flash error count from register, initialize to zero
+ o_readFlashErrorCount = 0;
+
+ // Handle to catch any errors
+ errlHndl_t l_err(nullptr);
+
+ // Cache the HUID of the NVDIMM
+ uint32_t l_nvDimmHuid = get_huid( i_nvDimm );
+
+ // The retrieved flash error count from a register
+ uint8_t l_readFlashErrorCountByte(0);
+
+ // Read the flash error count registers starting from MSB to LSB
+ for (int16_t l_flashErrorRegister = FLASH_ERROR_COUNT2;
+ l_flashErrorRegister >= FLASH_ERROR_COUNT0;
+ --l_flashErrorRegister)
+ {
+ // Reset this for every iteration, may be redundant
+ l_readFlashErrorCountByte = 0;
+
+ TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckFlashErrorCount(): "
+ "Reading NVDIMM(0x%.8X) flash error count from "
+ "register FLASH_ERROR_COUNT(0x%.4X)",
+ l_nvDimmHuid, l_flashErrorRegister);
+
+ l_err = nvdimmReadReg(i_nvDimm,
+ static_cast<i2cReg >(l_flashErrorRegister),
+ l_readFlashErrorCountByte);
+
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckFlashErrorCount(): "
+ "FAIL: NVDIMM(0x%.8X) failed to read flash error "
+ "count from register FLASH_ERROR_COUNT(0x%.4X) "
+ "marking as a fail",
+ l_nvDimmHuid, l_flashErrorRegister);
+
+ l_err->setSev(ERRORLOG::ERRL_SEV_PREDICTIVE);
+ l_err->collectTrace(NVDIMM_COMP_NAME);
+ errlCommit(l_err, NVDIMM_COMP_ID);
+
+ // Set up the fail state, so caller can determine that the fail was
+ // due to a register read error
+ l_didFlashErrorCountCheckPass = false;
+ o_readFlashErrorCount = 0;
+
+ break;
+ }
+
+ // If we get here, then the read was successful
+ // Append the read flash error count byte to the LSB of the
+ // aggregated flash error count bytes.
+ o_readFlashErrorCount = (o_readFlashErrorCount << 8) |
+ l_readFlashErrorCountByte;
+
+ TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckFlashErrorCount(): "
+ "NVDIMM(0x%.8X) returned value (0x%.2X) from the "
+ "partial flash error count, register "
+ "FLASH_ERROR_COUNT(0x%.4X)",
+ l_nvDimmHuid,
+ l_readFlashErrorCountByte,
+ l_flashErrorRegister);
+
+ } // end for (int16_t l_flashErrorRegister = FLASH_ERROR_COUNT2; ...
+
+ // If o_readFlashErrorCount is not zero, then register read was successful
+ if (o_readFlashErrorCount)
+ {
+ TRACDCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckFlashErrorCount(): "
+ "NVDIMM(0x%.8X) flash error count = %d ",
+ l_nvDimmHuid, o_readFlashErrorCount);
+
+ // Check the validity of the flash error count
+ if (o_readFlashErrorCount > i_maxFlashErrorsAllowed)
+ {
+ TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvDimmCheckFlashErrorCount(): "
+ "FAIL: For NVDIMM (0x%.8X), the flash error "
+ "count (%d) exceeds the maximum number of flash "
+ "errors allowed (%d), marking this as a fail",
+ l_nvDimmHuid,
+ o_readFlashErrorCount,
+ i_maxFlashErrorsAllowed);
+
+ // Set up the fail state, so caller can determine that the fail was
+ // due to error count exceeding the max errors allowed.
+ // Note: Leave the value in o_readFlashErrorCount so caller
+ // can inspect, if they wish
+ l_didFlashErrorCountCheckPass = false;
+ }
+ else
+ {
+ TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmCheckFlashErrorCount(): "
+ "SUCCESS: For NVDIMM(0x%.8X), the flash error counts "
+ "(%d) is less than or meets the maximum number of "
+ "errors allowed (%d), marking this as a pass",
+ l_nvDimmHuid,
+ o_readFlashErrorCount,
+ i_maxFlashErrorsAllowed);
+
+ // Set up the pass state
+ // Note: Leave the value in o_readFlashErrorCount so caller
+ // can inspect, if they wish
+ l_didFlashErrorCountCheckPass = true;
+ }
+ } // end if (o_readFlashErrorCount)
+
+ return l_didFlashErrorCountCheckPass;
+}
+
+/*
+ * @brief Check the NVM (non-volatile memory)/flash health of the individual
+ * NVDIMMs supplied in list.
+ *
+ * @param[in] i_nvdimmTargetList - list of NVDIMMs to check the health of flash
+ *
+ * @return false if one or more NVDIMMs fail NVM health check, else true
+ */
+bool nvDimmNvmCheckHealthStatus(const TargetHandleList &i_nvDimmTargetList)
+{
+ TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmNvmCheckHealthStatus(): "
+ "Target list size(%d)", i_nvDimmTargetList.size());
+
+ // The following maximums are the same values used by SMART's
+ // manufacturing and recommended that we use.
+ // The maximum percentage of bad flash blocks
+ // Fail if over 19% of bad flash blocks is encountered
+ const uint8_t MAXIMUM_PERCENTAGE_OF_BAD_FLASH_BLOCKS_ALLOWED = 19;
+ // The maximum number of flash memory errors allowed
+ // Fail if over 300 flash memory errors is encountered
+ const uint32_t MAXIMUM_NUMBER_OF_FLASH_MEMORY_ERRORS_ALLOWED = 300;
+
+ // Status of the accumulation of all calls related to the NVM health check.
+ // If any one call is bad/fails, then this will be false, else it stays true
+ bool l_didNvmHealthCheckPass(true);
+
+ // Handle to catch any errors
+ errlHndl_t l_err(nullptr);
+
+ // The retrieved flash block percentage from register
+ uint8_t l_badFlashBlockPercentage(0);
+ // The retrieved flash error count from register
+ uint32_t l_flashErrorCount(0);
+
+ // The status of the checks on the percentage of bad blocks and
+ // flash error count
+ // Default to true
+ bool l_badFlashBlockPercentageCheckPassed(true);
+ bool l_flashErrorCountCheckPassed(true);
+
+ // Iterate thru the supplied NVDIMMs checking the health of the NVM
+ for (auto const l_nvDimm : i_nvDimmTargetList)
+ {
+ // Cache the HUID of the NVDIMM
+ uint32_t l_nvDimmHuid = get_huid( l_nvDimm );
+
+ // Reset these for every NVDIMM that is checked
+ l_badFlashBlockPercentage = 0;
+ l_flashErrorCount = 0;
+ l_badFlashBlockPercentageCheckPassed = true;
+ l_flashErrorCountCheckPassed = true;
+
+ // Check the validity of bad flash block percentage
+ if (!nvDimmCheckBadFlashBlockPercentage(
+ l_nvDimm,
+ MAXIMUM_PERCENTAGE_OF_BAD_FLASH_BLOCKS_ALLOWED,
+ l_badFlashBlockPercentage))
+ {
+ // Set this to false to indicate that the overall check on the
+ // NVDIMMs had at least one failure
+ l_didNvmHealthCheckPass = false;
+
+ // If no data in the variable l_badFlashBlockPercentage, then
+ // this is a read register fail. Move onto the next NVDIMM
+ // this is a dud
+ if (!l_badFlashBlockPercentage)
+ {
+ continue;
+ }
+
+ // Set the check to false, to facilitate error reporting
+ l_badFlashBlockPercentageCheckPassed = false;
+ }
+
+ // Check the validity of the flash error count
+ if (!nvDimmCheckFlashErrorCount(
+ l_nvDimm,
+ MAXIMUM_NUMBER_OF_FLASH_MEMORY_ERRORS_ALLOWED,
+ l_flashErrorCount))
+ {
+ // Set this to false to indicate that the overall check on the
+ // NVDIMMs had at least one failure
+ l_didNvmHealthCheckPass = false;
+
+ // If no data in the variable l_flashErrorCount, then
+ // this is a read register fail. Move onto the next NVDIMM
+ // this is a dud
+ if (!l_flashErrorCount)
+ {
+ continue;
+ }
+
+ // Set the check to false, to facilitate error reporting
+ l_flashErrorCountCheckPassed = false;
+ }
+
+ /// Now we assess the health of the flash based on data gathered above
+ if ( !l_badFlashBlockPercentageCheckPassed ||
+ !l_flashErrorCountCheckPassed )
+ {
+ // First set the NVDIMM HUID to the first 32 bits of user data 1
+ uint64_t l_badFlashBlockPercentageUserData1 =
+ TWO_UINT32_TO_UINT64(l_nvDimmHuid, 0);
+
+ // If an issue with the bad flash block percentage, then append
+ // data to user data 1
+ if (!l_badFlashBlockPercentageCheckPassed &&
+ l_badFlashBlockPercentage)
+ {
+ // Setting the HUID here is redundant but easier than trying to
+ // do some clever code that will set the HUID for user data 1
+ // when this path is not taken, but the next check on the flash
+ // error count is taken
+ l_badFlashBlockPercentageUserData1 =
+ TWO_UINT32_TO_UINT64(l_nvDimmHuid,
+ TWO_UINT16_TO_UINT32(
+ l_badFlashBlockPercentage,
+ MAXIMUM_PERCENTAGE_OF_BAD_FLASH_BLOCKS_ALLOWED));
+ }
+
+ // If an issue with the flash error count, then set user
+ // data 2 to contain the flash error count value
+ uint64_t l_flashErrorCountUserData2(0);
+ if (!l_flashErrorCountCheckPassed &&
+ l_flashErrorCount)
+ {
+ l_flashErrorCountUserData2 =
+ TWO_UINT32_TO_UINT64(l_flashErrorCount,
+ MAXIMUM_NUMBER_OF_FLASH_MEMORY_ERRORS_ALLOWED);
+ }
+
+ /*@
+ * @errortype
+ * @severity ERRL_SEV_PREDICTIVE
+ * @moduleid NVDIMM_NVM_HEALTH_CHECK
+ * @reasoncode NVDIMM_NVM_HEALTH_CHECK_FAILED
+ * @userdata1[0:31] HUID of NVDIMM target
+ * @userdata1[32:47] The retrieved bad flash block percentage,
+ * if error with, else 0
+ * @userdata1[48:63] The maximum percentage of bad flash blocks
+ * allowed, if bad flash block percentage
+ * exceeds this maximum, else 0
+ * @userdata2[0:31] The retrieved flash error count,
+ * if error with, else 0
+ * @userdata2[32:63] The maximum number of flash errors
+ * allowed, if flash error exceeds this
+ * maximum, else 0
+ * @devdesc Either the NVDIMM NVM bad flash block
+ * percentage exceeded the maximum percentage
+ * allowed or the NVDIMM NVM number of flash
+ * error exceeds the maximum count allowed
+ * or both.
+ * @custdesc NVDIMM NVM health check failed.
+ */
+ l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE,
+ NVDIMM_NVM_HEALTH_CHECK,
+ NVDIMM_NVM_HEALTH_CHECK_FAILED,
+ l_badFlashBlockPercentageUserData1,
+ l_flashErrorCountUserData2,
+ ErrlEntry::NO_SW_CALLOUT );
+
+ l_err->collectTrace(NVDIMM_COMP_NAME);
+
+ // Collect the error
+ errlCommit(l_err, NVDIMM_COMP_ID);
+
+ // Let the caller know something went amiss
+ l_didNvmHealthCheckPass = false;
+ }
+ else
+ {
+ // This NVDIMM passed the NVM health check
+ TRACFCOMP(g_trac_nvdimm, INFO_MRK"nvDimmNvmCheckHealthStatus(): "
+ "Success: NVDIMM (0x%.8X) passed the NVM health check.",
+ l_nvDimmHuid);
+ } // end if ( !l_badFlashBlockPercentageCheckPassed .. else
+ } // end for (auto const l_nvdimm : i_nvdimmTargetList)
+
+ // Should not have any uncommitted errors
+ assert(l_err == NULL, "nvDimmNvmCheckHealthStatus() - unexpected "
+ "uncommitted error found");
+
+ TRACFCOMP(g_trac_nvdimm,EXIT_MRK"nvDimmNvmCheckHealthStatus(): Returning %s",
+ l_didNvmHealthCheckPass == true ? "true" : "false" );
+
+ return l_didNvmHealthCheckPass;
+} // end nvDimmNvmCheckHealthStatus
+
+/**
+ * @brief A wrapper around the call to nvDimmNvmCheckHealthStatus
+ *
+ * @see nvDimmNvmCheckHealthStatus for more details
+ *
+ * @return false if one or more NVDIMMs fail an NVM health check, else true
+ */
+bool nvDimmNvmCheckHealthStatusOnSystem()
+{
+ TRACFCOMP(g_trac_nvdimm, ENTER_MRK"nvDimmNvmCheckHealthStatusOnSystem()");
+
+ // Get the list of NVDIMM Targets from the system
+ TargetHandleList l_nvDimmTargetList;
+ nvdimm_getNvdimmList(l_nvDimmTargetList);
+
+ // Return status of doing a check health status
+ bool l_didNvmHealthCheckPass = nvDimmNvmCheckHealthStatus(l_nvDimmTargetList);
+
+ TRACFCOMP(g_trac_nvdimm, EXIT_MRK"nvDimmNvmCheckHealthStatusOnSystem(): "
+ "Returning %s", l_didNvmHealthCheckPass == true ? "true" : "false" );
+
+ return l_didNvmHealthCheckPass;
+} // end nvDimmCheckHealthStatusOnSystem
+
+
} // end NVDIMM namespace
diff --git a/src/usr/util/runtime/rt_cmds.C b/src/usr/util/runtime/rt_cmds.C
index c669aae4f..bf0c51749 100644
--- a/src/usr/util/runtime/rt_cmds.C
+++ b/src/usr/util/runtime/rt_cmds.C
@@ -1179,25 +1179,59 @@ void cmd_nvdimm_protection_msg( char* &o_output, uint32_t i_huid,
}
}
-void cmd_nvdimmCheckHealthStatus( char* &o_output)
+/**
+ * @brief Check the ES (energy source) health status of all NVDIMMs in the
+ * system. If check fails, see HBRT traces for further details.
+ * @param[out] o_output Output display buffer, memory allocated here.
+ * Will inform caller if ES health check passes or fails.
+ */
+void cmd_nvDimmEsCheckHealthStatus( char* &o_output)
+{
+ o_output = new char[500];
+ if (NVDIMM::nvDimmEsCheckHealthStatusOnSystem())
+ {
+ sprintf( o_output, "cmd_nvDimmEsCheckHealthStatus: "
+ "ES (energy source) health status check passed.");
+
+ }
+ else
+ {
+ sprintf( o_output, "cmd_nvDimmEsCheckHealthStatus: "
+ "ES (energy source) health status check failed. "
+ "Inspect HBRT traces for further details.");
+
+ }
+
+ return;
+} // end cmd_nvDimmEsCheckHealthStatus
+
+/**
+ * @brief Check the NVM (non-volatile memory) health status of all NVDIMMs in
+ * the system. If check fails, see HBRT traces for further details.
+ * @param[out] o_output Output display buffer, memory allocated here.
+ * Will inform caller if NVM health check passes or fails.
+ */
+
+void cmd_nvdDmmNvmCheckHealthStatus( char* &o_output)
{
o_output = new char[500];
- if (NVDIMM::nvDimmCheckHealthStatusOnSystem())
+ if (NVDIMM::nvDimmNvmCheckHealthStatusOnSystem())
{
- sprintf( o_output, "cmd_doNvDimmCheckHealthStatus: "
- "health status check passed.");
+ sprintf( o_output, "cmd_nvdDmmNvmCheckHealthStatus: "
+ "NVM (non-volatile memory) health status check passed.");
}
else
{
- sprintf( o_output, "cmd_doNvDimmCheckHealthStatus: "
- "health status check failed. Inspect HBRT traces "
- "for further details.");
+ sprintf( o_output, "cmd_nvdDmmNvmCheckHealthStatus: "
+ "NVM (non-volatile memory) health status check failed. "
+ "Inspect HBRT traces for further details.");
}
return;
-} // end cmd_nvdimmCheckHealthStatus
+} // end cmd_nvdDmmNvmCheckHealthStatus
+
#endif
@@ -1535,18 +1569,31 @@ int hbrtCommand( int argc,
sprintf(*l_output, "ERROR: nvdimm_protection <huid> <0 or 1>");
}
}
- else if( !strcmp( argv[0], "nvdimm_check_status" ) )
+ else if( !strcmp( argv[0], "nvdimm_es_check_status" ) )
{
if (argc == 1)
{
- cmd_nvdimmCheckHealthStatus( *l_output );
+ cmd_nvDimmEsCheckHealthStatus( *l_output );
}
else
{
*l_output = new char[100];
- sprintf(*l_output, "Usage: nvdimm_check_status");
+ sprintf(*l_output, "Usage: nvdimm_es_check_status");
}
}
+ else if( !strcmp( argv[0], "nvdimm_nvm_check_status" ) )
+ {
+ if (argc == 1)
+ {
+ cmd_nvdDmmNvmCheckHealthStatus( *l_output );
+ }
+ else
+ {
+ *l_output = new char[100];
+ sprintf(*l_output, "Usage: nvdimm_nvm_check_status");
+ }
+ }
+
#endif
else
{
@@ -1587,8 +1634,11 @@ int hbrtCommand( int argc,
#ifdef CONFIG_NVDIMM
sprintf( l_tmpstr, "nvdimm_protection <huid> <0 or 1>\n");
strcat( *l_output, l_tmpstr );
- sprintf( l_tmpstr, "nvdimm_check_status\n");
+ sprintf( l_tmpstr, "nvdimm_es_check_status\n");
+ strcat( *l_output, l_tmpstr );
+ sprintf( l_tmpstr, "nvdimm_nvm_check_status\n");
strcat( *l_output, l_tmpstr );
+
#endif
}
diff --git a/src/usr/util/runtime/rt_fwnotify.C b/src/usr/util/runtime/rt_fwnotify.C
index e9ebabe6d..350f4d1da 100644
--- a/src/usr/util/runtime/rt_fwnotify.C
+++ b/src/usr/util/runtime/rt_fwnotify.C
@@ -622,22 +622,40 @@ int doNvDimmOperation(const hostInterfaces::nvdimm_operation_t& i_nvDimmOp)
} // end if (nvDimmOp.opType & hostInterfaces::HBRT_FW_NVDIMM_ARM)
} while (0); // end Perform the arming/disarming operations.
- // Perform the health check operation
+ // Perform the ES (energy source) health check operation
if (i_nvDimmOp.opType & hostInterfaces::HBRT_FW_MNFG_ES_HEALTH_CHECK)
{
- if (!nvDimmCheckHealthStatus(l_nvDimmTargetList))
+ if (!nvDimmEsCheckHealthStatus(l_nvDimmTargetList))
{
TRACFCOMP(g_trac_runtime, "doNvDimmOperation: "
- "Call to do a health check failed.");
+ "Call to do an ES (energy source) health check failed.");
rc = -1;
break;
}
else
{
TRACFCOMP(g_trac_runtime, "doNvDimmOperation: "
- "Call to do a health check succeeded.");
+ "Call to do an ES (energy source) health check succeeded.");
}
}
+
+ // Perform the NVM (non-volatile memory) health check operation
+ if (i_nvDimmOp.opType & hostInterfaces::HBRT_FW_MNFG_NVM_HEALTH_CHECK)
+ {
+ if (!nvDimmNvmCheckHealthStatus(l_nvDimmTargetList))
+ {
+ TRACFCOMP(g_trac_runtime, "doNvDimmOperation: "
+ "Call to do a NVM (non-volatile memory) health check failed.");
+ rc = -1;
+ break;
+ }
+ else
+ {
+ TRACFCOMP(g_trac_runtime, "doNvDimmOperation: "
+ "Call to do a NVM (non-volatile memory) health check succeeded.");
+ }
+ }
+
} while(0); // end Perform the operations requested
if (l_err)
OpenPOWER on IntegriCloud