From 913c6fde1d0e495674c9560139615ade57e49349 Mon Sep 17 00:00:00 2001 From: "MATTHEW I. HICKMAN" Date: Tue, 10 Sep 2019 12:54:08 -0500 Subject: Fixed several Arm error handling bugs CQ:SW475445 Change-Id: I19cd03850b65a23d2a35c9160352cdc7a4946667 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/83534 Tested-by: Jenkins Server Tested-by: Jenkins OP Build CI Tested-by: FSP CI Jenkins Tested-by: Jenkins OP HW Reviewed-by: TSUNG K YEUNG Reviewed-by: Daniel M Crowell --- src/usr/isteps/nvdimm/errlud_nvdimm.C | 4 +++ src/usr/isteps/nvdimm/nvdimm.H | 2 ++ src/usr/isteps/nvdimm/nvdimmErrorLog.C | 35 +++++++++++++++----------- src/usr/isteps/nvdimm/nvdimmdd.H | 4 +++ src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H | 6 +++++ src/usr/isteps/nvdimm/runtime/nvdimm_rt.C | 7 +++++- 6 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/usr/isteps/nvdimm/errlud_nvdimm.C b/src/usr/isteps/nvdimm/errlud_nvdimm.C index f5c6c0eb9..07afa187a 100644 --- a/src/usr/isteps/nvdimm/errlud_nvdimm.C +++ b/src/usr/isteps/nvdimm/errlud_nvdimm.C @@ -185,6 +185,7 @@ UdNvdimmOPParms::UdNvdimmOPParms( const nvdimm_reg_t &i_RegInfo ) // 1 byte : NVDIMM_CMD_STATUS0 // 1 byte : ABORT_CMD_TIMEOUT // 1 byte : ERASE_STATUS + // 1 byte : ERASE_FAIL_INFO // 1 byte : ERASE_TIMEOUT0 // 1 byte : ERASE_TIMEOUT1 // 1 byte : SET_ES_POLICY_STATUS @@ -193,6 +194,9 @@ UdNvdimmOPParms::UdNvdimmOPParms( const nvdimm_reg_t &i_RegInfo ) // 1 byte : RESTORE_TIMEOUT0 // 1 byte : RESTORE_TIMEOUT1 // 1 byte : ARM_STATUS + // 1 byte : ARM_FAIL_INFO + // 1 byte : ARM_TIMEOUT0 + // 1 byte : ARM_TIMEOUT1 // 1 byte : SET_EVENT_NOTIFICATION_STATUS // 1 byte : ENCRYPTION_CONFIG_STATUS diff --git a/src/usr/isteps/nvdimm/nvdimm.H b/src/usr/isteps/nvdimm/nvdimm.H index 697efddeb..f9f38f06d 100644 --- a/src/usr/isteps/nvdimm/nvdimm.H +++ b/src/usr/isteps/nvdimm/nvdimm.H @@ -122,6 +122,8 @@ enum i2cReg : uint16_t SET_ES_POLICY_STATUS = 0x070, FIRMWARE_OPS_STATUS = 0x071, OPERATIONAL_UNIT_OPS_STATUS = 0x072, + ERASE_FAIL_INFO = 0x073, + ARM_FAIL_INFO = 0x076, CSAVE_INFO = 0x080, CSAVE_FAIL_INFO0 = 0x084, CSAVE_FAIL_INFO1 = 0x085, diff --git a/src/usr/isteps/nvdimm/nvdimmErrorLog.C b/src/usr/isteps/nvdimm/nvdimmErrorLog.C index 384d083d4..ed5fe21c0 100644 --- a/src/usr/isteps/nvdimm/nvdimmErrorLog.C +++ b/src/usr/isteps/nvdimm/nvdimmErrorLog.C @@ -158,6 +158,14 @@ void nvdimmTraceRegs(Target *i_nvdimm, nvdimm_reg_t& o_RegInfo) } o_RegInfo.Erase_Status = l_data; + // Read ERASE FAIL INFO register + l_err = nvdimmReadReg(i_nvdimm, ERASE_FAIL_INFO, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Erase_Fail_Info = l_data; + // Read ERASE TIMEOUT0 register l_err = nvdimmReadReg(i_nvdimm, ERASE_TIMEOUT0, l_data); if(l_err) @@ -230,6 +238,14 @@ void nvdimmTraceRegs(Target *i_nvdimm, nvdimm_reg_t& o_RegInfo) } o_RegInfo.Arm_Status = l_data; + // Read ARM FAIL INFO register + l_err = nvdimmReadReg(i_nvdimm, ARM_FAIL_INFO, l_data); + if(l_err) + { + errlCommit( l_err, NVDIMM_COMP_ID ); + } + o_RegInfo.Arm_Fail_Info = l_data; + // Read ARM TIMEOUT0 register l_err = nvdimmReadReg(i_nvdimm, ARM_TIMEOUT0, l_data); if(l_err) @@ -506,26 +522,17 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err) o_err->addPartCallout( i_nvdimm, HWAS::BPM_CABLE_PART_TYPE, HWAS::SRCI_PRIORITY_HIGH); + // Callout dimm but do not deconfig or gard + o_err->addHwCallout( i_nvdimm, + HWAS::SRCI_PRIORITY_LOW, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); // Check restore status and set dimm status accordingly if(l_continue) { // Set ATTR_NV_STATUS_FLAG to partially working as data may still persist nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR); - - // Callout dimm but do not deconfig or gard - o_err->addHwCallout( i_nvdimm, - HWAS::SRCI_PRIORITY_LOW, - HWAS::NO_DECONFIG, - HWAS::GARD_NULL); - } - else - { - // Callout dimm, deconfig and gard - o_err->addHwCallout( i_nvdimm, - HWAS::SRCI_PRIORITY_HIGH, - HWAS::DECONFIG, - HWAS::GARD_Fatal); } break; diff --git a/src/usr/isteps/nvdimm/nvdimmdd.H b/src/usr/isteps/nvdimm/nvdimmdd.H index c48e03f07..6a3f07be0 100755 --- a/src/usr/isteps/nvdimm/nvdimmdd.H +++ b/src/usr/isteps/nvdimm/nvdimmdd.H @@ -110,6 +110,7 @@ struct nvdimm_reg_t uint8_t NVDimm_Ready; uint8_t NVDimm_CMD_Status0; uint8_t Erase_Status; + uint8_t Erase_Fail_Info; uint8_t Erase_Timeout0; uint8_t Erase_Timeout1; uint8_t Abort_CMD_Timeout; @@ -119,6 +120,7 @@ struct nvdimm_reg_t uint8_t Restore_Timeout0; uint8_t Restore_Timeout1; uint8_t Arm_Status; + uint8_t Arm_Fail_Info; uint8_t Arm_Timeout0; uint8_t Arm_Timeout1; uint8_t Set_Event_Notification_Status; @@ -141,6 +143,7 @@ struct nvdimm_reg_t NVDimm_Ready(0), NVDimm_CMD_Status0(0), Erase_Status(0), + Erase_Fail_Info(0), Erase_Timeout0(0), Erase_Timeout1(0), Abort_CMD_Timeout(0), @@ -150,6 +153,7 @@ struct nvdimm_reg_t Restore_Timeout0(0), Restore_Timeout1(0), Arm_Status(0), + Arm_Fail_Info(0), Arm_Timeout0(0), Arm_Timeout1(0), Set_Event_Notification_Status(0), diff --git a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H index dc51d1fda..76bc8ecac 100644 --- a/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H +++ b/src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H @@ -212,6 +212,7 @@ public: // 1 byte : NVDIMM_READY // 1 byte : NVDIMM_CMD_STATUS0 // 1 byte : ERASE_STATUS + // 1 byte : ERASE_FAIL_INFO // 1 byte : ERASE_TIMEOUT0 // 1 byte : ERASE_TIMEOUT1 // 1 byte : ABORT_CMD_TIMEOUT @@ -221,6 +222,7 @@ public: // 1 byte : RESTORE_TIMEOUT0 // 1 byte : RESTORE_TIMEOUT1 // 1 byte : ARM_STATUS + // 1 byte : ARM_FAIL_INFO // 1 byte : ARM_TIMEOUT0 // 1 byte : ARM_TIMEOUT1 // 1 byte : SET_EVENT_NOTIFICATION_STATUS @@ -253,6 +255,8 @@ public: ++l_databuf; i_parser.PrintNumber("Erase Status Register: ","%.2lX",TO_UINT8(l_databuf)); ++l_databuf; + i_parser.PrintNumber("Erase Fail Info Register: ","%.2lX",TO_UINT8(l_databuf)); + ++l_databuf; i_parser.PrintNumber("Erase Timeout0 Register: ","%.2lX",TO_UINT8(l_databuf)); ++l_databuf; i_parser.PrintNumber("Erase Timeout1 Register: ","%.2lX",TO_UINT8(l_databuf)); @@ -271,6 +275,8 @@ public: ++l_databuf; i_parser.PrintNumber("Arm Status Register: ","%.2lX",TO_UINT8(l_databuf)); ++l_databuf; + i_parser.PrintNumber("Arm Fail Info Register: ","%.2lX",TO_UINT8(l_databuf)); + ++l_databuf; i_parser.PrintNumber("Arm Timeout0 Register: ","%.2lX",TO_UINT8(l_databuf)); ++l_databuf; i_parser.PrintNumber("Arm Timeout1 Register: ","%.2lX",TO_UINT8(l_databuf)); diff --git a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C index ac00f81b0..2e0903086 100644 --- a/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C +++ b/src/usr/isteps/nvdimm/runtime/nvdimm_rt.C @@ -304,7 +304,12 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList) if (l_err) { TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed first health status check", get_huid(l_nvdimm)); - if (!l_continue) + + // The arm timeout variable is used here as the continue variable for the + // health status check. This was done to include the timeout for use in the check + // If true either the arm timed out with a health status fail or the + // health status check failed with another disarm and exit condition + if (l_arm_timeout) { errlCommit( l_err, NVDIMM_COMP_ID ); -- cgit v1.2.1