diff options
-rw-r--r-- | src/include/usr/htmgt/htmgt_reasoncodes.H | 1 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt.C | 23 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_cfgdata.C | 15 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_occ.C | 63 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_occ.H | 24 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_occcmd.C | 102 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_poll.C | 130 |
7 files changed, 252 insertions, 106 deletions
diff --git a/src/include/usr/htmgt/htmgt_reasoncodes.H b/src/include/usr/htmgt/htmgt_reasoncodes.H index 5fc1b9638..d6a106949 100644 --- a/src/include/usr/htmgt/htmgt_reasoncodes.H +++ b/src/include/usr/htmgt/htmgt_reasoncodes.H @@ -63,6 +63,7 @@ namespace HTMGT HTMGT_RC_OT_THROTTLE_INVALID_N = HTMGT_COMP_ID | 0x04, HTMGT_RC_OCC_NOT_READY = HTMGT_COMP_ID | 0x05, HTMGT_RC_ATTRIBUTE_ERROR = HTMGT_COMP_ID | 0x06, + HTMGT_RC_OCC_EXCEPTION = HTMGT_COMP_ID | 0x0E, HTMGT_RC_NO_SUPPORT = HTMGT_COMP_ID | 0x0F, HTMGT_RC_OCC_RESET = HTMGT_COMP_ID | 0x15, HTMGT_RC_OCC_CRIT_FAILURE = HTMGT_COMP_ID | 0x16, diff --git a/src/usr/htmgt/htmgt.C b/src/usr/htmgt/htmgt.C index 257f2df04..d0ce39a1a 100644 --- a/src/usr/htmgt/htmgt.C +++ b/src/usr/htmgt/htmgt.C @@ -102,8 +102,18 @@ namespace HTMGT l_err = OccManager::sendOccPoll(); if (l_err) { - // Continue even if failed (poll will be retried) - ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); + if (OccManager::occNeedsReset()) + { + // No need to continue if a reset is required + TMGT_ERR("sendOccConfigData(): OCCs need to " + "be reset"); + break; + } + else + { + // Continue even if failed (will be retried) + ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); + } } // Send ALL config data @@ -415,17 +425,22 @@ namespace HTMGT { // Create an elog so the user knows the cmd failed. TMGT_ERR("enableOccActuation(): System is in safe mode"); + uint32_t safeInstance = 0; + uint32_t safeRc = OccManager::getSafeModeReason(safeInstance); /*@ * @errortype * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE * @moduleid HTMGT_MOD_ENABLE_OCC_ACTUATION - * @userdata1 OCC activate [1==true][0==false] + * @userdata1[0:31] OCC activate [1==true][0==false] + * @userdata1[32:63] return code triggering safe mode + * @userdata2[0:31] safeMode flag + * @userdata2[32:63] OCC instance * @devdesc Operation not allowed, system is in safe mode */ bldErrLog(l_err, HTMGT_MOD_ENABLE_OCC_ACTUATION, HTMGT_RC_OCC_CRIT_FAILURE, - 0, i_occActivation, 0, safeMode, + i_occActivation, safeRc, safeMode, safeInstance, ERRORLOG::ERRL_SEV_UNRECOVERABLE); } diff --git a/src/usr/htmgt/htmgt_cfgdata.C b/src/usr/htmgt/htmgt_cfgdata.C index 3c5ef6b24..5c12a1a76 100644 --- a/src/usr/htmgt/htmgt_cfgdata.C +++ b/src/usr/htmgt/htmgt_cfgdata.C @@ -196,7 +196,7 @@ namespace HTMGT break; default: - TMGT_ERR("send_occ_config_data: Unsupported" + TMGT_ERR("sendOccConfigData: Unsupported" " format type 0x%02X", format); cmdDataLen = 0; @@ -204,7 +204,7 @@ namespace HTMGT if (cmdDataLen > 0) { - TMGT_INF("send_occ_config_data: Sending config" + TMGT_INF("sendOccConfigData: Sending config" " 0x%02X to OCC%d", format, occInstance); OccCmd cmd(occ, OCC_CMD_SETUP_CFG_DATA, @@ -212,7 +212,7 @@ namespace HTMGT errlHndl_t l_err = cmd.sendOccCmd(); if (l_err != NULL) { - TMGT_ERR("send_occ_config_data: OCC%d cfg " + TMGT_ERR("sendOccConfigData: OCC%d cfg " "format 0x%02X failed with rc=0x%04X", occInstance, format, l_err->reasonCode()); @@ -222,7 +222,7 @@ namespace HTMGT { if (OCC_RC_SUCCESS != cmd.getRspStatus()) { - TMGT_ERR("send_occ_config_data: OCC%d cfg " + TMGT_ERR("sendOccConfigData: OCC%d cfg " "format 0x%02X had bad rsp status" " 0x%02X for sysConfig", occInstance, format, @@ -239,12 +239,17 @@ namespace HTMGT } } // if (sendData) + if (OccManager::occNeedsReset()) + { + TMGT_ERR("sendOccConfigData(): OCCs need to be reset"); + } + } // for each config format } // for each OCC } - } // end send_occ_config_data() + } // end sendOccConfigData() /** OCC configuration data message versions */ diff --git a/src/usr/htmgt/htmgt_occ.C b/src/usr/htmgt/htmgt_occ.C index 52a27c14b..e23c0360f 100644 --- a/src/usr/htmgt/htmgt_occ.C +++ b/src/usr/htmgt/htmgt_occ.C @@ -63,6 +63,8 @@ namespace HTMGT iv_target(i_target), iv_lastPollValid(false), iv_occsPresent(1 << i_instance), + iv_resetReason(OCC_RESET_REASON_NONE), + iv_exceptionLogged(0), iv_resetCount(0), iv_version(0x01) { @@ -235,6 +237,7 @@ namespace HTMGT iv_failed = false; iv_lastPollValid = false; iv_resetReason = OCC_RESET_REASON_NONE; + iv_exceptionLogged = 0; } @@ -622,6 +625,7 @@ namespace HTMGT { TMGT_INF("_setOccState: All OCCs have reached state " "0x%02X", requestedState); + iv_state = requestedState; if (OCC_STATE_ACTIVE == requestedState) { @@ -769,13 +773,15 @@ namespace HTMGT * @errortype * @moduleid HTMGT_MOD_OCC_RESET * @reasoncode HTMGT_RC_OCC_RESET_THREHOLD + * @userdata1 return code triggering safe mode + * @userdata2 OCC instance * @devdesc OCC reset threshold reached. * Leaving OCCs in reset state */ bldErrLog(err, HTMGT_MOD_OCC_RESET, HTMGT_RC_OCC_CRIT_FAILURE, - 0, 0, 0, 0, + 0, cv_safeReturnCode, 0, cv_safeOccInstance, ERRORLOG::ERRL_SEV_UNRECOVERABLE); } @@ -910,6 +916,13 @@ namespace HTMGT } + uint32_t OccManager::_getSafeModeReason(uint32_t & o_instance) + { + o_instance = cv_safeOccInstance; + return cv_safeReturnCode; + } + + bool OccManager::_occNeedsReset() { bool needsReset = false; @@ -1039,6 +1052,40 @@ namespace HTMGT return err; } + // Consolidate all OCC states + void OccManager::_syncOccStates() + { + occStateId currentState = OCC_STATE_NO_CHANGE; + + for(occList_t::const_iterator occ_itr = iv_occArray.begin(); + (occ_itr != iv_occArray.end()); + ++occ_itr) + { + Occ * occ = *occ_itr; + if (OCC_STATE_NO_CHANGE == currentState) + { + currentState = occ->getState(); + } + else + { + if (currentState != occ->getState()) + { + // States do not match yet... + currentState = OCC_STATE_NO_CHANGE; + break; + } + } + } + if (OCC_STATE_NO_CHANGE != currentState) + { + if (iv_state != currentState) + { + TMGT_INF("syncOccStates: All OCCs are in 0x%02X", currentState); + iv_state = currentState; + } + } + } + uint8_t OccManager::getNumOccs() { @@ -1084,16 +1131,22 @@ namespace HTMGT void OccManager::waitForOccCheckpoint() { - return Singleton<OccManager>::instance()._waitForOccCheckpoint(); + Singleton<OccManager>::instance()._waitForOccCheckpoint(); } void OccManager::updateSafeModeReason(uint32_t i_src, uint32_t i_instance) { - return Singleton<OccManager>::instance(). + Singleton<OccManager>::instance(). _updateSafeModeReason(i_src, i_instance); } + uint32_t OccManager::getSafeModeReason(uint32_t & o_instance) + { + return Singleton<OccManager>::instance(). + _getSafeModeReason(o_instance); + } + bool OccManager::occNeedsReset() { return Singleton<OccManager>::instance()._occNeedsReset(); @@ -1124,6 +1177,10 @@ namespace HTMGT Singleton<OccManager>::instance()._setPstateTable(i_useNormal); } + void OccManager::syncOccStates() + { + Singleton<OccManager>::instance()._syncOccStates(); + } } // end namespace diff --git a/src/usr/htmgt/htmgt_occ.H b/src/usr/htmgt/htmgt_occ.H index 267da6896..3e9e87b3f 100644 --- a/src/usr/htmgt/htmgt_occ.H +++ b/src/usr/htmgt/htmgt_occ.H @@ -343,6 +343,8 @@ namespace HTMGT occResetReason iv_resetReason; + // Value of last exception committed (to prevent duplicates) + uint8_t iv_exceptionLogged; /** * @brief Clear flags after OCC has been reset @@ -491,6 +493,16 @@ namespace HTMGT /** + * @brief Return the reason the system entered safe mode + * + * @param[out] o_instance OCC instance + * + * @return SRC which triggered safe mode + */ + static uint32_t getSafeModeReason(uint32_t & o_instance); + + + /** * @brief Check if any OCCs need to be reset * * @return true if any OCC needs to be reset @@ -555,6 +567,13 @@ namespace HTMGT static bool occFailed(); + /** + * @brief Update OCC manager state with consolidated OCC state + * + */ + static void syncOccStates(); + + private: typedef std::vector<Occ*> occList_t; @@ -641,6 +660,9 @@ namespace HTMGT void _updateSafeModeReason(uint32_t i_src, uint32_t i_instance); + /** See getSafeModeReason() above */ + uint32_t _getSafeModeReason(uint32_t & o_instance); + /** See occNeedsReset() above */ bool _occNeedsReset(); @@ -665,6 +687,8 @@ namespace HTMGT iv_normalPstateTables = i_useNormal; }; + /** See syncOccStates() above */ + void _syncOccStates(); }; typedef Singleton<OccManager> occMgr; diff --git a/src/usr/htmgt/htmgt_occcmd.C b/src/usr/htmgt/htmgt_occcmd.C index 3752bdd92..d842e8265 100644 --- a/src/usr/htmgt/htmgt_occcmd.C +++ b/src/usr/htmgt/htmgt_occcmd.C @@ -473,15 +473,46 @@ namespace HTMGT ((false == l_commEstablished) && (OCC_CMD_POLL == iv_OccCmd.cmdType)) ) { - iv_RetryCmd = false; - do + if (0 == iv_Occ->iv_exceptionLogged) { - // Send the command and receive the response - l_errlHndl = writeOccCmd(); - - processOccResponse(l_errlHndl, cmdTraced); - - } while (iv_RetryCmd); + iv_RetryCmd = false; + do + { + // Send the command and receive the response + l_errlHndl = writeOccCmd(); + + // process response if OCC did not hit an exception + if (0 == iv_Occ->iv_exceptionLogged) + { + processOccResponse(l_errlHndl, cmdTraced); + } + + // skip retry if an exception was logged + } while ((iv_RetryCmd) && + (0 == iv_Occ->iv_exceptionLogged)); + } + else + { + // OCC has already logged an exception, no need to send + TMGT_ERR("Skipping 0x%02X cmd since OCC has already " + "logged an exception 0x%04X", + iv_OccCmd.cmdType, iv_Occ->iv_exceptionLogged); + /*@ + * @errortype + * @reasoncode HTMGT_RC_OCC_EXCEPTION + * @moduleid HTMGT_MOD_SEND_OCC_CMD + * @userdata1 OCC command + * @userdata2 comm established + * @userdata3 OCC state + * @userdata4 exception + * @devdesc Unable to send cmd to OCC exception + */ + bldErrLog(l_errlHndl, HTMGT_MOD_SEND_OCC_CMD, + HTMGT_RC_OCC_EXCEPTION, + iv_OccCmd.cmdType, l_commEstablished, + l_occState, iv_Occ->iv_exceptionLogged, + ERRORLOG::ERRL_SEV_INFORMATIONAL); + } } else { @@ -813,29 +844,38 @@ namespace HTMGT TMGT_BIN("OCC Exception Data (up to 64 bytes)", sramRspPtr, std::min(exceptionLength,(uint32_t)64)); - /*@ - * @errortype - * @reasoncode HTMGT_RC_INTERNAL_ERROR - * @moduleid HTMGT_MOD_HANLDE_OCC_EXCEPTION - * @userdata1[0-31] rsp status - * @userdata1[32-63] exception data length - * @userdata2[0-31] OCC instance - * @userdata2[32-63] exception data - * @devdesc OCC reported exception - */ - errlHndl_t l_excErr = NULL; - bldErrLog(l_excErr, HTMGT_MOD_HANLDE_OCC_EXCEPTION, - (htmgtReasonCode)(OCCC_COMP_ID | exceptionType), - exceptionType, exceptionDataLength, - iv_Occ->iv_instance, UINT32_GET(&sramRspPtr[5]), - ERRORLOG::ERRL_SEV_UNRECOVERABLE); - l_excErr->addFFDC(OCCC_COMP_ID, - sramRspPtr, - std::min(exceptionLength,(uint32_t)MAX_FFDC), - 1, // version - exceptionType); // subsection - ERRORLOG::errlCommit(l_excErr, HTMGT_COMP_ID); - + if (iv_Occ->iv_exceptionLogged != exceptionType) + { + /*@ + * @errortype + * @reasoncode HTMGT_RC_INTERNAL_ERROR + * @moduleid HTMGT_MOD_HANLDE_OCC_EXCEPTION + * @userdata1[0-31] rsp status + * @userdata1[32-63] exception data length + * @userdata2[0-31] OCC instance + * @userdata2[32-63] exception data + * @devdesc OCC reported exception + */ + errlHndl_t l_excErr = NULL; + bldErrLog(l_excErr, HTMGT_MOD_HANLDE_OCC_EXCEPTION, + (htmgtReasonCode)(OCCC_COMP_ID | exceptionType), + exceptionType, exceptionDataLength, + iv_Occ->iv_instance, UINT32_GET(&sramRspPtr[5]), + ERRORLOG::ERRL_SEV_UNRECOVERABLE); + l_excErr->addFFDC(OCCC_COMP_ID, + sramRspPtr, + std::min(exceptionLength, + (uint32_t)MAX_FFDC), + 1, // version + exceptionType); // subsection + ERRORLOG::errlCommit(l_excErr, HTMGT_COMP_ID); + + // Save exception so we don't log it again + iv_Occ->iv_exceptionLogged = exceptionType; + // This OCC needs to be reset to recover + iv_Occ->failed(true); + iv_Occ->iv_needsReset = true; + } } } #endif diff --git a/src/usr/htmgt/htmgt_poll.C b/src/usr/htmgt/htmgt_poll.C index 1b1cc7f1d..2bba2aca3 100644 --- a/src/usr/htmgt/htmgt_poll.C +++ b/src/usr/htmgt/htmgt_poll.C @@ -84,87 +84,91 @@ namespace HTMGT errlHndl_t err = NULL; uint8_t * poll_rsp = NULL; - TMGT_INF("sendOccPoll: Polling OCC%d", iv_instance); - bool continuePolling = false; - size_t elogCount = 10; - - do + // Only send poll if OCC has not logged an exception + if (0 == iv_exceptionLogged) { - // create 1 byte buffer for poll command data - const uint8_t l_cmdData[1] = { 0x10 /*version*/ }; + TMGT_INF("sendOccPoll: Polling OCC%d", iv_instance); + bool continuePolling = false; + size_t elogCount = 10; - OccCmd cmd(this, - OCC_CMD_POLL, - sizeof(l_cmdData), - l_cmdData); - - err = cmd.sendOccCmd(); - if (err != NULL) + do { - // Poll failed - TMGT_ERR("sendOccPoll: OCC%d poll failed with rc=0x%04X", - iv_instance, - err->reasonCode()); + // create 1 byte buffer for poll command data + const uint8_t l_cmdData[1] = { 0x10 /*version*/ }; - continuePolling = false; - } - else - { - // Poll succeeded, check response - uint32_t poll_rsp_size = cmd.getResponseData(poll_rsp); - if (poll_rsp_size >= OCC_POLL_DATA_MIN_SIZE) + OccCmd cmd(this, + OCC_CMD_POLL, + sizeof(l_cmdData), + l_cmdData); + + err = cmd.sendOccCmd(); + if (err != NULL) { - if (i_flushAllErrors) + // Poll failed + TMGT_ERR("sendOccPoll: OCC%d poll failed with rc=0x%04X", + iv_instance, + err->reasonCode()); + + continuePolling = false; + } + else + { + // Poll succeeded, check response + uint32_t poll_rsp_size = cmd.getResponseData(poll_rsp); + if (poll_rsp_size >= OCC_POLL_DATA_MIN_SIZE) { - const occPollRspStruct_t *currentPollRsp = - (occPollRspStruct_t *) poll_rsp; - if (currentPollRsp->errorId != 0) + if (i_flushAllErrors) { - if (--elogCount > 0) + const occPollRspStruct_t *currentPollRsp = + (occPollRspStruct_t *) poll_rsp; + if (currentPollRsp->errorId != 0) { - // An error was returned, keep polling OCC - continuePolling = true; + if (--elogCount > 0) + { + // An error was returned, keep polling OCC + continuePolling = true; + } + else + { + // Limit number of elogs retrieved so + // we do not get stuck in loop + TMGT_INF("sendOccPoll: OCC%d still has" + "more errors to report.", + iv_instance); + continuePolling = false; + } } else { - // Limit number of elogs retrieved so - // we do not get stuck in loop - TMGT_INF("sendOccPoll: OCC%d still has" - "more errors to report.", - iv_instance); continuePolling = false; } } - else - { - continuePolling = false; - } + pollRspHandler(poll_rsp, poll_rsp_size); + } + else + { + TMGT_ERR("sendOccPoll: OCC%d poll command response " + "failed with invalid data length %d", + iv_instance, poll_rsp_size); + /*@ + * @errortype + * @reasoncode HTMGT_RC_INVALID_LENGTH + * @moduleid HTMGT_MOD_OCC_POLL + * @userdata1 OCC instance + * @devdesc Invalid POLL response length + */ + bldErrLog(err, + HTMGT_MOD_OCC_POLL, + HTMGT_RC_INVALID_LENGTH, + iv_instance, 0, 0, 0, + ERRORLOG::ERRL_SEV_INFORMATIONAL); + + continuePolling = false; } - pollRspHandler(poll_rsp, poll_rsp_size); } - else - { - TMGT_ERR("sendOccPoll: OCC%d poll command response " - "failed with invalid data length %d", - iv_instance, poll_rsp_size); - /*@ - * @errortype - * @reasoncode HTMGT_RC_INVALID_LENGTH - * @moduleid HTMGT_MOD_OCC_POLL - * @userdata1 OCC instance - * @devdesc Invalid POLL response length - */ - bldErrLog(err, - HTMGT_MOD_OCC_POLL, - HTMGT_RC_INVALID_LENGTH, - iv_instance, 0, 0, 0, - ERRORLOG::ERRL_SEV_INFORMATIONAL); - - continuePolling = false; - } } + while (continuePolling); } - while (continuePolling); return err; } |