diff options
author | Chris Cain <cjcain@us.ibm.com> | 2015-02-24 16:13:42 -0600 |
---|---|---|
committer | A. Patrick Williams III <iawillia@us.ibm.com> | 2015-02-28 05:53:20 -0600 |
commit | f3d348bde5bd0fbd9a707fd1635bcb3a90d9210d (patch) | |
tree | 52961091c8a645eb99e3adc1d88d59d6687dc162 /src | |
parent | 29581aca6a1ed02d3374e5688e5f32fcb6f104bc (diff) | |
download | talos-hostboot-f3d348bde5bd0fbd9a707fd1635bcb3a90d9210d.tar.gz talos-hostboot-f3d348bde5bd0fbd9a707fd1635bcb3a90d9210d.zip |
Support for OCC error reporting
Change-Id: If8cce2f960b28cda2f039f68e9527df92f9233f2
RTC: 121729
Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/15971
Tested-by: Jenkins Server
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/usr/htmgt/HBconfig | 6 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt.C | 124 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_activate.C | 10 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_cfgdata.C | 2 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_occ.C | 120 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_occ.H | 122 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_occcmd.C | 162 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_poll.C | 14 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_utility.C | 8 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_utility.H | 6 | ||||
-rw-r--r-- | src/usr/htmgt/occError.C | 357 | ||||
-rw-r--r-- | src/usr/htmgt/occError.H | 70 |
12 files changed, 646 insertions, 355 deletions
diff --git a/src/usr/htmgt/HBconfig b/src/usr/htmgt/HBconfig index 00a3ce36f..be84698b8 100644 --- a/src/usr/htmgt/HBconfig +++ b/src/usr/htmgt/HBconfig @@ -3,12 +3,6 @@ config HTMGT help Enable Host TMGT in hostboot/HBRT -config DELAY_AFTER_OCC_ACTIVATION - default n - help - After OCC has been activated, wait for 30 seconds for any - potential errors to be reported before continuing the IPL - config CONSOLE_OUTPUT_OCC_COMM default n help diff --git a/src/usr/htmgt/htmgt.C b/src/usr/htmgt/htmgt.C index 33d4f16c2..aff25008a 100644 --- a/src/usr/htmgt/htmgt.C +++ b/src/usr/htmgt/htmgt.C @@ -68,32 +68,27 @@ namespace HTMGT if (i_startCompleted) { // Query functional OCCs - const uint8_t numOccs = occMgr::instance().buildOccs(); + const uint8_t numOccs = OccManager::buildOccs(); if (numOccs > 0) { - if (NULL != occMgr::instance().getMasterOcc()) + if (NULL != OccManager::getMasterOcc()) { do { #ifndef __HOSTBOOT_RUNTIME - if (false == occMgr::instance().iv_configDataBuilt) + // Build pstate tables (once per IPL) + l_err = genPstateTables(); + if(l_err) { - // Build pstate tables (once per IPL) - l_err = genPstateTables(); - if(l_err) - { - break; - } - - // Calc memory throttles (once per IPL) - calcMemThrottles(); - - occMgr::instance().iv_configDataBuilt = true; + break; } + + // Calc memory throttles (once per IPL) + calcMemThrottles(); #endif // Make sure OCCs are ready for communication - occMgr::instance().waitForOccCheckpoint(); + OccManager::waitForOccCheckpoint(); #ifdef __HOSTBOOT_RUNTIME // TODO RTC 124738 Final solution TBD @@ -137,22 +132,6 @@ namespace HTMGT ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); } - // @TODO RTC 120059 remove after elog alerts supported -#ifdef CONFIG_DELAY_AFTER_OCC_ACTIVATION - // Delay to allow the OCC to complete several - // sensor readings and create errors if necessary - TMGT_INF("Delay after OCC activation"); - nanosleep(30, 0); - // Poll the OCCs to retrieve any errors that may - // have been created - TMGT_INF("Send final poll to all OCCs"); - l_err = OccManager::sendOccPoll(true); - if (l_err) - { - ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); - } -#endif - } while(0); } else @@ -206,13 +185,17 @@ namespace HTMGT if (NULL != l_err) { TMGT_ERR("OCCs not all active. System will stay in safe mode"); -#ifndef __HOSTBOOT_RUNTIME - CONSOLE::displayf(HTMGT_COMP_NAME, "OCCs are not active " - "(rc=0x%04X). System will remain in safe mode", - l_err->reasonCode()); -#endif - // TODO: RTC 109066 - //stopAllOccs(); + TMGT_CONSOLE("OCCs are not active (rc=0x%04X). " + "System will remain in safe mode", + l_err->reasonCode()); + TMGT_INF("Calling HBOCC::stopAllOCCs"); + errlHndl_t err2 = HBOCC::stopAllOCCs(); + if(err2) + { + TMGT_ERR("stopAllOCCs() failed with 0x%04X", + err2->reasonCode()); + ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); + } // Update error log to unrecoverable and set SRC // to indicate the system will remain in safe mode @@ -244,21 +227,52 @@ namespace HTMGT // Notify HTMGT that an OCC has an error to report - void processOccError(TARGETING::Target * i_proc) + void processOccError(TARGETING::Target * i_procTarget) { - const uint32_t l_huid = i_proc->getAttr<TARGETING::ATTR_HUID>(); - TMGT_INF("processOccError(HUID=0x%08X) called", l_huid); + bool polledOneOcc = false; + OccManager::buildOccs(); - //TARGETING::Target * failedOccTarget = NULL; - // Get OCC target (one per proc) - TARGETING::TargetHandleList pOccs; - getChildChiplets(pOccs, i_proc, TARGETING::TYPE_OCC); - if (pOccs.size() > 0) + if (i_procTarget != NULL) + { + const uint32_t l_huid = + i_procTarget->getAttr<TARGETING::ATTR_HUID>(); + TMGT_INF("processOccError(HUID=0x%08X) called", l_huid); + + TARGETING::TargetHandleList pOccs; + getChildChiplets(pOccs, i_procTarget, TARGETING::TYPE_OCC); + if (pOccs.size() > 0) + { + // Poll specified OCC flushing any errors + errlHndl_t err = OccManager::sendOccPoll(true, pOccs[0]); + if (err) + { + ERRORLOG::errlCommit(err, HTMGT_COMP_ID); + } + polledOneOcc = true; + } + } + + if ((OccManager::getNumOccs() > 1) || (false == polledOneOcc)) { - // failedOccTarget = pOccs[0]; + // Send POLL command to all OCCs to flush any other errors + errlHndl_t err = OccManager::sendOccPoll(true); + if (err) + { + ERRORLOG::errlCommit(err, HTMGT_COMP_ID); + } } - // TODO RTC 109224 + if (OccManager::occNeedsReset()) + { + TMGT_ERR("processOccError(): OCCs need to be reset"); + // Don't pass failed target as OCC should have already + // been marked as failed during the poll. + errlHndl_t err = OccManager::resetOccs(NULL); + if(err) + { + ERRORLOG::errlCommit(err, HTMGT_COMP_ID); + } + } } // end processOccError() @@ -340,13 +354,25 @@ namespace HTMGT } // Set state for all OCCs - errlHndl_t l_err = occMgr::instance().setOccState(targetState); + errlHndl_t l_err = OccManager::setOccState(targetState); if (NULL == l_err) { TMGT_INF("enableOccActuation: OCC states updated to 0x%02X", targetState); } + if (OccManager::occNeedsReset()) + { + TMGT_ERR("enableOccActuation(): OCCs need to be reset"); + // Don't pass failed target as OCC should have already + // been marked as failed during the poll. + errlHndl_t err2 = OccManager::resetOccs(NULL); + if(err2) + { + ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); + } + } + return l_err; } // end enableOccActuation() diff --git a/src/usr/htmgt/htmgt_activate.C b/src/usr/htmgt/htmgt_activate.C index c23e8b0a4..7f54d6dbe 100644 --- a/src/usr/htmgt/htmgt_activate.C +++ b/src/usr/htmgt/htmgt_activate.C @@ -61,11 +61,11 @@ namespace HTMGT const size_t MAX_POLL = 40; const size_t MSEC_BETWEEN_POLLS = 250; size_t numPolls = 0; - std::vector<Occ*> occList = occMgr::instance().getOccArray(); + std::vector<Occ*> occList = OccManager::getOccArray(); // Determine which bit to check uint8_t targetBit = OCC_STATUS_ACTIVE_READY; - if (OCC_STATE_OBSERVATION == occMgr::instance().getTargetState()) + if (OCC_STATE_OBSERVATION == OccManager::getTargetState()) { targetBit = OCC_STATUS_OBS_READY; } @@ -139,7 +139,7 @@ namespace HTMGT { // Send Set State command to master OCC. // The master will use the target state (default = ACTIVE) - l_err = occMgr::instance().setOccState(); + l_err = OccManager::setOccState(); } return l_err; @@ -153,8 +153,8 @@ namespace HTMGT { errlHndl_t l_err = NULL; - TMGT_INF("setOccActiveSensors:"); - std::vector<Occ*> occList = occMgr::instance().getOccArray(); + TMGT_INF("setOccActiveSensors: %s", i_activate?"active":"inactive"); + std::vector<Occ*> occList = OccManager::getOccArray(); for (std::vector<Occ*>::iterator itr = occList.begin(); (itr < occList.end()); ++itr) diff --git a/src/usr/htmgt/htmgt_cfgdata.C b/src/usr/htmgt/htmgt_cfgdata.C index 338e29efa..d9882ca01 100644 --- a/src/usr/htmgt/htmgt_cfgdata.C +++ b/src/usr/htmgt/htmgt_cfgdata.C @@ -76,7 +76,7 @@ namespace HTMGT if (validFormat) { // Loop through all functional OCCs - std::vector<Occ*> occList = occMgr::instance().getOccArray(); + std::vector<Occ*> occList = OccManager::getOccArray(); for (std::vector<Occ*>::iterator itr = occList.begin(); itr < occList.end(); itr++) diff --git a/src/usr/htmgt/htmgt_occ.C b/src/usr/htmgt/htmgt_occ.C index 237df3e16..8a539f446 100644 --- a/src/usr/htmgt/htmgt_occ.C +++ b/src/usr/htmgt/htmgt_occ.C @@ -185,10 +185,14 @@ namespace HTMGT uint8_t cmdData[2]; cmdData[0] = OCC_RESET_CMD_VERSION; + TMGT_INF("resetPrep: OCC%d (failed=%c, reset count=%d)", + iv_instance, iv_failed?'y':'n', iv_resetCount); if(iv_failed) { cmdData[1] = OCC_RESET_FAIL_THIS_OCC; ++iv_resetCount; + TMGT_INF("resetPrep: OCC%d failed, incrementing reset count to %d", + iv_instance, iv_resetCount); if(iv_resetCount > OCC_RESET_COUNT_THRESHOLD) { atThreshold = true; @@ -221,13 +225,28 @@ namespace HTMGT return atThreshold; } + + void Occ::postResetClear() + { + iv_state = OCC_STATE_UNKNOWN; + iv_commEstablished = false; + iv_needsReset = false; + iv_failed = false; + iv_lastPollValid = false; + iv_resetReason = OCC_RESET_REASON_NONE; + } + + + ///////////////////////////////////////////////////////////////// + uint32_t OccManager::cv_safeReturnCode = 0; + uint32_t OccManager::cv_safeOccInstance = 0; + OccManager::OccManager() - :iv_configDataBuilt(false), - iv_occMaster(NULL), + :iv_occMaster(NULL), iv_state(OCC_STATE_UNKNOWN), iv_targetState(OCC_STATE_ACTIVE) { @@ -297,7 +316,7 @@ namespace HTMGT ((*proc)->getAttr<TARGETING::ATTR_HOMER_VIRT_ADDR>()); const uint8_t * homerPhys = (uint8_t*) ((*proc)->getAttr<TARGETING::ATTR_HOMER_PHYS_ADDR>()); - TMGT_INF("buildOccs: homer = 0x%08X (virt) / 0x%08X (phys)" + TMGT_INF("buildOccs: homer = 0x%08llX (virt) / 0x%08llX (phys)" " for Proc%d", homer, homerPhys, instance); #ifdef SIMICS_TESTING // Starting of OCCs is not supported in SIMICS, so fake out @@ -413,6 +432,18 @@ namespace HTMGT // requests a new state, so we can update target here. iv_targetState = requestedState; + _buildOccs(); // if not already built. + + // Send poll cmd to confirm comm has been established. + // Flush old errors to ensure any new errors will be collected + l_err = _sendOccPoll(true, NULL); + if (l_err) + { + TMGT_ERR("_setOccState: Poll OCCs failed."); + // Proceed with reset even if failed + ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); + } + if (NULL != iv_occMaster) { TMGT_INF("_setOccState(state=0x%02X)", requestedState); @@ -502,18 +533,15 @@ namespace HTMGT TMGT_INF("_setOccState: All OCCs have reached state 0x%02X", requestedState); -#ifndef __HOSTBOOT_RUNTIME if (OCC_STATE_ACTIVE == requestedState) { - CONSOLE::displayf(HTMGT_COMP_NAME, - "OCCs are now running in ACTIVE state"); + TMGT_CONSOLE("OCCs are now running in ACTIVE state"); } else { - CONSOLE::displayf(HTMGT_COMP_NAME, - "OCCs are now running in OBSERVATION state"); + TMGT_CONSOLE("OCCs are now running in OBSERVATION " + "state"); } -#endif } } @@ -609,11 +637,11 @@ namespace HTMGT occ != iv_occArray.end(); ++occ) { - (*occ)->failed(false); + // After OCCs have been reset, clear flags + (*occ)->postResetClear(); } TMGT_INF("Calling HBOCC::activateOCCs"); - err = HBOCC::activateOCCs(); if(err) { @@ -654,6 +682,9 @@ namespace HTMGT { sys->setAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode); } + + TMGT_ERR("_resetOccs: Safe Mode RC: 0x%04X (OCC%d)", + cv_safeReturnCode, cv_safeOccInstance); } return err; @@ -740,6 +771,36 @@ namespace HTMGT } + void OccManager::_updateSafeModeReason(uint32_t i_src, + uint32_t i_instance) + { + if (cv_safeReturnCode == 0) + { + // Only update safe mode reason for the first failure + cv_safeReturnCode = i_src; + cv_safeOccInstance = i_instance; + } + } + + + bool OccManager::_occNeedsReset() + { + bool needsReset = false; + + for (std::vector<Occ*>::iterator pOcc = iv_occArray.begin(); + pOcc < iv_occArray.end(); + pOcc++) + { + if ((*pOcc)->needsReset()) + { + needsReset = true; + break; + } + } + + return needsReset; + } + uint8_t OccManager::getNumOccs() { @@ -788,36 +849,17 @@ namespace HTMGT return Singleton<OccManager>::instance()._waitForOccCheckpoint(); } - -#if 0 - // TODO: RTC 115296 - void update_occ_data() + void OccManager::updateSafeModeReason(uint32_t i_src, + uint32_t i_instance) { - if (occMgr::instance().getNumOccs() > 0) - { - // TBD: define as one block of data or in each OCC target? - - uint32_t dataSize = occMgr::instance().getNumOccs() * - sizeof(occInstance); - if (dataSize > 256) - { - TMGT_ERR("update_occ_data: data exceeds attr size, truncating"); - dataSize = 256; - } - // Update OCC_CONTROL_DATA Attribute - bool success = ->trySetAttr<ATTR_OCC_CONTROL_DATA>(dataSize, G_occ); - if (false == success) - { - TMGT_ERR("update_occ_data: failed to update OCC_CONTROL_DATA"); - } - } - else - { - TMGT_INF("update_occ_data: No OCC data to update"); - } - } // end update_occ_data() -#endif + return Singleton<OccManager>::instance(). + _updateSafeModeReason(i_src, i_instance); + } + bool OccManager::occNeedsReset() + { + return Singleton<OccManager>::instance()._occNeedsReset(); + } } // end namespace diff --git a/src/usr/htmgt/htmgt_occ.H b/src/usr/htmgt/htmgt_occ.H index 13d17a1a5..dec19b883 100644 --- a/src/usr/htmgt/htmgt_occ.H +++ b/src/usr/htmgt/htmgt_occ.H @@ -67,6 +67,26 @@ namespace HTMGT OCC_RESET_COUNT_THRESHOLD = 3, }; + enum occResetReason + { + OCC_RESET_REASON_NONE = 0x00, + OCC_RESET_REASON_CRIT_FAILURE = 0x01, + OCC_RESET_REASON_PWR_ON_FAIL = 0x02, + OCC_RESET_REASON_ERROR = 0x03, + OCC_RESET_REASON_POWER_FAULT = 0x04, + OCC_RESET_REASON_DIFF_OCC = 0x05, + OCC_RESET_REASON_OCC_REQUEST = 0x06, + }; + + // OCC Callout Structure + struct occErrlCallout + { + uint8_t type; + uint64_t calloutValue; + uint8_t priority; + uint16_t reserved1; + } __attribute__ ((__packed__)); + typedef struct occErrlCallout occErrlCallout_t; /** @@ -211,6 +231,7 @@ namespace HTMGT #endif } + /** * @brief Set failed state * @param[in] failed state @@ -219,6 +240,14 @@ namespace HTMGT /** + * @brief Determine if OCC needs to be reset + * + * @return true if this OCC needs to be reset + */ + bool needsReset() { return iv_needsReset; } + + + /** * @brief Return OCCs present bits * * @return bitmask representing this OCC position @@ -248,6 +277,42 @@ namespace HTMGT void pollRspHandler(const uint8_t * i_pollResponse, const uint16_t i_pollResponseSize); + /** + * @brief Collect, Commit and Clear error log from the OCC + * + * @param[in] i_id OCC elog id to retrieve + * @param[in] i_address SRAM address for elog entry + * @param[in] i_length size of the elog entry + */ + void occProcessElog(const uint8_t i_id, + const uint32_t i_address, + const uint16_t i_length); + + /** + * @brief Determine what actions are required for elog + * + * @param[in] i_actions Action requested by OCC + * @param[out] o_occReset returns true if OCC reset is needed + * @param[out] o_errlSeverity severity to use for elog commit + */ + void elogProcessActions(const uint8_t i_actions, + bool & o_occReset, + ERRORLOG::errlSeverity_t & o_errlSeverity); + + /** + * @brief Add specified callout to the error log + * + * @param[in,out] io_errlHndl elog to add callout + * @param[in] i_priority priority for callout + * @param[in] i_callout callout from OCC + * @param[in,out] io_numCallouts number of callouts in elog, + * incremented if new callout added + * */ + bool elogAddCallout(errlHndl_t & io_errlHndl, + HWAS::callOutPriority & i_priority, + const occErrlCallout_t i_callout, + uint8_t & io_callout_num); + protected: // Instance number of this OCC: 0 = first physical OCC @@ -277,6 +342,14 @@ namespace HTMGT // expected occsPresent byte in POLL response uint8_t iv_occsPresent; + occResetReason iv_resetReason; + + + /** + * @brief Clear flags after OCC has been reset + */ + void postResetClear(); + private: // Reset count @@ -303,14 +376,6 @@ namespace HTMGT public: /** - * @brief true if the required config data has been built - * This must be persisted across IPL/RT - * TODO: RTC 115296 - */ - bool iv_configDataBuilt; - - - /** * @brief Constructor */ OccManager(); @@ -372,8 +437,8 @@ namespace HTMGT * * @return NULL on success, or error handle on failure */ - errlHndl_t setOccState(const occStateId i_state = - OCC_STATE_NO_CHANGE); + static errlHndl_t setOccState(const occStateId i_state = + OCC_STATE_NO_CHANGE); /** @@ -396,7 +461,7 @@ namespace HTMGT * function will wait up to 10 seconds for all OCCs * before returning to the caller. */ - void waitForOccCheckpoint(); + static void waitForOccCheckpoint(); /** @@ -416,6 +481,24 @@ namespace HTMGT TARGETING::Target * i_occTarget = NULL); + /** + * @brief Save the reason that the system is entering safe mode + * + * @param[in] i_src SRC which triggered safe mode + * @param[in] i_instance OCC which triggered safe mode + */ + static void updateSafeModeReason(uint32_t i_src, + uint32_t i_instance); + + + /** + * @brief Check if any OCCs need to be reset + * + * @return true if any OCC needs to be reset + */ + static bool occNeedsReset(); + + private: typedef std::vector<Occ*> occList_t; @@ -425,6 +508,17 @@ namespace HTMGT occStateId iv_state; occStateId iv_targetState; + /** + * @brief SRC that caused system to enter safe mode + */ + static uint32_t cv_safeReturnCode; + + /** + * @brief OCC instance that triggered safe mode + */ + static uint32_t cv_safeOccInstance; + + /* See buildOccs() above */ uint32_t _buildOccs(); @@ -479,6 +573,12 @@ namespace HTMGT _sendOccPoll(const bool i_flushAllErrors, TARGETING::Target * i_occTarget); + /** See updateSafeModeReason() above */ + void _updateSafeModeReason(uint32_t i_src, + uint32_t i_instance); + + /** See occNeedsReset() above */ + bool _occNeedsReset(); }; typedef Singleton<OccManager> occMgr; diff --git a/src/usr/htmgt/htmgt_occcmd.C b/src/usr/htmgt/htmgt_occcmd.C index 3e60c5775..a6ccf0c23 100644 --- a/src/usr/htmgt/htmgt_occcmd.C +++ b/src/usr/htmgt/htmgt_occcmd.C @@ -97,17 +97,17 @@ namespace HTMGT #ifndef __HOSTBOOT_RUNTIME if (i_header[0] != '\0') { - CONSOLE::displayf(HTMGT_COMP_NAME, "%s", i_header); + TMGT_CONSOLE("%s", i_header); } uint16_t index = 0; while (index < i_len) { - CONSOLE::displayf(HTMGT_COMP_NAME, "%04X: %08X %08X %08X %08X", - index, - UINT32_GET(&i_data[index]), - UINT32_GET(&i_data[index+4]), - UINT32_GET(&i_data[index+8]), - UINT32_GET(&i_data[index+12])); + TMGT_CONSOLE("%04X: %08X %08X %08X %08X", + index, + UINT32_GET(&i_data[index]), + UINT32_GET(&i_data[index+4]), + UINT32_GET(&i_data[index+8]), + UINT32_GET(&i_data[index+12])); index += 16; } CONSOLE::flush(); @@ -130,7 +130,7 @@ namespace HTMGT { uint8_t l_index = 0; - // TODO RTC 109224 - convert to use lower_bound + // TODO RTC 124739 - convert to use lower_bound //= find(&cv_occCommandTable[0], // &cv_occCommandTable[OCC_CMDTABLE_SIZE-1], // i_cmd); @@ -182,7 +182,7 @@ namespace HTMGT }; uint8_t l_idx = 0; - // TODO RTC 109224 + // TODO RTC 124739 for (l_idx=0; l_idx < STATUS_STRING_COUNT; l_idx++) { if (i_status == L_status_string[l_idx].str_num) @@ -320,7 +320,7 @@ namespace HTMGT rsp_status_string(iv_OccRsp.returnStatus)); } - // TODO RTC 109224 - refactor/optimize trace strings + // TODO RTC 124739 - refactor/optimize trace strings TMGT_INF("OCC%d rsp status=0x%02X%s, length=0x%04X", l_instance, iv_OccRsp.returnStatus, l_rsp_status_string, iv_OccRsp.dataLength); @@ -677,7 +677,8 @@ namespace HTMGT uint16_t rspLength = 0; if (G_debug_trace & DEBUG_TRACE_VERBOSE) { - TMGT_INF("waitForOccRsp(%d) address=0x%08X", i_timeout, rspBuffer); + TMGT_INF("waitForOccRsp(%d) address=0x%08llX", + i_timeout, rspBuffer); } bool l_time_expired = true; @@ -735,36 +736,15 @@ namespace HTMGT // time expired l_msec_remaining = -1; + TMGT_ERR("waitForOccRsp: OCC%d timeout waiting for" + " response", iv_Occ->iv_instance); + uint8_t * const rspBuffer = iv_Occ->iv_homer + + OCC_RSP_ADDR; + TMGT_BIN("Rsp Buffer (32 bytes)", rspBuffer, 32); + // Read SRAM response buffer to check for exception // (On exception, data may not be copied to HOMER) - const uint16_t l_length = 4*KILOBYTE; - uint8_t l_sram_data[l_length]; - ecmdDataBufferBase l_buffer(l_length*8); // convert to bits -// HBOCC is only defined for HTMGT -#ifdef CONFIG_HTMGT - errlHndl_t l_err = HBOCC::readSRAM(iv_Occ->getTarget(), - OCC_RSP_SRAM_ADDR, - l_buffer); - if (NULL == l_err) -#endif - { - const uint32_t l_flatSize = l_buffer.flattenSize(); - l_buffer.flatten(l_sram_data, l_flatSize); - // Skip 8 byte ecmd header - const uint8_t *sramRspPtr = &l_sram_data[8]; - // Check response status for exception - if (0xE0 == (sramRspPtr[2] & 0xE0)) - { - TMGT_ERR("waitForOccRsp: OCC%d timeout waiting for" - " response, and OCC 0x%02X exception found", - iv_Occ->iv_instance, sramRspPtr[2]); - // Exception found, copy data to rsp buffer - uint8_t * const rspBuffer = iv_Occ->iv_homer + - OCC_RSP_ADDR; - memcpy(rspBuffer, sramRspPtr, l_length); - TMGT_BIN("SRAM Rsp Buffer (32 bytes)", sramRspPtr, 32); - } - } + handleOccException(); } } // while(time remaining) @@ -783,45 +763,79 @@ namespace HTMGT - // Create/commit an error log with the OCC exception data + // Check for an OCC exception in SRAM. If found: + // create/commit an error log with the OCC exception data void OccCmd::handleOccException(void) { - // Exception length includes response header (w/o checksum) and - // the data length - uint32_t l_exceptionDataLength = OCC_RSP_HDR_LENGTH - 2 + - iv_OccRsp.dataLength; - - TMGT_ERR("handleOccException: OCC%d returned abnormal rsp status of" - " 0x%02X, rsp len=%d", - iv_Occ->iv_instance, iv_OccRsp.returnStatus, - l_exceptionDataLength); - if (l_exceptionDataLength > 4*KILOBYTE) +#ifdef CONFIG_HTMGT + // Read SRAM to check for exception + // (Exception data not copied into HOMER) + const uint16_t l_length = 4*KILOBYTE; + uint8_t l_sram_data[l_length]; + ecmdDataBufferBase l_buffer(l_length*8); // convert to bits + errlHndl_t l_err = HBOCC::readSRAM(iv_Occ->getTarget(), + OCC_RSP_SRAM_ADDR, + l_buffer); + if (NULL == l_err) { - TMGT_INF("handleOccException: truncating data length to 4K"); - l_exceptionDataLength = 4*KILOBYTE; - // TODO RTC 109224 - HB elogs are only 4K - } + const uint32_t l_flatSize = l_buffer.flattenSize(); + l_buffer.flatten(l_sram_data, l_flatSize); + // Skip 8 byte ecmd header + const uint8_t *sramRspPtr = &l_sram_data[8]; + // Check buffer status for exception + if ((l_flatSize >= 3) && (0xE0 == (sramRspPtr[2] & 0xE0))) + { + const uint8_t exceptionType = sramRspPtr[2]; + uint16_t exceptionDataLength = 0; + if (l_flatSize >= 5) + { + exceptionDataLength = UINT16_GET(&sramRspPtr[3]); + } + // Exception length includes response header (w/o checksum) and + // the data length + uint32_t exceptionLength = OCC_RSP_HDR_LENGTH - 2 + + exceptionDataLength; + if (exceptionLength > l_flatSize) + { + exceptionLength = l_flatSize; + } + + TMGT_ERR("handleOccException: OCC%d SRAM has exception" + " 0x%02X, length=%d", + iv_Occ->iv_instance, exceptionType, + exceptionDataLength); + if (exceptionLength > 4*KILOBYTE) + { + TMGT_INF("handleOccException: truncating length to 4K"); + exceptionLength = 4*KILOBYTE; + // TODO RTC 124739 - HB elogs are only 4K + } + + /*@ + * @errortype + * @reasoncode HTMGT_RC_INTERNAL_ERROR + * @moduleid HTMGT_MOD_HANLDE_OCC_EXCEPTION + * @userdata1[0-15] rsp status + * @userdata1[16-31] exception data length + * @userdata2[0-15] OCC instance + * @devdesc OCC reported exception + */ + errlHndl_t l_excErr = NULL; + bldErrLog(l_excErr, HTMGT_MOD_HANLDE_OCC_EXCEPTION, + (htmgtReasonCode)(OCCC_COMP_ID | exceptionType), + exceptionType, exceptionDataLength, + iv_Occ->iv_instance, 0, + ERRORLOG::ERRL_SEV_UNRECOVERABLE); + l_excErr->addFFDC(OCCC_COMP_ID, + sramRspPtr, + std::min(exceptionLength,(uint32_t)MAX_FFDC), + 1, // version + exceptionType); // subsection + ERRORLOG::errlCommit(l_excErr, HTMGT_COMP_ID); - /*@ - * @errortype - * @reasoncode HTMGT_RC_INTERNAL_ERROR - * @moduleid HTMGT_MOD_HANLDE_OCC_EXCEPTION - * @userdata1[0-15] rsp status - * @userdata1[16-31] exception data length - * @devdesc OCC reported exception - */ - errlHndl_t l_excErr = NULL; - bldErrLog(l_excErr, HTMGT_MOD_HANLDE_OCC_EXCEPTION, - (htmgtReasonCode)(OCCC_COMP_ID | iv_OccRsp.returnStatus), - iv_OccRsp.returnStatus, iv_OccRsp.dataLength, 0, 0, - ERRORLOG::ERRL_SEV_UNRECOVERABLE); - const uint8_t * const exceptionData = iv_Occ->iv_homer + OCC_RSP_ADDR; - l_excErr->addFFDC(OCCC_COMP_ID, - exceptionData, - std::min(l_exceptionDataLength, (uint32_t)MAX_FFDC), - 1, // version - iv_OccRsp.returnStatus); // subsection == exception rc - ERRORLOG::errlCommit(l_excErr, HTMGT_COMP_ID); + } + } +#endif } // end OccCmd::handleOccException() diff --git a/src/usr/htmgt/htmgt_poll.C b/src/usr/htmgt/htmgt_poll.C index 6729b652c..e8a81a976 100644 --- a/src/usr/htmgt/htmgt_poll.C +++ b/src/usr/htmgt/htmgt_poll.C @@ -60,6 +60,11 @@ namespace HTMGT } } + if (occNeedsReset()) + { + TMGT_ERR("_sendOccPoll(): OCCs need to be reset"); + } + return err; } @@ -223,8 +228,7 @@ namespace HTMGT } // Handle a new error log from the OCC - occProcessElog(this, - pollRsp->errorId, + occProcessElog(pollRsp->errorId, pollRsp->errorAddress, pollRsp->errorLength); if (iv_needsReset) @@ -275,8 +279,7 @@ namespace HTMGT iv_role, pollRsp->status, ERRORLOG::ERRL_SEV_INFORMATIONAL); ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); - // TODO RTC 109224 - //iv_resetReason = OCC_RESET_REASON_ERROR; + iv_resetReason = OCC_RESET_REASON_ERROR; break; } @@ -303,8 +306,7 @@ namespace HTMGT iv_occsPresent, pollRsp->status, ERRORLOG::ERRL_SEV_INFORMATIONAL); ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); - // TODO RTC 109224 - //iv_resetReason = OCC_RESET_REASON_ERROR; + iv_resetReason = OCC_RESET_REASON_ERROR; } } diff --git a/src/usr/htmgt/htmgt_utility.C b/src/usr/htmgt/htmgt_utility.C index 5cb689121..8125a7567 100644 --- a/src/usr/htmgt/htmgt_utility.C +++ b/src/usr/htmgt/htmgt_utility.C @@ -54,7 +54,7 @@ namespace HTMGT " %08X, sev: 0x%02X, fw:%c", i_modid, i_rc, i_data1, i_data2, i_data3, i_data4, i_sev, i_addFwCallout?'y':'n'); - // TODO RTC 109224 - RAS review what logs need fw callout + // TODO RTC 124739 - RAS review what logs need fw callout if (NULL == io_err) { @@ -70,7 +70,7 @@ namespace HTMGT } else { - // TODO RTC 109224: + // TODO RTC 124739 // - collectTrace will not filter dup traces and no way to clear // - no way to add secondary SRC to elog io_err->collectTrace("HTMGT"); @@ -90,7 +90,7 @@ namespace HTMGT } - // TODO RTC 109224 - refactor/optimize trace strings + // TODO RTC 124739 - refactor/optimize trace strings // Internal utility to convert OCC command type to a string const char *command_string(const uint8_t i_cmd) @@ -114,7 +114,7 @@ namespace HTMGT const uint8_t l_total = sizeof(L_cmd_string) / sizeof(struct string_data_t); - // TODO RTC 109224 + // TODO RTC 124739 uint8_t l_idx = 0; for (l_idx=0; l_idx<l_total; l_idx++) { diff --git a/src/usr/htmgt/htmgt_utility.H b/src/usr/htmgt/htmgt_utility.H index 3cb772f7b..3b1c217d6 100644 --- a/src/usr/htmgt/htmgt_utility.H +++ b/src/usr/htmgt/htmgt_utility.H @@ -49,6 +49,12 @@ #define TMGT_BIN( _fmt_, _args_...) \ TRACFBIN( g_trac_htmgt, _fmt_, ##_args_ ) +#ifndef __HOSTBOOT_RUNTIME +#define TMGT_CONSOLE( _fmt_, _args_...) \ + CONSOLE::displayf( HTMGT_COMP_NAME, _fmt_, ##_args_ ) +#else +#define TMGT_CONSOLE(_fmt_, _args_...) +#endif inline uint16_t UINT16_GET(const uint8_t * i_ptr) { diff --git a/src/usr/htmgt/occError.C b/src/usr/htmgt/occError.C index 8b2963b43..00e4cc0e1 100644 --- a/src/usr/htmgt/occError.C +++ b/src/usr/htmgt/occError.C @@ -1,11 +1,11 @@ /* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ -/* $Source: src/usr/htmgt/tmgtutility.C $ */ +/* $Source: src/usr/htmgt/occError.C $ */ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014 */ +/* Contributors Listed Below - COPYRIGHT 2014,2015 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -31,16 +31,64 @@ #include <ecmdDataBufferBase.H> #include <hwpf/hwp/occ/occAccess.H> +#include <console/consoleif.H> +#include <targeting/targplatutil.H> namespace HTMGT { + // Translate OCC priorty + bool elogXlateSrciPriority(const uint8_t i_priority, + HWAS::callOutPriority & o_priority) + { + bool l_found = false; + uint8_t l_index = 0x00; + + // Loop through the occPriorityXlate until we find a priority or + // reach the end of the struct. + // TODO RTC 124739 - convert to use lower_bound + while (l_index < OCC_SRCI_PRIORITY_XLATE_SIZE) + { + //If the priority matches then return the SRC. + if (i_priority == occPriorityXlateTbl[l_index].occPriority) + { + o_priority = occPriorityXlateTbl[l_index].errlPriority; + l_found = true; + break; + } + l_index++; + } + return l_found; + } + + + // Translate component id + bool elogGetTranslationData(const uint8_t i_compId, + tmgtCompxlateType &o_dataType, + uint32_t &o_compData) + { + bool l_found = false; + + // TODO RTC 124739 - convert to use lower_bound + for (uint16_t l_index = 0 ; l_index < TMGT_MAX_COMP_IDS ; l_index++) + { + if (i_compId == tmgt_compXlateTable[l_index].compId) + { + o_dataType = tmgt_compXlateTable[l_index].dataType; + o_compData = tmgt_compXlateTable[l_index].data; + l_found = true; + break; + } + } + return l_found; + } + + // Process elog entry from OCC poll response - void occProcessElog(Occ * i_occ, - const uint8_t i_id, - const uint32_t i_address, - const uint16_t i_length) + void Occ::occProcessElog(const uint8_t i_id, + const uint32_t i_address, + const uint16_t i_length) { errlHndl_t l_errlHndl = NULL; @@ -50,7 +98,7 @@ namespace HTMGT ecmdDataBufferBase l_buffer(l_length*8); // convert to bits // HBOCC is only defined for HTMGT #ifdef CONFIG_HTMGT - l_errlHndl = HBOCC::readSRAM(i_occ->getTarget(), i_address, l_buffer); + l_errlHndl = HBOCC::readSRAM(iv_target, i_address, l_buffer); #endif if (NULL == l_errlHndl) { @@ -62,17 +110,14 @@ namespace HTMGT TMGT_BIN("OCC ELOG", l_occElog, 256); const uint32_t l_occSrc = OCCC_COMP_ID | l_occElog->reasonCode; - ERRORLOG::errlSeverity_t l_errlSeverity = + ERRORLOG::errlSeverity_t severity = ERRORLOG::ERRL_SEV_INFORMATIONAL; -#if 0 - // TODO: RTC 109224 - determine correct severity/actions - // Process Severity + // Translate Severity const uint8_t l_occSeverity = l_occElog->severity; - const uint8_t l_occActions = l_occElog->actions; if (l_occSeverity < OCC_SEV_ACTION_XLATE_SIZE) { - l_errlSeverity = + severity = occSeverityErrorActionXlate[l_occSeverity].occErrlSeverity; } else @@ -81,15 +126,14 @@ namespace HTMGT " (severity = 0x%02X)", l_occElog->severity); } - // Process elog Actions + // Process Actions bool l_occReset = false; - elogProcessActions(l_occActions, l_occReset, l_errlSeverity); + elogProcessActions(l_occElog->actions, l_occReset, severity); if (l_occReset == true) { iv_needsReset = true; - UPDATE_SAFE_MODE_REASON(l_occSrc, iv_huid, true); + OccManager::updateSafeModeReason(l_occSrc, iv_instance); } -#endif // Create OCC error log // NOTE: word 4 (used by extended reason code) to save off OCC @@ -99,7 +143,7 @@ namespace HTMGT // parsed with the OCC src tags const occErrlUsrDtls_t *l_usrDtls_ptr = (occErrlUsrDtls_t *) ((uint8_t*)l_occElog+sizeof(occErrlEntry_t)+ - (l_occElog->numCallouts * sizeof(occErrlCallout_t)) ); + (l_occElog->maxCallouts * sizeof(occErrlCallout_t)) ); bldErrLog(l_errlHndl, (htmgtModuleId)(l_usrDtls_ptr->modId & 0x00FF), (htmgtReasonCode)l_occSrc, // occ reason code @@ -107,141 +151,109 @@ namespace HTMGT l_usrDtls_ptr->userData2, l_usrDtls_ptr->userData3, ((l_usrDtls_ptr->modId & 0xFF00) << 16 ) | - l_occElog->userData4, // extended reason code - l_errlSeverity); + l_occElog->reserved, // extended reason code + severity); -#if 0 - // TODO: RTC 109224 // Add callout information + const uint8_t l_max_callouts = l_occElog->maxCallouts; bool l_bad_fru_data = false; - uint8_t l_callout_num = 0; - if (! ((ERRL_SEV_INFORMATIONAL == l_errlSeverity) && - (TMGT_ERRL_ACTIONS_MANUFACTURING_ERROR & l_occActions)) ) + uint8_t numCallouts = 0; + uint8_t calloutIndex = 0; + while (calloutIndex < l_max_callouts) { - // Only add callouts if this is MFG error and system not in - // MFG (in MFG severity would not be Info) - uint8_t l_index = 0; - uint8_t l_count = 1; - - const uint8_t l_max_callout = l_occElog->numCallouts; - // The beginning address of callout data - l_index = sizeof(occErrlEntry_t); - do { - occErrlCallout_t *l_callout_ptr = NULL; - l_callout_ptr = (occErrlCallout_t *) - ((uint8_t*)l_occElog+l_index); - if (l_callout_ptr->type != 0) + const occErrlCallout_t callout = + l_occElog->callout[calloutIndex]; + if (callout.type != 0) + { + HWAS::callOutPriority priority; + bool l_success = true; + l_success = elogXlateSrciPriority(callout.priority, + priority); + if (l_success == true) { - srciPriority l_priority; - bool l_success = true; - l_success = - elogXlateSrciPriority(l_callout_ptr->priority, - l_priority); - if (l_success == true) - { - l_success = elogAddCallout(l_errlHndl, - l_errlSeverity, - l_priority, - *l_callout_ptr, - l_callout_num); - if (l_success == false) - { - l_bad_fru_data = true; - } - } - else + l_success = elogAddCallout(l_errlHndl, + priority, + callout, + numCallouts); + if (l_success == false) { l_bad_fru_data = true; - TMGT_ERR("occProcessElog: Priority translate" - " failure (priority = 0x%02X)", - l_callout_ptr->priority); } - l_index += sizeof(occErrlCallout_t); - } // if (l_type != 0) + } else - { // make sure all the remaining callout data are zeros, - // otherwise mark bad fru data - uint8_t *l_ptr = (uint8_t*)l_occElog+l_index; - uint8_t l_len = (l_max_callout-l_count+1)* - sizeof(occErrlCallout_t); - while (l_len != 0) + { + l_bad_fru_data = true; + TMGT_ERR("occProcessElog: Priority translate" + " failure (priority = 0x%02X)", + callout.priority); + } + } + else + { // make sure all the remaining callout data are zeros, + // otherwise mark bad fru data + const occErrlCallout_t zeros = { 0 }; + while (calloutIndex < l_max_callouts) + { + if (memcmp(&l_occElog->callout[calloutIndex], + &zeros, sizeof(occErrlCallout_t))) { - if (*l_ptr != 0x00) - { - TMGT_ERR("occProcessElog: The remaining" - " callout data should be all zeros"); - l_bad_fru_data = true; - break; - } - l_len--; - l_ptr++; + TMGT_ERR("occProcessElog: The remaining" + " callout data should be all zeros"); + l_bad_fru_data = true; + break; } - break; + ++calloutIndex; } - l_count++; - } while (l_count <= l_max_callout); - } - else - { - TMGT_ERR("MFG error found outside MFG; callouts will not be" - " added to log (OCC severity=0x%02X, actions=0x%02X)", - l_occSeverity, l_occActions); - const uint8_t l_callout_length = l_occElog->numCallouts * 12; - const char *l_callout_ptr = (char *)((uint8_t*)l_occElog+ - sizeof(occErrlEntry_t)); - // Add raw callout data from the OCC - l_errlHndl->addUsrDtls(l_callout_ptr, - l_callout_length, - TMGT_COMP_ID, - TMGT_VERSION, - TMGT_ERROR_DATA_TYPE); + break; + } + ++calloutIndex; } // Any bad fru data found ? - errlHndl_t l_errlHndl2 = NULL; + errlHndl_t err2 = NULL; if (l_bad_fru_data == true) { + TMGT_BIN("Callout Data", &l_occElog->callout[0], + sizeof(occErrlCallout)*ERRL_MAX_CALLOUTS); /*@ * @errortype * @refcode LIC_REFCODE * @subsys EPUB_FIRMWARE_SP * @reasoncode HTMGT_RC_OCC_ERROR_LOG * @moduleid HTMGT_MOD_BAD_FRU_CALLOUTS - * @userdata1 OCC elog id - * @userdata2 Number of good callouts + * @userdata1[0-15] OCC elog id + * @userdata1[16-31] Bad callout index * @devdesc Bad FRU data received in OCC error log */ - bldErrLog(l_errlHndl2, HTMGT_MOD_BAD_FRU_CALLOUTS, + bldErrLog(err2, HTMGT_MOD_BAD_FRU_CALLOUTS, HTMGT_RC_OCC_ERROR_LOG, - i_id, l_callout_num, 0, 0, ERRL_SEV_INFORMATIONAL); - ERRORLOG::errlCommit(l_errlHndl2, HTMGT_COMP_ID); + i_id, calloutIndex, 0, 0, + ERRORLOG::ERRL_SEV_INFORMATIONAL); + ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); } - // Check callout number and severity - if ((l_callout_num == 0) && - (l_errlSeverity != ERRL_SEV_INFORMATIONAL)) + if ((numCallouts == 0) && + (severity != ERRORLOG::ERRL_SEV_INFORMATIONAL)) { TMGT_ERR("occProcessElog: No FRU callouts found for OCC%d" " elog_id:0x%02X, severity:0x%0X", - iv_instance, i_id, l_errlSeverity); + iv_instance, i_id, severity); /*@ * @errortype * @refcode LIC_REFCODE * @subsys EPUB_FIRMWARE_SP * @reasoncode HTMGT_RC_OCC_ERROR_LOG * @moduleid HTMGT_MOD_MISMATCHING_SEVERITY - * @userdata1 OCC elog id - * @userdata2 OCC severity - * @userdata3 - * @userdata4 + * @userdata1[0-15] OCC elog id + * @userdata1[16-31] OCC severity * @devdesc No FRU callouts found for non-info OCC Error Log */ - bldErrLog(l_errlHndl2, HTMGT_MOD_MISMATCHING_SEVERITY, + bldErrLog(err2, HTMGT_MOD_MISMATCHING_SEVERITY, HTMGT_RC_OCC_ERROR_LOG, - i_id, l_errlSeverity, 0, 0, ERRL_SEV_INFORMATIONAL); - ERRORLOG::errlCommit(l_errlHndl2, HTMGT_COMP_ID); + i_id, severity, 0, 0, + ERRORLOG::ERRL_SEV_INFORMATIONAL); + ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); } -#endif // Add full OCC error log data as a User Details section l_errlHndl->addFFDC(OCCC_COMP_ID, @@ -249,27 +261,18 @@ namespace HTMGT i_length, 1, // version 0); // subsection - -#if 0 - // TODO: RTC 109224 - // Add additional data - addTmgtElogData(l_errlHndl); - addThermalElogData(l_errlHndl); -#endif - - // Commit Error (or terminate if required) ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); // Clear elog const uint8_t l_cmdData[1] = {i_id}; - OccCmd l_cmd(i_occ, OCC_CMD_CLEAR_ERROR_LOG, + OccCmd l_cmd(this, OCC_CMD_CLEAR_ERROR_LOG, sizeof(l_cmdData), l_cmdData); l_errlHndl = l_cmd.sendOccCmd(); if (l_errlHndl != NULL) { TMGT_ERR("occProcessElog: Failed to clear elog id %d to" " OCC%d (rc=0x%04X)", - i_id, i_occ, l_errlHndl->reasonCode()); + i_id, iv_instance, l_errlHndl->reasonCode()); ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); } } @@ -283,6 +286,114 @@ namespace HTMGT } // end Occ::occProcessElog() + // Add callout to specified elog + bool Occ::elogAddCallout(errlHndl_t & io_errlHndl, + HWAS::callOutPriority & i_priority, + const occErrlCallout_t i_callout, + uint8_t & io_callout_num) + { + bool l_success = true; + + TMGT_INF("elogAddCallout: Add callout type:0x%02X, value:0x%016llX," + " priority:0x%02X", + i_callout.type,i_callout.calloutValue, i_priority); + + if (i_callout.type == OCC_CALLOUT_TYPE_SENSOR) + { + const uint32_t sensor = (uint32_t)i_callout.calloutValue; + TARGETING::Target * target = + TARGETING::UTIL::getSensorTarget(sensor); + if (NULL != target) + { + io_errlHndl->addHwCallout(target, i_priority, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + io_callout_num++; + } + else + { + TMGT_ERR("elogAddCallout: Unable to find target for " + "sensor 0x%04X", sensor); + } + } + else if (i_callout.type == OCC_CALLOUT_TYPE_COMPONENT_ID) + { + tmgtCompxlateType l_compDataType; + uint32_t l_compData = 0; + const uint8_t l_compId = (i_callout.calloutValue & 0xFF); + + if (elogGetTranslationData(l_compId, l_compDataType, l_compData)) + { + switch(l_compDataType) + { + case TMGT_COMP_DATA_SYMBOLIC_FRU: + TMGT_INF("elogAddCallout: symbolic callout: 0x%08X", + l_compData); + break; + case TMGT_COMP_DATA_PROCEDURE: + io_errlHndl->addProcedureCallout( + (HWAS::epubProcedureID)l_compData, + i_priority); + io_callout_num++; + break; + case TMGT_COMP_DATA_END_OF_TABLE: + break; + default: + TMGT_ERR("elogAddCallout: Invalid component id 0x%02X", + l_compId); + l_success = false; + } + } + else + { + TMGT_ERR("elogAddCallout: Component id translate failure" + " (id=0x%02X)", l_compId); + l_success = false; + } + } + else + { + TMGT_ERR("elogAddCallout: Invalid callout type (type=%d)", + i_callout.type); + l_success = false; + } + + return l_success;; + + } // end Occ::elogAddCallout() + + + void Occ::elogProcessActions(const uint8_t i_actions, + bool & o_occReset, + ERRORLOG::errlSeverity_t & o_errlSeverity) + { + if (i_actions & TMGT_ERRL_ACTIONS_RESET_REQUIRED) + { + o_occReset = true; + iv_failed = true; + iv_resetReason = OCC_RESET_REASON_OCC_REQUEST; + + TMGT_INF("elogProcessActions: OCC%d requested reset", + iv_instance); + } + + if (i_actions & TMGT_ERRL_ACTIONS_SAFE_MODE_REQUIRED) + { + o_occReset = true; + iv_failed = true; + iv_resetReason = OCC_RESET_REASON_CRIT_FAILURE; + iv_resetCount = OCC_RESET_COUNT_THRESHOLD; + + TMGT_INF("elogProcessActions: OCC%d requested safe mode", + iv_instance); + TMGT_CONSOLE("OCC%d requested system enter safe mode", + iv_instance); + } + + } // end Occ::elogProcessActions() + + + } // end namespace diff --git a/src/usr/htmgt/occError.H b/src/usr/htmgt/occError.H index 095d37d57..e4ab7f906 100644 --- a/src/usr/htmgt/occError.H +++ b/src/usr/htmgt/occError.H @@ -1,11 +1,11 @@ /* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ -/* $Source: src/usr/htmgt/htmgt_error.H $ */ +/* $Source: src/usr/htmgt/occError.H $ */ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014 */ +/* Contributors Listed Below - COPYRIGHT 2014,2015 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -41,9 +41,6 @@ namespace HTMGT // Error Actions enum tmgtErrlActionsType { - TMGT_ERRL_ACTIONS_CONSOLIDATE_ERRORS = 0x01, - TMGT_ERRL_ACTIONS_MANUFACTURING_ERROR = 0x08, - TMGT_ERRL_ACTIONS_UNRECOVERABLE_CPM_ERROR = 0x10, TMGT_ERRL_ACTIONS_SAFE_MODE_REQUIRED = 0x40, TMGT_ERRL_ACTIONS_RESET_REQUIRED = 0x80, }; @@ -51,7 +48,7 @@ namespace HTMGT // Type of Callout enum occCalloutType { - OCC_CALLOUT_TYPE_HUID = 0x01, + OCC_CALLOUT_TYPE_SENSOR = 0x01, OCC_CALLOUT_TYPE_COMPONENT_ID = 0x02, }; @@ -80,24 +77,22 @@ namespace HTMGT TMGT_COMP_DATA_END_OF_TABLE = 0xFF }; -#if 0 - // TODO: RTC 109224 + // Callout Priority Translation struct occSrciPriorityXlate { uint8_t occPriority; - srciPriority errlPriority; + HWAS::callOutPriority errlPriority; } __attribute__ ((__packed__)); typedef struct occSrciPriorityXlate occSrciPriorityXlate_t; const occSrciPriorityXlate_t occPriorityXlateTbl[] = { - {0x01, SRCI_PRIORITY_LOW}, - {0x02, SRCI_PRIORITY_MED}, - {0x03, SRCI_PRIORITY_HIGH}, + {0x01, HWAS::SRCI_PRIORITY_LOW}, + {0x02, HWAS::SRCI_PRIORITY_MED}, + {0x03, HWAS::SRCI_PRIORITY_HIGH}, }; const uint8_t OCC_SRCI_PRIORITY_XLATE_SIZE = - (sizeof(occPriorityXlateTbl) /sizeof(occSrciPriorityXlate_t)); -#endif + (sizeof(occPriorityXlateTbl) / sizeof(occSrciPriorityXlate_t)); // OCC Usr Dtls Structure struct occErrlUsrDtls @@ -119,16 +114,6 @@ namespace HTMGT } __attribute__ ((__packed__)); typedef struct occErrlUsrDtls occErrlUsrDtls_t; - // OCC Callout Structure - struct occErrlCallout - { - uint8_t type; - uint64_t calloutValue; - uint8_t priority; - uint16_t reserved1; - } __attribute__ ((__packed__)); - typedef struct occErrlCallout occErrlCallout_t; - // User Detail Entry Structure struct occUserDetailsEntry { @@ -157,34 +142,35 @@ namespace HTMGT // Actions to process the errors uint8_t actions; // Reserved - uint32_t userData4; + uint32_t reserved; // Log Callout Number - uint8_t numCallouts; + uint8_t maxCallouts; + // Callouts + occErrlCallout callout[ERRL_MAX_CALLOUTS]; } __attribute__ ((__packed__)); typedef struct occErrlEntry occErrlEntry_t; -#if 0 - // TODO: RTC 109224 // OCC Severity and Action struct occSeverityActionXlate { - occSeverityType occSeverity; - errlSeverity occErrlSeverity; - errlActions occErrlAction; + occSeverityType occSeverity; + ERRORLOG::errlSeverity_t occErrlSeverity; }; typedef struct occSeverityActionXlate occSeverityActionXlate_t; - // Translate Severity and Actios + // Translate Severity and Actions const occSeverityActionXlate_t occSeverityErrorActionXlate[] = { - {OCC_SEV_INFORMATIONAL, ERRL_SEV_INFORMATIONAL, ERRL_ACTION_HIDDEN}, - {OCC_SEV_RECOVERABLE, ERRL_SEV_PREDICTIVE, ERRL_ACTION_REPORT}, - {OCC_SEV_UNRECOVERABLE, ERRL_SEV_UNRECOVERABLE, ERRL_ACTION_REPORT}, + {OCC_SEV_INFORMATIONAL, ERRORLOG::ERRL_SEV_INFORMATIONAL}, + {OCC_SEV_RECOVERABLE, ERRORLOG::ERRL_SEV_PREDICTIVE}, + {OCC_SEV_UNRECOVERABLE, ERRORLOG::ERRL_SEV_UNRECOVERABLE}, }; const uint8_t OCC_SEV_ACTION_XLATE_SIZE = (sizeof(occSeverityErrorActionXlate)/sizeof(occSeverityActionXlate_t)); -#endif + + + struct tmgtCompXlate { @@ -201,7 +187,7 @@ namespace HTMGT const tmgtCompXlate_t tmgt_compXlateTable[TMGT_MAX_COMP_IDS] = { - { 0x01, TMGT_COMP_DATA_PROCEDURE, ERRORLOG::EPUB_FIRMWARE_SP}, // FW + { 0x01, TMGT_COMP_DATA_PROCEDURE, HWAS::EPUB_PRC_HB_CODE}, // FW { 0x04, TMGT_COMP_DATA_SYMBOLIC_FRU, OVERTMP}, // over temperature { 0x05, TMGT_COMP_DATA_SYMBOLIC_FRU, TPMD_OV}, // oversub throttling { 0xFF, TMGT_COMP_DATA_END_OF_TABLE, 0}, // none @@ -223,5 +209,15 @@ namespace HTMGT const uint16_t i_length); + + + struct tmgtSafeModeReasonCode_t + { + uint32_t returnCode; + uint32_t huid; + bool infoOnly; + }; + + } // end namespace #endif |