diff options
author | Chris Cain <cjcain@us.ibm.com> | 2018-10-25 17:02:28 -0500 |
---|---|---|
committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2018-10-30 14:45:09 -0500 |
commit | d9711869369308c99eea851c45022b66f68b12df (patch) | |
tree | 4737fbb5301ab32438babf2675baec336f56c29f /src/usr/htmgt | |
parent | 93478adb3314e7337a8c0870ad45c4678ff02006 (diff) | |
download | talos-hostboot-d9711869369308c99eea851c45022b66f68b12df.tar.gz talos-hostboot-d9711869369308c99eea851c45022b66f68b12df.zip |
HTMGT support for PGPE/SGPE error logs
Change-Id: I4a0d7fa092483cdfa6083a4ca86651c80f548d5c
RTC: 197064
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/68026
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Reviewed-by: Sheldon Bailey <baileysh@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/htmgt')
-rw-r--r-- | src/usr/htmgt/htmgt_occ.H | 7 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_poll.C | 24 | ||||
-rw-r--r-- | src/usr/htmgt/htmgt_poll.H | 4 | ||||
-rw-r--r-- | src/usr/htmgt/occError.C | 111 | ||||
-rw-r--r-- | src/usr/htmgt/occError.H | 26 |
5 files changed, 101 insertions, 71 deletions
diff --git a/src/usr/htmgt/htmgt_occ.H b/src/usr/htmgt/htmgt_occ.H index c712fe526..91ede4d61 100644 --- a/src/usr/htmgt/htmgt_occ.H +++ b/src/usr/htmgt/htmgt_occ.H @@ -332,15 +332,18 @@ namespace HTMGT /** - * @brief Collect, Commit and Clear error log from the OCC + * @brief Process elog entry from OCC poll response. + * Collect, Commit and Clear error log from the OCC. * * @param[in] i_id OCC elog id to retrieve * @param[in] i_address SRAM address for elog entry * @param[in] i_length size of the elog entry + * @param[in] i_source OCC Error Log Source (405, PGPE, etc) */ void occProcessElog(const uint8_t i_id, const uint32_t i_address, - const uint16_t i_length); + const uint16_t i_length, + const uint8_t i_source); /** diff --git a/src/usr/htmgt/htmgt_poll.C b/src/usr/htmgt/htmgt_poll.C index af863b1e5..52651f0e7 100644 --- a/src/usr/htmgt/htmgt_poll.C +++ b/src/usr/htmgt/htmgt_poll.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2017 */ +/* Contributors Listed Below - COPYRIGHT 2014,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -208,21 +208,23 @@ namespace HTMGT OCC_POLL_DATA_MIN_SIZE) != 0)) { TMGT_INF("OCC%d Poll change: Status:%04X Occs:%02X Cfg:%02X " - "State:%02X Error:%06X/%08X", + "State:%02X Error:%08X/%08X", iv_instance, (pollRsp->status << 8) | pollRsp->extStatus, pollRsp->occsPresent, pollRsp->requestedCfg, pollRsp->state, - (pollRsp->errorId<<16) | pollRsp->errorLength, + ((pollRsp->errorId<<24) | (pollRsp->errorLength<<8) | + pollRsp->errorSource), pollRsp->errorAddress); #ifdef CONFIG_CONSOLE_OUTPUT_OCC_COMM TMGT_CONSOLE("OCC%d Poll change: Status:%04X Occs:%02X Cfg:%02X " - "State:%02X Error:%06X/%08X", + "State:%02X Error:%08X/%08X", iv_instance, (pollRsp->status << 8) | pollRsp->extStatus, pollRsp->occsPresent, pollRsp->requestedCfg, pollRsp->state, - (pollRsp->errorId<<16) | pollRsp->errorLength, + ((pollRsp->errorId<<24) | (pollRsp->errorLength<<8) | + pollRsp->errorSource), pollRsp->errorAddress); #endif } @@ -241,16 +243,19 @@ namespace HTMGT if (pollRsp->errorId != 0) { if ((pollRsp->errorId != lastPollRsp->errorId) || + (pollRsp->errorSource != lastPollRsp->errorSource) || (L_elog_retry_count < 3)) { - if (pollRsp->errorId == lastPollRsp->errorId) + if ((pollRsp->errorId == lastPollRsp->errorId) && + (pollRsp->errorSource == lastPollRsp->errorSource)) { // Only retry same errorId a few times... L_elog_retry_count++; TMGT_ERR("pollRspHandler: Requesting elog 0x%02X" - " (retry %d)", - pollRsp->errorId, L_elog_retry_count); + " from source 0x%02X on OCC%d (retry %d)", + pollRsp->errorId, pollRsp->errorSource, + iv_instance, L_elog_retry_count); } else { @@ -260,7 +265,8 @@ namespace HTMGT // Handle a new error log from the OCC occProcessElog(pollRsp->errorId, pollRsp->errorAddress, - pollRsp->errorLength); + pollRsp->errorLength, + pollRsp->errorSource); if (iv_needsReset) { // Update state if changed... diff --git a/src/usr/htmgt/htmgt_poll.H b/src/usr/htmgt/htmgt_poll.H index d3c9526fc..3dc7528a6 100644 --- a/src/usr/htmgt/htmgt_poll.H +++ b/src/usr/htmgt/htmgt_poll.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2017 */ +/* Contributors Listed Below - COPYRIGHT 2014,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -67,7 +67,7 @@ namespace HTMGT uint8_t errorId; uint32_t errorAddress; uint16_t errorLength; - uint8_t reserved; + uint8_t errorSource; uint8_t gpuCfg; uint8_t codeLevel[16]; uint8_t sensor[6]; diff --git a/src/usr/htmgt/occError.C b/src/usr/htmgt/occError.C index f344bea85..14d890a22 100644 --- a/src/usr/htmgt/occError.C +++ b/src/usr/htmgt/occError.C @@ -31,8 +31,11 @@ #include <isteps/pm/occAccess.H> #include <console/consoleif.H> -#include <targeting/targplatutil.H> +#include <targeting/common/commontargeting.H> +#include <targeting/common/utilFilter.H> #include <variable_buffer.H> +#include "ipmi/ipmisensor.H" + namespace HTMGT { @@ -87,7 +90,8 @@ namespace HTMGT // Process elog entry from OCC poll response void Occ::occProcessElog(const uint8_t i_id, const uint32_t i_address, - const uint16_t i_length) + const uint16_t i_length, + const uint8_t i_source) { errlHndl_t l_errlHndl = nullptr; @@ -103,18 +107,26 @@ namespace HTMGT #endif if (nullptr == l_errlHndl) { + compId_t l_comp_id = OCCC_COMP_ID; + if (i_source == OCC_ERRSRC_PGPE) + { + l_comp_id = PGPE_COMP_ID; + } + else if (i_source == OCC_ERRSRC_XGPE) + { + l_comp_id = XGPE_COMP_ID; + } const occErrlEntry_t * l_occElog= reinterpret_cast<occErrlEntry_t*> (l_buffer.pointer()); TMGT_BIN("OCC ELOG", l_occElog, 256); - // Get user details section const occErrlUsrDtls_t *l_usrDtls_ptr = (occErrlUsrDtls_t *) ((uint8_t*)l_occElog + sizeof(occErrlEntry_t)); - const uint32_t l_occSrc = OCCC_COMP_ID | l_occElog->reasonCode; + const uint32_t l_occSrc = l_comp_id | l_occElog->reasonCode; ERRORLOG::errlSeverity_t severity = ERRORLOG::ERRL_SEV_INFORMATIONAL; @@ -135,8 +147,6 @@ namespace HTMGT bool l_occReset = false; elogProcessActions(l_occElog->actions, l_occReset, severity); - - // Need to add WOF reason code to OCC object regardless of // whether WOF resets are disabled. if( l_occElog->actions & TMGT_ERRL_ACTIONS_WOF_RESET_REQUIRED ) @@ -266,24 +276,39 @@ namespace HTMGT if ((numCallouts == 0) && (severity != ERRORLOG::ERRL_SEV_INFORMATIONAL)) { - TMGT_ERR("occProcessElog: No FRU callouts found for OCC%d" - " elog_id:0x%02X, severity:0x%0X", - iv_instance, i_id, severity); - /*@ - * @errortype - * @refcode LIC_REFCODE - * @subsys EPUB_FIRMWARE_SP - * @reasoncode HTMGT_RC_OCC_ERROR_LOG - * @moduleid HTMGT_MOD_MISMATCHING_SEVERITY - * @userdata1[0-15] OCC elog id - * @userdata1[16-31] OCC severity - * @devdesc No FRU callouts found for non-info OCC Error Log - */ - bldErrLog(err2, HTMGT_MOD_MISMATCHING_SEVERITY, - HTMGT_RC_OCC_ERROR_LOG, - i_id, severity, 0, 0, - ERRORLOG::ERRL_SEV_INFORMATIONAL); - ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); + if (i_source == OCC_ERRSRC_405) + { + TMGT_ERR("occProcessElog: No FRU callouts found for OCC%d" + " elog_id:0x%02X, severity:0x%0X", + iv_instance, i_id, severity); + /*@ + * @errortype + * @refcode LIC_REFCODE + * @subsys EPUB_FIRMWARE_SP + * @reasoncode HTMGT_RC_OCC_ERROR_LOG + * @moduleid HTMGT_MOD_MISMATCHING_SEVERITY + * @userdata1[0-15] OCC elog id + * @userdata1[16-31] OCC severity + * @devdesc No FRU callouts found for non-info OCC Error Log + */ + bldErrLog(err2, HTMGT_MOD_MISMATCHING_SEVERITY, + HTMGT_RC_OCC_ERROR_LOG, + i_id, severity, 0, 0, + ERRORLOG::ERRL_SEV_INFORMATIONAL); + ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); + } + else + { + // Add Processor callout for PGPE/SGPE/XGPE + TMGT_ERR("occProcessElog: Adding processor callout for" + " OCC%d", iv_instance); + TARGETING::ConstTargetHandle_t l_proc_target = + TARGETING::getParentChip(iv_target); + l_errlHndl->addHwCallout(l_proc_target, + HWAS::SRCI_PRIORITY_MED, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + } } if (int_flags_set(FLAG_HALT_ON_OCC_SRC)) @@ -313,33 +338,37 @@ namespace HTMGT #endif // Add full OCC error log data as a User Details section - l_errlHndl->addFFDC(OCCC_COMP_ID, + l_errlHndl->addFFDC(l_comp_id, l_occElog, i_length, 1, // version 0); // subsection ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); - - // Clear elog - const uint8_t l_cmdData[1] = {i_id}; - OccCmd l_cmd(this, OCC_CMD_CLEAR_ERROR_LOG, - sizeof(l_cmdData), l_cmdData); - l_errlHndl = l_cmd.sendOccCmd(); - if (l_errlHndl != nullptr) - { - TMGT_ERR("occProcessElog: Failed to clear elog id %d to" - " OCC%d (rc=0x%04X)", - i_id, iv_instance, l_errlHndl->reasonCode()); - ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); - } } else { - TMGT_ERR("occProcessElog: Unable to read elog %d from SRAM" - " address (0x%08X) length (0x%04X), rc=0x%04X", - i_id, i_address, i_length, l_errlHndl->reasonCode()); + TMGT_ERR("occProcessElog: Unable to read elog %d from source " + "0x%02X on OCC%d, SRAM address (0x%08X) length (0x%04X), " + "rc=0x%04X", + i_id, i_source, iv_instance, i_address, i_length, + l_errlHndl->reasonCode()); ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); } + + // Clear elog + const uint8_t l_cmdData[] = { + 0x01/* version*/, i_id, i_source, 0x00/*reserved*/}; + OccCmd l_cmd(this, OCC_CMD_CLEAR_ERROR_LOG, + sizeof(l_cmdData), l_cmdData); + l_errlHndl = l_cmd.sendOccCmd(); + if (l_errlHndl != nullptr) + { + TMGT_ERR("occProcessElog: Failed to clear elog id 0x%02X from" + " source 0x%02X on OCC%d (rc=0x%04X)", + i_id, i_source, iv_instance, l_errlHndl->reasonCode()); + ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); + } + } // end Occ::occProcessElog() diff --git a/src/usr/htmgt/occError.H b/src/usr/htmgt/occError.H index 55e5f6cc6..ac98bf8bb 100644 --- a/src/usr/htmgt/occError.H +++ b/src/usr/htmgt/occError.H @@ -199,23 +199,6 @@ namespace HTMGT }; - - /** - * @brief Process elog entry from OCC poll response - * - * @param[in] i_occ OCC instance number reporting error - * @param[in] i_id OCC Error Log ID to retrieve (from the poll response) - * @param[in] i_address OCC Error Log Address to read - * @param[in] i_length OCC Error Log Length - */ - void occProcessElog(Occ * i_occ, - const uint8_t i_id, - const uint32_t i_address, - const uint16_t i_length); - - - - struct tmgtSafeModeReasonCode_t { uint32_t returnCode; @@ -224,5 +207,14 @@ namespace HTMGT }; + // OCC Error Source + enum occErrorSource_e + { + OCC_ERRSRC_405 = 0x00, + OCC_ERRSRC_PGPE = 0x10, + OCC_ERRSRC_XGPE = 0x20 + }; + + } // end namespace #endif |