summaryrefslogtreecommitdiffstats
path: root/src/usr/htmgt
diff options
context:
space:
mode:
authorChris Cain <cjcain@us.ibm.com>2019-02-13 16:32:00 -0600
committerDaniel M. Crowell <dcrowell@us.ibm.com>2019-02-18 21:11:29 -0600
commit02f33294dea55eb2f022336f2b4871ea87ef7720 (patch)
tree0504efff1361920159d8e67b38ea2f018094b411 /src/usr/htmgt
parent84b32560e1aa82855bc2d9191c9f4b699f185885 (diff)
downloadtalos-hostboot-02f33294dea55eb2f022336f2b4871ea87ef7720.tar.gz
talos-hostboot-02f33294dea55eb2f022336f2b4871ea87ef7720.zip
HTMGT: Change OCC logs to info while recovery is still being attempted
Change-Id: I0a46cacbc7e473dedd38ce9656ab25f5452c77c1 CQ: SW456777 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/71869 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Sheldon Bailey <baileysh@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/htmgt')
-rw-r--r--src/usr/htmgt/htmgt_occ.H20
-rw-r--r--src/usr/htmgt/occError.C141
2 files changed, 100 insertions, 61 deletions
diff --git a/src/usr/htmgt/htmgt_occ.H b/src/usr/htmgt/htmgt_occ.H
index 1a707af46..e53d78fe6 100644
--- a/src/usr/htmgt/htmgt_occ.H
+++ b/src/usr/htmgt/htmgt_occ.H
@@ -351,13 +351,19 @@ namespace HTMGT
/**
* @brief Determine what actions are required for elog
*
- * @param[in] i_actions Action requested by OCC
- * @param[out] o_occReset returns true if OCC reset is needed
- * @param[out] o_errlSeverity severity to use for elog commit
- */
- void elogProcessActions(const uint8_t i_actions,
- bool & o_occReset,
- ERRORLOG::errlSeverity_t & o_errlSeverity);
+ * @param[in] i_actions Action flags requested by OCC
+ * @param[in] i_src SRC being reported by OCC
+ * @param[in] i_data Additional data used when
+ * processing actions
+ * @param[in,out] io_errlSeverity Severity to use for elog
+ * @param[out] o_call_home True if info error should be
+ * reported to BMC
+ */
+ void elogProcessActions(const uint8_t i_actions,
+ const uint32_t i_src,
+ const uint32_t i_data,
+ ERRORLOG::errlSeverity_t & io_errlSeverity,
+ bool & o_call_home);
/**
diff --git a/src/usr/htmgt/occError.C b/src/usr/htmgt/occError.C
index 0b785a643..492d047ce 100644
--- a/src/usr/htmgt/occError.C
+++ b/src/usr/htmgt/occError.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2014,2018 */
+/* Contributors Listed Below - COPYRIGHT 2014,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -141,7 +141,7 @@ namespace HTMGT
if (l_occSrc == 0x2A01)
{
// 2A01 is Periodic OCC Telemetry / Call Home data
- TMGT_ERR("OCC is reporting Periodic Telemetry Data (0x2A01)"
+ TMGT_INF("OCC is reporting Periodic Telemetry Data (0x2A01)"
" - NOT AN ERROR");
}
@@ -159,41 +159,12 @@ namespace HTMGT
}
// Process Actions
- bool l_occReset = false;
- elogProcessActions(l_occElog->actions, l_occReset, severity);
-
- // Need to add WOF reason code to OCC object regardless of
- // whether WOF resets are disabled.
- if( l_occElog->actions & TMGT_ERRL_ACTIONS_WOF_RESET_REQUIRED )
- {
- iv_wofResetReasons |= l_usrDtls_ptr->userData1;
- TMGT_ERR("WOF Reset Reasons for OCC%d = 0x%08x",
- iv_instance,
- iv_wofResetReasons);
-
- }
-
- // Check if we need a WOF requested reset
- if(iv_needsWofReset == true)
- {
- TMGT_ERR("WOF Reset detected! SRC = 0x%X",
- l_occSrc);
-
- // We compare against one less than the threshold because
- // the WOF reset count doesn't get incremented until
- // the resetPrep
- if( iv_wofResetCount < (WOF_RESET_COUNT_THRESHOLD-1) )
- {
- // Not at WOF reset threshold yet. Set sev to INFO
- severity = ERRORLOG::ERRL_SEV_INFORMATIONAL;
- }
- }
-
- if (l_occReset == true)
- {
- iv_needsReset = true;
- OccManager::updateSafeModeReason(l_occSrc, iv_instance);
- }
+ bool l_call_home_event = false;
+ elogProcessActions(l_occElog->actions,
+ l_occSrc,
+ l_usrDtls_ptr->userData1,
+ severity,
+ l_call_home_event);
// Create OCC error log
// NOTE: word 4 (used by extended reason code) to save off OCC
@@ -211,6 +182,13 @@ namespace HTMGT
l_occElog->extendedRC, // extended reason code
severity);
+ if (l_call_home_event)
+ {
+ // Force info log to the BMC.
+ // No HW Callouts (SELs) will be created for this error
+ l_errlHndl->setEselCallhomeInfoEvent(true);
+ }
+
// Add callout information
const uint8_t l_max_callouts = l_occElog->maxCallouts;
bool l_bad_fru_data = false;
@@ -336,16 +314,11 @@ namespace HTMGT
"HALT_ON_SRC is set. Resets will be disabled",
iv_instance, l_occSrc);
set_int_flags(get_int_flags() | FLAG_RESET_DISABLED);
+ // Force unrecoverable elog
+ l_errlHndl->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
}
}
- // Process force error log to be sent to BMC.
- if((l_occElog->actions & TMGT_ERRL_ACTIONS_FORCE_ERROR_POSTED)||
- (l_occSrc == (OCCC_COMP_ID | 0x01 ) ) ) //GEN_CALLHOME_LOG
- {
- l_errlHndl->setEselCallhomeInfoEvent(true);
- }
-
#ifdef CONFIG_CONSOLE_OUTPUT_OCC_COMM
char header[64];
sprintf(header, "OCC%d ELOG: (0x%04X bytes)", iv_instance,
@@ -542,10 +515,16 @@ namespace HTMGT
} // end Occ::elogAddCallout()
- void Occ::elogProcessActions(const uint8_t i_actions,
- bool & o_occReset,
- ERRORLOG::errlSeverity_t & o_errlSeverity)
+
+ void Occ::elogProcessActions(const uint8_t i_actions,
+ const uint32_t i_src,
+ uint32_t i_data,
+ ERRORLOG::errlSeverity_t & io_errlSeverity,
+ bool & o_call_home)
{
+ bool l_occReset = false;
+ o_call_home = false;
+
if (i_actions & TMGT_ERRL_ACTIONS_WOF_RESET_REQUIRED)
{
iv_failed = false;
@@ -553,7 +532,6 @@ namespace HTMGT
// Check if WOF resets are disabled
if(int_flags_set(FLAG_WOF_RESET_DISABLED) == true)
{
- o_occReset = false;
iv_needsWofReset = false;
TMGT_INF("elogProcessActions: OCC%d requested a WOF reset "
"but WOF resets are DISABLED",
@@ -561,27 +539,64 @@ namespace HTMGT
}
else // WOF resets are enabled
{
- o_occReset = true;
+ l_occReset = true;
iv_needsWofReset = true;
- TMGT_INF("elogProcessActions: OCC%d requested a WOF reset",
+ TMGT_ERR("elogProcessActions: OCC%d requested a WOF reset",
iv_instance);
+
+ // We compare against one less than the threshold because the
+ // WOF reset count doesn't get incremented until the resetPrep
+ if( iv_wofResetCount < (WOF_RESET_COUNT_THRESHOLD-1) )
+ {
+ // Not at WOF reset threshold yet. Set sev to INFO
+ io_errlSeverity = ERRORLOG::ERRL_SEV_INFORMATIONAL;
+ }
}
+
+ // Need to add WOF reason code to OCC object regardless of
+ // whether WOF resets are disabled.
+ iv_wofResetReasons |= i_data;
+ TMGT_ERR("elogProcessActions: WOF Reset Reasons for OCC%d = 0x%08x",
+ iv_instance, iv_wofResetReasons);
}
else
{
if (i_actions & TMGT_ERRL_ACTIONS_RESET_REQUIRED)
{
- o_occReset = true;
+ l_occReset = true;
iv_failed = true;
iv_resetReason = OCC_RESET_REASON_OCC_REQUEST;
TMGT_INF("elogProcessActions: OCC%d requested reset",
- iv_instance);
+ iv_instance);
+
+ // If reset will force safe mode, then make error unrecoverable
+ if (OCC_RESET_COUNT_THRESHOLD == iv_resetCount)
+ {
+ if (io_errlSeverity != ERRORLOG::ERRL_SEV_UNRECOVERABLE)
+ {
+ // update severity to UNRECOVERABLE
+ TMGT_ERR("elogProcessActions: changing severity to "
+ "UNRECOVERABLE (was sev=0x%02X)",
+ io_errlSeverity);
+ io_errlSeverity = ERRORLOG::ERRL_SEV_UNRECOVERABLE;
+ }
+ }
+ else if (io_errlSeverity != ERRORLOG::ERRL_SEV_INFORMATIONAL)
+ {
+ // update severity to INFO
+ TMGT_INF("elogProcessActions: changing severity to "
+ "INFORMATIONAL (was sev=0x%02X)",
+ io_errlSeverity);
+ io_errlSeverity = ERRORLOG::ERRL_SEV_INFORMATIONAL;
+ // log will be sent to BMC with NO SEL (hardware callouts)
+ o_call_home = true;
+ }
}
if (i_actions & TMGT_ERRL_ACTIONS_SAFE_MODE_REQUIRED)
{
- o_occReset = true;
+ l_occReset = true;
iv_failed = true;
iv_resetReason = OCC_RESET_REASON_CRIT_FAILURE;
iv_resetCount = OCC_RESET_COUNT_THRESHOLD;
@@ -589,10 +604,28 @@ namespace HTMGT
TMGT_INF("elogProcessActions: OCC%d requested safe mode",
iv_instance);
TMGT_CONSOLE("OCC%d requested system enter safe mode",
- iv_instance);
+ iv_instance);
}
}
+ // Check if error needs to be forced to the BMC:
+ // 1. 2A01 = OCC call home/telemetry data, OR
+ // 2. OCC requested force, but error was changed to info by HTMGT
+ // (log will be sent to the BMC with NO SEL (hardware callouts))
+ if ( (i_src == (OCCC_COMP_ID | 0x01 )) || // GEN_CALLHOME_LOG
+ ( (i_actions & TMGT_ERRL_ACTIONS_FORCE_ERROR_POSTED) &&
+ (io_errlSeverity == ERRORLOG::ERRL_SEV_INFORMATIONAL) ) )
+ {
+ o_call_home = true;
+ }
+
+ // If reset required, save the SRC in case it leads to safe mode
+ if (l_occReset == true)
+ {
+ iv_needsReset = true;
+ OccManager::updateSafeModeReason(i_src, iv_instance);
+ }
+
} // end Occ::elogProcessActions()
} // end namespace
OpenPOWER on IntegriCloud