summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Crowell <dcrowell@us.ibm.com>2018-04-26 13:01:01 -0500
committerDaniel M. Crowell <dcrowell@us.ibm.com>2018-05-19 17:56:42 -0400
commita4e02fc0828910582a08cb1277a30531540d7523 (patch)
tree130522c4e3683706afef978456945909b4ed8752
parent798ff0e50b1dd3a9b0ea640faae67bbd871b909c (diff)
downloadtalos-hostboot-a4e02fc0828910582a08cb1277a30531540d7523.tar.gz
talos-hostboot-a4e02fc0828910582a08cb1277a30531540d7523.zip
FFDC enhancements for core activate fails
Adding some more traces to the error log we grab for core activation failures. Change-Id: I30c6985060fcffcb3382b775a52e59c08d2b51b7 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57907 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Matt Derksen <mderkse1@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
-rw-r--r--src/include/usr/intr/intr_reasoncodes.H3
-rw-r--r--src/usr/intr/intrrp.C4
-rw-r--r--src/usr/isteps/istep16/call_host_activate_slave_cores.C20
3 files changed, 21 insertions, 6 deletions
diff --git a/src/include/usr/intr/intr_reasoncodes.H b/src/include/usr/intr/intr_reasoncodes.H
index 62d8ad87a..4dc3cc496 100644
--- a/src/include/usr/intr/intr_reasoncodes.H
+++ b/src/include/usr/intr/intr_reasoncodes.H
@@ -27,6 +27,9 @@
#include <hbotcompid.H>
+#define INTR_TRACE_NAME INTR_COMP_NAME
+
+
namespace INTR
{
enum IntrModuleID
diff --git a/src/usr/intr/intrrp.C b/src/usr/intr/intrrp.C
index c9809b802..fda137259 100644
--- a/src/usr/intr/intrrp.C
+++ b/src/usr/intr/intrrp.C
@@ -57,8 +57,6 @@
#include <p9n2_misc_scom_addresses_fld.H>
#include <util/utilmbox_scratch.H>
-#define INTR_TRACE_NAME INTR_COMP_NAME
-
using namespace INTR;
using namespace TARGETING;
@@ -3224,7 +3222,7 @@ void* INTR::IntrRp::handleCpuTimeout(void* _pir)
msg->data[0] = pir;
msg_q_t intr_msgQ = msg_q_resolve(VFS_ROOT_MSG_INTR);
- TRACFCOMP( g_trac_intr,"handleCpuTimeout for pir: %lx", pir);
+ TRACFCOMP( g_trac_intr,"handleCpuTimeout for pir: 0x%lx", pir);
do
{
diff --git a/src/usr/isteps/istep16/call_host_activate_slave_cores.C b/src/usr/isteps/istep16/call_host_activate_slave_cores.C
index e18639077..67e6b816b 100644
--- a/src/usr/isteps/istep16/call_host_activate_slave_cores.C
+++ b/src/usr/isteps/istep16/call_host_activate_slave_cores.C
@@ -49,6 +49,8 @@
#endif
#include <scom/scomif.H>
+#include <errl/errludprintk.H>
+#include <intr/intr_reasoncodes.H>
using namespace ERRORLOG;
using namespace TARGETING;
@@ -121,11 +123,12 @@ void* call_host_activate_slave_cores (void *io_pArgs)
int rc = cpu_start_core(pir, en_threads);
// Handle time out error
+ uint32_t l_checkidle_eid = 0;
if (-ETIME == rc)
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"call_host_activate_slave_cores: "
- "Time out rc from kernel %d on core %x",
+ "Time out rc from kernel %d on core 0x%x",
rc,
pir);
@@ -157,6 +160,8 @@ void* call_host_activate_slave_cores (void *io_pArgs)
// Create IStep error log
l_stepError.addErrorDetails(l_timeout_errl);
+ l_checkidle_eid = l_timeout_errl->eid();
+
// Commit error
errlCommit( l_timeout_errl, HWPF_COMP_ID );
}
@@ -176,7 +181,8 @@ void* call_host_activate_slave_cores (void *io_pArgs)
* @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE
* @moduleid MOD_HOST_ACTIVATE_SLAVE_CORES
* @userdata1 PIR of failing core.
- * @userdata2 rc of cpu_start_core().
+ * @userdata2[00:31] EID from p9_check_idle_stop_done().
+ * @userdata2[32:63] rc of cpu_start_core().
*
* @devdesc Kernel returned error when trying to activate
* core.
@@ -186,7 +192,9 @@ void* call_host_activate_slave_cores (void *io_pArgs)
MOD_HOST_ACTIVATE_SLAVE_CORES,
RC_BAD_RC,
pir,
- rc );
+ TWO_UINT32_TO_UINT64(
+ l_checkidle_eid,
+ rc) );
// Callout core that failed to wake up.
l_errl->addHwCallout(*l_core,
@@ -194,6 +202,12 @@ void* call_host_activate_slave_cores (void *io_pArgs)
HWAS::DECONFIG,
HWAS::GARD_Predictive);
+ // Could be an interrupt issue
+ l_errl->collectTrace(INTR_TRACE_NAME,256);
+
+ // Throw printk in there too in case it is a kernel issue
+ ERRORLOG::ErrlUserDetailsPrintk().addToLog(l_errl);
+
l_stepError.addErrorDetails( l_errl );
errlCommit( l_errl, HWPF_COMP_ID );
break;
OpenPOWER on IntegriCloud