summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBrian Stegmiller <bjs@us.ibm.com>2018-01-22 15:00:22 -0600
committerZane C. Shelley <zshelle@us.ibm.com>2018-03-01 16:07:37 -0500
commit8cf2925f7e0122c56b2c0699a52dd7c7416ff02a (patch)
tree800a6cc16e40daee76f02140a3985d3e900aaf26 /src
parent2993c5b32a67ad533498428fc849489fd28e1b44 (diff)
downloadtalos-hostboot-8cf2925f7e0122c56b2c0699a52dd7c7416ff02a.tar.gz
talos-hostboot-8cf2925f7e0122c56b2c0699a52dd7c7416ff02a.zip
Monitor threads for HB TI to work
Change-Id: I13c1717c650e24ee361e355ccaf5784d001a5b02 CQ:SW405958 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/52398 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r--src/include/usr/diag/attn/attnreasoncodes.H10
-rw-r--r--src/include/usr/diag/mdia/mdiareasoncodes.H4
-rw-r--r--src/usr/diag/attn/ipl/attnsvc.C138
-rw-r--r--src/usr/diag/attn/ipl/attnsvc.H26
-rwxr-xr-xsrc/usr/diag/mdia/mdiamonitor.C63
-rwxr-xr-xsrc/usr/diag/mdia/mdiamonitor.H7
6 files changed, 229 insertions, 19 deletions
diff --git a/src/include/usr/diag/attn/attnreasoncodes.H b/src/include/usr/diag/attn/attnreasoncodes.H
index ce0cd9dde..3fb644830 100644
--- a/src/include/usr/diag/attn/attnreasoncodes.H
+++ b/src/include/usr/diag/attn/attnreasoncodes.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2014,2017 */
+/* Contributors Listed Below - COPYRIGHT 2014,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -47,7 +47,9 @@ enum ModuleId
{
ATTN_INVALID_MODULE = 0x00,
ATTN_TEST_FAKE_CALL_PRD = 0x01, // this is used in test code only.
- ATTN_CHK_IPL_ATTNS_MODULE = 0x02
+ ATTN_CHK_IPL_ATTNS_MODULE = 0x02,
+ ATTN_PRD_TASK_MODULE = 0x03,
+ ATTN_INTR_TASK_MODULE = 0x04
};
/**
@@ -61,8 +63,10 @@ enum ReasonCode
ATTN_INVALID_REASONCODE = ATTN_COMP_ID | 0x00, // Invalid Reasoncode
ATTN_TEST_ATTN_FAIL = ATTN_COMP_ID | 0x01, // this is used in
// test code only.
- ATTN_SEE_HW_ERROR = ATTN_COMP_ID | 0x02 // HW err with no gard
+ ATTN_SEE_HW_ERROR = ATTN_COMP_ID | 0x02, // HW err with no gard
// so PLID still set
+ ATTN_PRD_TASK_CRASHED = ATTN_COMP_ID | 0x03, // prd thread crashed
+ ATTN_INTR_TASK_CRASHED = ATTN_COMP_ID | 0x04 // intr thread crashed
};
}
diff --git a/src/include/usr/diag/mdia/mdiareasoncodes.H b/src/include/usr/diag/mdia/mdiareasoncodes.H
index d941949fd..390f5d385 100644
--- a/src/include/usr/diag/mdia/mdiareasoncodes.H
+++ b/src/include/usr/diag/mdia/mdiareasoncodes.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2012,2016 */
+/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -47,6 +47,7 @@ enum ModuleId
{
INVALID_MODULE = 0x00,
PROCESS_COMMAND_TIMEOUT = 0x01,
+ MONITOR_MAIN_THREAD = 0x02
};
/**
@@ -60,6 +61,7 @@ enum ReasonCode
INVALID_REASONCODE = MDIA_COMP_ID | 0x00, // Invalid Reasoncode
MAINT_COMMAND_HW_TIMED_OUT = MDIA_COMP_ID | 0x01, // timeout due to HW
MAINT_COMMAND_SW_TIMED_OUT = MDIA_COMP_ID | 0x02, // timeout due to SW
+ MONITOR_THREAD_CRASHED = MDIA_COMP_ID | 0x03 // thread crashed
};
}
diff --git a/src/usr/diag/attn/ipl/attnsvc.C b/src/usr/diag/attn/ipl/attnsvc.C
index 156153a04..b1dba9470 100644
--- a/src/usr/diag/attn/ipl/attnsvc.C
+++ b/src/usr/diag/attn/ipl/attnsvc.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2014,2017 */
+/* Contributors Listed Below - COPYRIGHT 2014,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -37,6 +37,8 @@
#include "common/attnmem.H"
#include "common/attntarget.H"
#include "arch/pirformat.H"
+#include "diag/attn/attnreasoncodes.H"
+#include <initservice/initserviceif.H> // for hostboot TI
// Custom compile configs
#include <config.h>
@@ -97,7 +99,8 @@ errlHndl_t Service::configureInterrupts(
return err;
}
-void * Service::intrTask(void * i_svc)
+
+void* Service::intrTaskWorker(void * i_svc)
{
// interrupt task loop
Service & svc = *static_cast<Service *>(i_svc);
@@ -118,10 +121,75 @@ void * Service::intrTask(void * i_svc)
// got an interrupt. process it
svc.processIntrQMsg(*msg);
+
+
}
return NULL;
-}
+
+} // end intrTaskWorker
+
+
+void * Service::intrTask(void * i_svc)
+{
+ // We need to create the actual thread that will do the work
+ // and then monitor it for completion.
+ tid_t l_tid = task_create(&intrTaskWorker, i_svc);
+ assert( l_tid > 0 );
+
+ int l_status = 0;
+ void * l_Rc = NULL;
+
+ tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc);
+
+ if (l_status == TASK_STATUS_CRASHED)
+ {
+ /*@ errorlog tag
+ * @errortype ERRL_SEV_CRITICAL_SYS_TERM
+ * @moduleid ATTN_INTR_TASK_MODULE
+ * @reasoncode ATTN_INTR_TASK_CRASHED
+ * @userdata1 tidRc
+ * @userdata2 Task Id that crashed
+ *
+ * @devdesc PRD task crashed
+ * @custdesc Task analyzing HW errors has failed.
+ */
+ errlHndl_t l_err = new ERRORLOG::ErrlEntry
+ (
+ ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity
+ ATTN_INTR_TASK_MODULE, // moduleid
+ ATTN_INTR_TASK_CRASHED, // reason Code
+ (uint64_t)l_tidRc, // tid rc
+ (uint64_t)l_tid // task that crashed
+ );
+
+ l_err->collectTrace("ATTN_FAST" , 512 );
+ l_err->collectTrace("PRDF" , 512 );
+ l_err->collectTrace("MDIA_FAST" , 512 );
+
+ // Ensure we are not on the interrupt service list.
+ // or we'll get hung during TI waiting for this code
+ // that crashed to shutdown.
+ INTR::unRegisterMsgQ(INTR::LSI_LCL_FIR);
+
+ // Save PLID for TI purposes
+ uint32_t l_fatalPlid = l_err->plid();
+
+ // Commit the elog
+ ATTN_ERR("Committing INTR task crash elog");
+ errlCommit(l_err, ATTN_COMP_ID);
+ // Crash now
+ INITSERVICE::doShutdown(l_fatalPlid, true);
+
+ } // end if crashed
+
+
+ // On Normal shutdown of thread, we will get here
+ // and exit normally
+ return NULL;
+
+} // end intrTask
+
bool Service::intrTaskWait(msg_t * & o_msg)
{
@@ -280,7 +348,8 @@ errlHndl_t Service::processCheckstop()
#endif // CONFIG_ENABLE_CHECKSTOP_ANALYSIS
-void* Service::prdTask(void * i_svc)
+
+void* Service::prdTaskWorker(void * i_svc)
{
// prd task loop
Service & svc = *static_cast<Service *>(i_svc);
@@ -306,10 +375,69 @@ void* Service::prdTask(void * i_svc)
// new attentions for prd to handle
svc.processAttentions(procs);
+
}
return NULL;
-}
+
+} // end prdTaskWorker
+
+
+void* Service::prdTask(void * i_svc)
+{
+ // We need to create the actual thread that will do the work
+ // and then monitor it for completion.
+ tid_t l_tid = task_create(&prdTaskWorker, i_svc);
+ assert( l_tid > 0 );
+
+ int l_status = 0;
+ void * l_Rc = NULL;
+
+ tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc);
+
+ if (l_status == TASK_STATUS_CRASHED)
+ {
+ /*@ errorlog tag
+ * @errortype ERRL_SEV_CRITICAL_SYS_TERM
+ * @moduleid ATTN_PRD_TASK_MODULE
+ * @reasoncode ATTN_PRD_TASK_CRASHED
+ * @userdata1 tidRc
+ * @userdata2 Task Id that crashed
+ *
+ * @devdesc PRD task crashed
+ * @custdesc Task analyzing HW errors has failed.
+ */
+ errlHndl_t l_err = new ERRORLOG::ErrlEntry
+ (
+ ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity
+ ATTN_PRD_TASK_MODULE, // moduleid
+ ATTN_PRD_TASK_CRASHED, // reason Code
+ (uint64_t)l_tidRc, // tid rc
+ (uint64_t)l_tid // task that crashed
+ );
+
+ l_err->collectTrace("PRDF" , 512 );
+ l_err->collectTrace("MDIA_FAST" , 512 );
+ l_err->collectTrace("ATTN_FAST" , 512 );
+
+ // Save PLID for TI purposes
+ uint32_t l_fatalPlid = l_err->plid();
+
+ // Commit the elog
+ ATTN_ERR("Committing PRD task crash elog");
+ errlCommit(l_err, ATTN_COMP_ID);
+ // Crash now
+ INITSERVICE::doShutdown(l_fatalPlid, true);
+
+ } // end if crashed
+
+
+ // On Normal shutdown of thread, we will get here
+ // and exit normally
+ return NULL;
+
+} // end prdTask
+
bool Service::prdTaskWait()
{
diff --git a/src/usr/diag/attn/ipl/attnsvc.H b/src/usr/diag/attn/ipl/attnsvc.H
index aa5a974f0..8e49fca1f 100644
--- a/src/usr/diag/attn/ipl/attnsvc.H
+++ b/src/usr/diag/attn/ipl/attnsvc.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2014,2015 */
+/* Contributors Listed Below - COPYRIGHT 2014,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -130,22 +130,38 @@ class Service : public ServiceCommon
ConfigureMode i_mode);
/**
- * @brief intrTask infinite wait-for-interrupt loop
+ * @brief intrTask will start the intrTaskWorker
+ * and monitor if it crashes.
+ *
+ * @param[in] i_svc service object associated with task
+ */
+ static void* intrTask(void * i_svc);
+
+ /**
+ * @brief prdTask will start the prdTaskWorker
+ * and monitor if it crashes.
+ *
+ * @param[in] i_svc service object associated with task
+ */
+ static void* prdTask(void * i_svc);
+
+ /**
+ * @brief intrTaskWorker infinite wait-for-interrupt loop
*
* repeatedly call intrTaskWait and processIntrQMsg
*
* @param[in] i_svc service object associated with task
*/
- static void* intrTask(void * i_svc);
+ static void* intrTaskWorker(void * i_svc);
/**
- * @brief prdTask infinite wait-for-attention loop
+ * @brief prdTaskWorker infinite wait-for-attention loop
*
* repeatedly call prdTaskWait and processAttentions
*
* @param[in] i_svc service object associated with task
*/
- static void* prdTask(void * i_svc);
+ static void* prdTaskWorker(void * i_svc);
/**
* @brief startIntrTask start task helper function
diff --git a/src/usr/diag/mdia/mdiamonitor.C b/src/usr/diag/mdia/mdiamonitor.C
index fedd7a1d3..024cc8f8d 100755
--- a/src/usr/diag/mdia/mdiamonitor.C
+++ b/src/usr/diag/mdia/mdiamonitor.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2012,2016 */
+/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -29,6 +29,8 @@
#include "mdiamonitor.H"
#include "mdiasm.H"
#include "mdiatrace.H"
+#include <errl/errlmanager.H>
+#include <initservice/initserviceif.H>
using namespace TARGETING;
@@ -149,8 +151,7 @@ void CommandMonitor::threadMain(StateMachine & i_sm)
}
// istep finished...shutdown
-
- if(shutdown)
+ if (shutdown)
{
MDIA_FAST("cm: CommandMonitor will be shutdown");
break;
@@ -216,7 +217,8 @@ void CommandMonitor::shutdown()
task_wait_tid(tid, 0, 0);
}
-void* CommandMonitor::staticMain(void * i_args)
+
+void* CommandMonitor::staticMainWorker(void * i_args)
{
using namespace CommandMonitorImpl;
@@ -232,6 +234,59 @@ void* CommandMonitor::staticMain(void * i_args)
return NULL;
}
+
+void* CommandMonitor::staticMain(void * i_args)
+{
+ // We need to create the actual thread that will do the work
+ // and then monitor it for completion.
+ tid_t l_tid = task_create(&staticMainWorker, i_args);
+ assert( l_tid > 0 );
+
+ int l_status = 0;
+ void * l_Rc = NULL;
+
+ tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc);
+
+ if (l_status == TASK_STATUS_CRASHED)
+ {
+ /*@ errorlog tag
+ * @errortype ERRL_SEV_CRITICAL_SYS_TERM
+ * @moduleid MONITOR_MAIN_THREAD
+ * @reasoncode MONITOR_THREAD_CRASHED
+ * @userdata1 tidRc
+ * @userdata2 Task Id that crashed
+ *
+ * @devdesc MDIA monitor task crashed
+ * @custdesc Task handling mainstore init crashed
+ */
+ errlHndl_t l_err = new ERRORLOG::ErrlEntry
+ (
+ ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity
+ MONITOR_MAIN_THREAD, // moduleid
+ MONITOR_THREAD_CRASHED, // reason Code
+ (uint64_t)l_tidRc, // tid rc
+ (uint64_t)l_tid // task that crashed
+ );
+
+ l_err->collectTrace("MDIA_FAST" , 512 );
+ l_err->collectTrace("PRDF" , 512 );
+
+ // Save PLID for TI purposes
+ uint32_t l_fatalPlid = l_err->plid();
+
+ // Commit the elog
+ errlCommit(l_err, MDIA_COMP_ID);
+ MDIA_FAST("Committing task crash elog");
+ // Crash now
+ INITSERVICE::doShutdown(l_fatalPlid, true);
+ } // end if crashed
+
+
+ // On Normal shutdown of thread, we will get here
+ // and exit normally
+ return NULL;
+}
+
CommandMonitor::CommandMonitor() :
iv_shutdown(false),
iv_tid(0),
diff --git a/src/usr/diag/mdia/mdiamonitor.H b/src/usr/diag/mdia/mdiamonitor.H
index bc2b8b771..d269ab88c 100755
--- a/src/usr/diag/mdia/mdiamonitor.H
+++ b/src/usr/diag/mdia/mdiamonitor.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2012,2015 */
+/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -124,6 +124,11 @@ class CommandMonitor
static void* staticMain(void *);
/**
+ * @brief Worker thread under main thread
+ */
+ static void* staticMainWorker(void *);
+
+ /**
* @brief main thread function
*
* @param[in] i_sm state machine provided for callback
OpenPOWER on IntegriCloud