diff options
author | Brian Stegmiller <bjs@us.ibm.com> | 2018-01-22 15:00:22 -0600 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2018-03-01 16:07:37 -0500 |
commit | 8cf2925f7e0122c56b2c0699a52dd7c7416ff02a (patch) | |
tree | 800a6cc16e40daee76f02140a3985d3e900aaf26 /src | |
parent | 2993c5b32a67ad533498428fc849489fd28e1b44 (diff) | |
download | talos-hostboot-8cf2925f7e0122c56b2c0699a52dd7c7416ff02a.tar.gz talos-hostboot-8cf2925f7e0122c56b2c0699a52dd7c7416ff02a.zip |
Monitor threads for HB TI to work
Change-Id: I13c1717c650e24ee361e355ccaf5784d001a5b02
CQ:SW405958
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/52398
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/include/usr/diag/attn/attnreasoncodes.H | 10 | ||||
-rw-r--r-- | src/include/usr/diag/mdia/mdiareasoncodes.H | 4 | ||||
-rw-r--r-- | src/usr/diag/attn/ipl/attnsvc.C | 138 | ||||
-rw-r--r-- | src/usr/diag/attn/ipl/attnsvc.H | 26 | ||||
-rwxr-xr-x | src/usr/diag/mdia/mdiamonitor.C | 63 | ||||
-rwxr-xr-x | src/usr/diag/mdia/mdiamonitor.H | 7 |
6 files changed, 229 insertions, 19 deletions
diff --git a/src/include/usr/diag/attn/attnreasoncodes.H b/src/include/usr/diag/attn/attnreasoncodes.H index ce0cd9dde..3fb644830 100644 --- a/src/include/usr/diag/attn/attnreasoncodes.H +++ b/src/include/usr/diag/attn/attnreasoncodes.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2017 */ +/* Contributors Listed Below - COPYRIGHT 2014,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -47,7 +47,9 @@ enum ModuleId { ATTN_INVALID_MODULE = 0x00, ATTN_TEST_FAKE_CALL_PRD = 0x01, // this is used in test code only. - ATTN_CHK_IPL_ATTNS_MODULE = 0x02 + ATTN_CHK_IPL_ATTNS_MODULE = 0x02, + ATTN_PRD_TASK_MODULE = 0x03, + ATTN_INTR_TASK_MODULE = 0x04 }; /** @@ -61,8 +63,10 @@ enum ReasonCode ATTN_INVALID_REASONCODE = ATTN_COMP_ID | 0x00, // Invalid Reasoncode ATTN_TEST_ATTN_FAIL = ATTN_COMP_ID | 0x01, // this is used in // test code only. - ATTN_SEE_HW_ERROR = ATTN_COMP_ID | 0x02 // HW err with no gard + ATTN_SEE_HW_ERROR = ATTN_COMP_ID | 0x02, // HW err with no gard // so PLID still set + ATTN_PRD_TASK_CRASHED = ATTN_COMP_ID | 0x03, // prd thread crashed + ATTN_INTR_TASK_CRASHED = ATTN_COMP_ID | 0x04 // intr thread crashed }; } diff --git a/src/include/usr/diag/mdia/mdiareasoncodes.H b/src/include/usr/diag/mdia/mdiareasoncodes.H index d941949fd..390f5d385 100644 --- a/src/include/usr/diag/mdia/mdiareasoncodes.H +++ b/src/include/usr/diag/mdia/mdiareasoncodes.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2016 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -47,6 +47,7 @@ enum ModuleId { INVALID_MODULE = 0x00, PROCESS_COMMAND_TIMEOUT = 0x01, + MONITOR_MAIN_THREAD = 0x02 }; /** @@ -60,6 +61,7 @@ enum ReasonCode INVALID_REASONCODE = MDIA_COMP_ID | 0x00, // Invalid Reasoncode MAINT_COMMAND_HW_TIMED_OUT = MDIA_COMP_ID | 0x01, // timeout due to HW MAINT_COMMAND_SW_TIMED_OUT = MDIA_COMP_ID | 0x02, // timeout due to SW + MONITOR_THREAD_CRASHED = MDIA_COMP_ID | 0x03 // thread crashed }; } diff --git a/src/usr/diag/attn/ipl/attnsvc.C b/src/usr/diag/attn/ipl/attnsvc.C index 156153a04..b1dba9470 100644 --- a/src/usr/diag/attn/ipl/attnsvc.C +++ b/src/usr/diag/attn/ipl/attnsvc.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2017 */ +/* Contributors Listed Below - COPYRIGHT 2014,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -37,6 +37,8 @@ #include "common/attnmem.H" #include "common/attntarget.H" #include "arch/pirformat.H" +#include "diag/attn/attnreasoncodes.H" +#include <initservice/initserviceif.H> // for hostboot TI // Custom compile configs #include <config.h> @@ -97,7 +99,8 @@ errlHndl_t Service::configureInterrupts( return err; } -void * Service::intrTask(void * i_svc) + +void* Service::intrTaskWorker(void * i_svc) { // interrupt task loop Service & svc = *static_cast<Service *>(i_svc); @@ -118,10 +121,75 @@ void * Service::intrTask(void * i_svc) // got an interrupt. process it svc.processIntrQMsg(*msg); + + } return NULL; -} + +} // end intrTaskWorker + + +void * Service::intrTask(void * i_svc) +{ + // We need to create the actual thread that will do the work + // and then monitor it for completion. + tid_t l_tid = task_create(&intrTaskWorker, i_svc); + assert( l_tid > 0 ); + + int l_status = 0; + void * l_Rc = NULL; + + tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc); + + if (l_status == TASK_STATUS_CRASHED) + { + /*@ errorlog tag + * @errortype ERRL_SEV_CRITICAL_SYS_TERM + * @moduleid ATTN_INTR_TASK_MODULE + * @reasoncode ATTN_INTR_TASK_CRASHED + * @userdata1 tidRc + * @userdata2 Task Id that crashed + * + * @devdesc PRD task crashed + * @custdesc Task analyzing HW errors has failed. + */ + errlHndl_t l_err = new ERRORLOG::ErrlEntry + ( + ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity + ATTN_INTR_TASK_MODULE, // moduleid + ATTN_INTR_TASK_CRASHED, // reason Code + (uint64_t)l_tidRc, // tid rc + (uint64_t)l_tid // task that crashed + ); + + l_err->collectTrace("ATTN_FAST" , 512 ); + l_err->collectTrace("PRDF" , 512 ); + l_err->collectTrace("MDIA_FAST" , 512 ); + + // Ensure we are not on the interrupt service list. + // or we'll get hung during TI waiting for this code + // that crashed to shutdown. + INTR::unRegisterMsgQ(INTR::LSI_LCL_FIR); + + // Save PLID for TI purposes + uint32_t l_fatalPlid = l_err->plid(); + + // Commit the elog + ATTN_ERR("Committing INTR task crash elog"); + errlCommit(l_err, ATTN_COMP_ID); + // Crash now + INITSERVICE::doShutdown(l_fatalPlid, true); + + } // end if crashed + + + // On Normal shutdown of thread, we will get here + // and exit normally + return NULL; + +} // end intrTask + bool Service::intrTaskWait(msg_t * & o_msg) { @@ -280,7 +348,8 @@ errlHndl_t Service::processCheckstop() #endif // CONFIG_ENABLE_CHECKSTOP_ANALYSIS -void* Service::prdTask(void * i_svc) + +void* Service::prdTaskWorker(void * i_svc) { // prd task loop Service & svc = *static_cast<Service *>(i_svc); @@ -306,10 +375,69 @@ void* Service::prdTask(void * i_svc) // new attentions for prd to handle svc.processAttentions(procs); + } return NULL; -} + +} // end prdTaskWorker + + +void* Service::prdTask(void * i_svc) +{ + // We need to create the actual thread that will do the work + // and then monitor it for completion. + tid_t l_tid = task_create(&prdTaskWorker, i_svc); + assert( l_tid > 0 ); + + int l_status = 0; + void * l_Rc = NULL; + + tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc); + + if (l_status == TASK_STATUS_CRASHED) + { + /*@ errorlog tag + * @errortype ERRL_SEV_CRITICAL_SYS_TERM + * @moduleid ATTN_PRD_TASK_MODULE + * @reasoncode ATTN_PRD_TASK_CRASHED + * @userdata1 tidRc + * @userdata2 Task Id that crashed + * + * @devdesc PRD task crashed + * @custdesc Task analyzing HW errors has failed. + */ + errlHndl_t l_err = new ERRORLOG::ErrlEntry + ( + ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity + ATTN_PRD_TASK_MODULE, // moduleid + ATTN_PRD_TASK_CRASHED, // reason Code + (uint64_t)l_tidRc, // tid rc + (uint64_t)l_tid // task that crashed + ); + + l_err->collectTrace("PRDF" , 512 ); + l_err->collectTrace("MDIA_FAST" , 512 ); + l_err->collectTrace("ATTN_FAST" , 512 ); + + // Save PLID for TI purposes + uint32_t l_fatalPlid = l_err->plid(); + + // Commit the elog + ATTN_ERR("Committing PRD task crash elog"); + errlCommit(l_err, ATTN_COMP_ID); + // Crash now + INITSERVICE::doShutdown(l_fatalPlid, true); + + } // end if crashed + + + // On Normal shutdown of thread, we will get here + // and exit normally + return NULL; + +} // end prdTask + bool Service::prdTaskWait() { diff --git a/src/usr/diag/attn/ipl/attnsvc.H b/src/usr/diag/attn/ipl/attnsvc.H index aa5a974f0..8e49fca1f 100644 --- a/src/usr/diag/attn/ipl/attnsvc.H +++ b/src/usr/diag/attn/ipl/attnsvc.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2015 */ +/* Contributors Listed Below - COPYRIGHT 2014,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -130,22 +130,38 @@ class Service : public ServiceCommon ConfigureMode i_mode); /** - * @brief intrTask infinite wait-for-interrupt loop + * @brief intrTask will start the intrTaskWorker + * and monitor if it crashes. + * + * @param[in] i_svc service object associated with task + */ + static void* intrTask(void * i_svc); + + /** + * @brief prdTask will start the prdTaskWorker + * and monitor if it crashes. + * + * @param[in] i_svc service object associated with task + */ + static void* prdTask(void * i_svc); + + /** + * @brief intrTaskWorker infinite wait-for-interrupt loop * * repeatedly call intrTaskWait and processIntrQMsg * * @param[in] i_svc service object associated with task */ - static void* intrTask(void * i_svc); + static void* intrTaskWorker(void * i_svc); /** - * @brief prdTask infinite wait-for-attention loop + * @brief prdTaskWorker infinite wait-for-attention loop * * repeatedly call prdTaskWait and processAttentions * * @param[in] i_svc service object associated with task */ - static void* prdTask(void * i_svc); + static void* prdTaskWorker(void * i_svc); /** * @brief startIntrTask start task helper function diff --git a/src/usr/diag/mdia/mdiamonitor.C b/src/usr/diag/mdia/mdiamonitor.C index fedd7a1d3..024cc8f8d 100755 --- a/src/usr/diag/mdia/mdiamonitor.C +++ b/src/usr/diag/mdia/mdiamonitor.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2016 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -29,6 +29,8 @@ #include "mdiamonitor.H" #include "mdiasm.H" #include "mdiatrace.H" +#include <errl/errlmanager.H> +#include <initservice/initserviceif.H> using namespace TARGETING; @@ -149,8 +151,7 @@ void CommandMonitor::threadMain(StateMachine & i_sm) } // istep finished...shutdown - - if(shutdown) + if (shutdown) { MDIA_FAST("cm: CommandMonitor will be shutdown"); break; @@ -216,7 +217,8 @@ void CommandMonitor::shutdown() task_wait_tid(tid, 0, 0); } -void* CommandMonitor::staticMain(void * i_args) + +void* CommandMonitor::staticMainWorker(void * i_args) { using namespace CommandMonitorImpl; @@ -232,6 +234,59 @@ void* CommandMonitor::staticMain(void * i_args) return NULL; } + +void* CommandMonitor::staticMain(void * i_args) +{ + // We need to create the actual thread that will do the work + // and then monitor it for completion. + tid_t l_tid = task_create(&staticMainWorker, i_args); + assert( l_tid > 0 ); + + int l_status = 0; + void * l_Rc = NULL; + + tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc); + + if (l_status == TASK_STATUS_CRASHED) + { + /*@ errorlog tag + * @errortype ERRL_SEV_CRITICAL_SYS_TERM + * @moduleid MONITOR_MAIN_THREAD + * @reasoncode MONITOR_THREAD_CRASHED + * @userdata1 tidRc + * @userdata2 Task Id that crashed + * + * @devdesc MDIA monitor task crashed + * @custdesc Task handling mainstore init crashed + */ + errlHndl_t l_err = new ERRORLOG::ErrlEntry + ( + ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity + MONITOR_MAIN_THREAD, // moduleid + MONITOR_THREAD_CRASHED, // reason Code + (uint64_t)l_tidRc, // tid rc + (uint64_t)l_tid // task that crashed + ); + + l_err->collectTrace("MDIA_FAST" , 512 ); + l_err->collectTrace("PRDF" , 512 ); + + // Save PLID for TI purposes + uint32_t l_fatalPlid = l_err->plid(); + + // Commit the elog + errlCommit(l_err, MDIA_COMP_ID); + MDIA_FAST("Committing task crash elog"); + // Crash now + INITSERVICE::doShutdown(l_fatalPlid, true); + } // end if crashed + + + // On Normal shutdown of thread, we will get here + // and exit normally + return NULL; +} + CommandMonitor::CommandMonitor() : iv_shutdown(false), iv_tid(0), diff --git a/src/usr/diag/mdia/mdiamonitor.H b/src/usr/diag/mdia/mdiamonitor.H index bc2b8b771..d269ab88c 100755 --- a/src/usr/diag/mdia/mdiamonitor.H +++ b/src/usr/diag/mdia/mdiamonitor.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2015 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -124,6 +124,11 @@ class CommandMonitor static void* staticMain(void *); /** + * @brief Worker thread under main thread + */ + static void* staticMainWorker(void *); + + /** * @brief main thread function * * @param[in] i_sm state machine provided for callback |