diff options
| author | Ilya Smirnov <ismirno@us.ibm.com> | 2019-04-11 09:42:19 -0500 |
|---|---|---|
| committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2019-04-24 13:52:59 -0500 |
| commit | a1f8b3160946b44d37ac947de70430f4a49b28cd (patch) | |
| tree | 5f09163e170d4d56e4b7b33857d4f00a03018729 /src/usr/util | |
| parent | 2007c4f940856589a087452beac9a20359098b0b (diff) | |
| download | talos-hostboot-a1f8b3160946b44d37ac947de70430f4a49b28cd.tar.gz talos-hostboot-a1f8b3160946b44d37ac947de70430f4a49b28cd.zip | |
Add Child RC Checking to Thread Pool
Add logic for Thread Pool to return error log when
shutdown() is called if a child thread (or multiple
child threads) had crashed. This is a default behavior,
but a constructor option can switch this functionality
off.
The new logic will aid in debugging of the failures of
child processes, as the failures could be catched when
they occurred and not at some point after.
Change-Id: I9736d659a086701b8e4f18f41504df4864924d88
RTC: 208517
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/75897
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Nicholas E. Bofferding <bofferdn@us.ibm.com>
Reviewed-by: Michael Baiocchi <mbaiocch@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/util')
| -rw-r--r-- | src/usr/util/test/threadpool.H | 131 | ||||
| -rw-r--r-- | src/usr/util/threadpool.C | 54 |
2 files changed, 177 insertions, 8 deletions
diff --git a/src/usr/util/test/threadpool.H b/src/usr/util/test/threadpool.H index 9784540a9..e8f41e9a3 100644 --- a/src/usr/util/test/threadpool.H +++ b/src/usr/util/test/threadpool.H @@ -5,7 +5,9 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* COPYRIGHT International Business Machines Corp. 2012,2014 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ +/* [+] International Business Machines Corp. */ +/* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ @@ -25,6 +27,14 @@ #include <cxxtest/TestSuite.H> #include <util/threadpool.H> +#include <errl/errlentry.H> +#include <errl/errlmanager.H> +#include <util/util_reasoncodes.H> +#include <hbotcompid.H> + +// Thread pool constructor flags +#define DISABLE_CHILD_RC_CHECKING false +#define CHECK_CHILD_RC true namespace __ThreadPoolTest { @@ -86,6 +96,17 @@ namespace __ThreadPoolTest uint64_t* iv_value; }; + /** A simple work item that causes the task to crash */ + struct CrashedTask + { + CrashedTask() {}; + void operator()() + { + uint8_t* l_ptr = nullptr; + *l_ptr = 1; + } + }; + }; @@ -96,7 +117,8 @@ class ThreadPoolTest: public CxxTest::TestSuite * ordered pools. */ void testSimpleWorkItem() { - Util::ThreadPool<__ThreadPoolTest::Simple> instance; + Util::ThreadPool<__ThreadPoolTest::Simple> + instance(DISABLE_CHILD_RC_CHECKING); uint64_t value = 0; instance.insert(new __ThreadPoolTest::Simple(&value)); @@ -108,7 +130,8 @@ class ThreadPoolTest: public CxxTest::TestSuite TS_FAIL("Value was not changed by Simple worker thread."); } - Util::ThreadPool<__ThreadPoolTest::SimpleOrdered> instance2; + Util::ThreadPool<__ThreadPoolTest::SimpleOrdered> + instance2(DISABLE_CHILD_RC_CHECKING); value = 0; instance2.insert(new __ThreadPoolTest::SimpleOrdered(&value)); @@ -127,7 +150,8 @@ class ThreadPoolTest: public CxxTest::TestSuite * threads as directed. */ void testThreadManager() { - Util::ThreadPool<__ThreadPoolTest::EnsureThreads> instance; + Util::ThreadPool<__ThreadPoolTest::EnsureThreads> + instance(DISABLE_CHILD_RC_CHECKING); barrier_t barrier; for (size_t count = 1; count < 5; count++) @@ -151,7 +175,8 @@ class ThreadPoolTest: public CxxTest::TestSuite /** Test that the order functions work on the thread pool. */ void testThreadOrder() { - Util::ThreadPool<__ThreadPoolTest::EnsureOrder> instance; + Util::ThreadPool<__ThreadPoolTest::EnsureOrder> + instance(DISABLE_CHILD_RC_CHECKING); uint64_t value = 0; // Ensure that adding work items in order works. @@ -191,6 +216,102 @@ class ThreadPoolTest: public CxxTest::TestSuite instance.start(); instance.shutdown(); } + + /** Test a good child task that doesn't return an error. */ + void testChildRCGoodTask() + { + TS_INFO(ENTER_MRK"testChildRCGoodTask"); + Util::ThreadPool<__ThreadPoolTest::Simple> + l_threadPool(CHECK_CHILD_RC); + uint64_t l_value = 0; + + do { + l_threadPool.insert(new __ThreadPoolTest::Simple(&l_value)); + l_threadPool.start(); + errlHndl_t l_errl = l_threadPool.shutdown(); + if(l_errl) + { + TS_FAIL("testChildRCGoodTask: an unexpected error log (EID 0x%.8%x) returned from the thread pool", l_errl->eid()); + errlCommit(l_errl, CXXTEST_COMP_ID); + break; + } + + if(l_value == 0) + { + TS_FAIL("testChildRCGoodTask: the test value was not changed by the child task"); + break; + } + + }while(0); + TS_INFO(EXIT_MRK"testChildRCGoodTask"); + } + + /** Test that the crashed task's error log is returned to thread pool + correctly */ + void testChildRCCrashedTask() + { + TS_INFO(ENTER_MRK"testChildRCCrashedTask"); + Util::ThreadPool<__ThreadPoolTest::CrashedTask> + l_threadPool(CHECK_CHILD_RC); + errlHndl_t l_errl = nullptr; + + do { + l_threadPool.insert(new __ThreadPoolTest::CrashedTask()); + l_threadPool.start(); + l_errl = l_threadPool.shutdown(); + if(!l_errl) + { + TS_FAIL("testChildRCCrashedTask: the thread pool did not return an error log as was expected"); + break; + } + + if(l_errl->moduleId() != Util::UTIL_MOD_TP_SHUTDOWN) + { + TS_FAIL("testChildRCCrashedTask: unexpected moduleId returned from EID 0x%.8x; expected: 0x%x; actual: 0x%x", + l_errl->eid(), + Util::UTIL_MOD_TP_SHUTDOWN, + l_errl->moduleId()); + break; + } + + if(l_errl->reasonCode() != Util::UTIL_RC_CHILD_TASK_FAILED) + { + TS_FAIL("testChildRCCrashedTask: unexpected return code from EID 0x%.8x; expected: 0x%x; actual 0x%x", + l_errl->eid(), + Util::UTIL_RC_CHILD_TASK_FAILED, + l_errl->reasonCode()); + break; + } + }while(0); + + if(l_errl) + { + ERRORLOG::errlCommit(l_errl, CXXTEST_COMP_ID); + } + TS_INFO(EXIT_MRK"testChildRCCrashedTask"); + } + + /* Test that error is not returned by crashed task when explicitly + not requested by thread pool */ + void testChildNoRCCrashedTask() + { + TS_INFO(ENTER_MRK"testChildNoRCCrashedTask"); + Util::ThreadPool<__ThreadPoolTest::CrashedTask> + l_threadPool(DISABLE_CHILD_RC_CHECKING); + errlHndl_t l_errl = nullptr; + + l_threadPool.insert(new __ThreadPoolTest::CrashedTask()); + l_threadPool.start(); + l_errl = l_threadPool.shutdown(); + + if(l_errl) + { + TS_FAIL("testChildNoRCCrashedTask: unexpected error returned from the thread pool (EID 0x%.8x)", l_errl->eid()); + ERRORLOG::errlCommit(l_errl, CXXTEST_COMP_ID); + } + + TS_INFO(EXIT_MRK"testChildNoRCCrashedTask"); + } }; #endif diff --git a/src/usr/util/threadpool.C b/src/usr/util/threadpool.C index d2b156839..b51cca642 100644 --- a/src/usr/util/threadpool.C +++ b/src/usr/util/threadpool.C @@ -5,7 +5,9 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* COPYRIGHT International Business Machines Corp. 2012,2014 */ +/* Contributors Listed Below - COPYRIGHT 2012,2019 */ +/* [+] International Business Machines Corp. */ +/* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ @@ -23,6 +25,9 @@ #include <util/threadpool.H> #include <sys/task.h> #include <sys/misc.h> +#include <util/util_reasoncodes.H> +#include <errl/errlmanager.H> +#include <hbotcompid.H> void Util::__Util_ThreadPool_Impl::ThreadPoolImpl::__init() { @@ -96,10 +101,15 @@ void Util::__Util_ThreadPool_Impl::ThreadPoolImpl::__start( mutex_unlock(&iv_mutex); } -void Util::__Util_ThreadPool_Impl::ThreadPoolImpl::__shutdown() +errlHndl_t Util::__Util_ThreadPool_Impl::ThreadPoolImpl::__shutdown() { mutex_lock(&iv_mutex); + int l_childRc = 0; + void* l_childRetval = nullptr; + errlHndl_t l_origError = nullptr; + errlHndl_t l_errl = nullptr; + // Set shutdown status and signal all children to release from their // condition variable. iv_shutdown = true; @@ -109,14 +119,52 @@ void Util::__Util_ThreadPool_Impl::ThreadPoolImpl::__shutdown() while(!iv_children.empty()) { tid_t child = iv_children.front(); + tid_t l_returnedTid = 0; iv_children.pop_front(); mutex_unlock(&iv_mutex); - task_wait_tid(child, NULL, NULL); // Don't need status. + l_returnedTid = task_wait_tid(child, &l_childRc, &l_childRetval); + if(iv_checkChildRc && + ((l_returnedTid != child) || + (l_childRc != TASK_STATUS_EXITED_CLEAN))) + { + /** + * @errortype + * @moduleid UTIL_MOD_TP_SHUTDOWN + * @reasoncode UTIL_RC_CHILD_TASK_FAILED + * @userdata1 The return code of the child thread + * @userdata2[0:31] The returned task ID of the child thread + * @userdata2[32:63] The original task ID of the child thread + * @devdesc The child thread of a thread pool returned an + * error + * @custdesc A failure occurred during the IPL of the system + */ + l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_UNRECOVERABLE, + UTIL_MOD_TP_SHUTDOWN, + UTIL_RC_CHILD_TASK_FAILED, + l_childRc, + TWO_UINT32_TO_UINT64(l_returnedTid, + child), + ERRORLOG::ErrlEntry::ADD_SW_CALLOUT + ); + l_errl->collectTrace(UTIL_COMP_NAME); + + if(!l_origError) + { + l_origError = l_errl; + l_errl = nullptr; + } + else + { + l_errl->plid(l_origError->plid()); + errlCommit(l_errl, UTIL_COMP_ID); + } + } mutex_lock(&iv_mutex); } mutex_unlock(&iv_mutex); + return l_origError; } // Default thread count of one per HW thread. |

