diff options
author | Dan Crowell <dcrowell@us.ibm.com> | 2019-01-22 15:32:10 -0600 |
---|---|---|
committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2019-01-25 10:07:27 -0600 |
commit | ff5e4695cc58653dda06b0e861349a9d520d87cc (patch) | |
tree | 9375d1e401a9720fc8cf422c74b4ea754284090f | |
parent | c2f2f5037920dc8441c6b27ff7a488a90f0433b1 (diff) | |
download | talos-hostboot-ff5e4695cc58653dda06b0e861349a9d520d87cc.tar.gz talos-hostboot-ff5e4695cc58653dda06b0e861349a9d520d87cc.zip |
Add retry to slave core wakeup path
We are still seeing some very intermittent errors in the slave
core wakeup path. It still seems like we may have a timing issue.
Until we figure out exactly what is going on, I am adding a retry
mechanism that should get the core to report in correctly. The
retry is done by issuing an additional doorbell message to the
core that didn't report in.
Change-Id: Ib87e5d58e079674d1eebb44c10d0252a35ea0519
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/70761
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Nicholas E. Bofferding <bofferdn@us.ibm.com>
Reviewed-by: Dean Sanner <dsanner@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
-rw-r--r-- | src/include/kernel/cpumgr.H | 11 | ||||
-rw-r--r-- | src/include/kernel/syscalls.H | 4 | ||||
-rw-r--r-- | src/include/sys/misc.h | 18 | ||||
-rw-r--r-- | src/kernel/cpumgr.C | 37 | ||||
-rw-r--r-- | src/kernel/syscall.C | 11 | ||||
-rw-r--r-- | src/lib/syscall_misc.C | 10 | ||||
-rw-r--r-- | src/usr/isteps/istep16/call_host_activate_slave_cores.C | 17 |
7 files changed, 101 insertions, 7 deletions
diff --git a/src/include/kernel/cpumgr.H b/src/include/kernel/cpumgr.H index f8daf5ab3..9d741cd83 100644 --- a/src/include/kernel/cpumgr.H +++ b/src/include/kernel/cpumgr.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2010,2018 */ +/* Contributors Listed Below - COPYRIGHT 2010,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -186,6 +186,15 @@ class CpuManager static void startCore(uint64_t pir,uint64_t i_threads); + /** @fn wakeupCore + * Start the core, can only be run after startCore. + * + * @param[in] pir - PIR value of first thread in core. + * @param[in] i_threads - Bitstring of threads to enable (left-justified). + */ + static void wakeupCore(uint64_t pir,uint64_t i_threads); + + /** @fn forceMemoryPeriodic() * Force the memory free / coalesce operations to be performed on the * next "periodic" interval. diff --git a/src/include/kernel/syscalls.H b/src/include/kernel/syscalls.H index c606ad771..42e1a551b 100644 --- a/src/include/kernel/syscalls.H +++ b/src/include/kernel/syscalls.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2010,2018 */ +/* Contributors Listed Below - COPYRIGHT 2010,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -99,6 +99,8 @@ namespace Systemcalls MISC_CPUNAP, /** cpu_master_winkle() */ MISC_CPUWINKLE, + /** cpu_wakeup_core() */ + MISC_CPUWAKEUPCORE, /** mm_alloc_block() */ MM_ALLOC_BLOCK, diff --git a/src/include/sys/misc.h b/src/include/sys/misc.h index 4fe0d5e44..183754f56 100644 --- a/src/include/sys/misc.h +++ b/src/include/sys/misc.h @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2018 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -252,6 +252,22 @@ int cpu_master_winkle(bool i_fusedCores); */ int cpu_all_winkle(); +/** @fn cpu_wakeup_core + * @brief Have the kernel wakeup a core that was previously started. + * + * @param[in] pir - PIR value of the first thread on the core. + * @param[in] i_threads - Bitstring of threads to enable (left-justified). + * + * @note The kernel will wakeup all threads on the requested core even + * though the callee only requests with a single PIR value. + * + * @return 0 or -(errno) on failure. + * + * @retval -ENXIO - The core ID was outside of the range the kernel is + * prepared to support. + */ +int cpu_wakeup_core(uint64_t pir,uint64_t i_threads); + /** @fn cpu_crit_assert * @brief Forces a Terminate Immediate after a crit-assert is issued * @param[in] i_failAddr - value in the linkRegister of the address diff --git a/src/kernel/cpumgr.C b/src/kernel/cpumgr.C index a2dff9415..425cc2d28 100644 --- a/src/kernel/cpumgr.C +++ b/src/kernel/cpumgr.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2010,2018 */ +/* Contributors Listed Below - COPYRIGHT 2010,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -454,7 +454,7 @@ void CpuManager::startCore(uint64_t pir,uint64_t i_threads) // Only wakeup the threads we were told to wakeup if( i_threads & (0x8000000000000000 >> i) ) { - printk("Dbell pir 0x%lx\n", pir + i); + printk("Dbell:0x%lx\n", pir + i); //Initiate the Doorbell for this core/pir send_doorbell_wakeup(pir + i); } @@ -463,6 +463,39 @@ void CpuManager::startCore(uint64_t pir,uint64_t i_threads) return; }; +void CpuManager::wakeupCore(uint64_t pir,uint64_t i_threads) +{ + size_t threads = getThreadCount(); + pir = pir & ~(threads-1); + + if (pir >= + (KERNEL_MAX_SUPPORTED_NODES * KERNEL_MAX_SUPPORTED_CPUS_PER_NODE)) + { + TASK_SETRTN(TaskManager::getCurrentTask(), -ENXIO); + return; + } + + //Send a message to userspace that a core with this base pir is being added + // userspace will know which threads on the core to expect already + InterruptMsgHdlr::addCpuCore(pir); + + // Physically wakeup the threads with doorbells + // Assumption is that startCore has already run so all + // internal structures are setup + for(size_t i = 0; i < threads; i++) + { + // Only wakeup the threads we were told to wakeup + if( i_threads & (0x8000000000000000 >> i) ) + { + printk("Dbell2:0x%lx\n", pir + i); + //Initiate the Doorbell for this core/pir + doorbell_send(pir + i); + } + } + + return; +}; + size_t CpuManager::getThreadCount() { size_t threads = 0; diff --git a/src/kernel/syscall.C b/src/kernel/syscall.C index c293d5067..1df43b78e 100644 --- a/src/kernel/syscall.C +++ b/src/kernel/syscall.C @@ -52,6 +52,8 @@ extern "C" void kernel_execute_hyp_doorbell() { + printkd("hyp_doorbell on %lx\n", getPIR()); + // Per POWER ISA Section 5.9.2, to avoid any weak consistency // issues we must use a msgsync instruction before consuming // any data set by a different thread following a doorbell @@ -144,6 +146,7 @@ namespace Systemcalls void CpuSprSet(task_t *t); void CpuNap(task_t *t); void CpuWinkle(task_t *t); + void CpuWakeupCore(task_t *t); void MmAllocBlock(task_t *t); void MmRemovePages(task_t *t); void MmSetPermission(task_t *t); @@ -189,6 +192,7 @@ namespace Systemcalls &CpuSprSet, // MISC_CPUSPRSET &CpuNap, // MISC_CPUNAP &CpuWinkle, // MISC_CPUWINKLE + &CpuWakeupCore, // MISC_CPUWAKEUPCORE &MmAllocBlock, // MM_ALLOC_BLOCK &MmRemovePages, // MM_REMOVE_PAGES @@ -858,6 +862,13 @@ namespace Systemcalls } } + /** Force thread wakeup via doorbell. */ + void CpuWakeupCore(task_t *t) + { + CpuManager::wakeupCore(static_cast<uint64_t>(TASK_GETARG0(t)), + static_cast<uint64_t>(TASK_GETARG1(t))); + }; + /** * Allocate a block of virtual memory within the base segment * @param[in] t: The task used to allocate a block in the base segment diff --git a/src/lib/syscall_misc.C b/src/lib/syscall_misc.C index ad6b204a6..29c075baf 100644 --- a/src/lib/syscall_misc.C +++ b/src/lib/syscall_misc.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2018 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -139,6 +139,14 @@ int cpu_all_winkle() return rc; } +int cpu_wakeup_core(uint64_t pir,uint64_t i_threads) +{ + return reinterpret_cast<int64_t>( + _syscall2(MISC_CPUWAKEUPCORE, + reinterpret_cast<void*>(pir), + reinterpret_cast<void*>(i_threads))); +} + void cpu_crit_assert(uint64_t i_failAddr) { diff --git a/src/usr/isteps/istep16/call_host_activate_slave_cores.C b/src/usr/isteps/istep16/call_host_activate_slave_cores.C index 67e6b816b..e9cea28fb 100644 --- a/src/usr/isteps/istep16/call_host_activate_slave_cores.C +++ b/src/usr/isteps/istep16/call_host_activate_slave_cores.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2015,2018 */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -122,6 +122,18 @@ void* call_host_activate_slave_cores (void *io_pArgs) int rc = cpu_start_core(pir, en_threads); + // Workaround to handle some syncing issues with new cpus + // waking + if (-ETIME == rc) + { + TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, + "call_host_activate_slave_cores: " + "Time out rc from kernel %d on core 0x%x, resending doorbell", + rc, + pir); + rc = cpu_wakeup_core(pir,en_threads); + } + // Handle time out error uint32_t l_checkidle_eid = 0; if (-ETIME == rc) @@ -208,6 +220,9 @@ void* call_host_activate_slave_cores (void *io_pArgs) // Throw printk in there too in case it is a kernel issue ERRORLOG::ErrlUserDetailsPrintk().addToLog(l_errl); + // Add interesting ISTEP traces + l_errl->collectTrace(ISTEP_COMP_NAME,256); + l_stepError.addErrorDetails( l_errl ); errlCommit( l_errl, HWPF_COMP_ID ); break; |