summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Crowell <dcrowell@us.ibm.com>2019-01-22 15:32:10 -0600
committerDaniel M. Crowell <dcrowell@us.ibm.com>2019-01-25 10:07:27 -0600
commitff5e4695cc58653dda06b0e861349a9d520d87cc (patch)
tree9375d1e401a9720fc8cf422c74b4ea754284090f
parentc2f2f5037920dc8441c6b27ff7a488a90f0433b1 (diff)
downloadtalos-hostboot-ff5e4695cc58653dda06b0e861349a9d520d87cc.tar.gz
talos-hostboot-ff5e4695cc58653dda06b0e861349a9d520d87cc.zip
Add retry to slave core wakeup path
We are still seeing some very intermittent errors in the slave core wakeup path. It still seems like we may have a timing issue. Until we figure out exactly what is going on, I am adding a retry mechanism that should get the core to report in correctly. The retry is done by issuing an additional doorbell message to the core that didn't report in. Change-Id: Ib87e5d58e079674d1eebb44c10d0252a35ea0519 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/70761 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Nicholas E. Bofferding <bofferdn@us.ibm.com> Reviewed-by: Dean Sanner <dsanner@us.ibm.com> Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
-rw-r--r--src/include/kernel/cpumgr.H11
-rw-r--r--src/include/kernel/syscalls.H4
-rw-r--r--src/include/sys/misc.h18
-rw-r--r--src/kernel/cpumgr.C37
-rw-r--r--src/kernel/syscall.C11
-rw-r--r--src/lib/syscall_misc.C10
-rw-r--r--src/usr/isteps/istep16/call_host_activate_slave_cores.C17
7 files changed, 101 insertions, 7 deletions
diff --git a/src/include/kernel/cpumgr.H b/src/include/kernel/cpumgr.H
index f8daf5ab3..9d741cd83 100644
--- a/src/include/kernel/cpumgr.H
+++ b/src/include/kernel/cpumgr.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2010,2018 */
+/* Contributors Listed Below - COPYRIGHT 2010,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -186,6 +186,15 @@ class CpuManager
static void startCore(uint64_t pir,uint64_t i_threads);
+ /** @fn wakeupCore
+ * Start the core, can only be run after startCore.
+ *
+ * @param[in] pir - PIR value of first thread in core.
+ * @param[in] i_threads - Bitstring of threads to enable (left-justified).
+ */
+ static void wakeupCore(uint64_t pir,uint64_t i_threads);
+
+
/** @fn forceMemoryPeriodic()
* Force the memory free / coalesce operations to be performed on the
* next "periodic" interval.
diff --git a/src/include/kernel/syscalls.H b/src/include/kernel/syscalls.H
index c606ad771..42e1a551b 100644
--- a/src/include/kernel/syscalls.H
+++ b/src/include/kernel/syscalls.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2010,2018 */
+/* Contributors Listed Below - COPYRIGHT 2010,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -99,6 +99,8 @@ namespace Systemcalls
MISC_CPUNAP,
/** cpu_master_winkle() */
MISC_CPUWINKLE,
+ /** cpu_wakeup_core() */
+ MISC_CPUWAKEUPCORE,
/** mm_alloc_block() */
MM_ALLOC_BLOCK,
diff --git a/src/include/sys/misc.h b/src/include/sys/misc.h
index 4fe0d5e44..183754f56 100644
--- a/src/include/sys/misc.h
+++ b/src/include/sys/misc.h
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2018 */
+/* Contributors Listed Below - COPYRIGHT 2011,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -252,6 +252,22 @@ int cpu_master_winkle(bool i_fusedCores);
*/
int cpu_all_winkle();
+/** @fn cpu_wakeup_core
+ * @brief Have the kernel wakeup a core that was previously started.
+ *
+ * @param[in] pir - PIR value of the first thread on the core.
+ * @param[in] i_threads - Bitstring of threads to enable (left-justified).
+ *
+ * @note The kernel will wakeup all threads on the requested core even
+ * though the callee only requests with a single PIR value.
+ *
+ * @return 0 or -(errno) on failure.
+ *
+ * @retval -ENXIO - The core ID was outside of the range the kernel is
+ * prepared to support.
+ */
+int cpu_wakeup_core(uint64_t pir,uint64_t i_threads);
+
/** @fn cpu_crit_assert
* @brief Forces a Terminate Immediate after a crit-assert is issued
* @param[in] i_failAddr - value in the linkRegister of the address
diff --git a/src/kernel/cpumgr.C b/src/kernel/cpumgr.C
index a2dff9415..425cc2d28 100644
--- a/src/kernel/cpumgr.C
+++ b/src/kernel/cpumgr.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2010,2018 */
+/* Contributors Listed Below - COPYRIGHT 2010,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -454,7 +454,7 @@ void CpuManager::startCore(uint64_t pir,uint64_t i_threads)
// Only wakeup the threads we were told to wakeup
if( i_threads & (0x8000000000000000 >> i) )
{
- printk("Dbell pir 0x%lx\n", pir + i);
+ printk("Dbell:0x%lx\n", pir + i);
//Initiate the Doorbell for this core/pir
send_doorbell_wakeup(pir + i);
}
@@ -463,6 +463,39 @@ void CpuManager::startCore(uint64_t pir,uint64_t i_threads)
return;
};
+void CpuManager::wakeupCore(uint64_t pir,uint64_t i_threads)
+{
+ size_t threads = getThreadCount();
+ pir = pir & ~(threads-1);
+
+ if (pir >=
+ (KERNEL_MAX_SUPPORTED_NODES * KERNEL_MAX_SUPPORTED_CPUS_PER_NODE))
+ {
+ TASK_SETRTN(TaskManager::getCurrentTask(), -ENXIO);
+ return;
+ }
+
+ //Send a message to userspace that a core with this base pir is being added
+ // userspace will know which threads on the core to expect already
+ InterruptMsgHdlr::addCpuCore(pir);
+
+ // Physically wakeup the threads with doorbells
+ // Assumption is that startCore has already run so all
+ // internal structures are setup
+ for(size_t i = 0; i < threads; i++)
+ {
+ // Only wakeup the threads we were told to wakeup
+ if( i_threads & (0x8000000000000000 >> i) )
+ {
+ printk("Dbell2:0x%lx\n", pir + i);
+ //Initiate the Doorbell for this core/pir
+ doorbell_send(pir + i);
+ }
+ }
+
+ return;
+};
+
size_t CpuManager::getThreadCount()
{
size_t threads = 0;
diff --git a/src/kernel/syscall.C b/src/kernel/syscall.C
index c293d5067..1df43b78e 100644
--- a/src/kernel/syscall.C
+++ b/src/kernel/syscall.C
@@ -52,6 +52,8 @@
extern "C"
void kernel_execute_hyp_doorbell()
{
+ printkd("hyp_doorbell on %lx\n", getPIR());
+
// Per POWER ISA Section 5.9.2, to avoid any weak consistency
// issues we must use a msgsync instruction before consuming
// any data set by a different thread following a doorbell
@@ -144,6 +146,7 @@ namespace Systemcalls
void CpuSprSet(task_t *t);
void CpuNap(task_t *t);
void CpuWinkle(task_t *t);
+ void CpuWakeupCore(task_t *t);
void MmAllocBlock(task_t *t);
void MmRemovePages(task_t *t);
void MmSetPermission(task_t *t);
@@ -189,6 +192,7 @@ namespace Systemcalls
&CpuSprSet, // MISC_CPUSPRSET
&CpuNap, // MISC_CPUNAP
&CpuWinkle, // MISC_CPUWINKLE
+ &CpuWakeupCore, // MISC_CPUWAKEUPCORE
&MmAllocBlock, // MM_ALLOC_BLOCK
&MmRemovePages, // MM_REMOVE_PAGES
@@ -858,6 +862,13 @@ namespace Systemcalls
}
}
+ /** Force thread wakeup via doorbell. */
+ void CpuWakeupCore(task_t *t)
+ {
+ CpuManager::wakeupCore(static_cast<uint64_t>(TASK_GETARG0(t)),
+ static_cast<uint64_t>(TASK_GETARG1(t)));
+ };
+
/**
* Allocate a block of virtual memory within the base segment
* @param[in] t: The task used to allocate a block in the base segment
diff --git a/src/lib/syscall_misc.C b/src/lib/syscall_misc.C
index ad6b204a6..29c075baf 100644
--- a/src/lib/syscall_misc.C
+++ b/src/lib/syscall_misc.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2018 */
+/* Contributors Listed Below - COPYRIGHT 2011,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -139,6 +139,14 @@ int cpu_all_winkle()
return rc;
}
+int cpu_wakeup_core(uint64_t pir,uint64_t i_threads)
+{
+ return reinterpret_cast<int64_t>(
+ _syscall2(MISC_CPUWAKEUPCORE,
+ reinterpret_cast<void*>(pir),
+ reinterpret_cast<void*>(i_threads)));
+}
+
void cpu_crit_assert(uint64_t i_failAddr)
{
diff --git a/src/usr/isteps/istep16/call_host_activate_slave_cores.C b/src/usr/isteps/istep16/call_host_activate_slave_cores.C
index 67e6b816b..e9cea28fb 100644
--- a/src/usr/isteps/istep16/call_host_activate_slave_cores.C
+++ b/src/usr/isteps/istep16/call_host_activate_slave_cores.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2015,2018 */
+/* Contributors Listed Below - COPYRIGHT 2015,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -122,6 +122,18 @@ void* call_host_activate_slave_cores (void *io_pArgs)
int rc = cpu_start_core(pir, en_threads);
+ // Workaround to handle some syncing issues with new cpus
+ // waking
+ if (-ETIME == rc)
+ {
+ TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
+ "call_host_activate_slave_cores: "
+ "Time out rc from kernel %d on core 0x%x, resending doorbell",
+ rc,
+ pir);
+ rc = cpu_wakeup_core(pir,en_threads);
+ }
+
// Handle time out error
uint32_t l_checkidle_eid = 0;
if (-ETIME == rc)
@@ -208,6 +220,9 @@ void* call_host_activate_slave_cores (void *io_pArgs)
// Throw printk in there too in case it is a kernel issue
ERRORLOG::ErrlUserDetailsPrintk().addToLog(l_errl);
+ // Add interesting ISTEP traces
+ l_errl->collectTrace(ISTEP_COMP_NAME,256);
+
l_stepError.addErrorDetails( l_errl );
errlCommit( l_errl, HWPF_COMP_ID );
break;
OpenPOWER on IntegriCloud