diff options
author | Dan Crowell <dcrowell@us.ibm.com> | 2017-09-23 22:24:28 -0500 |
---|---|---|
committer | William G. Hoffa <wghoffa@us.ibm.com> | 2018-06-15 13:44:39 -0400 |
commit | 7cc8294252577238eb99bad42c3bc7dd92f4794d (patch) | |
tree | 09a73519b9565693ca699a73ec1a21fd17153350 /src/kernel | |
parent | 5090c197292cdd0ec4ad8e416020e5229812cb65 (diff) | |
download | talos-hostboot-7cc8294252577238eb99bad42c3bc7dd92f4794d.tar.gz talos-hostboot-7cc8294252577238eb99bad42c3bc7dd92f4794d.zip |
Debug improvements for exceptions and OOM hangs
There are two main changes in this commit:
1) Forcing an assert if we cannot allocate pages after
10,000 attempts to yield.
2) Adding a backtrace for a lot of exception paths.
Change-Id: I755ada753b78abed56e553f7c669f0f98ae68700
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58224
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Prachi Gupta <pragupta@us.ibm.com>
Reviewed-by: Thi N. Tran <thi@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Diffstat (limited to 'src/kernel')
-rw-r--r-- | src/kernel/exception.C | 6 | ||||
-rw-r--r-- | src/kernel/machchk.C | 2 | ||||
-rw-r--r-- | src/kernel/misc.C | 45 | ||||
-rw-r--r-- | src/kernel/pagemgr.C | 12 |
4 files changed, 58 insertions, 7 deletions
diff --git a/src/kernel/exception.C b/src/kernel/exception.C index cf2a35c81..ca05bf3a1 100644 --- a/src/kernel/exception.C +++ b/src/kernel/exception.C @@ -37,6 +37,7 @@ #include <kernel/terminate.H> #include <kernel/hbterminatetypes.H> #include <kernel/kernel_reasoncodes.H> +#include <kernel/misc.H> namespace ExceptionHandles @@ -62,7 +63,8 @@ void kernel_execute_prog_ex() } if (!handled) { - printk("Program exception, killing task %d\n", t->tid); + printk( "Program exception, killing task %d, SRR0=0x%lX, SRR1=0x%lX\n", + t->tid, getSRR0(), getSRR1() ); MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR); TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED); } @@ -107,7 +109,7 @@ void kernel_execute_data_storage() "Exception Type: %lx\n" "Instruction where it occurred: %p\n", t->tid, getDAR(), getDSISR(), t->context.nip); - MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR); + KernelMisc::printkBacktrace(t); TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED); } } diff --git a/src/kernel/machchk.C b/src/kernel/machchk.C index 2a96b5896..776ede0ce 100644 --- a/src/kernel/machchk.C +++ b/src/kernel/machchk.C @@ -149,7 +149,7 @@ void setCheckstopData(uint64_t i_xstopAddr, uint64_t i_xstopData) g_xstopRegPtr = reinterpret_cast<uint64_t*>(i_xstopAddr |VmmManager::FORCE_PHYS_ADDR); g_xstopRegValue = i_xstopData; - printk( "Set MchChk Xstop: %p=%.16lX\n", g_xstopRegPtr, g_xstopRegValue ); + printk( "Arm MchChk Xstop: %p=%.16lX\n", g_xstopRegPtr, g_xstopRegValue ); // Now that the machine check handler can do the xscom we // can set MSR[ME]=1 to enable the regular machine check diff --git a/src/kernel/misc.C b/src/kernel/misc.C index b602ed707..f6aa69bda 100644 --- a/src/kernel/misc.C +++ b/src/kernel/misc.C @@ -585,6 +585,51 @@ namespace KernelMisc writeScratchReg(l_scratch_addr, data); }; + /** + * @brief Collect the backtrace for the given task and print an + */ + void printkBacktrace(task_t* i_task) + { + uint64_t* l_frame = nullptr; + uint32_t l_tid = 0; + bool l_kernelSpace = true; + if( i_task == nullptr ) //user-space + { + l_kernelSpace = false; + printk("U:"); + l_frame = static_cast<uint64_t*>(framePointer()); + l_tid = task_gettid(); + } + else //kernel-space + { + printk("K:"); + l_frame = reinterpret_cast<uint64_t*>( i_task->context.gprs[1] ); + l_tid = i_task->tid; + } + + printk("Backtrace for %d:\n ", l_tid ); + printkd("frame=%p\n",l_frame);isync(); + while (l_frame != NULL) + { + printkd("\nf=%p\n",l_frame); isync(); + if( l_kernelSpace ) + { + uint64_t* frame_p = reinterpret_cast<uint64_t*> + (VmmManager::findPhysicalAddress( reinterpret_cast<uint64_t> + (l_frame) )); + printkd("frame_p=%p\n",frame_p); isync(); + l_frame = frame_p; + } + if( (0 != *l_frame) && (0 != l_frame[2]) ) + { + printk( "<-0x%lX", l_frame[2] ); + } + + l_frame = reinterpret_cast<uint64_t*>(*l_frame); + } + printk("\n"); + } + }; diff --git a/src/kernel/pagemgr.C b/src/kernel/pagemgr.C index f5c4d406a..42545470a 100644 --- a/src/kernel/pagemgr.C +++ b/src/kernel/pagemgr.C @@ -37,6 +37,7 @@ #include <assert.h> #include <kernel/memstate.H> #include <kernel/bltohbdatamgr.H> +#include <kernel/misc.H> size_t PageManager::cv_coalesce_count = 0; @@ -155,7 +156,7 @@ void* PageManager::allocatePage(size_t n, bool userspace) // In non-kernel mode, make a system-call to allocate in kernel-mode. if (!KernelMisc::in_kernel_mode()) { - size_t attempts = 0; + size_t l_attempts = 0; while (NULL == page) { page = _syscall1(Systemcalls::MM_ALLOC_PAGES, @@ -165,11 +166,14 @@ void* PageManager::allocatePage(size_t n, bool userspace) // will eventually free up (ex. VMM flushes). if (NULL == page) { - attempts++; - if( attempts == 10000 ) //arbitrarily huge number + l_attempts++; + if( l_attempts == 10000 ) { - printk("Cannot allocate %ld pages\n", n); + printk( "Cannot allocate %ld pages to %d!\n", + n, task_gettid() ); MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR); + KernelMisc::printkBacktrace(nullptr); + task_crash(); } task_yield(); } |