summaryrefslogtreecommitdiffstats
path: root/src/kernel
diff options
context:
space:
mode:
authorDan Crowell <dcrowell@us.ibm.com>2017-09-23 22:24:28 -0500
committerWilliam G. Hoffa <wghoffa@us.ibm.com>2018-06-15 13:44:39 -0400
commit7cc8294252577238eb99bad42c3bc7dd92f4794d (patch)
tree09a73519b9565693ca699a73ec1a21fd17153350 /src/kernel
parent5090c197292cdd0ec4ad8e416020e5229812cb65 (diff)
downloadtalos-hostboot-7cc8294252577238eb99bad42c3bc7dd92f4794d.tar.gz
talos-hostboot-7cc8294252577238eb99bad42c3bc7dd92f4794d.zip
Debug improvements for exceptions and OOM hangs
There are two main changes in this commit: 1) Forcing an assert if we cannot allocate pages after 10,000 attempts to yield. 2) Adding a backtrace for a lot of exception paths. Change-Id: I755ada753b78abed56e553f7c669f0f98ae68700 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58224 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Prachi Gupta <pragupta@us.ibm.com> Reviewed-by: Thi N. Tran <thi@us.ibm.com> Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Diffstat (limited to 'src/kernel')
-rw-r--r--src/kernel/exception.C6
-rw-r--r--src/kernel/machchk.C2
-rw-r--r--src/kernel/misc.C45
-rw-r--r--src/kernel/pagemgr.C12
4 files changed, 58 insertions, 7 deletions
diff --git a/src/kernel/exception.C b/src/kernel/exception.C
index cf2a35c81..ca05bf3a1 100644
--- a/src/kernel/exception.C
+++ b/src/kernel/exception.C
@@ -37,6 +37,7 @@
#include <kernel/terminate.H>
#include <kernel/hbterminatetypes.H>
#include <kernel/kernel_reasoncodes.H>
+#include <kernel/misc.H>
namespace ExceptionHandles
@@ -62,7 +63,8 @@ void kernel_execute_prog_ex()
}
if (!handled)
{
- printk("Program exception, killing task %d\n", t->tid);
+ printk( "Program exception, killing task %d, SRR0=0x%lX, SRR1=0x%lX\n",
+ t->tid, getSRR0(), getSRR1() );
MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR);
TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
}
@@ -107,7 +109,7 @@ void kernel_execute_data_storage()
"Exception Type: %lx\n"
"Instruction where it occurred: %p\n",
t->tid, getDAR(), getDSISR(), t->context.nip);
- MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR);
+ KernelMisc::printkBacktrace(t);
TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
}
}
diff --git a/src/kernel/machchk.C b/src/kernel/machchk.C
index 2a96b5896..776ede0ce 100644
--- a/src/kernel/machchk.C
+++ b/src/kernel/machchk.C
@@ -149,7 +149,7 @@ void setCheckstopData(uint64_t i_xstopAddr, uint64_t i_xstopData)
g_xstopRegPtr = reinterpret_cast<uint64_t*>(i_xstopAddr
|VmmManager::FORCE_PHYS_ADDR);
g_xstopRegValue = i_xstopData;
- printk( "Set MchChk Xstop: %p=%.16lX\n", g_xstopRegPtr, g_xstopRegValue );
+ printk( "Arm MchChk Xstop: %p=%.16lX\n", g_xstopRegPtr, g_xstopRegValue );
// Now that the machine check handler can do the xscom we
// can set MSR[ME]=1 to enable the regular machine check
diff --git a/src/kernel/misc.C b/src/kernel/misc.C
index b602ed707..f6aa69bda 100644
--- a/src/kernel/misc.C
+++ b/src/kernel/misc.C
@@ -585,6 +585,51 @@ namespace KernelMisc
writeScratchReg(l_scratch_addr, data);
};
+ /**
+ * @brief Collect the backtrace for the given task and print an
+ */
+ void printkBacktrace(task_t* i_task)
+ {
+ uint64_t* l_frame = nullptr;
+ uint32_t l_tid = 0;
+ bool l_kernelSpace = true;
+ if( i_task == nullptr ) //user-space
+ {
+ l_kernelSpace = false;
+ printk("U:");
+ l_frame = static_cast<uint64_t*>(framePointer());
+ l_tid = task_gettid();
+ }
+ else //kernel-space
+ {
+ printk("K:");
+ l_frame = reinterpret_cast<uint64_t*>( i_task->context.gprs[1] );
+ l_tid = i_task->tid;
+ }
+
+ printk("Backtrace for %d:\n ", l_tid );
+ printkd("frame=%p\n",l_frame);isync();
+ while (l_frame != NULL)
+ {
+ printkd("\nf=%p\n",l_frame); isync();
+ if( l_kernelSpace )
+ {
+ uint64_t* frame_p = reinterpret_cast<uint64_t*>
+ (VmmManager::findPhysicalAddress( reinterpret_cast<uint64_t>
+ (l_frame) ));
+ printkd("frame_p=%p\n",frame_p); isync();
+ l_frame = frame_p;
+ }
+ if( (0 != *l_frame) && (0 != l_frame[2]) )
+ {
+ printk( "<-0x%lX", l_frame[2] );
+ }
+
+ l_frame = reinterpret_cast<uint64_t*>(*l_frame);
+ }
+ printk("\n");
+ }
+
};
diff --git a/src/kernel/pagemgr.C b/src/kernel/pagemgr.C
index f5c4d406a..42545470a 100644
--- a/src/kernel/pagemgr.C
+++ b/src/kernel/pagemgr.C
@@ -37,6 +37,7 @@
#include <assert.h>
#include <kernel/memstate.H>
#include <kernel/bltohbdatamgr.H>
+#include <kernel/misc.H>
size_t PageManager::cv_coalesce_count = 0;
@@ -155,7 +156,7 @@ void* PageManager::allocatePage(size_t n, bool userspace)
// In non-kernel mode, make a system-call to allocate in kernel-mode.
if (!KernelMisc::in_kernel_mode())
{
- size_t attempts = 0;
+ size_t l_attempts = 0;
while (NULL == page)
{
page = _syscall1(Systemcalls::MM_ALLOC_PAGES,
@@ -165,11 +166,14 @@ void* PageManager::allocatePage(size_t n, bool userspace)
// will eventually free up (ex. VMM flushes).
if (NULL == page)
{
- attempts++;
- if( attempts == 10000 ) //arbitrarily huge number
+ l_attempts++;
+ if( l_attempts == 10000 )
{
- printk("Cannot allocate %ld pages\n", n);
+ printk( "Cannot allocate %ld pages to %d!\n",
+ n, task_gettid() );
MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR);
+ KernelMisc::printkBacktrace(nullptr);
+ task_crash();
}
task_yield();
}
OpenPOWER on IntegriCloud