diff options
author | Dan Crowell <dcrowell@us.ibm.com> | 2017-10-01 16:09:56 -0500 |
---|---|---|
committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2018-03-29 12:03:50 -0400 |
commit | 90eaed6f430c88eb0127ce47671bd80b21f35433 (patch) | |
tree | 9bc4aaa5cfb416f0da69386fb595e92513e0d1b7 /src/kernel | |
parent | 284cebd97cf08d42ba2f4caa8779bf47494fcc20 (diff) | |
download | talos-hostboot-90eaed6f430c88eb0127ce47671bd80b21f35433.tar.gz talos-hostboot-90eaed6f430c88eb0127ce47671bd80b21f35433.zip |
Force checkstops for unhandled machine checks
Default MSR[ME]=0 during initial boot for bootloader and
hostboot kernel
Once the xscom address range has been mapped in, enable the
machine check handler to force a checkstop and set MSR[ME]=1
to allow regular machine check handling
CQ: SW401402
Change-Id: I104e39465e61b3b19d5c073e71271102711ae54f
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/47179
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/kernel')
-rw-r--r-- | src/kernel/bltohbdatamgr.C | 8 | ||||
-rw-r--r-- | src/kernel/exception.C | 5 | ||||
-rw-r--r-- | src/kernel/machchk.C | 45 | ||||
-rw-r--r-- | src/kernel/start.S | 5 | ||||
-rw-r--r-- | src/kernel/syscall.C | 15 |
5 files changed, 70 insertions, 8 deletions
diff --git a/src/kernel/bltohbdatamgr.C b/src/kernel/bltohbdatamgr.C index e33fab6ef..a0c89b034 100644 --- a/src/kernel/bltohbdatamgr.C +++ b/src/kernel/bltohbdatamgr.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2017 */ +/* Contributors Listed Below - COPYRIGHT 2017,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -145,7 +145,6 @@ printk("Version=%lX\n",i_data.version); // were set correctly, instead use BLTOHB_SECURE_OVERRIDES version. if( iv_data.version >= Bootloader::BLTOHB_SECURE_OVERRIDES ) { -printk("lpc=%lX, xscom=%lX\n", i_data.lpcBAR, i_data.xscomBAR ); kassert(i_data.lpcBAR>0); kassert(i_data.xscomBAR>0); iv_data.lpcBAR = i_data.lpcBAR; @@ -158,9 +157,10 @@ printk("lpc=%lX, xscom=%lX\n", i_data.lpcBAR, i_data.xscomBAR ); iv_data.xscomBAR = MMIO_GROUP0_CHIP0_XSCOM_BASE_ADDR; } + printk("lpc=%lX, xscom=%lX\n", i_data.lpcBAR, i_data.xscomBAR ); -printk("lpc=%lX, xscom=%lX, iv_data=%p\n", iv_data.lpcBAR, iv_data.xscomBAR, - static_cast<void *>(&iv_data) ); + printk("iv_lpc=%lX, iv_xscom=%lX, iv_data=%p\n", + iv_data.lpcBAR, iv_data.xscomBAR, static_cast<void *>(&iv_data) ); // Check if bootloader advertised the size of the structure it saw; // otherwise use the default padded size diff --git a/src/kernel/exception.C b/src/kernel/exception.C index 0ece19a63..cf2a35c81 100644 --- a/src/kernel/exception.C +++ b/src/kernel/exception.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2010,2017 */ +/* Contributors Listed Below - COPYRIGHT 2010,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -328,6 +328,7 @@ void kernel_execute_machine_check() t->tid, getPIR(), getSRR0(), getSRR1(), getDSISR(), getDAR()); MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR); + Kernel::MachineCheck::forceCheckstop(); kassert(false); } @@ -388,6 +389,7 @@ void kernel_execute_machine_check() t->tid, getPIR(), getSRR0(), getSRR1(), getDSISR(), getDAR()); MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR); + Kernel::MachineCheck::forceCheckstop(); TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED); } } @@ -425,3 +427,4 @@ void kernel_execute_unhandled_exception() termWriteSRC(TI_UNHANDLED_EX, KERNEL::RC_UNHANDLED_EX, exception); terminateExecuteTI(); } + diff --git a/src/kernel/machchk.C b/src/kernel/machchk.C index 79edf4240..2a96b5896 100644 --- a/src/kernel/machchk.C +++ b/src/kernel/machchk.C @@ -27,12 +27,20 @@ #include <kernel/vmmmgr.H> #include <sys/mmio.h> #include <arch/memorymap.H> +#include <arch/ppc.H> namespace Kernel { namespace MachineCheck { +//Keep track of the MMIO address that we can use to force a checkstop +static uint64_t* g_xstopRegPtr = nullptr; + +//Keep track of the data to write into the xstop reg +static uint64_t g_xstopRegValue = 0; + + bool handleLoadUE(task_t* t) { bool handled = false; @@ -132,5 +140,42 @@ bool handleSLB(task_t* t) } +/** + * @brief Tells the kernel how to force a checkstop for unrecoverable + * machine checks + */ +void setCheckstopData(uint64_t i_xstopAddr, uint64_t i_xstopData) +{ + g_xstopRegPtr = reinterpret_cast<uint64_t*>(i_xstopAddr + |VmmManager::FORCE_PHYS_ADDR); + g_xstopRegValue = i_xstopData; + printk( "Set MchChk Xstop: %p=%.16lX\n", g_xstopRegPtr, g_xstopRegValue ); + + // Now that the machine check handler can do the xscom we + // can set MSR[ME]=1 to enable the regular machine check + // handling + uint64_t l_msr = getMSR(); + l_msr |= 0x0000000000001000; //set bit 51 + setMSR(l_msr); +} + +/** + * @brief Force a checkstop if we know how in order to get better + * error isolation for cache/memory UEs + */ +void forceCheckstop() +{ + if( g_xstopRegPtr != nullptr ) + { + printk( "Forcing a xstop with %p = %.16lX\n", + g_xstopRegPtr, g_xstopRegValue ); + *g_xstopRegPtr = g_xstopRegValue; + } + else + { + printk( "Unable to force checkstop, No xstop reg set\n" ); + } +} + } } diff --git a/src/kernel/start.S b/src/kernel/start.S index 40ff3b0ed..979235276 100644 --- a/src/kernel/start.S +++ b/src/kernel/start.S @@ -33,11 +33,10 @@ _start: ;// Set thread priority high. or 2,2,2 - ;// Clear MSR[TA] (bit 1) and enable MSR[ME] (bit 51). + ;// Clear MSR[TA] (bit 1) mfmsr r2 rldicl r2,r2,1,1 ;// Clear bit 1 - result [1-63,0] rotrdi r2,r2,1 ;// Rotate right 1 - result [0,63] - ori r2,r2,4096 ;// Set bit 51 ;// Set up SRR0 / SRR1 to enable new MSR. mtsrr1 r2 li r2, _start_postmsr@l @@ -473,7 +472,7 @@ kernel_dispatch_task: stdcx. r0, TASK_CPUPTR, r1 ;// the CPU pointer in the task. mfmsr r2 ;// Get current MSR - ori r2,r2, 0xD030 ;// Enable MSR[EE,ME,PR,IR,DR]. + ori r2,r2, 0xC030 ;// Enable MSR[EE,PR,IR,DR]. rldicl r2,r2,50,1 ;// Clear ... rotldi r2,r2,14 ;// MSR[FP] ld r3, TASK_MSR_MASK(r1) ;// Load MSR mask. diff --git a/src/kernel/syscall.C b/src/kernel/syscall.C index 04c65eb10..9dc6bd720 100644 --- a/src/kernel/syscall.C +++ b/src/kernel/syscall.C @@ -45,6 +45,7 @@ #include <kernel/doorbell.H> #include <sys/sync.h> #include <errno.h> +#include <kernel/machchk.H> namespace KernelIpc { @@ -142,6 +143,7 @@ namespace Systemcalls void MmExtend(task_t *t); void MmLinearMap(task_t *t); void CritAssert(task_t *t); + void SetMchkData(task_t *t); syscall syscalls[] = @@ -185,6 +187,7 @@ namespace Systemcalls &MmExtend, // MM_EXTEND &MmLinearMap, // MM_LINEAR_MAP &CritAssert, // MISC_CRITASSERT + &SetMchkData, // MISC_SETMCHKDATA }; }; @@ -988,6 +991,18 @@ namespace Systemcalls CpuManager::critAssert(i_failAddr); } + /** + * @brief Tells the kernel how to force a checkstop for unrecoverable + * machine checks + * @param[in] t: the task calling the critical assert + */ + void SetMchkData(task_t* t) + { + uint64_t i_xstopAddr = (uint64_t)(TASK_GETARG0(t)); + uint64_t i_xstopData = (uint64_t)(TASK_GETARG1(t)); + + Kernel::MachineCheck::setCheckstopData(i_xstopAddr,i_xstopData); + } }; |