From 22524e9017445a08d63733ae5a8c75d9126bdb28 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Jan 2019 00:04:27 +1000 Subject: core/exceptions: allow recoverable sreset exceptions This requires implementing the MSR[RI] bit. Then just allow all non-fatal sreset exceptions to recover. Signed-off-by: Nicholas Piggin Signed-off-by: Stewart Smith --- core/cpu.c | 1 + core/exceptions.c | 48 +++++++++++++++++++++++++++++++++++++----------- core/fast-reboot.c | 2 ++ core/init.c | 15 +++++++++------ 4 files changed, 49 insertions(+), 17 deletions(-) (limited to 'core') diff --git a/core/cpu.c b/core/cpu.c index 50f399c2..85a14783 100644 --- a/core/cpu.c +++ b/core/cpu.c @@ -534,6 +534,7 @@ static void cpu_idle_pm(enum cpu_wake_cause wake_on) default: break; } + mtmsrd(MSR_RI, 1); } } diff --git a/core/exceptions.c b/core/exceptions.c index 1c291735..779cd620 100644 --- a/core/exceptions.c +++ b/core/exceptions.c @@ -39,8 +39,10 @@ static void dump_regs(struct stack_frame *stack) i, stack->gpr[i], i + 16, stack->gpr[i + 16]); } -void __noreturn exception_entry(struct stack_frame *stack) +void exception_entry(struct stack_frame *stack) { + bool fatal = false; + bool hv; uint64_t nip; uint64_t msr; const size_t max = 320; @@ -57,24 +59,40 @@ void __noreturn exception_entry(struct stack_frame *stack) case 0xe80: case 0xea0: case 0xf80: - nip = stack->hsrr0; - msr = stack->hsrr1; + hv = true; break; default: + hv = false; + break; + } + + if (hv) { + nip = stack->hsrr0; + msr = stack->hsrr1; + } else { nip = stack->srr0; msr = stack->srr1; - break; } + if (!(msr & MSR_RI)) + fatal = true; + prerror("***********************************************\n"); l = 0; if (stack->type == 0x100) { - l += snprintf(buf + l, max - l, - "Fatal System Reset at "REG" ", nip); + if (fatal) { + l += snprintf(buf + l, max - l, + "Fatal System Reset at "REG" ", nip); + } else { + l += snprintf(buf + l, max - l, + "System Reset at "REG" ", nip); + } } else if (stack->type == 0x200) { + fatal = true; l += snprintf(buf + l, max - l, "Fatal MCE at "REG" ", nip); } else { + fatal = true; l += snprintf(buf + l, max - l, "Fatal Exception 0x%llx at "REG" ", stack->type, nip); } @@ -83,10 +101,19 @@ void __noreturn exception_entry(struct stack_frame *stack) prerror("%s\n", buf); dump_regs(stack); - abort(); + if (fatal) + abort(); + else + backtrace(); + + if (hv) { + /* Set up for SRR return */ + stack->srr0 = nip; + stack->srr1 = msr; + } } -void __noreturn exception_entry_pm_sreset(void) +void exception_entry_pm_sreset(void) { const size_t max = 320; char buf[max]; @@ -95,10 +122,9 @@ void __noreturn exception_entry_pm_sreset(void) prerror("***********************************************\n"); l = 0; l += snprintf(buf + l, max - l, - "Fatal System Reset in sleep"); + "System Reset in sleep"); prerror("%s\n", buf); - - abort(); + backtrace(); } diff --git a/core/fast-reboot.c b/core/fast-reboot.c index d841474d..22160b65 100644 --- a/core/fast-reboot.c +++ b/core/fast-reboot.c @@ -196,6 +196,7 @@ void fast_reboot(void) * crash. */ enable_machine_check(); + mtmsrd(MSR_RI, 1); /* * sreset vector has a FIXUP_ENDIAN sequence at the start, so @@ -358,6 +359,7 @@ void __noreturn fast_reboot_entry(void) sync(); cleanup_cpu_state(); enable_machine_check(); + mtmsrd(MSR_RI, 1); __secondary_cpu_entry(); } diff --git a/core/init.c b/core/init.c index e0955074..132a42a0 100644 --- a/core/init.c +++ b/core/init.c @@ -475,12 +475,13 @@ static void load_initramfs(void) } } -static void cpu_disable_ME_one(void *param __unused) +static void cpu_disable_ME_RI_one(void *param __unused) { disable_machine_check(); + mtmsrd(0, 1); } -static int64_t cpu_disable_ME_all(void) +static int64_t cpu_disable_ME_RI_all(void) { struct cpu_thread *cpu; struct cpu_job **jobs; @@ -491,12 +492,12 @@ static int64_t cpu_disable_ME_all(void) for_each_available_cpu(cpu) { if (cpu == this_cpu()) continue; - jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME", - cpu_disable_ME_one, NULL); + jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME_RI", + cpu_disable_ME_RI_one, NULL); } /* this cpu */ - cpu_disable_ME_one(NULL); + cpu_disable_ME_RI_one(NULL); for_each_available_cpu(cpu) { if (jobs[cpu->pir]) @@ -620,7 +621,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot) kernel_entry, fdt, fdt_totalsize(fdt)); /* Disable machine checks on all */ - cpu_disable_ME_all(); + cpu_disable_ME_RI_all(); debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE; @@ -951,6 +952,7 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt) * recover, but we print some useful information. */ enable_machine_check(); + mtmsrd(MSR_RI, 1); /* Setup a NULL catcher to catch accidental NULL ptr calls */ setup_branch_null_catcher(); @@ -1282,6 +1284,7 @@ void __noreturn __secondary_cpu_entry(void) cpu_callin(cpu); enable_machine_check(); + mtmsrd(MSR_RI, 1); /* Some XIVE setup */ xive_cpu_callin(cpu); -- cgit v1.2.1