diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2011-12-16 11:09:21 +1100 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2011-12-16 11:09:21 +1100 |
commit | 43ca5d347acc0dcae988dbd38b5bb5a930744a75 (patch) | |
tree | 63dd3f2def7220aa0df7951553bbd18022ccb96b | |
parent | efdad722ef4d69eaaa8335deab0b6f55f52d7e57 (diff) | |
parent | 2440c01e10f07adcbc2094ba12ae4ad6094bd2b6 (diff) | |
download | talos-obmc-linux-43ca5d347acc0dcae988dbd38b5bb5a930744a75.tar.gz talos-obmc-linux-43ca5d347acc0dcae988dbd38b5bb5a930744a75.zip |
Merge branch 'kexec' into next
-rw-r--r-- | arch/powerpc/include/asm/kexec.h | 7 | ||||
-rw-r--r-- | arch/powerpc/include/asm/system.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/crash.c | 220 | ||||
-rw-r--r-- | arch/powerpc/kernel/traps.c | 173 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/setup.c | 2 | ||||
-rw-r--r-- | arch/powerpc/sysdev/xics/icp-hv.c | 13 |
6 files changed, 214 insertions, 203 deletions
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index f921eb121d39..16d7e33d35e9 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -49,7 +49,6 @@ #define KEXEC_STATE_REAL_MODE 2 #ifndef __ASSEMBLY__ -#include <linux/cpumask.h> #include <asm/reg.h> typedef void (*crash_shutdown_t)(void); @@ -73,11 +72,6 @@ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for master to copy new code to 0 */ extern int crashing_cpu; extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); -extern cpumask_t cpus_in_sr; -static inline int kexec_sr_activated(int cpu) -{ - return cpumask_test_cpu(cpu, &cpus_in_sr); -} struct kimage; struct pt_regs; @@ -94,7 +88,6 @@ extern void reserve_crashkernel(void); extern void machine_kexec_mask_interrupts(void); #else /* !CONFIG_KEXEC */ -static inline int kexec_sr_activated(int cpu) { return 0; } static inline void crash_kexec_secondary(struct pt_regs *regs) { } static inline int overlaps_crashkernel(unsigned long start, unsigned long size) diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index f56a0a75d989..c377457d1b89 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -193,8 +193,8 @@ extern void cacheable_memzero(void *p, unsigned int nb); extern void *cacheable_memcpy(void *, const void *, unsigned int); extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); -extern int die(const char *, struct pt_regs *, long); extern void _exception(int, struct pt_regs *, int, unsigned long); +extern void die(const char *, struct pt_regs *, long); extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val); #ifdef CONFIG_BOOKE_WDT diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d879809d5c45..28be3452e67a 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -10,85 +10,85 @@ * */ -#undef DEBUG - #include <linux/kernel.h> #include <linux/smp.h> #include <linux/reboot.h> #include <linux/kexec.h> -#include <linux/bootmem.h> #include <linux/export.h> #include <linux/crash_dump.h> #include <linux/delay.h> -#include <linux/elf.h> -#include <linux/elfcore.h> #include <linux/init.h> #include <linux/irq.h> #include <linux/types.h> -#include <linux/memblock.h> #include <asm/processor.h> #include <asm/machdep.h> #include <asm/kexec.h> #include <asm/kdump.h> #include <asm/prom.h> -#include <asm/firmware.h> #include <asm/smp.h> #include <asm/system.h> #include <asm/setjmp.h> -#ifdef DEBUG -#include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 -/* This keeps a track of which one is crashing cpu. */ +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 + +/* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; -static cpumask_t cpus_in_crash = CPU_MASK_NONE; -cpumask_t cpus_in_sr = CPU_MASK_NONE; +static atomic_t cpus_in_crash; +static int time_to_dump; #define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; +} + #ifdef CONFIG_SMP -static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); void crash_ipi_callback(struct pt_regs *regs) { + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; hard_irq_disable(); - if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); - cpumask_set_cpu(cpu, &cpus_in_crash); - - /* - * Entered via soft-reset - could be the kdump - * process is invoked using soft-reset or user activated - * it if some CPU did not respond to an IPI. - * For soft-reset, the secondary CPU can enter this func - * twice. 1 - using IPI, and 2. soft-reset. - * Tell the kexec CPU that entered via soft-reset and ready - * to go down. - */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) { - cpumask_clear_cpu(cpu, &cpus_in_sr); - atomic_inc(&enter_on_soft_reset); + cpumask_set_cpu(cpu, &cpus_state_saved); } + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic_inc(); + /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. - * If not, soft-reset will be invoked to bring other CPUs. */ - while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) + while (!time_to_dump) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -103,106 +103,99 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -/* - * Wait until all CPUs are entered via soft-reset. - */ -static void crash_soft_reset_check(int cpu) -{ - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - - cpumask_clear_cpu(cpu, &cpus_in_sr); - while (atomic_read(&enter_on_soft_reset) != ncpus) - cpu_relax(); -} - - static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); + + printk(KERN_EMERG "Sending IPI to other CPUs\n"); crash_send_ipi(crash_ipi_callback); smp_wmb(); +again: /* * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to * respond. - * Delay of at least 10 seconds. */ - printk(KERN_EMERG "Sending IPI to other cpus...\n"); - msecs = 10000; - while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { - cpu_relax(); + msecs = IPI_TIMEOUT; + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) mdelay(1); - } /* Would it be better to replace the trap vector here? */ + if (atomic_read(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - atomic_read(&cpus_in_crash)); + /* - * FIXME: In case if we do not get all CPUs, one possibility: ask the - * user to do soft reset such that we get all. - * Soft-reset will be used until better mechanism is implemented. + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. */ - if (cpumask_weight(&cpus_in_crash) < ncpus) { - printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", - ncpus - cpumask_weight(&cpus_in_crash)); - printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpumask_clear(&cpus_in_sr); - atomic_set(&enter_on_soft_reset, 0); - while (cpumask_weight(&cpus_in_crash) < ncpus) - cpu_relax(); - } + if ((panic_timeout > 0) || (tries > 0)) + return; + /* - * Make sure all CPUs are entered via soft-reset if the kdump is - * invoked using soft-reset. + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) - crash_soft_reset_check(cpu); - /* Leave the IPI callback set */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + atomic_set(&cpus_in_crash, 0); + smp_mb(); + + while (atomic_read(&cpus_in_crash) < ncpus) + cpu_relax(); + } + + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; } /* - * This function will be called by secondary cpus or by kexec cpu - * if soft-reset is activated to stop some CPUs. + * This function will be called by secondary cpus. */ void crash_kexec_secondary(struct pt_regs *regs) { - int cpu = smp_processor_id(); unsigned long flags; - int msecs = 5; + int msecs = SECONDARY_TIMEOUT; local_irq_save(flags); - /* Wait 5ms if the kexec CPU is not entered yet. */ + + /* Wait for the primary crash CPU to signal its progress */ while (crashing_cpu < 0) { if (--msecs < 0) { - /* - * Either kdump image is not loaded or - * kdump process is not started - Probably xmon - * exited using 'x'(exit and recover) or - * kexec_should_crash() failed for all running tasks. - */ - cpumask_clear_cpu(cpu, &cpus_in_sr); + /* No response, kdump image may not have been loaded */ local_irq_restore(flags); return; } + mdelay(1); - cpu_relax(); - } - if (cpu == crashing_cpu) { - /* - * Panic CPU will enter this func only via soft-reset. - * Wait until all secondary CPUs entered and - * then start kexec boot. - */ - crash_soft_reset_check(cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); - machine_kexec(kexec_crash_image); - /* NOTREACHED */ } + crash_ipi_callback(regs); } @@ -211,7 +204,7 @@ void crash_kexec_secondary(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { /* - * move the secondarys to us so that we can copy + * move the secondaries to us so that we can copy * the new kernel 0-0x100 safely * * do this if kexec in setup.c ? @@ -225,7 +218,6 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpumask_clear(&cpus_in_sr); } #endif /* CONFIG_SMP */ @@ -236,7 +228,7 @@ static void crash_kexec_wait_realmode(int cpu) unsigned int msecs; int i; - msecs = 10000; + msecs = REAL_MODE_TIMEOUT; for (i=0; i < nr_cpu_ids && msecs > 0; i++) { if (i == cpu) continue; @@ -308,22 +300,11 @@ int crash_shutdown_unregister(crash_shutdown_t handler) } EXPORT_SYMBOL(crash_shutdown_unregister); -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; -static int crash_shutdown_cpu = -1; - -static int handle_fault(struct pt_regs *regs) -{ - if (crash_shutdown_cpu == smp_processor_id()) - longjmp(crash_shutdown_buf, 1); - return 0; -} - void default_machine_crash_shutdown(struct pt_regs *regs) { unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* * This function is only called after the system * has panicked or is otherwise in a critical state. @@ -341,15 +322,26 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_save_cpu(regs, crashing_cpu); + + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); + crash_kexec_prepare_cpus(crashing_cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); /* - * Call registered shutdown routines savely. Swap out + * Call registered shutdown routines safely. Swap out * __debugger_fault_handler, and replace on exit. */ old_handler = __debugger_fault_handler; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5459d148a0f6..c091527efd89 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -98,18 +98,14 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif -int die(const char *str, struct pt_regs *regs, long err) +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; +static int die_counter; + +static unsigned __kprobes long oops_begin(struct pt_regs *regs) { - static struct { - raw_spinlock_t lock; - u32 lock_owner; - int lock_owner_depth; - } die = { - .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock), - .lock_owner = -1, - .lock_owner_depth = 0 - }; - static int die_counter; + int cpu; unsigned long flags; if (debugger(regs)) @@ -117,66 +113,109 @@ int die(const char *str, struct pt_regs *regs, long err) oops_enter(); - if (die.lock_owner != raw_smp_processor_id()) { - console_verbose(); - raw_spin_lock_irqsave(&die.lock, flags); - die.lock_owner = smp_processor_id(); - die.lock_owner_depth = 0; - bust_spinlocks(1); - if (machine_is(powermac)) - pmac_backlight_unblank(); - } else { - local_save_flags(flags); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + if (machine_is(powermac)) + pmac_backlight_unblank(); + return flags; +} - if (++die.lock_owner_depth < 3) { - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); -#endif - printk("%s\n", ppc_md.name ? ppc_md.name : ""); +static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, + int signr) +{ + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + die_nest_count--; + oops_exit(); + printk("\n"); + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); - if (notify_die(DIE_OOPS, str, regs, err, 255, - SIGSEGV) == NOTIFY_STOP) - return 1; + /* + * A system reset (0x100) is a request to dump, so we always send + * it through the crashdump code. + */ + if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) { + crash_kexec(regs); - print_modules(); - show_regs(regs); - } else { - printk("Recursive die() failure, output suppressed\n"); + /* + * We aren't the primary crash CPU. We need to send it + * to a holding pattern to avoid it ending up in the panic + * code. + */ + crash_kexec_secondary(regs); } - bust_spinlocks(0); - die.lock_owner = -1; - add_taint(TAINT_DIE); - raw_spin_unlock_irqrestore(&die.lock, flags); + if (!signr) + return; - if (kexec_should_crash(current) || - kexec_sr_activated(smp_processor_id())) - crash_kexec(regs); - crash_kexec_secondary(regs); + /* + * While our oops output is serialised by a spinlock, output + * from panic() called below can race and corrupt it. If we + * know we are going to panic, delay for 1 second so we have a + * chance to get clean backtraces from all CPUs that are oopsing. + */ + if (in_interrupt() || panic_on_oops || !current->pid || + is_global_init(current)) { + mdelay(MSEC_PER_SEC); + } if (in_interrupt()) panic("Fatal exception in interrupt"); - if (panic_on_oops) panic("Fatal exception"); + do_exit(signr); +} - oops_exit(); - do_exit(err); +static int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); +#endif + printk("%s\n", ppc_md.name ? ppc_md.name : ""); + + if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) + return 1; + + print_modules(); + show_regs(regs); return 0; } +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(regs); + + if (__die(str, regs, err)) + err = 0; + oops_end(flags, regs, err); +} + void user_single_step_siginfo(struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info) { @@ -195,10 +234,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) "at %016lx nip %016lx lr %016lx code %x\n"; if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } else if (show_unhandled_signals && - unhandled_signal(current, signr)) { + die("Exception in kernel mode", regs, signr); + return; + } + + if (show_unhandled_signals && unhandled_signal(current, signr)) { printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); @@ -220,25 +260,8 @@ void system_reset_exception(struct pt_regs *regs) return; } -#ifdef CONFIG_KEXEC - cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); -#endif - die("System Reset", regs, SIGABRT); - /* - * Some CPUs when released from the debugger will execute this path. - * These CPUs entered the debugger via a soft-reset. If the CPU was - * hung before entering the debugger it will return to the hung - * state when exiting this function. This causes a problem in - * kdump since the hung CPU(s) will not respond to the IPI sent - * from kdump. To prevent the problem we call crash_kexec_secondary() - * here. If a kdump had not been initiated or we exit the debugger - * with the "exit and recover" command (x) crash_kexec_secondary() - * will return after 5ms and the CPU returns to its previous state. - */ - crash_kexec_secondary(regs); - /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable System Reset"); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 164839cb9fcd..f79f1278dfca 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -366,6 +366,8 @@ static void pSeries_idle(void) static void __init pSeries_setup_arch(void) { + panic_timeout = 10; + /* Discover PIC type and setup ppc_md accordingly */ pseries_discover_pic(); diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index 784b3fc6f071..253dce98c16e 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -41,23 +41,24 @@ static inline unsigned int icp_hv_get_xirr(unsigned char cppr) return ret; } -static inline void icp_hv_set_xirr(unsigned int value) +static inline void icp_hv_set_cppr(u8 value) { - long rc = plpar_hcall_norets(H_EOI, value); + long rc = plpar_hcall_norets(H_CPPR, value); if (rc != H_SUCCESS) { - pr_err("%s: bad return code eoi xirr=0x%x returned %ld\n", + pr_err("%s: bad return code cppr cppr=0x%x returned %ld\n", __func__, value, rc); WARN_ON_ONCE(1); } } -static inline void icp_hv_set_cppr(u8 value) +static inline void icp_hv_set_xirr(unsigned int value) { - long rc = plpar_hcall_norets(H_CPPR, value); + long rc = plpar_hcall_norets(H_EOI, value); if (rc != H_SUCCESS) { - pr_err("%s: bad return code cppr cppr=0x%x returned %ld\n", + pr_err("%s: bad return code eoi xirr=0x%x returned %ld\n", __func__, value, rc); WARN_ON_ONCE(1); + icp_hv_set_cppr(value >> 24); } } |