diff options
author | Stephen Boyd <sboyd@codeaurora.org> | 2013-06-24 17:45:02 -0700 |
---|---|---|
committer | Stephen Boyd <sboyd@codeaurora.org> | 2013-06-24 17:45:02 -0700 |
commit | 42cba6e27c4cddc4df701c597dbfa666e1c013c8 (patch) | |
tree | 6fe38f37a79391bce3db56f311d0f3b83711fad0 /arch | |
parent | f722406faae2d073cc1d01063d1123c35425939e (diff) | |
parent | 2699339361a9bacb3fa663e6b8981a040cfca4ee (diff) | |
download | blackbird-op-linux-42cba6e27c4cddc4df701c597dbfa666e1c013c8.tar.gz blackbird-op-linux-42cba6e27c4cddc4df701c597dbfa666e1c013c8.zip |
Merge branch 'timers/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into remove-local-timers
Diffstat (limited to 'arch')
93 files changed, 1066 insertions, 642 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index dd0e8eb8042f..a4429bcd609e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -213,6 +213,9 @@ config USE_GENERIC_SMP_HELPERS config GENERIC_SMP_IDLE_THREAD bool +config GENERIC_IDLE_POLL_SETUP + bool + # Select if arch init_task initializer is different to init/init_task.c config ARCH_INIT_TASK bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d423d58f938d..53d3a356f61f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,7 @@ config ARM select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP + select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_IDLE_POLL_SETUP select GENERIC_STRNCPY_FROM_USER @@ -38,6 +39,7 @@ config ARM select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)) select HAVE_IDE if PCI || ISA || PCMCIA + select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO @@ -488,7 +490,7 @@ config ARCH_IXP4XX config ARCH_DOVE bool "Marvell Dove" select ARCH_REQUIRE_GPIOLIB - select CPU_V7 + select CPU_PJ4 select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI select PINCTRL diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 47374085befd..1ba358ba16b8 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -309,7 +309,7 @@ define archhelp echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' echo '* xipImage - XIP kernel image, if configured (arch/$(ARCH)/boot/xipImage)' echo ' uImage - U-Boot wrapped zImage' - echo ' bootpImage - Combined zImage and initial RAM disk' + echo ' bootpImage - Combined zImage and initial RAM disk' echo ' (supply initrd image via make variable INITRD=<path>)' echo '* dtbs - Build device tree blobs for enabled boards' echo ' install - Install uncompressed kernel' diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 52b88d81b7bb..3caed0db6986 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c @@ -15,8 +15,6 @@ #include <linux/smp.h> #include <linux/spinlock.h> -#include <linux/irqchip/arm-gic.h> - #include <asm/mcpm.h> #include <asm/smp.h> #include <asm/smp_plat.h> @@ -49,7 +47,6 @@ static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *i static void __cpuinit mcpm_secondary_init(unsigned int cpu) { mcpm_cpu_powered_up(); - gic_secondary_init(0); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index ddc740769601..023ee63827a2 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -28,8 +28,8 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/hardware/arm_timer.h> #include <asm/hardware/timer-sp.h> diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index 7eb18c1d8d6c..4f009c10540d 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -233,15 +233,15 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, ((__typeof__(*(ptr)))atomic64_cmpxchg(container_of((ptr), \ atomic64_t, \ counter), \ - (unsigned long)(o), \ - (unsigned long)(n))) + (unsigned long long)(o), \ + (unsigned long long)(n))) #define cmpxchg64_local(ptr, o, n) \ ((__typeof__(*(ptr)))local64_cmpxchg(container_of((ptr), \ local64_t, \ a), \ - (unsigned long)(o), \ - (unsigned long)(n))) + (unsigned long long)(o), \ + (unsigned long long)(n))) #endif /* __LINUX_ARM_ARCH__ >= 6 */ diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h index 3d520ddca61b..2389b71a8e7c 100644 --- a/arch/arm/include/asm/sched_clock.h +++ b/arch/arm/include/asm/sched_clock.h @@ -1,16 +1,4 @@ -/* - * sched_clock.h: support for extending counters to full 64-bit ns counter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. +/* You shouldn't include this file. Use linux/sched_clock.h instead. + * Temporary file until all asm/sched_clock.h users are gone */ -#ifndef ASM_SCHED_CLOCK -#define ASM_SCHED_CLOCK - -extern void sched_clock_postinit(void); -extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); - -extern unsigned long long (*sched_clock_func)(void); - -#endif +#include <linux/sched_clock.h> diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5f3338eacad2..97cb0576d07c 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-armv.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o return_address.o sched_clock.o \ + process.o ptrace.o return_address.o \ setup.o signal.o stacktrace.o sys_arm.o time.o traps.o obj-$(CONFIG_ATAGS) += atags_parse.o diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index 59dcdced6e30..221f07b11ccb 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -11,9 +11,9 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/errno.h> +#include <linux/sched_clock.h> #include <asm/delay.h> -#include <asm/sched_clock.h> #include <clocksource/arm_arch_timer.h> diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c deleted file mode 100644 index e8edcaa0e432..000000000000 --- a/arch/arm/kernel/sched_clock.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * sched_clock.c: support for extending counters to full 64-bit ns counter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/clocksource.h> -#include <linux/init.h> -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/moduleparam.h> -#include <linux/sched.h> -#include <linux/syscore_ops.h> -#include <linux/timer.h> - -#include <asm/sched_clock.h> - -struct clock_data { - u64 epoch_ns; - u32 epoch_cyc; - u32 epoch_cyc_copy; - unsigned long rate; - u32 mult; - u32 shift; - bool suspended; - bool needs_suspend; -}; - -static void sched_clock_poll(unsigned long wrap_ticks); -static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); -static int irqtime = -1; - -core_param(irqtime, irqtime, int, 0400); - -static struct clock_data cd = { - .mult = NSEC_PER_SEC / HZ, -}; - -static u32 __read_mostly sched_clock_mask = 0xffffffff; - -static u32 notrace jiffy_sched_clock_read(void) -{ - return (u32)(jiffies - INITIAL_JIFFIES); -} - -static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; - -static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) -{ - return (cyc * mult) >> shift; -} - -static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) -{ - u64 epoch_ns; - u32 epoch_cyc; - - if (cd.suspended) - return cd.epoch_ns; - - /* - * Load the epoch_cyc and epoch_ns atomically. We do this by - * ensuring that we always write epoch_cyc, epoch_ns and - * epoch_cyc_copy in strict order, and read them in strict order. - * If epoch_cyc and epoch_cyc_copy are not equal, then we're in - * the middle of an update, and we should repeat the load. - */ - do { - epoch_cyc = cd.epoch_cyc; - smp_rmb(); - epoch_ns = cd.epoch_ns; - smp_rmb(); - } while (epoch_cyc != cd.epoch_cyc_copy); - - return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift); -} - -/* - * Atomically update the sched_clock epoch. - */ -static void notrace update_sched_clock(void) -{ - unsigned long flags; - u32 cyc; - u64 ns; - - cyc = read_sched_clock(); - ns = cd.epoch_ns + - cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); - /* - * Write epoch_cyc and epoch_ns in a way that the update is - * detectable in cyc_to_fixed_sched_clock(). - */ - raw_local_irq_save(flags); - cd.epoch_cyc_copy = cyc; - smp_wmb(); - cd.epoch_ns = ns; - smp_wmb(); - cd.epoch_cyc = cyc; - raw_local_irq_restore(flags); -} - -static void sched_clock_poll(unsigned long wrap_ticks) -{ - mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); - update_sched_clock(); -} - -void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) -{ - unsigned long r, w; - u64 res, wrap; - char r_unit; - - if (cd.rate > rate) - return; - - BUG_ON(bits > 32); - WARN_ON(!irqs_disabled()); - read_sched_clock = read; - sched_clock_mask = (1 << bits) - 1; - cd.rate = rate; - - /* calculate the mult/shift to convert counter ticks to ns. */ - clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0); - - r = rate; - if (r >= 4000000) { - r /= 1000000; - r_unit = 'M'; - } else if (r >= 1000) { - r /= 1000; - r_unit = 'k'; - } else - r_unit = ' '; - - /* calculate how many ns until we wrap */ - wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); - do_div(wrap, NSEC_PER_MSEC); - w = wrap; - - /* calculate the ns resolution of this counter */ - res = cyc_to_ns(1ULL, cd.mult, cd.shift); - pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", - bits, r, r_unit, res, w); - - /* - * Start the timer to keep sched_clock() properly updated and - * sets the initial epoch. - */ - sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); - update_sched_clock(); - - /* - * Ensure that sched_clock() starts off at 0ns - */ - cd.epoch_ns = 0; - - /* Enable IRQ time accounting if we have a fast enough sched_clock */ - if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) - enable_sched_clock_irqtime(); - - pr_debug("Registered %pF as sched_clock source\n", read); -} - -static unsigned long long notrace sched_clock_32(void) -{ - u32 cyc = read_sched_clock(); - return cyc_to_sched_clock(cyc, sched_clock_mask); -} - -unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; - -unsigned long long notrace sched_clock(void) -{ - return sched_clock_func(); -} - -void __init sched_clock_postinit(void) -{ - /* - * If no sched_clock function has been provided at that point, - * make it the final one one. - */ - if (read_sched_clock == jiffy_sched_clock_read) - setup_sched_clock(jiffy_sched_clock_read, 32, HZ); - - sched_clock_poll(sched_clock_timer.data); -} - -static int sched_clock_suspend(void) -{ - sched_clock_poll(sched_clock_timer.data); - cd.suspended = true; - return 0; -} - -static void sched_clock_resume(void) -{ - cd.epoch_cyc = read_sched_clock(); - cd.epoch_cyc_copy = cd.epoch_cyc; - cd.suspended = false; -} - -static struct syscore_ops sched_clock_ops = { - .suspend = sched_clock_suspend, - .resume = sched_clock_resume, -}; - -static int __init sched_clock_syscore_init(void) -{ - register_syscore_ops(&sched_clock_ops); - return 0; -} -device_initcall(sched_clock_syscore_init); diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index abff4e9aaee0..98aee3258398 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -24,9 +24,9 @@ #include <linux/timer.h> #include <linux/clocksource.h> #include <linux/irq.h> +#include <linux/sched_clock.h> #include <asm/thread_info.h> -#include <asm/sched_clock.h> #include <asm/stacktrace.h> #include <asm/mach/arch.h> #include <asm/mach/time.h> @@ -120,6 +120,4 @@ void __init time_init(void) machine_desc->init_time(); else clocksource_of_init(); - - sched_clock_postinit(); } diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index bad361ec1666..7a55b5c95971 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -18,8 +18,8 @@ #include <linux/clk.h> #include <linux/err.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index fea91313678b..cd46529e9eaa 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -26,8 +26,8 @@ #include <linux/clockchips.h> #include <linux/clk.h> #include <linux/err.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/mach/time.h> #include "common.h" diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index b23c8e4f28e8..aa4346227c41 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -41,6 +41,7 @@ #include <linux/stat.h> #include <linux/sys_soc.h> #include <linux/termios.h> +#include <linux/sched_clock.h> #include <video/vga.h> #include <mach/hardware.h> @@ -49,7 +50,6 @@ #include <asm/setup.h> #include <asm/param.h> /* HZ */ #include <asm/mach-types.h> -#include <asm/sched_clock.h> #include <mach/lm.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 6600cff6bd92..58307cff1f18 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -30,6 +30,7 @@ #include <linux/export.h> #include <linux/gpio.h> #include <linux/cpu.h> +#include <linux/sched_clock.h> #include <mach/udc.h> #include <mach/hardware.h> @@ -38,7 +39,6 @@ #include <asm/pgtable.h> #include <asm/page.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <asm/system_misc.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 86a18b3d252e..7ac41e83cfef 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -28,8 +28,8 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <mach/addr-map.h> #include <mach/regs-timers.h> #include <mach/regs-apbc.h> diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 284313f3e02c..b6418fd5fe0d 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -23,10 +23,10 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> #include <asm/localtimer.h> -#include <asm/sched_clock.h> #include "common.h" diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 726ec23d29c7..80603d2fef77 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -43,9 +43,9 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/io.h> +#include <linux/sched_clock.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <mach/hardware.h> #include <asm/mach/irq.h> diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index f8b23b8040d9..4c069b0cab21 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -41,10 +41,10 @@ #include <linux/of_irq.h> #include <linux/platform_device.h> #include <linux/platform_data/dmtimer-omap.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> #include <asm/smp_twd.h> -#include <asm/sched_clock.h> #include "omap_hwmod.h" #include "omap_device.h" diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 8f1ee92aea30..9aa852a8fab9 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -16,11 +16,11 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/clockchips.h> +#include <linux/sched_clock.h> #include <asm/div64.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <mach/regs-ost.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index a59a13a665a6..713c86cd3d64 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -14,9 +14,9 @@ #include <linux/irq.h> #include <linux/timex.h> #include <linux/clockchips.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <mach/hardware.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index d9e73209c9b8..af771b76fe1c 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -18,12 +18,12 @@ #include <linux/clk.h> #include <linux/err.h> #include <linux/irq.h> +#include <linux/sched_clock.h> #include <mach/hardware.h> #include <mach/irqs.h> /* Generic stuff */ -#include <asm/sched_clock.h> #include <asm/mach/map.h> #include <asm/mach/time.h> diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index 837a2d52e9db..29606bd75f3f 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -22,9 +22,9 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/export.h> +#include <linux/sched_clock.h> #include <mach/hardware.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <asm/uaccess.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 5b0b86bb34bb..d9bc98eb2a6b 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -18,9 +18,9 @@ #include <linux/err.h> #include <linux/io.h> #include <linux/clocksource.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <plat/counter-32k.h> diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 5d5ac0f05422..9d2b2ac74938 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -16,7 +16,7 @@ #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/irq.h> -#include <asm/sched_clock.h> +#include <linux/sched_clock.h> /* * MBus bridge block registers. diff --git a/arch/arm/plat-samsung/samsung-time.c b/arch/arm/plat-samsung/samsung-time.c index f899cbc9b288..2957075ca836 100644 --- a/arch/arm/plat-samsung/samsung-time.c +++ b/arch/arm/plat-samsung/samsung-time.c @@ -15,12 +15,12 @@ #include <linux/clk.h> #include <linux/clockchips.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> #include <asm/smp_twd.h> #include <asm/mach/time.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> -#include <asm/sched_clock.h> #include <mach/map.h> #include <plat/devs.h> diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c index b33b74c87232..51b109e3b6c3 100644 --- a/arch/arm/plat-versatile/sched-clock.c +++ b/arch/arm/plat-versatile/sched-clock.c @@ -20,8 +20,8 @@ */ #include <linux/kernel.h> #include <linux/io.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <plat/sched_clock.h> static void __iomem *ctr; diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index d30042e39974..13609e01f4b7 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -152,11 +152,12 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); -static int __init xen_secondary_init(unsigned int cpu) +static void __init xen_percpu_init(void *unused) { struct vcpu_register_vcpu_info info; struct vcpu_info *vcpup; int err; + int cpu = get_cpu(); pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); @@ -165,14 +166,10 @@ static int __init xen_secondary_init(unsigned int cpu) info.offset = offset_in_page(vcpup); err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); - if (err) { - pr_debug("register_vcpu_info failed: err=%d\n", err); - } else { - /* This cpu is using the registered vcpu info, even if - later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; - } - return 0; + BUG_ON(err); + per_cpu(xen_vcpu, cpu) = vcpup; + + enable_percpu_irq(xen_events_irq, 0); } static void xen_restart(char str, const char *cmd) @@ -208,7 +205,6 @@ static int __init xen_guest_init(void) const char *version = NULL; const char *xen_prefix = "xen,xen-"; struct resource res; - int i; node = of_find_compatible_node(NULL, NULL, "xen,xen"); if (!node) { @@ -265,19 +261,23 @@ static int __init xen_guest_init(void) sizeof(struct vcpu_info)); if (xen_vcpu_info == NULL) return -ENOMEM; - for_each_online_cpu(i) - xen_secondary_init(i); gnttab_init(); if (!xen_initial_domain()) xenbus_probe(NULL); + return 0; +} +core_initcall(xen_guest_init); + +static int __init xen_pm_init(void) +{ pm_power_off = xen_power_off; arm_pm_restart = xen_restart; return 0; } -core_initcall(xen_guest_init); +subsys_initcall(xen_pm_init); static irqreturn_t xen_arm_callback(int irq, void *arg) { @@ -285,11 +285,6 @@ static irqreturn_t xen_arm_callback(int irq, void *arg) return IRQ_HANDLED; } -static __init void xen_percpu_enable_events(void *unused) -{ - enable_percpu_irq(xen_events_irq, 0); -} - static int __init xen_init_events(void) { if (!xen_domain() || xen_events_irq < 0) @@ -303,7 +298,7 @@ static int __init xen_init_events(void) return -EINVAL; } - on_each_cpu(xen_percpu_enable_events, NULL, 0); + on_each_cpu(xen_percpu_init, NULL, 0); return 0; } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index cad060f288cf..6507dabdd5dd 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -245,7 +245,7 @@ config SMP config IRQSTACKS bool "Use separate kernel stacks when processing interrupts" - default n + default y help If you say Y here the kernel will use separate kernel stacks for handling hard and soft interrupts. This can help avoid diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 2f967cc6649e..197690068f88 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -23,24 +23,21 @@ NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) -MACHINE := $(shell uname -m) -NATIVE := $(if $(filter parisc%,$(MACHINE)),1,0) - ifdef CONFIG_64BIT UTS_MACHINE := parisc64 CHECKFLAGS += -D__LP64__=1 -m64 -WIDTH := 64 +CC_ARCHES = hppa64 else # 32-bit -WIDTH := +CC_ARCHES = hppa hppa2.0 hppa1.1 endif -# attempt to help out folks who are cross-compiling -ifeq ($(NATIVE),1) -CROSS_COMPILE := hppa$(WIDTH)-linux- -else - ifeq ($(CROSS_COMPILE),) - CROSS_COMPILE := hppa$(WIDTH)-linux-gnu- - endif +ifneq ($(SUBARCH),$(UTS_MACHINE)) + ifeq ($(CROSS_COMPILE),) + CC_SUFFIXES = linux linux-gnu unknown-linux-gnu + CROSS_COMPILE := $(call cc-cross-prefix, \ + $(foreach a,$(CC_ARCHES), \ + $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) + endif endif OBJCOPY_FLAGS =-O binary -R .note -R .comment -S diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h index 12373c4dabab..c19f7138ba48 100644 --- a/arch/parisc/include/asm/hardirq.h +++ b/arch/parisc/include/asm/hardirq.h @@ -11,10 +11,18 @@ #include <linux/threads.h> #include <linux/irq.h> +#ifdef CONFIG_IRQSTACKS +#define __ARCH_HAS_DO_SOFTIRQ +#endif + typedef struct { unsigned int __softirq_pending; #ifdef CONFIG_DEBUG_STACKOVERFLOW unsigned int kernel_stack_usage; +#ifdef CONFIG_IRQSTACKS + unsigned int irq_stack_usage; + unsigned int irq_stack_counter; +#endif #endif #ifdef CONFIG_SMP unsigned int irq_resched_count; @@ -28,6 +36,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member) #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) +#define __inc_irq_stat(member) __this_cpu_inc(irq_stat.member) #define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 064015547d1e..cfbc43929cf6 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -63,10 +63,13 @@ */ #ifdef __KERNEL__ +#include <linux/spinlock_types.h> + #define IRQ_STACK_SIZE (4096 << 2) /* 16k irq stack size */ union irq_stack_union { unsigned long stack[IRQ_STACK_SIZE/sizeof(unsigned long)]; + raw_spinlock_t lock; }; DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 4bb96ad9b0b1..ae27cb6ce19a 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -452,9 +452,41 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm + /* Acquire pa_dbit_lock lock. */ + .macro dbit_lock spc,tmp,tmp1 +#ifdef CONFIG_SMP + cmpib,COND(=),n 0,\spc,2f + load32 PA(pa_dbit_lock),\tmp +1: LDCW 0(\tmp),\tmp1 + cmpib,COND(=) 0,\tmp1,1b + nop +2: +#endif + .endm + + /* Release pa_dbit_lock lock without reloading lock address. */ + .macro dbit_unlock0 spc,tmp +#ifdef CONFIG_SMP + or,COND(=) %r0,\spc,%r0 + stw \spc,0(\tmp) +#endif + .endm + + /* Release pa_dbit_lock lock. */ + .macro dbit_unlock1 spc,tmp +#ifdef CONFIG_SMP + load32 PA(pa_dbit_lock),\tmp + dbit_unlock0 \spc,\tmp +#endif + .endm + /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep ptep,pte,tmp,tmp1 + .macro update_ptep spc,ptep,pte,tmp,tmp1 +#ifdef CONFIG_SMP + or,COND(=) %r0,\spc,%r0 + LDREG 0(\ptep),\pte +#endif ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 @@ -463,7 +495,11 @@ /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty ptep,pte,tmp + .macro update_dirty spc,ptep,pte,tmp +#ifdef CONFIG_SMP + or,COND(=) %r0,\spc,%r0 + LDREG 0(\ptep),\pte +#endif ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp or \tmp,\pte,\pte STREG \pte,0(\ptep) @@ -1111,11 +1147,13 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1135,11 +1173,13 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1161,7 +1201,8 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1172,6 +1213,7 @@ dtlb_miss_11: idtlbp prot,(%sr1,va) mtsp t0, %sr1 /* Restore sr1 */ + dbit_unlock1 spc,t0 rfir nop @@ -1192,7 +1234,8 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1204,6 +1247,7 @@ nadtlb_miss_11: idtlbp prot,(%sr1,va) mtsp t0, %sr1 /* Restore sr1 */ + dbit_unlock1 spc,t0 rfir nop @@ -1224,13 +1268,15 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot f_extend pte,t0 idtlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1250,13 +1296,15 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot f_extend pte,t0 idtlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1357,11 +1405,13 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1379,11 +1429,13 @@ naitlb_miss_20w: L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1405,7 +1457,8 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1416,6 +1469,7 @@ itlb_miss_11: iitlbp prot,(%sr1,va) mtsp t0, %sr1 /* Restore sr1 */ + dbit_unlock1 spc,t0 rfir nop @@ -1427,7 +1481,8 @@ naitlb_miss_11: L2_ptep ptp,pte,t0,va,naitlb_check_alias_11 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1438,6 +1493,7 @@ naitlb_miss_11: iitlbp prot,(%sr1,va) mtsp t0, %sr1 /* Restore sr1 */ + dbit_unlock1 spc,t0 rfir nop @@ -1459,13 +1515,15 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot f_extend pte,t0 iitlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1477,13 +1535,15 @@ naitlb_miss_20: L2_ptep ptp,pte,t0,va,naitlb_check_alias_20 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot f_extend pte,t0 iitlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1507,29 +1567,13 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_20w - load32 PA(pa_dbit_lock),t0 - -dbit_spin_20w: - LDCW 0(t0),t1 - cmpib,COND(=) 0,t1,dbit_spin_20w - nop - -dbit_nolock_20w: -#endif - update_dirty ptp,pte,t1 + dbit_lock spc,t0,t1 + update_dirty spc,ptp,pte,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_20w - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_20w: -#endif + dbit_unlock0 spc,t0 rfir nop @@ -1543,18 +1587,8 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_11 - load32 PA(pa_dbit_lock),t0 - -dbit_spin_11: - LDCW 0(t0),t1 - cmpib,= 0,t1,dbit_spin_11 - nop - -dbit_nolock_11: -#endif - update_dirty ptp,pte,t1 + dbit_lock spc,t0,t1 + update_dirty spc,ptp,pte,t1 make_insert_tlb_11 spc,pte,prot @@ -1565,13 +1599,7 @@ dbit_nolock_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_11 - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_11: -#endif + dbit_unlock0 spc,t0 rfir nop @@ -1583,32 +1611,15 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_20 - load32 PA(pa_dbit_lock),t0 - -dbit_spin_20: - LDCW 0(t0),t1 - cmpib,= 0,t1,dbit_spin_20 - nop - -dbit_nolock_20: -#endif - update_dirty ptp,pte,t1 + dbit_lock spc,t0,t1 + update_dirty spc,ptp,pte,t1 make_insert_tlb spc,pte,prot f_extend pte,t1 idtlbt pte,prot - -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_20 - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_20: -#endif + dbit_unlock0 spc,t0 rfir nop diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index e255db0bb761..55237a70e197 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -166,22 +166,32 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s: ", prec, "STK"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->kernel_stack_usage); - seq_printf(p, " Kernel stack usage\n"); + seq_puts(p, " Kernel stack usage\n"); +# ifdef CONFIG_IRQSTACKS + seq_printf(p, "%*s: ", prec, "IST"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_stack_usage); + seq_puts(p, " Interrupt stack usage\n"); + seq_printf(p, "%*s: ", prec, "ISC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_stack_counter); + seq_puts(p, " Interrupt stack usage counter\n"); +# endif #endif #ifdef CONFIG_SMP seq_printf(p, "%*s: ", prec, "RES"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); + seq_puts(p, " Rescheduling interrupts\n"); seq_printf(p, "%*s: ", prec, "CAL"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); - seq_printf(p, " Function call interrupts\n"); + seq_puts(p, " Function call interrupts\n"); #endif seq_printf(p, "%*s: ", prec, "TLB"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); + seq_puts(p, " TLB shootdowns\n"); return 0; } @@ -378,6 +388,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) unsigned long sp = regs->gr[30]; unsigned long stack_usage; unsigned int *last_usage; + int cpu = smp_processor_id(); /* if sr7 != 0, we interrupted a userspace process which we do not want * to check for stack overflow. We will only check the kernel stack. */ @@ -386,7 +397,31 @@ static inline void stack_overflow_check(struct pt_regs *regs) /* calculate kernel stack usage */ stack_usage = sp - stack_start; - last_usage = &per_cpu(irq_stat.kernel_stack_usage, smp_processor_id()); +#ifdef CONFIG_IRQSTACKS + if (likely(stack_usage <= THREAD_SIZE)) + goto check_kernel_stack; /* found kernel stack */ + + /* check irq stack usage */ + stack_start = (unsigned long) &per_cpu(irq_stack_union, cpu).stack; + stack_usage = sp - stack_start; + + last_usage = &per_cpu(irq_stat.irq_stack_usage, cpu); + if (unlikely(stack_usage > *last_usage)) + *last_usage = stack_usage; + + if (likely(stack_usage < (IRQ_STACK_SIZE - STACK_MARGIN))) + return; + + pr_emerg("stackcheck: %s will most likely overflow irq stack " + "(sp:%lx, stk bottom-top:%lx-%lx)\n", + current->comm, sp, stack_start, stack_start + IRQ_STACK_SIZE); + goto panic_check; + +check_kernel_stack: +#endif + + /* check kernel stack usage */ + last_usage = &per_cpu(irq_stat.kernel_stack_usage, cpu); if (unlikely(stack_usage > *last_usage)) *last_usage = stack_usage; @@ -398,31 +433,69 @@ static inline void stack_overflow_check(struct pt_regs *regs) "(sp:%lx, stk bottom-top:%lx-%lx)\n", current->comm, sp, stack_start, stack_start + THREAD_SIZE); +#ifdef CONFIG_IRQSTACKS +panic_check: +#endif if (sysctl_panic_on_stackoverflow) panic("low stack detected by irq handler - check messages\n"); #endif } #ifdef CONFIG_IRQSTACKS -DEFINE_PER_CPU(union irq_stack_union, irq_stack_union); +DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = { + .lock = __RAW_SPIN_LOCK_UNLOCKED((irq_stack_union).lock) + }; static void execute_on_irq_stack(void *func, unsigned long param1) { - unsigned long *irq_stack_start; + union irq_stack_union *union_ptr; unsigned long irq_stack; - int cpu = smp_processor_id(); + raw_spinlock_t *irq_stack_in_use; - irq_stack_start = &per_cpu(irq_stack_union, cpu).stack[0]; - irq_stack = (unsigned long) irq_stack_start; - irq_stack = ALIGN(irq_stack, 16); /* align for stack frame usage */ + union_ptr = &per_cpu(irq_stack_union, smp_processor_id()); + irq_stack = (unsigned long) &union_ptr->stack; + irq_stack = ALIGN(irq_stack + sizeof(irq_stack_union.lock), + 64); /* align for stack frame usage */ - BUG_ON(*irq_stack_start); /* report bug if we were called recursive. */ - *irq_stack_start = 1; + /* We may be called recursive. If we are already using the irq stack, + * just continue to use it. Use spinlocks to serialize + * the irq stack usage. + */ + irq_stack_in_use = &union_ptr->lock; + if (!raw_spin_trylock(irq_stack_in_use)) { + void (*direct_call)(unsigned long p1) = func; + + /* We are using the IRQ stack already. + * Do direct call on current stack. */ + direct_call(param1); + return; + } /* This is where we switch to the IRQ stack. */ call_on_stack(param1, func, irq_stack); - *irq_stack_start = 0; + __inc_irq_stat(irq_stack_counter); + + /* free up irq stack usage. */ + do_raw_spin_unlock(irq_stack_in_use); +} + +asmlinkage void do_softirq(void) +{ + __u32 pending; + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + pending = local_softirq_pending(); + + if (pending) + execute_on_irq_stack(__do_softirq, 0); + + local_irq_restore(flags); } #endif /* CONFIG_IRQSTACKS */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ce939ac8622b..1c965642068b 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -1069,7 +1069,7 @@ void flush_tlb_all(void) { int do_recycle; - inc_irq_stat(irq_tlb_count); + __inc_irq_stat(irq_tlb_count); do_recycle = 0; spin_lock(&sid_lock); if (dirty_space_ids > RECYCLE_THRESHOLD) { @@ -1090,7 +1090,7 @@ void flush_tlb_all(void) #else void flush_tlb_all(void) { - inc_irq_stat(irq_tlb_count); + __inc_irq_stat(irq_tlb_count); spin_lock(&sid_lock); flush_tlb_all_local(NULL); recycle_sids(); diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 5416e28a7538..863d877e0b5f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -262,8 +262,31 @@ config PPC_EARLY_DEBUG_OPAL_HVSI Select this to enable early debugging for the PowerNV platform using an "hvsi" console +config PPC_EARLY_DEBUG_MEMCONS + bool "In memory console" + help + Select this to enable early debugging using an in memory console. + This console provides input and output buffers stored within the + kernel BSS and should be safe to select on any system. A debugger + can then be used to read kernel output or send input to the console. endchoice +config PPC_MEMCONS_OUTPUT_SIZE + int "In memory console output buffer size" + depends on PPC_EARLY_DEBUG_MEMCONS + default 4096 + help + Selects the size of the output buffer (in bytes) of the in memory + console. + +config PPC_MEMCONS_INPUT_SIZE + int "In memory console input buffer size" + depends on PPC_EARLY_DEBUG_MEMCONS + default 128 + help + Selects the size of the input buffer (in bytes) of the in memory + console. + config PPC_EARLY_DEBUG_OPAL def_bool y depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h new file mode 100644 index 000000000000..b6f5a33b8ee2 --- /dev/null +++ b/arch/powerpc/include/asm/context_tracking.h @@ -0,0 +1,10 @@ +#ifndef _ASM_POWERPC_CONTEXT_TRACKING_H +#define _ASM_POWERPC_CONTEXT_TRACKING_H + +#ifdef CONFIG_CONTEXT_TRACKING +#define SCHEDULE_USER bl .schedule_user +#else +#define SCHEDULE_USER bl .schedule +#endif + +#endif diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 0df54646f968..681bc0314b6b 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,7 @@ #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000) #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) +#define FW_FEATURE_OPALv3 ASM_CONST(0x0000000400000000) #ifndef __ASSEMBLY__ @@ -69,7 +70,8 @@ enum { FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN, FW_FEATURE_PSERIES_ALWAYS = 0, - FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2, + FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2 | + FW_FEATURE_OPALv3, FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index d615b28dda82..ba713f166fa5 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -96,11 +96,12 @@ static inline bool arch_irqs_disabled(void) #endif #define hard_irq_disable() do { \ + u8 _was_enabled = get_paca()->soft_enabled; \ __hard_irq_disable(); \ - if (local_paca->soft_enabled) \ - trace_hardirqs_off(); \ get_paca()->soft_enabled = 0; \ get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; \ + if (_was_enabled) \ + trace_hardirqs_off(); \ } while(0) static inline bool lazy_irq_pending(void) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index b6c8b58b1d76..cbb9305ab15a 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -243,7 +243,8 @@ enum OpalMCE_TlbErrorType { enum OpalThreadStatus { OPAL_THREAD_INACTIVE = 0x0, - OPAL_THREAD_STARTED = 0x1 + OPAL_THREAD_STARTED = 0x1, + OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */ }; enum OpalPciBusCompare { @@ -563,6 +564,8 @@ extern void opal_nvram_init(void); extern int opal_machine_check(struct pt_regs *regs); +extern void opal_shutdown(void); + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 91acb12bac92..b66ae722a8e9 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -186,7 +186,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pgtable_t pmd_pgtable(pmd_t pmd) { - return (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE); + return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index 3e13e23e4fdf..d836d945068d 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -47,7 +47,7 @@ * generic accessors and iterators here */ #define __real_pte(e,p) ((real_pte_t) { \ - (e), ((e) & _PAGE_COMBO) ? \ + (e), (pte_val(e) & _PAGE_COMBO) ? \ (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) #define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index a8bc2bb4adc9..34fd70488d83 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -264,6 +264,8 @@ extern void rtas_progress(char *s, unsigned short hex); extern void rtas_initialize(void); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); +extern int rtas_online_cpus_mask(cpumask_var_t cpus); +extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(struct rtas_args *); struct rtc_time; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 8ceea14d6fe4..ba7b1973866e 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -97,7 +97,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ -#define TIF_MEMDIE 9 /* is terminating due to OOM killer */ +#define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -106,6 +106,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation for stack store? */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -124,8 +125,10 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_UPROBE (1<<TIF_UPROBE) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) +#define _TIF_NOHZ (1<<TIF_NOHZ) #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE) diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 5a7510e9d09d..dc590919f8eb 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,6 +52,7 @@ extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); extern void __init udbg_init_wsp(void); +extern void __init udbg_init_memcons(void); extern void __init udbg_init_ehv_bc(void); extern void __init udbg_init_ps3gelic(void); extern void __init udbg_init_debug_opal_raw(void); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e514de57a125..d22e73e4618b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -439,8 +439,6 @@ ret_from_fork: ret_from_kernel_thread: REST_NVGPRS(r1) bl schedule_tail - li r3,0 - stw r3,0(r1) mtlr r14 mr r3,r15 PPC440EP_ERR42 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 915fbb4fc2fe..51cfb8fc301f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -33,6 +33,7 @@ #include <asm/irqflags.h> #include <asm/ftrace.h> #include <asm/hw_irq.h> +#include <asm/context_tracking.h> /* * System calls. @@ -376,8 +377,6 @@ _GLOBAL(ret_from_fork) _GLOBAL(ret_from_kernel_thread) bl .schedule_tail REST_NVGPRS(r1) - li r3,0 - std r3,0(r1) ld r14, 0(r14) mtlr r14 mr r3,r15 @@ -634,7 +633,7 @@ _GLOBAL(ret_from_except_lite) andi. r0,r4,_TIF_NEED_RESCHED beq 1f bl .restore_interrupts - bl .schedule + SCHEDULE_USER b .ret_from_except_lite 1: bl .save_nvgprs diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 42a756eec9ff..645170a07ada 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -489,7 +489,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) */ mfspr r14,SPRN_DBSR /* check single-step/branch taken */ - andis. r15,r14,DBSR_IC@h + andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) @@ -500,7 +500,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bge+ cr1,1f /* here it looks like we got an inappropriate debug exception. */ - lis r14,DBSR_IC@h /* clear the IC event */ + lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */ rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */ mtspr SPRN_DBSR,r14 mtspr SPRN_CSRR1,r11 @@ -555,7 +555,7 @@ kernel_dbg_exc: */ mfspr r14,SPRN_DBSR /* check single-step/branch taken */ - andis. r15,r14,DBSR_IC@h + andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) @@ -566,7 +566,7 @@ kernel_dbg_exc: bge+ cr1,1f /* here it looks like we got an inappropriate debug exception. */ - lis r14,DBSR_IC@h /* clear the IC event */ + lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */ rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */ mtspr SPRN_DBSR,r14 mtspr SPRN_DSRR1,r11 diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 466a2908bb63..611acdf30096 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/cpu.h> +#include <linux/hardirq.h> #include <asm/page.h> #include <asm/current.h> @@ -335,10 +336,13 @@ void default_machine_kexec(struct kimage *image) pr_debug("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. + * We setup preempt_count to avoid using VMX in memcpy. * XXX: the task struct will likely be invalid once we do the copy! */ kexec_stack.thread_info.task = current_thread_info()->task; kexec_stack.thread_info.flags = 0; + kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET; + kexec_stack.thread_info.cpu = current_thread_info()->cpu; /* We need a static PACA, too; copy this CPU's PACA over and switch to * it. Also poison per_cpu_offset to catch anyone using non-static diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 19e096bd0e73..e469f30e6eeb 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2) li r3,2 blr +_GLOBAL(__bswapdi2) + rotlwi r9,r4,8 + rotlwi r10,r3,8 + rlwimi r9,r4,24,0,7 + rlwimi r10,r3,24,0,7 + rlwimi r9,r4,24,16,23 + rlwimi r10,r3,24,16,23 + mr r3,r9 + mr r4,r10 + blr + _GLOBAL(abs) srawi r4,r3,31 xor r3,r3,r4 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 5cfa8008693b..6820e45f557b 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -234,6 +234,17 @@ _GLOBAL(__flush_dcache_icache) isync blr +_GLOBAL(__bswapdi2) + srdi r8,r3,32 + rlwinm r7,r3,8,0xffffffff + rlwimi r7,r3,24,0,7 + rlwinm r9,r8,8,0xffffffff + rlwimi r7,r3,24,16,23 + rlwimi r9,r8,24,0,7 + rlwimi r9,r8,24,16,23 + sldi r7,r7,32 + or r3,r7,r9 + blr #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) /* diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f5c5c90799a7..6053f037ef0a 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -359,7 +359,6 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, enum pci_mmap_state mmap_state, int write_combine) { - unsigned long prot = pgprot_val(protection); /* Write combine is always 0 on non-memory space mappings. On * memory space, if the user didn't pass 1, we check for a @@ -376,9 +375,9 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, /* XXX would be nice to have a way to ask for write-through */ if (write_combine) - return pgprot_noncached_wc(prot); + return pgprot_noncached_wc(protection); else - return pgprot_noncached(prot); + return pgprot_noncached(protection); } /* diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 78b8766fd79e..c29666586998 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3); int __ucmpdi2(unsigned long long, unsigned long long); EXPORT_SYMBOL(__ucmpdi2); #endif - +long long __bswapdi2(long long); +EXPORT_SYMBOL(__bswapdi2); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ceb4e7b62cf4..a902723fdc69 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -339,6 +339,13 @@ static void set_debug_reg_defaults(struct thread_struct *thread) static void prime_debug_regs(struct thread_struct *thread) { + /* + * We could have inherited MSR_DE from userspace, since + * it doesn't get cleared on exception entry. Make sure + * MSR_DE is clear before we enable any debug events. + */ + mtmsr(mfmsr() & ~MSR_DE); + mtspr(SPRN_IAC1, thread->iac1); mtspr(SPRN_IAC2, thread->iac2); #if CONFIG_PPC_ADV_DEBUG_IACS > 2 @@ -971,6 +978,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ + ((unsigned long *)sp)[0] = 0; sp -= sizeof(struct pt_regs); kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 3b14d320e69f..98c2fc198712 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -32,6 +32,7 @@ #include <trace/syscall.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> +#include <linux/context_tracking.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -1788,6 +1789,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + user_exit(); + secure_computing_strict(regs->gpr[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) && @@ -1832,4 +1835,6 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + user_enter(); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1fd6e7b2f390..52add6f3e201 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/capability.h> #include <linux/delay.h> +#include <linux/cpu.h> #include <linux/smp.h> #include <linux/completion.h> #include <linux/cpumask.h> @@ -807,6 +808,95 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } +enum rtas_cpu_state { + DOWN, + UP, +}; + +#ifndef CONFIG_SMP +static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, + cpumask_var_t cpus) +{ + if (!cpumask_empty(cpus)) { + cpumask_clear(cpus); + return -EINVAL; + } else + return 0; +} +#else +/* On return cpumask will be altered to indicate CPUs changed. + * CPUs with states changed will be set in the mask, + * CPUs with status unchanged will be unset in the mask. */ +static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, + cpumask_var_t cpus) +{ + int cpu; + int cpuret = 0; + int ret = 0; + + if (cpumask_empty(cpus)) + return 0; + + for_each_cpu(cpu, cpus) { + switch (state) { + case DOWN: + cpuret = cpu_down(cpu); + break; + case UP: + cpuret = cpu_up(cpu); + break; + } + if (cpuret) { + pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", + __func__, + ((state == UP) ? "up" : "down"), + cpu, cpuret); + if (!ret) + ret = cpuret; + if (state == UP) { + /* clear bits for unchanged cpus, return */ + cpumask_shift_right(cpus, cpus, cpu); + cpumask_shift_left(cpus, cpus, cpu); + break; + } else { + /* clear bit for unchanged cpu, continue */ + cpumask_clear_cpu(cpu, cpus); + } + } + } + + return ret; +} +#endif + +int rtas_online_cpus_mask(cpumask_var_t cpus) +{ + int ret; + + ret = rtas_cpu_state_change_mask(UP, cpus); + + if (ret) { + cpumask_var_t tmp_mask; + + if (!alloc_cpumask_var(&tmp_mask, GFP_TEMPORARY)) + return ret; + + /* Use tmp_mask to preserve cpus mask from first failure */ + cpumask_copy(tmp_mask, cpus); + rtas_offline_cpus_mask(tmp_mask); + free_cpumask_var(tmp_mask); + } + + return ret; +} +EXPORT_SYMBOL(rtas_online_cpus_mask); + +int rtas_offline_cpus_mask(cpumask_var_t cpus) +{ + return rtas_cpu_state_change_mask(DOWN, cpus); +} +EXPORT_SYMBOL(rtas_offline_cpus_mask); + int rtas_ibm_suspend_me(struct rtas_args *args) { long state; @@ -814,6 +904,8 @@ int rtas_ibm_suspend_me(struct rtas_args *args) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); + cpumask_var_t offline_mask; + int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -837,11 +929,24 @@ int rtas_ibm_suspend_me(struct rtas_args *args) return 0; } + if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY)) + return -ENOMEM; + atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); data.token = rtas_token("ibm,suspend-me"); data.complete = &done; + + /* All present CPUs must be online */ + cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); + cpuret = rtas_online_cpus_mask(offline_mask); + if (cpuret) { + pr_err("%s: Could not bring present CPUs online.\n", __func__); + atomic_set(&data.error, cpuret); + goto out; + } + stop_topology_update(); /* Call function on all CPUs. One of us will make the @@ -857,6 +962,14 @@ int rtas_ibm_suspend_me(struct rtas_args *args) start_topology_update(); + /* Take down CPUs not online prior to suspend */ + cpuret = rtas_offline_cpus_mask(offline_mask); + if (cpuret) + pr_warn("%s: Could not restore CPUs to offline state.\n", + __func__); + +out: + free_cpumask_var(offline_mask); return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 5b3022470126..2f3cdb01506d 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -89,6 +89,7 @@ /* Array sizes */ #define VALIDATE_BUF_SIZE 4096 +#define VALIDATE_MSG_LEN 256 #define RTAS_MSG_MAXLEN 64 /* Quirk - RTAS requires 4k list length and block size */ @@ -466,7 +467,7 @@ static void validate_flash(struct rtas_validate_flash_t *args_buf) } static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, - char *msg) + char *msg, int msglen) { int n; @@ -474,7 +475,8 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, n = sprintf(msg, "%d\n", args_buf->update_results); if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) || (args_buf->update_results == VALIDATE_TMP_UPDATE)) - n += sprintf(msg + n, "%s\n", args_buf->buf); + n += snprintf(msg + n, msglen - n, "%s\n", + args_buf->buf); } else { n = sprintf(msg, "%d\n", args_buf->status); } @@ -486,11 +488,11 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf, { struct rtas_validate_flash_t *const args_buf = &rtas_validate_flash_data; - char msg[RTAS_MSG_MAXLEN]; + char msg[VALIDATE_MSG_LEN]; int msglen; mutex_lock(&rtas_validate_flash_mutex); - msglen = get_validate_flash_msg(args_buf, msg); + msglen = get_validate_flash_msg(args_buf, msg, VALIDATE_MSG_LEN); mutex_unlock(&rtas_validate_flash_mutex); return simple_read_from_buffer(buf, count, ppos, msg, msglen); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cf12eae02de5..577a8aa69c6e 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -13,6 +13,7 @@ #include <linux/signal.h> #include <linux/uprobes.h> #include <linux/key.h> +#include <linux/context_tracking.h> #include <asm/hw_breakpoint.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -24,7 +25,7 @@ * through debug.exception-trace sysctl. */ -int show_unhandled_signals = 0; +int show_unhandled_signals = 1; /* * Allocate space for the signal frame @@ -159,6 +160,8 @@ static int do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { + user_exit(); + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); @@ -169,4 +172,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + + user_enter(); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 83efa2f7d926..a7a648f6b750 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -35,6 +35,7 @@ #include <linux/kdebug.h> #include <linux/debugfs.h> #include <linux/ratelimit.h> +#include <linux/context_tracking.h> #include <asm/emulated_ops.h> #include <asm/pgtable.h> @@ -667,6 +668,7 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); int recover = 0; __get_cpu_var(irq_stat).mce_exceptions++; @@ -683,7 +685,7 @@ void machine_check_exception(struct pt_regs *regs) recover = cur_cpu_spec->machine_check(regs); if (recover > 0) - return; + goto bail; #if defined(CONFIG_8xx) && defined(CONFIG_PCI) /* the qspan pci read routines can cause machine checks -- Cort @@ -693,20 +695,23 @@ void machine_check_exception(struct pt_regs *regs) * -- BenH */ bad_page_fault(regs, regs->dar, SIGBUS); - return; + goto bail; #endif if (debugger_fault_handler(regs)) - return; + goto bail; if (check_io_access(regs)) - return; + goto bail; die("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable Machine check"); + +bail: + exception_exit(prev_state); } void SMIException(struct pt_regs *regs) @@ -716,20 +721,29 @@ void SMIException(struct pt_regs *regs) void unknown_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, 0, 0); + + exception_exit(prev_state); } void instruction_breakpoint_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (debugger_iabr_match(regs)) - return; + goto bail; _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + +bail: + exception_exit(prev_state); } void RunModeException(struct pt_regs *regs) @@ -739,15 +753,20 @@ void RunModeException(struct pt_regs *regs) void __kprobes single_step_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + clear_single_step(regs); if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (debugger_sstep(regs)) - return; + goto bail; _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + +bail: + exception_exit(prev_state); } /* @@ -1005,6 +1024,7 @@ int is_valid_bugaddr(unsigned long addr) void __kprobes program_check_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); extern int do_mathemu(struct pt_regs *regs); @@ -1014,26 +1034,26 @@ void __kprobes program_check_exception(struct pt_regs *regs) if (reason & REASON_FP) { /* IEEE FP exception */ parse_fpe(regs); - return; + goto bail; } if (reason & REASON_TRAP) { /* Debugger is first in line to stop recursive faults in * rcu_lock, notify_die, or atomic_notifier_call_chain */ if (debugger_bpt(regs)) - return; + goto bail; /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - return; + goto bail; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - return; + goto bail; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1049,7 +1069,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) if (!user_mode(regs) && report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - return; + goto bail; } /* If usermode caused this, it's done something illegal and * gets a SIGILL slap on the wrist. We call it an illegal @@ -1059,7 +1079,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) */ if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - return; + goto bail; } else { printk(KERN_EMERG "Unexpected TM Bad Thing exception " "at %lx (msr 0x%x)\n", regs->nip, reason); @@ -1083,16 +1103,16 @@ void __kprobes program_check_exception(struct pt_regs *regs) switch (do_mathemu(regs)) { case 0: emulate_single_step(regs); - return; + goto bail; case 1: { int code = 0; code = __parse_fpscr(current->thread.fpscr.val); _exception(SIGFPE, regs, code, regs->nip); - return; + goto bail; } case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; + goto bail; } /* fall through on any other errors */ #endif /* CONFIG_MATH_EMULATION */ @@ -1103,10 +1123,10 @@ void __kprobes program_check_exception(struct pt_regs *regs) case 0: regs->nip += 4; emulate_single_step(regs); - return; + goto bail; case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; + goto bail; } } @@ -1114,10 +1134,14 @@ void __kprobes program_check_exception(struct pt_regs *regs) _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + +bail: + exception_exit(prev_state); } void alignment_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; /* We restore the interrupt state now */ @@ -1131,7 +1155,7 @@ void alignment_exception(struct pt_regs *regs) if (fixed == 1) { regs->nip += 4; /* skip over emulated instruction */ emulate_single_step(regs); - return; + goto bail; } /* Operand address was bad */ @@ -1146,6 +1170,9 @@ void alignment_exception(struct pt_regs *regs) _exception(sig, regs, code, regs->dar); else bad_page_fault(regs, regs->dar, sig); + +bail: + exception_exit(prev_state); } void StackOverflow(struct pt_regs *regs) @@ -1174,23 +1201,32 @@ void trace_syscall(struct pt_regs *regs) void kernel_fp_unavailable_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); + + exception_exit(prev_state); } void altivec_unavailable_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + if (user_mode(regs)) { /* A user program has executed an altivec instruction, but this kernel doesn't support altivec. */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; + goto bail; } printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); + +bail: + exception_exit(prev_state); } void vsx_unavailable_exception(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 13b867093499..9d3fdcd66290 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -64,6 +64,9 @@ void __init udbg_early_init(void) udbg_init_usbgecko(); #elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) udbg_init_wsp(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS) + /* In memory console */ + udbg_init_memcons(); #elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC) udbg_init_ehv_bc(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 229951ffc351..8726779e1409 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,7 @@ #include <linux/perf_event.h> #include <linux/magic.h> #include <linux/ratelimit.h> +#include <linux/context_tracking.h> #include <asm/firmware.h> #include <asm/page.h> @@ -196,6 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { + enum ctx_state prev_state = exception_enter(); struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -204,6 +206,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, int trap = TRAP(regs); int is_exec = trap == 0x400; int fault; + int rc = 0; #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* @@ -230,28 +233,30 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * look at it */ if (error_code & ICSWX_DSI_UCT) { - int rc = acop_handle_fault(regs, address, error_code); + rc = acop_handle_fault(regs, address, error_code); if (rc) - return rc; + goto bail; } #endif /* CONFIG_PPC_ICSWX */ if (notify_page_fault(regs)) - return 0; + goto bail; if (unlikely(debugger_fault_handler(regs))) - return 0; + goto bail; /* On a kernel SLB miss we can only check for a valid exception entry */ - if (!user_mode(regs) && (address >= TASK_SIZE)) - return SIGSEGV; + if (!user_mode(regs) && (address >= TASK_SIZE)) { + rc = SIGSEGV; + goto bail; + } #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ defined(CONFIG_PPC_BOOK3S_64)) if (error_code & DSISR_DABRMATCH) { /* breakpoint match */ do_break(regs, address, error_code); - return 0; + goto bail; } #endif @@ -260,8 +265,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, local_irq_enable(); if (in_atomic() || mm == NULL) { - if (!user_mode(regs)) - return SIGSEGV; + if (!user_mode(regs)) { + rc = SIGSEGV; + goto bail; + } /* in_atomic() in user mode is really bad, as is current->mm == NULL. */ printk(KERN_EMERG "Page fault in user mode with " @@ -417,9 +424,11 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - int rc = mm_fault_error(regs, address, fault); + rc = mm_fault_error(regs, address, fault); if (rc >= MM_FAULT_RETURN) - return rc; + goto bail; + else + rc = 0; } /* @@ -454,7 +463,7 @@ good_area: } up_read(&mm->mmap_sem); - return 0; + goto bail; bad_area: up_read(&mm->mmap_sem); @@ -463,7 +472,7 @@ bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { _exception(SIGSEGV, regs, code, address); - return 0; + goto bail; } if (is_exec && (error_code & DSISR_PROTFAULT)) @@ -471,7 +480,11 @@ bad_area_nosemaphore: " page (%lx) - exploit attempt? (uid: %d)\n", address, from_kuid(&init_user_ns, current_uid())); - return SIGSEGV; + rc = SIGSEGV; + +bail: + exception_exit(prev_state); + return rc; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88ac0eeaadde..e303a6d74e3a 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -33,6 +33,7 @@ #include <linux/init.h> #include <linux/signal.h> #include <linux/memblock.h> +#include <linux/context_tracking.h> #include <asm/processor.h> #include <asm/pgtable.h> @@ -954,6 +955,7 @@ void hash_failure_debug(unsigned long ea, unsigned long access, */ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) { + enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; struct mm_struct *mm; @@ -973,7 +975,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); - return 1; + rc = 1; + goto bail; } psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); @@ -992,19 +995,23 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) /* Not a valid range * Send the problem up to do_page_fault */ - return 1; + rc = 1; + goto bail; } DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); /* Bad address. */ if (!vsid) { DBG_LOW("Bad address!\n"); - return 1; + rc = 1; + goto bail; } /* Get pgdir */ pgdir = mm->pgd; - if (pgdir == NULL) - return 1; + if (pgdir == NULL) { + rc = 1; + goto bail; + } /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); @@ -1027,7 +1034,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); if (ptep == NULL || !pte_present(*ptep)) { DBG_LOW(" no PTE !\n"); - return 1; + rc = 1; + goto bail; } /* Add _PAGE_PRESENT to the required access perm */ @@ -1038,13 +1046,16 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) */ if (access & ~pte_val(*ptep)) { DBG_LOW(" no access !\n"); - return 1; + rc = 1; + goto bail; } #ifdef CONFIG_HUGETLB_PAGE - if (hugeshift) - return __hash_page_huge(ea, access, vsid, ptep, trap, local, + if (hugeshift) { + rc = __hash_page_huge(ea, access, vsid, ptep, trap, local, ssize, hugeshift, psize); + goto bail; + } #endif /* CONFIG_HUGETLB_PAGE */ #ifndef CONFIG_PPC_64K_PAGES @@ -1124,6 +1135,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) pte_val(*(ptep + PTRS_PER_PTE))); #endif DBG_LOW(" -> rc=%d\n", rc); + +bail: + exception_exit(prev_state); return rc; } EXPORT_SYMBOL_GPL(hash_page); @@ -1259,6 +1273,8 @@ void flush_hash_range(unsigned long number, int local) */ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) { + enum ctx_state prev_state = exception_enter(); + if (user_mode(regs)) { #ifdef CONFIG_PPC_SUBPAGE_PROT if (rc == -2) @@ -1268,6 +1284,8 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) _exception(SIGBUS, regs, BUS_ADRERR, address); } else bad_page_fault(regs, address, SIGBUS); + + exception_exit(prev_state); } long hpte_insert_repeating(unsigned long hash, unsigned long vpn, diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index c2787bf779ca..a90b9c458990 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -215,7 +215,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start, unsigned long phys) { int mapped = htab_bolt_mapping(start, start + page_size, phys, - PAGE_KERNEL, mmu_vmemmap_psize, + pgprot_val(PAGE_KERNEL), + mmu_vmemmap_psize, mmu_kernel_ssize); BUG_ON(mapped < 0); } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index c627843c5b2e..426180b84978 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -13,11 +13,13 @@ #include <linux/perf_event.h> #include <linux/percpu.h> #include <linux/hardirq.h> +#include <linux/uaccess.h> #include <asm/reg.h> #include <asm/pmc.h> #include <asm/machdep.h> #include <asm/firmware.h> #include <asm/ptrace.h> +#include <asm/code-patching.h> #define BHRB_MAX_ENTRIES 32 #define BHRB_TARGET 0x0000000000000002 @@ -100,6 +102,10 @@ static inline int siar_valid(struct pt_regs *regs) return 1; } +static inline void power_pmu_bhrb_enable(struct perf_event *event) {} +static inline void power_pmu_bhrb_disable(struct perf_event *event) {} +void power_pmu_flush_branch_stack(void) {} +static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} #endif /* CONFIG_PPC32 */ static bool regs_use_siar(struct pt_regs *regs) @@ -308,6 +314,159 @@ static inline int siar_valid(struct pt_regs *regs) return 1; } + +/* Reset all possible BHRB entries */ +static void power_pmu_bhrb_reset(void) +{ + asm volatile(PPC_CLRBHRB); +} + +static void power_pmu_bhrb_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!ppmu->bhrb_nr) + return; + + /* Clear BHRB if we changed task context to avoid data leaks */ + if (event->ctx->task && cpuhw->bhrb_context != event->ctx) { + power_pmu_bhrb_reset(); + cpuhw->bhrb_context = event->ctx; + } + cpuhw->bhrb_users++; +} + +static void power_pmu_bhrb_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!ppmu->bhrb_nr) + return; + + cpuhw->bhrb_users--; + WARN_ON_ONCE(cpuhw->bhrb_users < 0); + + if (!cpuhw->disabled && !cpuhw->bhrb_users) { + /* BHRB cannot be turned off when other + * events are active on the PMU. + */ + + /* avoid stale pointer */ + cpuhw->bhrb_context = NULL; + } +} + +/* Called from ctxsw to prevent one process's branch entries to + * mingle with the other process's entries during context switch. + */ +void power_pmu_flush_branch_stack(void) +{ + if (ppmu->bhrb_nr) + power_pmu_bhrb_reset(); +} +/* Calculate the to address for a branch */ +static __u64 power_pmu_bhrb_to(u64 addr) +{ + unsigned int instr; + int ret; + __u64 target; + + if (is_kernel_addr(addr)) + return branch_target((unsigned int *)addr); + + /* Userspace: need copy instruction here then translate it */ + pagefault_disable(); + ret = __get_user_inatomic(instr, (unsigned int __user *)addr); + if (ret) { + pagefault_enable(); + return 0; + } + pagefault_enable(); + + target = branch_target(&instr); + if ((!target) || (instr & BRANCH_ABSOLUTE)) + return target; + + /* Translate relative branch target from kernel to user address */ + return target - (unsigned long)&instr + addr; +} + +/* Processing BHRB entries */ +void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +{ + u64 val; + u64 addr; + int r_index, u_index, pred; + + r_index = 0; + u_index = 0; + while (r_index < ppmu->bhrb_nr) { + /* Assembly read function */ + val = read_bhrb(r_index++); + if (!val) + /* Terminal marker: End of valid BHRB entries */ + break; + else { + addr = val & BHRB_EA; + pred = val & BHRB_PREDICTION; + + if (!addr) + /* invalid entry */ + continue; + + /* Branches are read most recent first (ie. mfbhrb 0 is + * the most recent branch). + * There are two types of valid entries: + * 1) a target entry which is the to address of a + * computed goto like a blr,bctr,btar. The next + * entry read from the bhrb will be branch + * corresponding to this target (ie. the actual + * blr/bctr/btar instruction). + * 2) a from address which is an actual branch. If a + * target entry proceeds this, then this is the + * matching branch for that target. If this is not + * following a target entry, then this is a branch + * where the target is given as an immediate field + * in the instruction (ie. an i or b form branch). + * In this case we need to read the instruction from + * memory to determine the target/to address. + */ + + if (val & BHRB_TARGET) { + /* Target branches use two entries + * (ie. computed gotos/XL form) + */ + cpuhw->bhrb_entries[u_index].to = addr; + cpuhw->bhrb_entries[u_index].mispred = pred; + cpuhw->bhrb_entries[u_index].predicted = ~pred; + + /* Get from address in next entry */ + val = read_bhrb(r_index++); + addr = val & BHRB_EA; + if (val & BHRB_TARGET) { + /* Shouldn't have two targets in a + row.. Reset index and try again */ + r_index--; + addr = 0; + } + cpuhw->bhrb_entries[u_index].from = addr; + } else { + /* Branches to immediate field + (ie I or B form) */ + cpuhw->bhrb_entries[u_index].from = addr; + cpuhw->bhrb_entries[u_index].to = + power_pmu_bhrb_to(addr); + cpuhw->bhrb_entries[u_index].mispred = pred; + cpuhw->bhrb_entries[u_index].predicted = ~pred; + } + u_index++; + + } + } + cpuhw->bhrb_stack.nr = u_index; + return; +} + #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -904,47 +1063,6 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* Reset all possible BHRB entries */ -static void power_pmu_bhrb_reset(void) -{ - asm volatile(PPC_CLRBHRB); -} - -void power_pmu_bhrb_enable(struct perf_event *event) -{ - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - - if (!ppmu->bhrb_nr) - return; - - /* Clear BHRB if we changed task context to avoid data leaks */ - if (event->ctx->task && cpuhw->bhrb_context != event->ctx) { - power_pmu_bhrb_reset(); - cpuhw->bhrb_context = event->ctx; - } - cpuhw->bhrb_users++; -} - -void power_pmu_bhrb_disable(struct perf_event *event) -{ - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - - if (!ppmu->bhrb_nr) - return; - - cpuhw->bhrb_users--; - WARN_ON_ONCE(cpuhw->bhrb_users < 0); - - if (!cpuhw->disabled && !cpuhw->bhrb_users) { - /* BHRB cannot be turned off when other - * events are active on the PMU. - */ - - /* avoid stale pointer */ - cpuhw->bhrb_context = NULL; - } -} - /* * Add a event to the PMU. * If all events are not already frozen, then we disable and @@ -1180,15 +1298,6 @@ int power_pmu_commit_txn(struct pmu *pmu) return 0; } -/* Called from ctxsw to prevent one process's branch entries to - * mingle with the other process's entries during context switch. - */ -void power_pmu_flush_branch_stack(void) -{ - if (ppmu->bhrb_nr) - power_pmu_bhrb_reset(); -} - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1458,77 +1567,6 @@ struct pmu power_pmu = { .flush_branch_stack = power_pmu_flush_branch_stack, }; -/* Processing BHRB entries */ -void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) -{ - u64 val; - u64 addr; - int r_index, u_index, target, pred; - - r_index = 0; - u_index = 0; - while (r_index < ppmu->bhrb_nr) { - /* Assembly read function */ - val = read_bhrb(r_index); - - /* Terminal marker: End of valid BHRB entries */ - if (val == 0) { - break; - } else { - /* BHRB field break up */ - addr = val & BHRB_EA; - pred = val & BHRB_PREDICTION; - target = val & BHRB_TARGET; - - /* Probable Missed entry: Not applicable for POWER8 */ - if ((addr == 0) && (target == 0) && (pred == 1)) { - r_index++; - continue; - } - - /* Real Missed entry: Power8 based missed entry */ - if ((addr == 0) && (target == 1) && (pred == 1)) { - r_index++; - continue; - } - - /* Reserved condition: Not a valid entry */ - if ((addr == 0) && (target == 1) && (pred == 0)) { - r_index++; - continue; - } - - /* Is a target address */ - if (val & BHRB_TARGET) { - /* First address cannot be a target address */ - if (r_index == 0) { - r_index++; - continue; - } - - /* Update target address for the previous entry */ - cpuhw->bhrb_entries[u_index - 1].to = addr; - cpuhw->bhrb_entries[u_index - 1].mispred = pred; - cpuhw->bhrb_entries[u_index - 1].predicted = ~pred; - - /* Dont increment u_index */ - r_index++; - } else { - /* Update address, flags for current entry */ - cpuhw->bhrb_entries[u_index].from = addr; - cpuhw->bhrb_entries[u_index].mispred = pred; - cpuhw->bhrb_entries[u_index].predicted = ~pred; - - /* Successfully popullated one entry */ - u_index++; - r_index++; - } - } - } - cpuhw->bhrb_stack.nr = u_index; - return; -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index a881232a3cce..b62aab3e22ec 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -128,7 +128,7 @@ config PPC_RTAS_DAEMON config RTAS_PROC bool "Proc interface to RTAS" - depends on PPC_RTAS + depends on PPC_RTAS && PROC_FS default y config RTAS_FLASH diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index ade4463226c6..628c564ceadb 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/interrupt.h> +#include <linux/slab.h> #include <asm/opal.h> #include <asm/firmware.h> @@ -28,6 +29,8 @@ struct opal { static struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); extern u64 opal_mc_secondary_handler[]; +static unsigned int *opal_irqs; +static unsigned int opal_irq_count; int __init early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data) @@ -53,7 +56,11 @@ int __init early_init_dt_scan_opal(unsigned long node, opal.entry, entryp, entrysz); powerpc_firmware_features |= FW_FEATURE_OPAL; - if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { + if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { + powerpc_firmware_features |= FW_FEATURE_OPALv2; + powerpc_firmware_features |= FW_FEATURE_OPALv3; + printk("OPAL V3 detected !\n"); + } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; printk("OPAL V2 detected !\n"); } else { @@ -144,6 +151,13 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { len = total_len; rc = opal_console_write(vtermno, &len, data); + + /* Closed or other error drop */ + if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && + rc != OPAL_BUSY_EVENT) { + written = total_len; + break; + } if (rc == OPAL_SUCCESS) { total_len -= len; data += len; @@ -316,6 +330,8 @@ static int __init opal_init(void) irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); pr_debug("opal: Found %d interrupts reserved for OPAL\n", irqs ? (irqlen / 4) : 0); + opal_irq_count = irqlen / 4; + opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) { unsigned int hwirq = be32_to_cpup(irqs); unsigned int irq = irq_create_mapping(NULL, hwirq); @@ -327,7 +343,19 @@ static int __init opal_init(void) if (rc) pr_warning("opal: Error %d requesting irq %d" " (0x%x)\n", rc, irq, hwirq); + opal_irqs[i] = irq; } return 0; } subsys_initcall(opal_init); + +void opal_shutdown(void) +{ + unsigned int i; + + for (i = 0; i < opal_irq_count; i++) { + if (opal_irqs[i]) + free_irq(opal_irqs[i], 0); + opal_irqs[i] = 0; + } +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1da578b7c1bf..3937aaae5bc4 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1048,6 +1048,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; } +static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) +{ + opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, + OPAL_ASSERT_RESET); +} + void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) { struct pci_controller *hose; @@ -1178,6 +1184,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; + /* Setup shutdown function for kexec */ + phb->shutdown = pnv_pci_ioda_shutdown; + /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 55dfca844ddf..163bd7422f1c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -450,6 +450,18 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) pnv_pci_dma_fallback_setup(hose, pdev); } +void pnv_pci_shutdown(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) { + struct pnv_phb *phb = hose->private_data; + + if (phb && phb->shutdown) + phb->shutdown(phb); + } +} + /* Fixup wrong class code in p7ioc and p8 root complex */ static void pnv_p7ioc_rc_quirk(struct pci_dev *dev) { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 48dc4bb856a1..25d76c4df50b 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -86,6 +86,7 @@ struct pnv_phb { void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); + void (*shutdown)(struct pnv_phb *phb); union { struct { @@ -158,4 +159,5 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, u64 *startp, u64 *endp); + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 8a9df7f9667e..a1c6f83fc391 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -9,8 +9,10 @@ static inline void pnv_smp_init(void) { } #ifdef CONFIG_PCI extern void pnv_pci_init(void); +extern void pnv_pci_shutdown(void); #else static inline void pnv_pci_init(void) { } +static inline void pnv_pci_shutdown(void) { } #endif #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index db1ad1c8f68f..d4459bfc92f7 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -78,7 +78,9 @@ static void pnv_show_cpuinfo(struct seq_file *m) if (root) model = of_get_property(root, "model", NULL); seq_printf(m, "machine\t\t: PowerNV %s\n", model); - if (firmware_has_feature(FW_FEATURE_OPALv2)) + if (firmware_has_feature(FW_FEATURE_OPALv3)) + seq_printf(m, "firmware\t: OPAL v3\n"); + else if (firmware_has_feature(FW_FEATURE_OPALv2)) seq_printf(m, "firmware\t: OPAL v2\n"); else if (firmware_has_feature(FW_FEATURE_OPAL)) seq_printf(m, "firmware\t: OPAL v1\n"); @@ -126,6 +128,17 @@ static void pnv_progress(char *s, unsigned short hex) { } +static void pnv_shutdown(void) +{ + /* Let the PCI code clear up IODA tables */ + pnv_pci_shutdown(); + + /* And unregister all OPAL interrupts so they don't fire + * up while we kexec + */ + opal_shutdown(); +} + #ifdef CONFIG_KEXEC static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) { @@ -187,6 +200,7 @@ define_machine(powernv) { .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, .progress = pnv_progress, + .machine_shutdown = pnv_shutdown, .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, #ifdef CONFIG_KEXEC diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 6a3ecca5b725..88c9459c3e07 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -71,18 +71,68 @@ int pnv_smp_kick_cpu(int nr) BUG_ON(nr < 0 || nr >= NR_CPUS); - /* On OPAL v2 the CPU are still spinning inside OPAL itself, - * get them back now + /* + * If we already started or OPALv2 is not supported, we just + * kick the CPU via the PACA */ - if (!paca[nr].cpu_start && firmware_has_feature(FW_FEATURE_OPALv2)) { - pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu); - rc = opal_start_cpu(pcpu, start_here); + if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2)) + goto kick; + + /* + * At this point, the CPU can either be spinning on the way in + * from kexec or be inside OPAL waiting to be started for the + * first time. OPAL v3 allows us to query OPAL to know if it + * has the CPUs, so we do that + */ + if (firmware_has_feature(FW_FEATURE_OPALv3)) { + uint8_t status; + + rc = opal_query_cpu_status(pcpu, &status); if (rc != OPAL_SUCCESS) { - pr_warn("OPAL Error %ld starting CPU %d\n", + pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr); return -ENODEV; } + + /* + * Already started, just kick it, probably coming from + * kexec and spinning + */ + if (status == OPAL_THREAD_STARTED) + goto kick; + + /* + * Available/inactive, let's kick it + */ + if (status == OPAL_THREAD_INACTIVE) { + pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", + nr, pcpu); + rc = opal_start_cpu(pcpu, start_here); + if (rc != OPAL_SUCCESS) { + pr_warn("OPAL Error %ld starting CPU %d\n", + rc, nr); + return -ENODEV; + } + } else { + /* + * An unavailable CPU (or any other unknown status) + * shouldn't be started. It should also + * not be in the possible map but currently it can + * happen + */ + pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable" + " (status %d)...\n", nr, pcpu, status); + return -ENODEV; + } + } else { + /* + * On OPAL v2, we just kick it and hope for the best, + * we must not test the error from opal_start_cpu() or + * we would fail to get CPUs from kexec. + */ + opal_start_cpu(pcpu, start_here); } + kick: return smp_generic_kick_cpu(nr); } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 9a0941bc4d31..023b288f895b 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -18,6 +18,7 @@ config PPC_PSERIES select PPC_PCI_CHOICE if EXPERT select ZLIB_DEFLATE select PPC_DOORBELL + select HAVE_CONTEXT_TRACKING default y config PPC_SPLPAR diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 47226e04126d..5f997e79d570 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -16,6 +16,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/cpu.h> #include <linux/delay.h> #include <linux/suspend.h> #include <linux/stat.h> @@ -126,11 +127,15 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + cpumask_var_t offline_mask; int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY)) + return -ENOMEM; + stream_id = simple_strtoul(buf, NULL, 16); do { @@ -140,15 +145,32 @@ static ssize_t store_hibernate(struct device *dev, } while (rc == -EAGAIN); if (!rc) { + /* All present CPUs must be online */ + cpumask_andnot(offline_mask, cpu_present_mask, + cpu_online_mask); + rc = rtas_online_cpus_mask(offline_mask); + if (rc) { + pr_err("%s: Could not bring present CPUs online.\n", + __func__); + goto out; + } + stop_topology_update(); rc = pm_suspend(PM_SUSPEND_MEM); start_topology_update(); + + /* Take down CPUs not online prior to suspend */ + if (!rtas_offline_cpus_mask(offline_mask)) + pr_warn("%s: Could not restore CPUs to offline " + "state.\n", __func__); } stream_id = 0; if (!rc) rc = count; +out: + free_cpumask_var(offline_mask); return rc; } diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c index 97fe82ee8633..2d3b1dd9571d 100644 --- a/arch/powerpc/platforms/wsp/ics.c +++ b/arch/powerpc/platforms/wsp/ics.c @@ -361,7 +361,7 @@ static int wsp_chip_set_affinity(struct irq_data *d, xive = xive_set_server(xive, get_irq_server(ics, hw_irq)); wsp_ics_set_xive(ics, hw_irq, xive); - return 0; + return IRQ_SET_MASK_OK; } static struct irq_chip wsp_irq_chip = { diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index b0a518e97599..99464a7bdb3b 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -64,6 +64,8 @@ endif obj-$(CONFIG_PPC_SCOM) += scom.o +obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o + subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror obj-$(CONFIG_PPC_XICS) += xics/ diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index 6e0e1005227f..9cd0e60716fe 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -81,7 +81,7 @@ int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest, ev_int_set_config(src, config, prio, cpuid); spin_unlock_irqrestore(&ehv_pic_lock, flags); - return 0; + return IRQ_SET_MASK_OK; } static unsigned int ehv_pic_type_to_vecpri(unsigned int type) diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index ee21b5e71aec..0a13ecb270c7 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -836,7 +836,7 @@ int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, mpic_physmask(mask)); } - return 0; + return IRQ_SET_MASK_OK; } static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c new file mode 100644 index 000000000000..ce5a7b489e4b --- /dev/null +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -0,0 +1,105 @@ +/* + * A udbg backend which logs messages and reads input from in memory + * buffers. + * + * The console output can be read from memcons_output which is a + * circular buffer whose next write position is stored in memcons.output_pos. + * + * Input may be passed by writing into the memcons_input buffer when it is + * empty. The input buffer is empty when both input_pos == input_start and + * *input_start == '\0'. + * + * Copyright (C) 2003-2005 Anton Blanchard and Milton Miller, IBM Corp + * Copyright (C) 2013 Alistair Popple, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/barrier.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/udbg.h> + +struct memcons { + char *output_start; + char *output_pos; + char *output_end; + char *input_start; + char *input_pos; + char *input_end; +}; + +static char memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE]; +static char memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE]; + +struct memcons memcons = { + .output_start = memcons_output, + .output_pos = memcons_output, + .output_end = &memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE], + .input_start = memcons_input, + .input_pos = memcons_input, + .input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE], +}; + +void memcons_putc(char c) +{ + char *new_output_pos; + + *memcons.output_pos = c; + wmb(); + new_output_pos = memcons.output_pos + 1; + if (new_output_pos >= memcons.output_end) + new_output_pos = memcons.output_start; + + memcons.output_pos = new_output_pos; +} + +int memcons_getc_poll(void) +{ + char c; + char *new_input_pos; + + if (*memcons.input_pos) { + c = *memcons.input_pos; + + new_input_pos = memcons.input_pos + 1; + if (new_input_pos >= memcons.input_end) + new_input_pos = memcons.input_start; + else if (*new_input_pos == '\0') + new_input_pos = memcons.input_start; + + *memcons.input_pos = '\0'; + wmb(); + memcons.input_pos = new_input_pos; + return c; + } + + return -1; +} + +int memcons_getc(void) +{ + int c; + + while (1) { + c = memcons_getc_poll(); + if (c == -1) + cpu_relax(); + else + break; + } + + return c; +} + +void udbg_init_memcons(void) +{ + udbg_putc = memcons_putc; + udbg_getc = memcons_getc; + udbg_getc_poll = memcons_getc_poll; +} diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index f7e8609df0d5..39d72212655e 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -148,7 +148,7 @@ static int ics_opal_set_affinity(struct irq_data *d, __func__, d->irq, hw_irq, server, rc); return -1; } - return 0; + return IRQ_SET_MASK_OK; } static struct irq_chip ics_opal_irq_chip = { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a154a91c7e7..685692c94f05 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -108,7 +108,6 @@ config X86 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) select GENERIC_TIME_VSYSCALL if X86_64 select KTIME_SCALAR if X86_32 - select ALWAYS_USE_PERSISTENT_CLOCK select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index d354fb781c57..a55c7efcc4ed 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void) unsigned char rtc_cmos_read(unsigned char addr); void rtc_cmos_write(unsigned char val, unsigned char addr); -extern int mach_set_rtc_mmss(unsigned long nowtime); -extern unsigned long mach_get_cmos_time(void); +extern int mach_set_rtc_mmss(const struct timespec *now); +extern void mach_get_cmos_time(struct timespec *now); #define RTC_IRQ 8 diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h index 73668abdbedf..1e69a75412a4 100644 --- a/arch/x86/include/asm/mrst-vrtc.h +++ b/arch/x86/include/asm/mrst-vrtc.h @@ -3,7 +3,7 @@ extern unsigned char vrtc_cmos_read(unsigned char reg); extern void vrtc_cmos_write(unsigned char val, unsigned char reg); -extern unsigned long vrtc_get_time(void); -extern int vrtc_set_mmss(unsigned long nowtime); +extern void vrtc_get_time(struct timespec *now); +extern int vrtc_set_mmss(const struct timespec *now); #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index d8d99222b36a..828a1565ba57 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,8 @@ struct x86_cpuinit_ops { void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); }; +struct timespec; + /** * struct x86_platform_ops - platform specific runtime functions * @calibrate_tsc: calibrate TSC @@ -156,8 +158,8 @@ struct x86_cpuinit_ops { */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); - unsigned long (*get_wallclock)(void); - int (*set_wallclock)(unsigned long nowtime); + void (*get_wallclock)(struct timespec *ts); + int (*set_wallclock)(const struct timespec *ts); void (*iommu_shutdown)(void); bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d2c381280e3c..0db81ab511cc 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -48,10 +48,9 @@ static struct pvclock_wall_clock wall_clock; * have elapsed since the hypervisor wrote the data. So we try to account for * that with system time */ -static unsigned long kvm_get_wallclock(void) +static void kvm_get_wallclock(struct timespec *now) { struct pvclock_vcpu_time_info *vcpu_time; - struct timespec ts; int low, high; int cpu; @@ -64,14 +63,12 @@ static unsigned long kvm_get_wallclock(void) cpu = smp_processor_id(); vcpu_time = &hv_clock[cpu].pvti; - pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); + pvclock_read_wallclock(&wall_clock, vcpu_time, now); preempt_enable(); - - return ts.tv_sec; } -static int kvm_set_wallclock(unsigned long now) +static int kvm_set_wallclock(const struct timespec *now) { return -1; } diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c index d893e8ed8ac9..2e9e12871c2b 100644 --- a/arch/x86/kernel/microcode_intel_early.c +++ b/arch/x86/kernel/microcode_intel_early.c @@ -487,6 +487,7 @@ static inline void show_saved_mc(void) #endif #if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) +static DEFINE_MUTEX(x86_cpu_microcode_mutex); /* * Save this mc into mc_saved_data. So it will be loaded early when a CPU is * hot added or resumes. @@ -507,7 +508,7 @@ int save_mc_for_early(u8 *mc) * Hold hotplug lock so mc_saved_data is not accessed by a CPU in * hotplug. */ - cpu_hotplug_driver_lock(); + mutex_lock(&x86_cpu_microcode_mutex); mc_saved_count_init = mc_saved_data.mc_saved_count; mc_saved_count = mc_saved_data.mc_saved_count; @@ -544,7 +545,7 @@ int save_mc_for_early(u8 *mc) } out: - cpu_hotplug_driver_unlock(); + mutex_unlock(&x86_cpu_microcode_mutex); return ret; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 607af0d4d5ef..4e7a37ff03ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -312,6 +312,8 @@ void arch_cpu_idle(void) { if (cpuidle_idle_call()) x86_idle(); + else + local_irq_enable(); } /* @@ -368,9 +370,6 @@ void amd_e400_remove_cpu(int cpu) */ static void amd_e400_idle(void) { - if (need_resched()) - return; - if (!amd_e400_c1e_detected) { u32 lo, hi; diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 198eb201ed3b..0aa29394ed6f 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -38,8 +38,9 @@ EXPORT_SYMBOL(rtc_lock); * jump to the next second precisely 500 ms later. Check the Motorola * MC146818A or Dallas DS12887 data sheet for details. */ -int mach_set_rtc_mmss(unsigned long nowtime) +int mach_set_rtc_mmss(const struct timespec *now) { + unsigned long nowtime = now->tv_sec; struct rtc_time tm; int retval = 0; @@ -58,7 +59,7 @@ int mach_set_rtc_mmss(unsigned long nowtime) return retval; } -unsigned long mach_get_cmos_time(void) +void mach_get_cmos_time(struct timespec *now) { unsigned int status, year, mon, day, hour, min, sec, century = 0; unsigned long flags; @@ -107,7 +108,8 @@ unsigned long mach_get_cmos_time(void) } else year += CMOS_YEARS_OFFS; - return mktime(year, mon, day, hour, min, sec); + now->tv_sec = mktime(year, mon, day, hour, min, sec); + now->tv_nsec = 0; } /* Routines for accessing the CMOS RAM/RTC. */ @@ -135,18 +137,13 @@ EXPORT_SYMBOL(rtc_cmos_write); int update_persistent_clock(struct timespec now) { - return x86_platform.set_wallclock(now.tv_sec); + return x86_platform.set_wallclock(&now); } /* not static: needed by APM */ void read_persistent_clock(struct timespec *ts) { - unsigned long retval; - - retval = x86_platform.get_wallclock(); - - ts->tv_sec = retval; - ts->tv_nsec = 0; + x86_platform.get_wallclock(ts); } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7114c63f047d..8424d5adcfa2 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -882,9 +882,9 @@ int lguest_setup_irq(unsigned int irq) * It would be far better for everyone if the Guest had its own clock, but * until then the Host gives us the time on every interrupt. */ -static unsigned long lguest_get_wallclock(void) +static void lguest_get_wallclock(struct timespec *now) { - return lguest_data.time.tv_sec; + *now = lguest_data.time; } /* diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fdc5dca14fb3..eaac1743def7 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -359,7 +359,17 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, } /* - * would have hole in the middle or ends, and only ram parts will be mapped. + * We need to iterate through the E820 memory map and create direct mappings + * for only E820_RAM and E820_KERN_RESERVED regions. We cannot simply + * create direct mappings for all pfns from [0 to max_low_pfn) and + * [4GB to max_pfn) because of possible memory holes in high addresses + * that cannot be marked as UC by fixed/variable range MTRRs. + * Depending on the alignment of E820 ranges, this may possibly result + * in using smaller size (i.e. 4K instead of 2M or 1G) page tables. + * + * init_mem_mapping() calls init_range_memory_mapping() with big range. + * That range would have hole in the middle or ends, and only ram parts + * will be mapped in init_range_memory_mapping(). */ static unsigned long __init init_range_memory_mapping( unsigned long r_start, @@ -419,6 +429,13 @@ void __init init_mem_mapping(void) max_pfn_mapped = 0; /* will get exact value next */ min_pfn_mapped = real_end >> PAGE_SHIFT; last_start = start = real_end; + + /* + * We start from the top (end of memory) and go to the bottom. + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ while (last_start > ISA_END_ADDRESS) { if (last_start > step_size) { start = round_down(last_start - 1, step_size); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 55856b2310d3..dd3b82530145 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -352,8 +352,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, return status; } -int efi_set_rtc_mmss(unsigned long nowtime) +int efi_set_rtc_mmss(const struct timespec *now) { + unsigned long nowtime = now->tv_sec; efi_status_t status; efi_time_t eft; efi_time_cap_t cap; @@ -388,7 +389,7 @@ int efi_set_rtc_mmss(unsigned long nowtime) return 0; } -unsigned long efi_get_time(void) +void efi_get_time(struct timespec *now) { efi_status_t status; efi_time_t eft; @@ -398,8 +399,9 @@ unsigned long efi_get_time(void) if (status != EFI_SUCCESS) pr_err("Oops: efitime: can't read time!\n"); - return mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); + now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour, + eft.minute, eft.second); + now->tv_nsec = 0; } /* diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c index d62b0a3b5c14..5e355b134ba4 100644 --- a/arch/x86/platform/mrst/vrtc.c +++ b/arch/x86/platform/mrst/vrtc.c @@ -56,7 +56,7 @@ void vrtc_cmos_write(unsigned char val, unsigned char reg) } EXPORT_SYMBOL_GPL(vrtc_cmos_write); -unsigned long vrtc_get_time(void) +void vrtc_get_time(struct timespec *now) { u8 sec, min, hour, mday, mon; unsigned long flags; @@ -82,17 +82,18 @@ unsigned long vrtc_get_time(void) printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d " "mon: %d year: %d\n", sec, min, hour, mday, mon, year); - return mktime(year, mon, mday, hour, min, sec); + now->tv_sec = mktime(year, mon, mday, hour, min, sec); + now->tv_nsec = 0; } -int vrtc_set_mmss(unsigned long nowtime) +int vrtc_set_mmss(const struct timespec *now) { unsigned long flags; struct rtc_time tm; int year; int retval = 0; - rtc_time_to_tm(nowtime, &tm); + rtc_time_to_tm(now->tv_sec, &tm); if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) { /* * tm.year is the number of years since 1900, and the @@ -110,7 +111,7 @@ int vrtc_set_mmss(unsigned long nowtime) } else { printk(KERN_ERR "%s: Invalid vRTC value: write of %lx to vRTC failed\n", - __FUNCTION__, nowtime); + __FUNCTION__, now->tv_sec); retval = -EINVAL; } return retval; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 3d88bfdf9e1c..a1947ac2da82 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -191,32 +191,25 @@ static void xen_read_wallclock(struct timespec *ts) put_cpu_var(xen_vcpu); } -static unsigned long xen_get_wallclock(void) +static void xen_get_wallclock(struct timespec *now) { - struct timespec ts; - - xen_read_wallclock(&ts); - return ts.tv_sec; + xen_read_wallclock(now); } -static int xen_set_wallclock(unsigned long now) +static int xen_set_wallclock(const struct timespec *now) { struct xen_platform_op op; - int rc; /* do nothing for domU */ if (!xen_initial_domain()) return -1; op.cmd = XENPF_settime; - op.u.settime.secs = now; - op.u.settime.nsecs = 0; + op.u.settime.secs = now->tv_sec; + op.u.settime.nsecs = now->tv_nsec; op.u.settime.system_time = xen_clocksource_read(); - rc = HYPERVISOR_dom0_op(&op); - WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); - - return rc; + return HYPERVISOR_dom0_op(&op); } static struct clocksource xen_clocksource __read_mostly = { |