diff options
author | Takashi Iwai <tiwai@suse.de> | 2018-03-12 17:30:38 +0100 |
---|---|---|
committer | Takashi Iwai <tiwai@suse.de> | 2018-03-12 17:30:38 +0100 |
commit | db45dc9540ea4864bb9d22eed30d3e70ed5d56d4 (patch) | |
tree | c611d75097036ff464e9af31aa729e34c863ec93 /arch | |
parent | 40088dc4e1ead7df31728c73f5b51d71da18831d (diff) | |
parent | a3e39ed1f40e14f24bec0f91c4fa0408fb6c4d02 (diff) | |
download | talos-obmc-linux-db45dc9540ea4864bb9d22eed30d3e70ed5d56d4.tar.gz talos-obmc-linux-db45dc9540ea4864bb9d22eed30d3e70ed5d56d4.zip |
Merge tag 'asoc-fix-v4.16-rc5' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v4.16
This is a fairly standard collection of fixes, there's no changes to the
core here just a bunch of small device specific changes for single
drivers plus an update to the MAINTAINERS file for the sgl5000.
Diffstat (limited to 'arch')
297 files changed, 3265 insertions, 1938 deletions
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h index 46ebf14aed4e..8a2b331e43fe 100644 --- a/arch/alpha/include/asm/cmpxchg.h +++ b/arch/alpha/include/asm/cmpxchg.h @@ -6,7 +6,6 @@ * Atomic exchange routines. */ -#define __ASM__MB #define ____xchg(type, args...) __xchg ## type ## _local(args) #define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args) #include <asm/xchg.h> @@ -33,10 +32,6 @@ cmpxchg_local((ptr), (o), (n)); \ }) -#ifdef CONFIG_SMP -#undef __ASM__MB -#define __ASM__MB "\tmb\n" -#endif #undef ____xchg #undef ____cmpxchg #define ____xchg(type, args...) __xchg ##type(args) @@ -64,7 +59,6 @@ cmpxchg((ptr), (o), (n)); \ }) -#undef __ASM__MB #undef ____cmpxchg #endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h index 68dfb3cb7145..e2b59fac5257 100644 --- a/arch/alpha/include/asm/xchg.h +++ b/arch/alpha/include/asm/xchg.h @@ -12,6 +12,10 @@ * Atomic exchange. * Since it can be used to implement critical sections * it must clobber "memory" (also for interrupts in UP). + * + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + * */ static inline unsigned long @@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val) { unsigned long ret, tmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %4,7,%3\n" " insbl %1,%4,%1\n" @@ -28,12 +33,12 @@ ____xchg(_u8, volatile char *m, unsigned long val) " or %1,%2,%2\n" " stq_c %2,0(%3)\n" " beq %2,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) : "r" ((long)m), "1" (val) : "memory"); + smp_mb(); return ret; } @@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val) { unsigned long ret, tmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %4,7,%3\n" " inswl %1,%4,%1\n" @@ -52,12 +58,12 @@ ____xchg(_u16, volatile short *m, unsigned long val) " or %1,%2,%2\n" " stq_c %2,0(%3)\n" " beq %2,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) : "r" ((long)m), "1" (val) : "memory"); + smp_mb(); return ret; } @@ -67,17 +73,18 @@ ____xchg(_u32, volatile int *m, unsigned long val) { unsigned long dummy; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%4\n" " bis $31,%3,%1\n" " stl_c %1,%2\n" " beq %1,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (val), "=&r" (dummy), "=m" (*m) : "rI" (val), "m" (*m) : "memory"); + smp_mb(); return val; } @@ -87,17 +94,18 @@ ____xchg(_u64, volatile long *m, unsigned long val) { unsigned long dummy; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%4\n" " bis $31,%3,%1\n" " stq_c %1,%2\n" " beq %1,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (val), "=&r" (dummy), "=m" (*m) : "rI" (val), "m" (*m) : "memory"); + smp_mb(); return val; } @@ -128,10 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size) * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. * - * The memory barrier should be placed in SMP only when we actually - * make the change. If we don't change anything (so if the returned - * prev is equal to old) then we aren't acquiring anything new and - * we don't need any memory barrier as far I can tell. + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + * + * The trailing memory barrier is placed in SMP unconditionally, in + * order to guarantee that dependency ordering is preserved when a + * dependency is headed by an unsuccessful operation. */ static inline unsigned long @@ -139,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) { unsigned long prev, tmp, cmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %5,7,%4\n" " insbl %1,%5,%1\n" @@ -150,13 +161,13 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) " or %1,%2,%2\n" " stq_c %2,0(%4)\n" " beq %2,3f\n" - __ASM__MB "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + smp_mb(); return prev; } @@ -166,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) { unsigned long prev, tmp, cmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %5,7,%4\n" " inswl %1,%5,%1\n" @@ -177,13 +189,13 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) " or %1,%2,%2\n" " stq_c %2,0(%4)\n" " beq %2,3f\n" - __ASM__MB "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + smp_mb(); return prev; } @@ -193,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new) { unsigned long prev, cmp; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%5\n" " cmpeq %0,%3,%1\n" @@ -200,13 +213,13 @@ ____cmpxchg(_u32, volatile int *m, int old, int new) " mov %4,%1\n" " stl_c %1,%2\n" " beq %1,3f\n" - __ASM__MB "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r"(prev), "=&r"(cmp), "=m"(*m) : "r"((long) old), "r"(new), "m"(*m) : "memory"); + smp_mb(); return prev; } @@ -216,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) { unsigned long prev, cmp; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%5\n" " cmpeq %0,%3,%1\n" @@ -223,13 +237,13 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) " mov %4,%1\n" " stq_c %1,%2\n" " beq %1,3f\n" - __ASM__MB "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r"(prev), "=&r"(cmp), "=m"(*m) : "r"((long) old), "r"(new), "m"(*m) : "memory"); + smp_mb(); return prev; } diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index f3a80cf164cc..d76bf4a83740 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -484,7 +484,6 @@ config ARC_CURR_IN_REG config ARC_EMUL_UNALIGNED bool "Emulate unaligned memory access (userspace only)" - default N select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_ARCH_UNALIGN_ALLOW depends on ISA_ARCOMPACT diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts index 70aec7d6ca60..626b694c7be7 100644 --- a/arch/arc/boot/dts/axs101.dts +++ b/arch/arc/boot/dts/axs101.dts @@ -17,6 +17,6 @@ compatible = "snps,axs101", "snps,arc-sdp"; chosen { - bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60"; + bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60 print-fatal-signals=1"; }; }; diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 74d070cd3c13..47b74fbc403c 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -214,13 +214,13 @@ }; eeprom@0x54{ - compatible = "24c01"; + compatible = "atmel,24c01"; reg = <0x54>; pagesize = <0x8>; }; eeprom@0x57{ - compatible = "24c04"; + compatible = "atmel,24c04"; reg = <0x57>; pagesize = <0x8>; }; diff --git a/arch/arc/boot/dts/haps_hs_idu.dts b/arch/arc/boot/dts/haps_hs_idu.dts index 215cddd0b63b..0c603308aeb3 100644 --- a/arch/arc/boot/dts/haps_hs_idu.dts +++ b/arch/arc/boot/dts/haps_hs_idu.dts @@ -22,7 +22,7 @@ }; chosen { - bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts index 5ee96b067c08..ff2f2c70c545 100644 --- a/arch/arc/boot/dts/nsim_700.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -17,7 +17,7 @@ interrupt-parent = <&core_intc>; chosen { - bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; + bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index 8d787b251f73..8e2489b16b0a 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -24,7 +24,7 @@ }; chosen { - bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; + bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts index 4f98ebf71fd8..ed12f494721d 100644 --- a/arch/arc/boot/dts/nsim_hs_idu.dts +++ b/arch/arc/boot/dts/nsim_hs_idu.dts @@ -15,7 +15,7 @@ interrupt-parent = <&core_intc>; chosen { - bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; + bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index 3c391ba565ed..7842e5eb4ab5 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -20,7 +20,7 @@ /* this is for console on PGU */ /* bootargs = "console=tty0 consoleblank=0"; */ /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts index 14a727cbf4c9..b8838cf2b4ec 100644 --- a/arch/arc/boot/dts/nsimosci_hs.dts +++ b/arch/arc/boot/dts/nsimosci_hs.dts @@ -20,7 +20,7 @@ /* this is for console on PGU */ /* bootargs = "console=tty0 consoleblank=0"; */ /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts index 5052917d4a99..72a2c723f1f7 100644 --- a/arch/arc/boot/dts/nsimosci_hs_idu.dts +++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts @@ -18,7 +18,7 @@ chosen { /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index ea022d47896c..21ec82466d62 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address); #define BUG() do { \ pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ - dump_stack(); \ + barrier_before_unreachable(); \ + __builtin_trap(); \ } while (0) #define HAVE_ARCH_BUG diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 257a68f3c2fe..309f4e6721b3 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -184,7 +184,7 @@ .macro FAKE_RET_FROM_EXCPN lr r9, [status32] bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK) - or r9, r9, (STATUS_L_MASK|STATUS_IE_MASK) + or r9, r9, STATUS_IE_MASK kflag r9 .endm diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index f61a52b01625..5fe84e481654 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -22,10 +22,79 @@ static DEFINE_RAW_SPINLOCK(mcip_lock); static char smp_cpuinfo_buf[128]; +/* + * Set mask to halt GFRC if any online core in SMP cluster is halted. + * Only works for ARC HS v3.0+, on earlier versions has no effect. + */ +static void mcip_update_gfrc_halt_mask(int cpu) +{ + struct bcr_generic gfrc; + unsigned long flags; + u32 gfrc_halt_mask; + + READ_BCR(ARC_REG_GFRC_BUILD, gfrc); + + /* + * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in + * GFRC 0x3 version. + */ + if (gfrc.ver < 0x3) + return; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + __mcip_cmd(CMD_GFRC_READ_CORE, 0); + gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK); + gfrc_halt_mask |= BIT(cpu); + __mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + +static void mcip_update_debug_halt_mask(int cpu) +{ + u32 mcip_mask = 0; + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + /* + * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK + * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK + * and CMD_DEBUG_READ_SELECT. + */ + __mcip_cmd(CMD_DEBUG_READ_SELECT, 0); + mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK); + + mcip_mask |= BIT(cpu); + + __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask); + /* + * Parameter specified halt cause: + * STATUS32[H]/actionpoint/breakpoint/self-halt + * We choose all of them (0xF). + */ + __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + static void mcip_setup_per_cpu(int cpu) { + struct mcip_bcr mp; + + READ_BCR(ARC_REG_MCIP_BCR, mp); + smp_ipi_irq_setup(cpu, IPI_IRQ); smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ); + + /* Update GFRC halt mask as new CPU came online */ + if (mp.gfrc) + mcip_update_gfrc_halt_mask(cpu); + + /* Update MCIP debug mask as new CPU came online */ + if (mp.dbg) + mcip_update_debug_halt_mask(cpu); } static void mcip_ipi_send(int cpu) @@ -101,11 +170,6 @@ static void mcip_probe_n_setup(void) IS_AVAIL1(mp.gfrc, "GFRC")); cpuinfo_arc700[0].extn.gfrc = mp.gfrc; - - if (mp.dbg) { - __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); - __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf); - } } struct plat_smp_ops plat_smp_ops = { diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 9d27331fe69a..b2cae79a25d7 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -51,7 +51,7 @@ static const struct id_to_str arc_cpu_rel[] = { { 0x51, "R2.0" }, { 0x52, "R2.1" }, { 0x53, "R3.0" }, - { 0x54, "R4.0" }, + { 0x54, "R3.10a" }, #endif { 0x00, NULL } }; @@ -373,7 +373,7 @@ static void arc_chk_core_config(void) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; int saved = 0, present = 0; - char *opt_nm = NULL;; + char *opt_nm = NULL; if (!cpu->extn.timer0) panic("Timer0 is not present!\n"); diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index efe8b4200a67..21d86c36692b 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -24,6 +24,7 @@ #include <linux/reboot.h> #include <linux/irqdomain.h> #include <linux/export.h> +#include <linux/of_fdt.h> #include <asm/processor.h> #include <asm/setup.h> @@ -47,6 +48,42 @@ void __init smp_prepare_boot_cpu(void) { } +static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask) +{ + unsigned long dt_root = of_get_flat_dt_root(); + const char *buf; + + buf = of_get_flat_dt_prop(dt_root, name, NULL); + if (!buf) + return -EINVAL; + + if (cpulist_parse(buf, cpumask)) + return -EINVAL; + + return 0; +} + +/* + * Read from DeviceTree and setup cpu possible mask. If there is no + * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist. + */ +static void __init arc_init_cpu_possible(void) +{ + struct cpumask cpumask; + + if (arc_get_cpu_map("possible-cpus", &cpumask)) { + pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n", + NR_CPUS); + + cpumask_setall(&cpumask); + } + + if (!cpumask_test_cpu(0, &cpumask)) + panic("Master cpu (cpu[0]) is missed in cpu possible mask!"); + + init_cpu_possible(&cpumask); +} + /* * Called from setup_arch() before calling setup_processor() * @@ -58,10 +95,7 @@ void __init smp_prepare_boot_cpu(void) */ void __init smp_init_cpus(void) { - unsigned int i; - - for (i = 0; i < NR_CPUS; i++) - set_cpu_possible(i, true); + arc_init_cpu_possible(); if (plat_smp_ops.init_early_smp) plat_smp_ops.init_early_smp(); @@ -70,16 +104,12 @@ void __init smp_init_cpus(void) /* called from init ( ) => process 1 */ void __init smp_prepare_cpus(unsigned int max_cpus) { - int i; - /* * if platform didn't set the present map already, do it now * boot cpu is set to present already by init/main.c */ - if (num_present_cpus() <= 1) { - for (i = 0; i < max_cpus; i++) - set_cpu_present(i, true); - } + if (num_present_cpus() <= 1) + init_cpu_present(cpu_possible_mask); } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 333daab7def0..183391d4d33a 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -366,7 +366,7 @@ static void init_unwind_hdr(struct unwind_table *table, return; ret_err: - panic("Attention !!! Dwarf FDE parsing errors\n");; + panic("Attention !!! Dwarf FDE parsing errors\n"); } #ifdef CONFIG_MODULES diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index eee924dfffa6..2072f3451e9c 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -780,7 +780,10 @@ noinline static void slc_entire_op(const int op) write_aux_reg(r, ctrl); - write_aux_reg(ARC_REG_SLC_INVALIDATE, 1); + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1); + else + write_aux_reg(ARC_REG_SLC_FLUSH, 0x1); /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ read_aux_reg(r); diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi index 18045c38bcf1..db7cded1b7ad 100644 --- a/arch/arm/boot/dts/bcm11351.dtsi +++ b/arch/arm/boot/dts/bcm11351.dtsi @@ -55,7 +55,7 @@ <0x3ff00100 0x100>; }; - smc@0x3404c000 { + smc@3404c000 { compatible = "brcm,bcm11351-smc", "brcm,kona-smc"; reg = <0x3404c000 0x400>; /* 1 KiB in SRAM */ }; diff --git a/arch/arm/boot/dts/bcm21664.dtsi b/arch/arm/boot/dts/bcm21664.dtsi index 6dde95f21cef..266f2611dc22 100644 --- a/arch/arm/boot/dts/bcm21664.dtsi +++ b/arch/arm/boot/dts/bcm21664.dtsi @@ -55,7 +55,7 @@ <0x3ff00100 0x100>; }; - smc@0x3404e000 { + smc@3404e000 { compatible = "brcm,bcm21664-smc", "brcm,kona-smc"; reg = <0x3404e000 0x400>; /* 1 KiB in SRAM */ }; diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi index 0e3d2a5ff208..a5c3824c8056 100644 --- a/arch/arm/boot/dts/bcm2835.dtsi +++ b/arch/arm/boot/dts/bcm2835.dtsi @@ -18,10 +18,10 @@ soc { ranges = <0x7e000000 0x20000000 0x02000000>; dma-ranges = <0x40000000 0x00000000 0x20000000>; + }; - arm-pmu { - compatible = "arm,arm1176-pmu"; - }; + arm-pmu { + compatible = "arm,arm1176-pmu"; }; }; diff --git a/arch/arm/boot/dts/bcm2836.dtsi b/arch/arm/boot/dts/bcm2836.dtsi index 1dfd76442777..c933e8413884 100644 --- a/arch/arm/boot/dts/bcm2836.dtsi +++ b/arch/arm/boot/dts/bcm2836.dtsi @@ -9,19 +9,19 @@ <0x40000000 0x40000000 0x00001000>; dma-ranges = <0xc0000000 0x00000000 0x3f000000>; - local_intc: local_intc { + local_intc: local_intc@40000000 { compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; #interrupt-cells = <2>; interrupt-parent = <&local_intc>; }; + }; - arm-pmu { - compatible = "arm,cortex-a7-pmu"; - interrupt-parent = <&local_intc>; - interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; - }; + arm-pmu { + compatible = "arm,cortex-a7-pmu"; + interrupt-parent = <&local_intc>; + interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; }; timer { diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index efa7d3387ab2..7704bb029605 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -8,7 +8,7 @@ <0x40000000 0x40000000 0x00001000>; dma-ranges = <0xc0000000 0x00000000 0x3f000000>; - local_intc: local_intc { + local_intc: local_intc@40000000 { compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 18db25a5a66e..9d293decf8d3 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -465,7 +465,7 @@ status = "disabled"; }; - aux: aux@0x7e215000 { + aux: aux@7e215000 { compatible = "brcm,bcm2835-aux"; #clock-cells = <1>; reg = <0x7e215000 0x8>; diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index 6a44b8021702..f0e2008f7490 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -49,7 +49,7 @@ memory { device_type = "memory"; - reg = <0x60000000 0x80000000>; + reg = <0x60000000 0x20000000>; }; gpio-restart { diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts index 08568ce24d06..da8bb9d60f99 100644 --- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts +++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts @@ -269,7 +269,7 @@ sata: sata@46000000 { /* The ROM uses this muxmode */ - cortina,gemini-ata-muxmode = <3>; + cortina,gemini-ata-muxmode = <0>; cortina,gemini-enable-sata-bridge; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6dl-icore-rqs.dts b/arch/arm/boot/dts/imx6dl-icore-rqs.dts index cf42c2f5cdc7..1281bc39b7ab 100644 --- a/arch/arm/boot/dts/imx6dl-icore-rqs.dts +++ b/arch/arm/boot/dts/imx6dl-icore-rqs.dts @@ -42,7 +42,7 @@ /dts-v1/; -#include "imx6q.dtsi" +#include "imx6dl.dtsi" #include "imx6qdl-icore-rqs.dtsi" / { diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index c1aa7a4518fb..a30ee9fcb3ae 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -71,6 +71,8 @@ }; &i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; clock-frequency = <2600000>; twl: twl@48 { @@ -189,7 +191,12 @@ >; }; - + i2c1_pins: pinmux_i2c1_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0) /* i2c1_scl.i2c1_scl */ + OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0) /* i2c1_sda.i2c1_sda */ + >; + }; }; &omap3_pmx_wkup { diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index b50b796e15c7..47915447a826 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -66,6 +66,8 @@ }; &i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; clock-frequency = <2600000>; twl: twl@48 { @@ -136,6 +138,12 @@ OMAP3_CORE1_IOPAD(0x21b8, PIN_INPUT | MUX_MODE0) /* hsusb0_data7.hsusb0_data7 */ >; }; + i2c1_pins: pinmux_i2c1_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0) /* i2c1_scl.i2c1_scl */ + OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0) /* i2c1_sda.i2c1_sda */ + >; + }; }; &uart2 { diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index ec2c8baef62a..592e17fd4eeb 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -47,7 +47,7 @@ gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; /* gpio3_83 */ wakeup-source; autorepeat; - debounce_interval = <50>; + debounce-interval = <50>; }; }; diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 3b704cfed69a..a97458112ff6 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -280,7 +280,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>; resets = <&cru SRST_SDIO>; @@ -298,7 +298,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; default-sample-phase = <158>; disable-wp; dmas = <&pdma 12>; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 780ec3a99b21..341deaf62ff6 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -621,7 +621,7 @@ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>; @@ -634,7 +634,7 @@ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; pinctrl-names = "default"; pinctrl-0 = <&sdio_clk &sdio_cmd &sdio_bus4>; @@ -649,7 +649,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; bus-width = <8>; default-sample-phase = <158>; fifo-depth = <0x100>; diff --git a/arch/arm/boot/dts/rk3288-phycore-som.dtsi b/arch/arm/boot/dts/rk3288-phycore-som.dtsi index 99cfae875e12..5eae4776ffde 100644 --- a/arch/arm/boot/dts/rk3288-phycore-som.dtsi +++ b/arch/arm/boot/dts/rk3288-phycore-som.dtsi @@ -110,26 +110,6 @@ }; }; -&cpu0 { - cpu0-supply = <&vdd_cpu>; - operating-points = < - /* KHz uV */ - 1800000 1400000 - 1608000 1350000 - 1512000 1300000 - 1416000 1200000 - 1200000 1100000 - 1008000 1050000 - 816000 1000000 - 696000 950000 - 600000 900000 - 408000 900000 - 312000 900000 - 216000 900000 - 126000 900000 - >; -}; - &emmc { status = "okay"; bus-width = <8>; diff --git a/arch/arm/boot/dts/zx296702.dtsi b/arch/arm/boot/dts/zx296702.dtsi index 8a74efdb6360..240e7a23d81f 100644 --- a/arch/arm/boot/dts/zx296702.dtsi +++ b/arch/arm/boot/dts/zx296702.dtsi @@ -56,7 +56,7 @@ clocks = <&topclk ZX296702_A9_PERIPHCLK>; }; - l2cc: l2-cache-controller@0x00c00000 { + l2cc: l2-cache-controller@c00000 { compatible = "arm,pl310-cache"; reg = <0x00c00000 0x1000>; cache-unified; @@ -67,30 +67,30 @@ arm,double-linefill-incr = <0>; }; - pcu: pcu@0xa0008000 { + pcu: pcu@a0008000 { compatible = "zte,zx296702-pcu"; reg = <0xa0008000 0x1000>; }; - topclk: topclk@0x09800000 { + topclk: topclk@9800000 { compatible = "zte,zx296702-topcrm-clk"; reg = <0x09800000 0x1000>; #clock-cells = <1>; }; - lsp1clk: lsp1clk@0x09400000 { + lsp1clk: lsp1clk@9400000 { compatible = "zte,zx296702-lsp1crpm-clk"; reg = <0x09400000 0x1000>; #clock-cells = <1>; }; - lsp0clk: lsp0clk@0x0b000000 { + lsp0clk: lsp0clk@b000000 { compatible = "zte,zx296702-lsp0crpm-clk"; reg = <0x0b000000 0x1000>; #clock-cells = <1>; }; - uart0: serial@0x09405000 { + uart0: serial@9405000 { compatible = "zte,zx296702-uart"; reg = <0x09405000 0x1000>; interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>; @@ -98,7 +98,7 @@ status = "disabled"; }; - uart1: serial@0x09406000 { + uart1: serial@9406000 { compatible = "zte,zx296702-uart"; reg = <0x09406000 0x1000>; interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; @@ -106,7 +106,7 @@ status = "disabled"; }; - mmc0: mmc@0x09408000 { + mmc0: mmc@9408000 { compatible = "snps,dw-mshc"; #address-cells = <1>; #size-cells = <0>; @@ -119,7 +119,7 @@ status = "disabled"; }; - mmc1: mmc@0x0b003000 { + mmc1: mmc@b003000 { compatible = "snps,dw-mshc"; #address-cells = <1>; #size-cells = <0>; @@ -132,7 +132,7 @@ status = "disabled"; }; - sysctrl: sysctrl@0xa0007000 { + sysctrl: sysctrl@a0007000 { compatible = "zte,sysctrl", "syscon"; reg = <0xa0007000 0x1000>; }; diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 2f145c4af93a..92674f247a12 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -319,7 +319,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_RC_CORE=m CONFIG_MEDIA_CONTROLLER=y CONFIG_VIDEO_V4L2_SUBDEV_API=y -CONFIG_LIRC=m +CONFIG_LIRC=y CONFIG_RC_DEVICES=y CONFIG_IR_RX51=m CONFIG_V4L_PLATFORM_DRIVERS=y diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 629f8e9981f1..cf2701cb0de8 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -83,7 +83,7 @@ static void dummy_clock_access(struct timespec64 *ts) } static clock_access_fn __read_persistent_clock = dummy_clock_access; -static clock_access_fn __read_boot_clock = dummy_clock_access;; +static clock_access_fn __read_boot_clock = dummy_clock_access; void read_persistent_clock64(struct timespec64 *ts) { diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 5638ce0c9524..63d6b404d88e 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING KVM=../../../../virt/kvm +CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) + obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o @@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o obj-$(CONFIG_KVM_ARM_HOST) += vfp.o obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o +CFLAGS_banked-sr.o += $(CFLAGS_ARMV7VE) + obj-$(CONFIG_KVM_ARM_HOST) += entry.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o obj-$(CONFIG_KVM_ARM_HOST) += switch.o +CFLAGS_switch.o += $(CFLAGS_ARMV7VE) obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c index 111bda8cdebd..be4b8b0a40ad 100644 --- a/arch/arm/kvm/hyp/banked-sr.c +++ b/arch/arm/kvm/hyp/banked-sr.c @@ -20,6 +20,10 @@ #include <asm/kvm_hyp.h> +/* + * gcc before 4.9 doesn't understand -march=armv7ve, so we have to + * trick the assembler. + */ __asm__(".arch_extension virt"); void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt) diff --git a/arch/arm/mach-clps711x/board-dt.c b/arch/arm/mach-clps711x/board-dt.c index ee1f83b1a332..4c89a8e9a2e3 100644 --- a/arch/arm/mach-clps711x/board-dt.c +++ b/arch/arm/mach-clps711x/board-dt.c @@ -69,7 +69,7 @@ static void clps711x_restart(enum reboot_mode mode, const char *cmd) soft_restart(0); } -static const char *clps711x_compat[] __initconst = { +static const char *const clps711x_compat[] __initconst = { "cirrus,ep7209", NULL }; diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index e457f299cd44..d6b11907380c 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -368,7 +368,7 @@ static struct spi_eeprom at25640a = { .flags = EE_ADDR2, }; -static struct spi_board_info dm355_evm_spi_info[] __initconst = { +static const struct spi_board_info dm355_evm_spi_info[] __initconst = { { .modalias = "at25", .platform_data = &at25640a, diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c index be997243447b..fad9a5611a5d 100644 --- a/arch/arm/mach-davinci/board-dm355-leopard.c +++ b/arch/arm/mach-davinci/board-dm355-leopard.c @@ -217,7 +217,7 @@ static struct spi_eeprom at25640a = { .flags = EE_ADDR2, }; -static struct spi_board_info dm355_leopard_spi_info[] __initconst = { +static const struct spi_board_info dm355_leopard_spi_info[] __initconst = { { .modalias = "at25", .platform_data = &at25640a, diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index e75741fb2c1d..e3780986d2a3 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -726,7 +726,7 @@ static struct spi_eeprom at25640 = { .flags = EE_ADDR2, }; -static struct spi_board_info dm365_evm_spi_info[] __initconst = { +static const struct spi_board_info dm365_evm_spi_info[] __initconst = { { .modalias = "at25", .platform_data = &at25640, diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 6b32dc527edc..2c20599cc350 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -41,7 +41,7 @@ config MACH_ARMADA_375 depends on ARCH_MULTI_V7 select ARMADA_370_XP_IRQ select ARM_ERRATA_720789 - select ARM_ERRATA_753970 + select PL310_ERRATA_753970 select ARM_GIC select ARMADA_375_CLK select HAVE_ARM_SCU @@ -57,7 +57,7 @@ config MACH_ARMADA_38X bool "Marvell Armada 380/385 boards" depends on ARCH_MULTI_V7 select ARM_ERRATA_720789 - select ARM_ERRATA_753970 + select PL310_ERRATA_753970 select ARM_GIC select ARM_GLOBAL_TIMER select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 43e3e188f521..fa512413a471 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -1011,17 +1011,17 @@ static int clk_debugfs_register_one(struct clk *c) return -ENOMEM; c->dent = d; - d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount); + d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount); if (!d) { err = -ENOMEM; goto err_out; } - d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate); + d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate); if (!d) { err = -ENOMEM; goto err_out; } - d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags); + d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags); if (!d) { err = -ENOMEM; goto err_out; diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 4bb6751864a5..fc5fb776a710 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -299,8 +299,6 @@ static void irq_save_context(void) if (soc_is_dra7xx()) return; - if (!sar_base) - sar_base = omap4_get_sar_ram_base(); if (wakeupgen_ops && wakeupgen_ops->save_context) wakeupgen_ops->save_context(); } @@ -598,6 +596,8 @@ static int __init wakeupgen_init(struct device_node *node, irq_hotplug_init(); irq_pm_init(); + sar_base = omap4_get_sar_ram_base(); + return 0; } IRQCHIP_DECLARE(ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init); diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 124f9af34a15..34156eca8e23 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -977,6 +977,9 @@ static int _enable_clocks(struct omap_hwmod *oh) pr_debug("omap_hwmod: %s: enabling clocks\n", oh->name); + if (oh->flags & HWMOD_OPT_CLKS_NEEDED) + _enable_optional_clocks(oh); + if (oh->_clk) clk_enable(oh->_clk); @@ -985,9 +988,6 @@ static int _enable_clocks(struct omap_hwmod *oh) clk_enable(os->_clk); } - if (oh->flags & HWMOD_OPT_CLKS_NEEDED) - _enable_optional_clocks(oh); - /* The opt clocks are controlled by the device driver. */ return 0; diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 366158a54fcd..6f68576e5695 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -186,7 +186,7 @@ static void omap_pm_end(void) cpu_idle_poll_ctrl(false); } -static void omap_pm_finish(void) +static void omap_pm_wake(void) { if (soc_is_omap34xx()) omap_prcm_irq_complete(); @@ -196,7 +196,7 @@ static const struct platform_suspend_ops omap_pm_ops = { .begin = omap_pm_begin, .end = omap_pm_end, .enter = omap_pm_enter, - .finish = omap_pm_finish, + .wake = omap_pm_wake, .valid = suspend_valid_only_mem, }; diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index ece09c9461f7..d61fbd7a2840 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -156,12 +156,6 @@ static struct clock_event_device clockevent_gpt = { .tick_resume = omap2_gp_timer_shutdown, }; -static struct property device_disabled = { - .name = "status", - .length = sizeof("disabled"), - .value = "disabled", -}; - static const struct of_device_id omap_timer_match[] __initconst = { { .compatible = "ti,omap2420-timer", }, { .compatible = "ti,omap3430-timer", }, @@ -203,8 +197,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id * of_get_property(np, "ti,timer-secure", NULL))) continue; - if (!of_device_is_compatible(np, "ti,omap-counter32k")) - of_add_property(np, &device_disabled); + if (!of_device_is_compatible(np, "ti,omap-counter32k")) { + struct property *prop; + + prop = kzalloc(sizeof(*prop), GFP_KERNEL); + if (!prop) + return NULL; + prop->name = "status"; + prop->value = "disabled"; + prop->length = strlen(prop->value); + of_add_property(np, prop); + } return np; } diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig index 2a7bb6ccdcb7..a810f4dd34b1 100644 --- a/arch/arm/mach-orion5x/Kconfig +++ b/arch/arm/mach-orion5x/Kconfig @@ -58,7 +58,6 @@ config MACH_KUROBOX_PRO config MACH_DNS323 bool "D-Link DNS-323" - select GENERIC_NET_UTILS select I2C_BOARDINFO if I2C help Say 'Y' here if you want your kernel to support the @@ -66,7 +65,6 @@ config MACH_DNS323 config MACH_TS209 bool "QNAP TS-109/TS-209" - select GENERIC_NET_UTILS help Say 'Y' here if you want your kernel to support the QNAP TS-109/TS-209 platform. @@ -101,7 +99,6 @@ config MACH_LINKSTATION_LS_HGL config MACH_TS409 bool "QNAP TS-409" - select GENERIC_NET_UTILS help Say 'Y' here if you want your kernel to support the QNAP TS-409 platform. diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c index cd483bfb5ca8..d13344b2ddcd 100644 --- a/arch/arm/mach-orion5x/dns323-setup.c +++ b/arch/arm/mach-orion5x/dns323-setup.c @@ -173,10 +173,42 @@ static struct mv643xx_eth_platform_data dns323_eth_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(8), }; +/* dns323_parse_hex_*() taken from tsx09-common.c; should a common copy of these + * functions be kept somewhere? + */ +static int __init dns323_parse_hex_nibble(char n) +{ + if (n >= '0' && n <= '9') + return n - '0'; + + if (n >= 'A' && n <= 'F') + return n - 'A' + 10; + + if (n >= 'a' && n <= 'f') + return n - 'a' + 10; + + return -1; +} + +static int __init dns323_parse_hex_byte(const char *b) +{ + int hi; + int lo; + + hi = dns323_parse_hex_nibble(b[0]); + lo = dns323_parse_hex_nibble(b[1]); + + if (hi < 0 || lo < 0) + return -1; + + return (hi << 4) | lo; +} + static int __init dns323_read_mac_addr(void) { u_int8_t addr[6]; - void __iomem *mac_page; + int i; + char *mac_page; /* MAC address is stored as a regular ol' string in /dev/mtdblock4 * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80). @@ -185,8 +217,23 @@ static int __init dns323_read_mac_addr(void) if (!mac_page) return -ENOMEM; - if (!mac_pton((__force const char *) mac_page, addr)) - goto error_fail; + /* Sanity check the string we're looking at */ + for (i = 0; i < 5; i++) { + if (*(mac_page + (i * 3) + 2) != ':') { + goto error_fail; + } + } + + for (i = 0; i < 6; i++) { + int byte; + + byte = dns323_parse_hex_byte(mac_page + (i * 3)); + if (byte < 0) { + goto error_fail; + } + + addr[i] = byte; + } iounmap(mac_page); printk("DNS-323: Found ethernet MAC address: %pM\n", addr); diff --git a/arch/arm/mach-orion5x/tsx09-common.c b/arch/arm/mach-orion5x/tsx09-common.c index 89774985d380..905d4f2dd0b8 100644 --- a/arch/arm/mach-orion5x/tsx09-common.c +++ b/arch/arm/mach-orion5x/tsx09-common.c @@ -53,12 +53,53 @@ struct mv643xx_eth_platform_data qnap_tsx09_eth_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(8), }; +static int __init qnap_tsx09_parse_hex_nibble(char n) +{ + if (n >= '0' && n <= '9') + return n - '0'; + + if (n >= 'A' && n <= 'F') + return n - 'A' + 10; + + if (n >= 'a' && n <= 'f') + return n - 'a' + 10; + + return -1; +} + +static int __init qnap_tsx09_parse_hex_byte(const char *b) +{ + int hi; + int lo; + + hi = qnap_tsx09_parse_hex_nibble(b[0]); + lo = qnap_tsx09_parse_hex_nibble(b[1]); + + if (hi < 0 || lo < 0) + return -1; + + return (hi << 4) | lo; +} + static int __init qnap_tsx09_check_mac_addr(const char *addr_str) { u_int8_t addr[6]; + int i; - if (!mac_pton(addr_str, addr)) - return -1; + for (i = 0; i < 6; i++) { + int byte; + + /* + * Enforce "xx:xx:xx:xx:xx:xx\n" format. + */ + if (addr_str[(i * 3) + 2] != ((i < 5) ? ':' : '\n')) + return -1; + + byte = qnap_tsx09_parse_hex_byte(addr_str + (i * 3)); + if (byte < 0) + return -1; + addr[i] = byte; + } printk(KERN_INFO "tsx09: found ethernet mac address %pM\n", addr); @@ -77,12 +118,12 @@ void __init qnap_tsx09_find_mac_addr(u32 mem_base, u32 size) unsigned long addr; for (addr = mem_base; addr < (mem_base + size); addr += 1024) { - void __iomem *nor_page; + char *nor_page; int ret = 0; nor_page = ioremap(addr, 1024); if (nor_page != NULL) { - ret = qnap_tsx09_check_mac_addr((__force const char *)nor_page); + ret = qnap_tsx09_check_mac_addr(nor_page); iounmap(nor_page); } diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 57058ac46f49..7e5d7a083707 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -23,7 +23,6 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_platform.h> -#include <linux/perf/arm_pmu.h> #include <linux/regulator/machine.h> #include <asm/outercache.h> @@ -112,37 +111,6 @@ static void ux500_restart(enum reboot_mode mode, const char *cmd) prcmu_system_reset(0); } -/* - * The PMU IRQ lines of two cores are wired together into a single interrupt. - * Bounce the interrupt to the other core if it's not ours. - */ -static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler) -{ - irqreturn_t ret = handler(irq, dev); - int other = !smp_processor_id(); - - if (ret == IRQ_NONE && cpu_online(other)) - irq_set_affinity(irq, cpumask_of(other)); - - /* - * We should be able to get away with the amount of IRQ_NONEs we give, - * while still having the spurious IRQ detection code kick in if the - * interrupt really starts hitting spuriously. - */ - return ret; -} - -static struct arm_pmu_platdata db8500_pmu_platdata = { - .handle_irq = db8500_pmu_handler, - .irq_flags = IRQF_NOBALANCING | IRQF_NO_THREAD, -}; - -static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = { - /* Requires call-back bindings. */ - OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata), - {}, -}; - static struct of_dev_auxdata u8540_auxdata_lookup[] __initdata = { OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu", NULL), {}, @@ -165,9 +133,6 @@ static void __init u8500_init_machine(void) if (of_machine_is_compatible("st-ericsson,u8540")) of_platform_populate(NULL, u8500_local_bus_nodes, u8540_auxdata_lookup, NULL); - else - of_platform_populate(NULL, u8500_local_bus_nodes, - u8500_auxdata_lookup, NULL); } static const char * stericsson_dt_platform_compat[] = { diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index aff6994950ba..a2399fd66e97 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -472,28 +472,27 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, /***************************************************************************** * Ethernet switch ****************************************************************************/ -static __initconst const char *orion_ge00_mvmdio_bus_name = "orion-mii"; -static __initdata struct mdio_board_info - orion_ge00_switch_board_info; +static __initdata struct mdio_board_info orion_ge00_switch_board_info = { + .bus_id = "orion-mii", + .modalias = "mv88e6085", +}; void __init orion_ge00_switch_init(struct dsa_chip_data *d) { - struct mdio_board_info *bd; unsigned int i; if (!IS_BUILTIN(CONFIG_PHYLIB)) return; - for (i = 0; i < ARRAY_SIZE(d->port_names); i++) - if (!strcmp(d->port_names[i], "cpu")) + for (i = 0; i < ARRAY_SIZE(d->port_names); i++) { + if (!strcmp(d->port_names[i], "cpu")) { + d->netdev[i] = &orion_ge00.dev; break; + } + } - bd = &orion_ge00_switch_board_info; - bd->bus_id = orion_ge00_mvmdio_bus_name; - bd->mdio_addr = d->sw_addr; - d->netdev[i] = &orion_ge00.dev; - strcpy(bd->modalias, "mv88e6085"); - bd->platform_data = d; + orion_ge00_switch_board_info.mdio_addr = d->sw_addr; + orion_ge00_switch_board_info.platform_data = d; mdiobus_register_board_info(&orion_ge00_switch_board_info, 1); } diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index a80632641b39..70c776ef7aa7 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -165,14 +165,14 @@ uart_A: serial@24000 { compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart"; - reg = <0x0 0x24000 0x0 0x14>; + reg = <0x0 0x24000 0x0 0x18>; interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>; status = "disabled"; }; uart_B: serial@23000 { compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart"; - reg = <0x0 0x23000 0x0 0x14>; + reg = <0x0 0x23000 0x0 0x18>; interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 6cb3c2a52baf..4ee2e7951482 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -235,14 +235,14 @@ uart_A: serial@84c0 { compatible = "amlogic,meson-gx-uart"; - reg = <0x0 0x84c0 0x0 0x14>; + reg = <0x0 0x84c0 0x0 0x18>; interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>; status = "disabled"; }; uart_B: serial@84dc { compatible = "amlogic,meson-gx-uart"; - reg = <0x0 0x84dc 0x0 0x14>; + reg = <0x0 0x84dc 0x0 0x18>; interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>; status = "disabled"; }; @@ -287,7 +287,7 @@ uart_C: serial@8700 { compatible = "amlogic,meson-gx-uart"; - reg = <0x0 0x8700 0x0 0x14>; + reg = <0x0 0x8700 0x0 0x18>; interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>; status = "disabled"; }; @@ -404,14 +404,14 @@ uart_AO: serial@4c0 { compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart"; - reg = <0x0 0x004c0 0x0 0x14>; + reg = <0x0 0x004c0 0x0 0x18>; interrupts = <GIC_SPI 193 IRQ_TYPE_EDGE_RISING>; status = "disabled"; }; uart_AO_B: serial@4e0 { compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart"; - reg = <0x0 0x004e0 0x0 0x14>; + reg = <0x0 0x004e0 0x0 0x18>; interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 4f355f17eed6..c8514110b9da 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -631,6 +631,7 @@ internal_phy: ethernet-phy@8 { compatible = "ethernet-phy-id0181.4400", "ethernet-phy-ieee802.3-c22"; + interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; reg = <8>; max-speed = <100>; }; diff --git a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi index 4220fbdcb24a..ff5c4c47b22b 100644 --- a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi @@ -98,7 +98,7 @@ clock-output-names = "clk125mhz"; }; - pci { + pcie@30000000 { compatible = "pci-host-ecam-generic"; device_type = "pci"; #interrupt-cells = <1>; @@ -118,6 +118,7 @@ ranges = <0x02000000 0 0x40000000 0 0x40000000 0 0x20000000 0x43000000 0x40 0x00000000 0x40 0x00000000 0x20 0x00000000>; + bus-range = <0 0xff>; interrupt-map-mask = <0 0 0 7>; interrupt-map = /* addr pin ic icaddr icintr */ diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index e94fa1a53192..047641fe294c 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -51,7 +51,7 @@ #size-cells = <2>; ranges; - ramoops@0x21f00000 { + ramoops@21f00000 { compatible = "ramoops"; reg = <0x0 0x21f00000 0x0 0x00100000>; record-size = <0x00020000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 9fbe4705ee88..94597e33c806 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -341,7 +341,7 @@ reg = <0 0x10005000 0 0x1000>; }; - pio: pinctrl@0x10005000 { + pio: pinctrl@10005000 { compatible = "mediatek,mt8173-pinctrl"; reg = <0 0x1000b000 0 0x1000>; mediatek,pctl-regmap = <&syscfg_pctl_a>; diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 492a011f14f6..1c8f1b86472d 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -140,16 +140,16 @@ }; agnoc@0 { - qcom,pcie@00600000 { + qcom,pcie@600000 { perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>; }; - qcom,pcie@00608000 { + qcom,pcie@608000 { status = "okay"; perst-gpio = <&msmgpio 130 GPIO_ACTIVE_LOW>; }; - qcom,pcie@00610000 { + qcom,pcie@610000 { status = "okay"; perst-gpio = <&msmgpio 114 GPIO_ACTIVE_LOW>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 4b2afcc4fdf4..0a6f7952bbb1 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -840,7 +840,7 @@ #size-cells = <1>; ranges; - pcie0: qcom,pcie@00600000 { + pcie0: qcom,pcie@600000 { compatible = "qcom,pcie-msm8996", "snps,dw-pcie"; status = "disabled"; power-domains = <&gcc PCIE0_GDSC>; @@ -893,7 +893,7 @@ }; - pcie1: qcom,pcie@00608000 { + pcie1: qcom,pcie@608000 { compatible = "qcom,pcie-msm8996", "snps,dw-pcie"; power-domains = <&gcc PCIE1_GDSC>; bus-range = <0x00 0xff>; @@ -946,7 +946,7 @@ "bus_slave"; }; - pcie2: qcom,pcie@00610000 { + pcie2: qcom,pcie@610000 { compatible = "qcom,pcie-msm8996", "snps,dw-pcie"; power-domains = <&gcc PCIE2_GDSC>; bus-range = <0x00 0xff>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 3890468678ce..28257724a56e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -132,17 +132,16 @@ assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>; clock_in_out = "input"; - /* shows instability at 1GBit right now */ - max-speed = <100>; phy-supply = <&vcc_io>; phy-mode = "rgmii"; pinctrl-names = "default"; pinctrl-0 = <&rgmiim1_pins>; + snps,force_thresh_dma_mode; snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - tx_delay = <0x26>; - rx_delay = <0x11>; + tx_delay = <0x24>; + rx_delay = <0x18>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index a037ee56fead..cae341554486 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -730,7 +730,7 @@ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; @@ -741,7 +741,7 @@ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; @@ -752,7 +752,7 @@ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index aa4d07046a7b..03458ac44201 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -257,7 +257,7 @@ max-frequency = <150000000>; clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>, <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; resets = <&cru SRST_SDIO0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index 0f873c897d0d..ce592a4c0c4c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -457,7 +457,7 @@ assigned-clocks = <&cru SCLK_PCIEPHY_REF>; assigned-clock-parents = <&cru SCLK_PCIEPHY_REF100M>; assigned-clock-rates = <100000000>; - ep-gpios = <&gpio3 RK_PB5 GPIO_ACTIVE_HIGH>; + ep-gpios = <&gpio2 RK_PA4 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 7aa2144e0d47..2605118d4b4c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1739,8 +1739,8 @@ compatible = "rockchip,rk3399-edp"; reg = <0x0 0xff970000 0x0 0x8000>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH 0>; - clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>; - clock-names = "dp", "pclk"; + clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>, <&cru PCLK_VIO_GRF>; + clock-names = "dp", "pclk", "grf"; pinctrl-names = "default"; pinctrl-0 = <&edp_hpd>; power-domains = <&power RK3399_PD_EDP>; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index be7bd19c87ec..350c76a1d15b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -20,7 +20,7 @@ #define MPIDR_UP_BITMASK (0x1 << 30) #define MPIDR_MT_BITMASK (0x1 << 24) -#define MPIDR_HWID_BITMASK 0xff00ffffff +#define MPIDR_HWID_BITMASK UL(0xff00ffffff) #define MPIDR_LEVEL_BITS_SHIFT 3 #define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 1dca41bea16a..e73f68569624 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -22,7 +22,7 @@ static inline pte_t huge_ptep_get(pte_t *ptep) { - return *ptep; + return READ_ONCE(*ptep); } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 9679067a1574..7faed6e48b46 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd) return pmd; } -static inline void kvm_set_s2pte_readonly(pte_t *pte) +static inline void kvm_set_s2pte_readonly(pte_t *ptep) { pteval_t old_pteval, pteval; - pteval = READ_ONCE(pte_val(*pte)); + pteval = READ_ONCE(pte_val(*ptep)); do { old_pteval = pteval; pteval &= ~PTE_S2_RDWR; pteval |= PTE_S2_RDONLY; - pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval); + pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); } while (pteval != old_pteval); } -static inline bool kvm_s2pte_readonly(pte_t *pte) +static inline bool kvm_s2pte_readonly(pte_t *ptep) { - return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY; + return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY; } -static inline bool kvm_s2pte_exec(pte_t *pte) +static inline bool kvm_s2pte_exec(pte_t *ptep) { - return !(pte_val(*pte) & PTE_S2_XN); + return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN); } -static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) +static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp) { - kvm_set_s2pte_readonly((pte_t *)pmd); + kvm_set_s2pte_readonly((pte_t *)pmdp); } -static inline bool kvm_s2pmd_readonly(pmd_t *pmd) +static inline bool kvm_s2pmd_readonly(pmd_t *pmdp) { - return kvm_s2pte_readonly((pte_t *)pmd); + return kvm_s2pte_readonly((pte_t *)pmdp); } -static inline bool kvm_s2pmd_exec(pmd_t *pmd) +static inline bool kvm_s2pmd_exec(pmd_t *pmdp) { - return !(pmd_val(*pmd) & PMD_S2_XN); + return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN); } static inline bool kvm_page_empty(void *ptr) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 8d3331985d2e..39ec0b8a689e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void) * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ -static inline void cpu_replace_ttbr1(pgd_t *pgd) +static inline void cpu_replace_ttbr1(pgd_t *pgdp) { typedef void (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1; ttbr_replace_func *replace_phys; - phys_addr_t pgd_phys = virt_to_phys(pgd); + phys_addr_t pgd_phys = virt_to_phys(pgdp); replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index e9d9f1b006ef..2e05bcd944c8 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) return (pmd_t *)__get_free_page(PGALLOC_GFP); } -static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp) { - BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); - free_page((unsigned long)pmd); + BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1)); + free_page((unsigned long)pmdp); } -static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) +static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) { - set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot)); + set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); } -static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE); + __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE); } #else -static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) +static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) { BUILD_BUG(); } @@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) return (pud_t *)__get_free_page(PGALLOC_GFP); } -static inline void pud_free(struct mm_struct *mm, pud_t *pud) +static inline void pud_free(struct mm_struct *mm, pud_t *pudp) { - BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); - free_page((unsigned long)pud); + BUG_ON((unsigned long)pudp & (PAGE_SIZE-1)); + free_page((unsigned long)pudp); } -static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot) { - set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot)); + set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot)); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp) { - __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE); + __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE); } #else -static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot) { BUILD_BUG(); } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) @@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) /* * Free a PTE table. */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep) { - if (pte) - free_page((unsigned long)pte); + if (ptep) + free_page((unsigned long)ptep); } static inline void pte_free(struct mm_struct *mm, pgtable_t pte) @@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte) __free_page(pte); } -static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, +static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, pmdval_t prot) { - set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot)); + set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); } /* diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 094374c82db0..7e2c27e63cd8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd) static inline void set_pte(pte_t *ptep, pte_t pte) { - *ptep = pte; + WRITE_ONCE(*ptep, pte); /* * Only if the new pte is valid and kernel, otherwise TLB maintenance @@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { + pte_t old_pte; + if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte, addr); @@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * hardware updates of the pte (ptep_set_access_flags safely changes * valid ptes without going through an invalid entry). */ - if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) && + old_pte = READ_ONCE(*ptep); + if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) && (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { VM_WARN_ONCE(!pte_young(pte), "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(*ptep), pte_val(pte)); - VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte), + __func__, pte_val(old_pte), pte_val(pte)); + VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(*ptep), pte_val(pte)); + __func__, pte_val(old_pte), pte_val(pte)); } set_pte(ptep, pte); @@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); dsb(ishst); isb(); } @@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) static inline void set_pud(pud_t *pudp, pud_t pud) { - *pudp = pud; + WRITE_ONCE(*pudp, pud); dsb(ishst); isb(); } @@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) /* Find an entry in the second-level page table. */ #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t)) +#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) #define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) @@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { - *pgdp = pgd; + WRITE_ONCE(*pgdp, pgd); dsb(ishst); } @@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) /* Find an entry in the frst-level page table. */ #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t)) +#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) #define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 472ef944e932..902f9edacbea 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -28,7 +28,7 @@ struct stackframe { unsigned long fp; unsigned long pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - unsigned int graph; + int graph; #endif }; diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 543e11f0f657..e66b0fca99c2 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -72,15 +72,15 @@ static inline void set_fs(mm_segment_t fs) * This is equivalent to the following test: * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 */ -static inline unsigned long __range_ok(unsigned long addr, unsigned long size) +static inline unsigned long __range_ok(const void __user *addr, unsigned long size) { - unsigned long limit = current_thread_info()->addr_limit; + unsigned long ret, limit = current_thread_info()->addr_limit; __chk_user_ptr(addr); asm volatile( // A + B <= C + 1 for all A,B,C, in four easy steps: // 1: X = A + B; X' = X % 2^64 - " adds %0, %0, %2\n" + " adds %0, %3, %2\n" // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 " csel %1, xzr, %1, hi\n" // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' @@ -92,9 +92,9 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size) // testing X' - C == 0, subject to the previous adjustments. " sbcs xzr, %0, %1\n" " cset %0, ls\n" - : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc"); + : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); - return addr; + return ret; } /* @@ -104,7 +104,7 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size) */ #define untagged_addr(addr) sign_extend64(addr, 55) -#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size) +#define access_ok(type, addr, size) __range_ok(addr, size) #define user_addr_max get_fs #define _ASM_EXTABLE(from, to) \ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c33b5e4010ab..68450e954d47 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -370,6 +370,7 @@ static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) static int swp_handler(struct pt_regs *regs, u32 instr) { u32 destreg, data, type, address = 0; + const void __user *user_ptr; int rn, rt2, res = 0; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); @@ -401,7 +402,8 @@ static int swp_handler(struct pt_regs *regs, u32 instr) aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data); /* Check access in reasonable access range for both SWP and SWPB */ - if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + user_ptr = (const void __user *)(unsigned long)(address & ~3); + if (!access_ok(VERIFY_WRITE, user_ptr, 4)) { pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n", address); goto fault; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 07823595b7f0..b5a28336c077 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -178,7 +178,7 @@ static int enable_smccc_arch_workaround_1(void *data) case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if (res.a0) + if ((int)res.a0 < 0) return 0; cb = call_hvc_arch_workaround_1; smccc_start = __smccc_workaround_1_hvc_start; @@ -188,7 +188,7 @@ static int enable_smccc_arch_workaround_1(void *data) case PSCI_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if (res.a0) + if ((int)res.a0 < 0) return 0; cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc_start; @@ -408,6 +408,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + .enable = qcom_enable_link_stack_sanitization, + }, + { + .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), .enable = enable_smccc_arch_workaround_1, }, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 29b1f873e337..2985a067fc13 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -199,9 +199,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { }; static const struct arm64_ftr_bits ftr_ctr[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1), /* DIC */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index f85ac58d08a3..a8bf1c892b90 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { efi_memory_desc_t *md = data; - pte_t pte = *ptep; + pte_t pte = READ_ONCE(*ptep); if (md->attribute & EFI_MEMORY_RO) pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index f20cf7e99249..1ec5f28c39fc 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length, gfp_t mask) { int rc = 0; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; unsigned long dst = (unsigned long)allocator(mask); if (!dst) { @@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length, memcpy((void *)dst, src_start, length); flush_icache_range(dst, dst + length); - pgd = pgd_offset_raw(allocator(mask), dst_addr); - if (pgd_none(*pgd)) { - pud = allocator(mask); - if (!pud) { + pgdp = pgd_offset_raw(allocator(mask), dst_addr); + if (pgd_none(READ_ONCE(*pgdp))) { + pudp = allocator(mask); + if (!pudp) { rc = -ENOMEM; goto out; } - pgd_populate(&init_mm, pgd, pud); + pgd_populate(&init_mm, pgdp, pudp); } - pud = pud_offset(pgd, dst_addr); - if (pud_none(*pud)) { - pmd = allocator(mask); - if (!pmd) { + pudp = pud_offset(pgdp, dst_addr); + if (pud_none(READ_ONCE(*pudp))) { + pmdp = allocator(mask); + if (!pmdp) { rc = -ENOMEM; goto out; } - pud_populate(&init_mm, pud, pmd); + pud_populate(&init_mm, pudp, pmdp); } - pmd = pmd_offset(pud, dst_addr); - if (pmd_none(*pmd)) { - pte = allocator(mask); - if (!pte) { + pmdp = pmd_offset(pudp, dst_addr); + if (pmd_none(READ_ONCE(*pmdp))) { + ptep = allocator(mask); + if (!ptep) { rc = -ENOMEM; goto out; } - pmd_populate_kernel(&init_mm, pmd, pte); + pmd_populate_kernel(&init_mm, pmdp, ptep); } - pte = pte_offset_kernel(pmd, dst_addr); - set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); + ptep = pte_offset_kernel(pmdp, dst_addr); + set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); /* * Load our new page tables. A strict BBM approach requires that we @@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length, */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1); + write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1); isb(); *phys_dst_addr = virt_to_phys((void *)dst); @@ -320,9 +320,9 @@ int swsusp_arch_suspend(void) return ret; } -static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) +static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) { - pte_t pte = *src_pte; + pte_t pte = READ_ONCE(*src_ptep); if (pte_valid(pte)) { /* @@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) * read only (code, rodata). Clear the RDONLY bit from * the temporary mappings we use during restore. */ - set_pte(dst_pte, pte_mkwrite(pte)); + set_pte(dst_ptep, pte_mkwrite(pte)); } else if (debug_pagealloc_enabled() && !pte_none(pte)) { /* * debug_pagealloc will removed the PTE_VALID bit if @@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) */ BUG_ON(!pfn_valid(pte_pfn(pte))); - set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte))); + set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte))); } } -static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, +static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, unsigned long end) { - pte_t *src_pte; - pte_t *dst_pte; + pte_t *src_ptep; + pte_t *dst_ptep; unsigned long addr = start; - dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pte) + dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_ptep) return -ENOMEM; - pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); - dst_pte = pte_offset_kernel(dst_pmd, start); + pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); + dst_ptep = pte_offset_kernel(dst_pmdp, start); - src_pte = pte_offset_kernel(src_pmd, start); + src_ptep = pte_offset_kernel(src_pmdp, start); do { - _copy_pte(dst_pte, src_pte, addr); - } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + _copy_pte(dst_ptep, src_ptep, addr); + } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); return 0; } -static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, +static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, unsigned long end) { - pmd_t *src_pmd; - pmd_t *dst_pmd; + pmd_t *src_pmdp; + pmd_t *dst_pmdp; unsigned long next; unsigned long addr = start; - if (pud_none(*dst_pud)) { - dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pmd) + if (pud_none(READ_ONCE(*dst_pudp))) { + dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmdp) return -ENOMEM; - pud_populate(&init_mm, dst_pud, dst_pmd); + pud_populate(&init_mm, dst_pudp, dst_pmdp); } - dst_pmd = pmd_offset(dst_pud, start); + dst_pmdp = pmd_offset(dst_pudp, start); - src_pmd = pmd_offset(src_pud, start); + src_pmdp = pmd_offset(src_pudp, start); do { + pmd_t pmd = READ_ONCE(*src_pmdp); + next = pmd_addr_end(addr, end); - if (pmd_none(*src_pmd)) + if (pmd_none(pmd)) continue; - if (pmd_table(*src_pmd)) { - if (copy_pte(dst_pmd, src_pmd, addr, next)) + if (pmd_table(pmd)) { + if (copy_pte(dst_pmdp, src_pmdp, addr, next)) return -ENOMEM; } else { - set_pmd(dst_pmd, - __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); + set_pmd(dst_pmdp, + __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY)); } - } while (dst_pmd++, src_pmd++, addr = next, addr != end); + } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); return 0; } -static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, +static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, unsigned long end) { - pud_t *dst_pud; - pud_t *src_pud; + pud_t *dst_pudp; + pud_t *src_pudp; unsigned long next; unsigned long addr = start; - if (pgd_none(*dst_pgd)) { - dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pud) + if (pgd_none(READ_ONCE(*dst_pgdp))) { + dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pudp) return -ENOMEM; - pgd_populate(&init_mm, dst_pgd, dst_pud); + pgd_populate(&init_mm, dst_pgdp, dst_pudp); } - dst_pud = pud_offset(dst_pgd, start); + dst_pudp = pud_offset(dst_pgdp, start); - src_pud = pud_offset(src_pgd, start); + src_pudp = pud_offset(src_pgdp, start); do { + pud_t pud = READ_ONCE(*src_pudp); + next = pud_addr_end(addr, end); - if (pud_none(*src_pud)) + if (pud_none(pud)) continue; - if (pud_table(*(src_pud))) { - if (copy_pmd(dst_pud, src_pud, addr, next)) + if (pud_table(pud)) { + if (copy_pmd(dst_pudp, src_pudp, addr, next)) return -ENOMEM; } else { - set_pud(dst_pud, - __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); + set_pud(dst_pudp, + __pud(pud_val(pud) & ~PMD_SECT_RDONLY)); } - } while (dst_pud++, src_pud++, addr = next, addr != end); + } while (dst_pudp++, src_pudp++, addr = next, addr != end); return 0; } -static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, +static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, unsigned long end) { unsigned long next; unsigned long addr = start; - pgd_t *src_pgd = pgd_offset_k(start); + pgd_t *src_pgdp = pgd_offset_k(start); - dst_pgd = pgd_offset_raw(dst_pgd, start); + dst_pgdp = pgd_offset_raw(dst_pgdp, start); do { next = pgd_addr_end(addr, end); - if (pgd_none(*src_pgd)) + if (pgd_none(READ_ONCE(*src_pgdp))) continue; - if (copy_pud(dst_pgd, src_pgd, addr, next)) + if (copy_pud(dst_pgdp, src_pgdp, addr, next)) return -ENOMEM; - } while (dst_pgd++, src_pgd++, addr = next, addr != end); + } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); return 0; } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 75b220ba73a3..85a251b6dfa8 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -908,9 +908,9 @@ static void __armv8pmu_probe_pmu(void *info) int pmuver; dfr0 = read_sysreg(id_aa64dfr0_el1); - pmuver = cpuid_feature_extract_signed_field(dfr0, + pmuver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMUVER_SHIFT); - if (pmuver < 1) + if (pmuver == 0xf || pmuver == 0) return; probe->present = true; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ad8aeb098b31..c0da6efe5465 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -220,8 +220,15 @@ void __show_regs(struct pt_regs *regs) show_regs_print_info(KERN_DEFAULT); print_pstate(regs); - printk("pc : %pS\n", (void *)regs->pc); - printk("lr : %pS\n", (void *)lr); + + if (!user_mode(regs)) { + printk("pc : %pS\n", (void *)regs->pc); + printk("lr : %pS\n", (void *)lr); + } else { + printk("pc : %016llx\n", regs->pc); + printk("lr : %016llx\n", lr); + } + printk("sp : %016llx\n", sp); i = top_reg; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6618036ae6d4..9ae31f7e2243 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1419,7 +1419,7 @@ static int compat_ptrace_hbp_get(unsigned int note_type, u64 addr = 0; u32 ctrl = 0; - int err, idx = compat_ptrace_hbp_num_to_idx(num);; + int err, idx = compat_ptrace_hbp_num_to_idx(num); if (num & 1) { err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 76809ccd309c..d5718a060672 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -59,6 +59,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { + if (WARN_ON_ONCE(frame->graph == -1)) + return -EINVAL; + if (frame->graph < -1) + frame->graph += FTRACE_NOTRACE_DEPTH; + /* * This is a case where function graph tracer has * modified a return address (LR) in a stack frame diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 8b8bbd3eaa52..a382b2a1b84e 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -57,7 +57,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags) if (end < start || flags) return -EINVAL; - if (!access_ok(VERIFY_READ, start, end - start)) + if (!access_ok(VERIFY_READ, (const void __user *)start, end - start)) return -EFAULT; return __do_compat_cache_op(start, end); diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a4391280fba9..f258636273c9 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,7 +52,7 @@ unsigned long profile_pc(struct pt_regs *regs) frame.fp = regs->regs[29]; frame.pc = regs->pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = -1; /* no task info */ + frame.graph = current->curr_ret_stack; #endif do { int ret = unwind_frame(NULL, &frame); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index bbb0fde2780e..eb2d15147e8d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -57,7 +57,7 @@ static const char *handler[]= { "Error" }; -int show_unhandled_signals = 1; +int show_unhandled_signals = 0; static void dump_backtrace_entry(unsigned long where) { @@ -526,14 +526,6 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) } #endif - if (show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: syscall %d\n", current->comm, - task_pid_nr(current), regs->syscallno); - dump_instr("", regs); - if (user_mode(regs)) - __show_regs(regs); - } - return sys_ni_syscall(); } diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 116252a8d3a5..870f4b1587f9 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -407,8 +407,10 @@ again: u32 midr = read_cpuid_id(); /* Apply BTAC predictors mitigation to all Falkor chips */ - if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1) + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) { __qcom_hyp_sanitize_btac_predictors(); + } } fp_enabled = __fpsimd_enabled(); diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 7b60d62ac593..65dfc8571bf8 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, } -static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start) { - pte_t *pte = pte_offset_kernel(pmd, 0UL); + pte_t *ptep = pte_offset_kernel(pmdp, 0UL); unsigned long addr; unsigned i; - for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + for (i = 0; i < PTRS_PER_PTE; i++, ptep++) { addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte)); + note_page(st, addr, 4, READ_ONCE(pte_val(*ptep))); } } -static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start) { - pmd_t *pmd = pmd_offset(pud, 0UL); + pmd_t *pmdp = pmd_offset(pudp, 0UL); unsigned long addr; unsigned i; - for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) { + pmd_t pmd = READ_ONCE(*pmdp); + addr = start + i * PMD_SIZE; - if (pmd_none(*pmd) || pmd_sect(*pmd)) { - note_page(st, addr, 3, pmd_val(*pmd)); + if (pmd_none(pmd) || pmd_sect(pmd)) { + note_page(st, addr, 3, pmd_val(pmd)); } else { - BUG_ON(pmd_bad(*pmd)); - walk_pte(st, pmd, addr); + BUG_ON(pmd_bad(pmd)); + walk_pte(st, pmdp, addr); } } } -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0UL); + pud_t *pudp = pud_offset(pgdp, 0UL); unsigned long addr; unsigned i; - for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + for (i = 0; i < PTRS_PER_PUD; i++, pudp++) { + pud_t pud = READ_ONCE(*pudp); + addr = start + i * PUD_SIZE; - if (pud_none(*pud) || pud_sect(*pud)) { - note_page(st, addr, 2, pud_val(*pud)); + if (pud_none(pud) || pud_sect(pud)) { + note_page(st, addr, 2, pud_val(pud)); } else { - BUG_ON(pud_bad(*pud)); - walk_pmd(st, pud, addr); + BUG_ON(pud_bad(pud)); + walk_pmd(st, pudp, addr); } } } @@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) { - pgd_t *pgd = pgd_offset(mm, 0UL); + pgd_t *pgdp = pgd_offset(mm, 0UL); unsigned i; unsigned long addr; - for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) { + pgd_t pgd = READ_ONCE(*pgdp); + addr = start + i * PGDIR_SIZE; - if (pgd_none(*pgd)) { - note_page(st, addr, 1, pgd_val(*pgd)); + if (pgd_none(pgd)) { + note_page(st, addr, 1, pgd_val(pgd)); } else { - BUG_ON(pgd_bad(*pgd)); - walk_pud(st, pgd, addr); + BUG_ON(pgd_bad(pgd)); + walk_pud(st, pgdp, addr); } } } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f76bb2c3c943..bff11553eb05 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr) void show_pte(unsigned long addr) { struct mm_struct *mm; - pgd_t *pgd; + pgd_t *pgdp; + pgd_t pgd; if (addr < TASK_SIZE) { /* TTBR0 */ @@ -149,33 +150,37 @@ void show_pte(unsigned long addr) return; } - pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n", + pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, VA_BITS, mm->pgd); - pgd = pgd_offset(mm, addr); - pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); + pgdp = pgd_offset(mm, addr); + pgd = READ_ONCE(*pgdp); + pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); do { - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; - if (pgd_none(*pgd) || pgd_bad(*pgd)) + if (pgd_none(pgd) || pgd_bad(pgd)) break; - pud = pud_offset(pgd, addr); - pr_cont(", *pud=%016llx", pud_val(*pud)); - if (pud_none(*pud) || pud_bad(*pud)) + pudp = pud_offset(pgdp, addr); + pud = READ_ONCE(*pudp); + pr_cont(", pud=%016llx", pud_val(pud)); + if (pud_none(pud) || pud_bad(pud)) break; - pmd = pmd_offset(pud, addr); - pr_cont(", *pmd=%016llx", pmd_val(*pmd)); - if (pmd_none(*pmd) || pmd_bad(*pmd)) + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + pr_cont(", pmd=%016llx", pmd_val(pmd)); + if (pmd_none(pmd) || pmd_bad(pmd)) break; - pte = pte_offset_map(pmd, addr); - pr_cont(", *pte=%016llx", pte_val(*pte)); - pte_unmap(pte); + ptep = pte_offset_map(pmdp, addr); + pte = READ_ONCE(*ptep); + pr_cont(", pte=%016llx", pte_val(pte)); + pte_unmap(ptep); } while(0); pr_cont("\n"); @@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma, pte_t entry, int dirty) { pteval_t old_pteval, pteval; + pte_t pte = READ_ONCE(*ptep); - if (pte_same(*ptep, entry)) + if (pte_same(pte, entry)) return 0; /* only preserve the access flags and write permission */ @@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, * (calculated as: a & b == ~(~a | ~b)). */ pte_val(entry) ^= PTE_RDONLY; - pteval = READ_ONCE(pte_val(*ptep)); + pteval = pte_val(pte); do { old_pteval = pteval; pteval ^= PTE_RDONLY; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 6cb0fa92a651..ecc6818191df 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte) static int find_num_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, size_t *pgsize) { - pgd_t *pgd = pgd_offset(mm, addr); - pud_t *pud; - pmd_t *pmd; + pgd_t *pgdp = pgd_offset(mm, addr); + pud_t *pudp; + pmd_t *pmdp; *pgsize = PAGE_SIZE; - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); - if ((pte_t *)pmd == ptep) { + pudp = pud_offset(pgdp, addr); + pmdp = pmd_offset(pudp, addr); + if ((pte_t *)pmdp == ptep) { *pgsize = PMD_SIZE; return CONT_PMDS; } @@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, clear_flush(mm, addr, ptep, pgsize, ncontig); - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) { - pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, - pte_val(pfn_pte(pfn, hugeprot))); + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); - } } void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, @@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz); - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (!pud) + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep = NULL; + + pgdp = pgd_offset(mm, addr); + pudp = pud_alloc(mm, pgdp, addr); + if (!pudp) return NULL; if (sz == PUD_SIZE) { - pte = (pte_t *)pud; + ptep = (pte_t *)pudp; } else if (sz == (PAGE_SIZE * CONT_PTES)) { - pmd_t *pmd = pmd_alloc(mm, pud, addr); + pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); /* @@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, * will be no pte_unmap() to correspond with this * pte_alloc_map(). */ - pte = pte_alloc_map(mm, pmd, addr); + ptep = pte_alloc_map(mm, pmdp, addr); } else if (sz == PMD_SIZE) { if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && - pud_none(*pud)) - pte = huge_pmd_share(mm, addr, pud); + pud_none(READ_ONCE(*pudp))) + ptep = huge_pmd_share(mm, addr, pudp); else - pte = (pte_t *)pmd_alloc(mm, pud, addr); + ptep = (pte_t *)pmd_alloc(mm, pudp, addr); } else if (sz == (PMD_SIZE * CONT_PMDS)) { - pmd_t *pmd; - - pmd = pmd_alloc(mm, pud, addr); + pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); - return (pte_t *)pmd; + return (pte_t *)pmdp; } - pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr, - sz, pte, pte_val(*pte)); - return pte; + return ptep; } pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; + pgd_t *pgdp; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; - pgd = pgd_offset(mm, addr); - pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); - if (!pgd_present(*pgd)) + pgdp = pgd_offset(mm, addr); + if (!pgd_present(READ_ONCE(*pgdp))) return NULL; - pud = pud_offset(pgd, addr); - if (sz != PUD_SIZE && pud_none(*pud)) + pudp = pud_offset(pgdp, addr); + pud = READ_ONCE(*pudp); + if (sz != PUD_SIZE && pud_none(pud)) return NULL; /* hugepage or swap? */ - if (pud_huge(*pud) || !pud_present(*pud)) - return (pte_t *)pud; + if (pud_huge(pud) || !pud_present(pud)) + return (pte_t *)pudp; /* table; check the next level */ if (sz == CONT_PMD_SIZE) addr &= CONT_PMD_MASK; - pmd = pmd_offset(pud, addr); + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && - pmd_none(*pmd)) + pmd_none(pmd)) return NULL; - if (pmd_huge(*pmd) || !pmd_present(*pmd)) - return (pte_t *)pmd; + if (pmd_huge(pmd) || !pmd_present(pmd)) + return (pte_t *)pmdp; - if (sz == CONT_PTE_SIZE) { - pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK)); - return pte; - } + if (sz == CONT_PTE_SIZE) + return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK)); return NULL; } @@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, size_t pgsize; pte_t pte; - if (!pte_cont(*ptep)) { + if (!pte_cont(READ_ONCE(*ptep))) { ptep_set_wrprotect(mm, addr, ptep); return; } @@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, size_t pgsize; int ncontig; - if (!pte_cont(*ptep)) { + if (!pte_cont(READ_ONCE(*ptep))) { ptep_clear_flush(vma, addr, ptep); return; } diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 6e02e6fb4c7b..dabfc1ecda3d 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) return __pa(p); } -static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node, +static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) { - if (pmd_none(*pmd)) { + if (pmd_none(READ_ONCE(*pmdp))) { phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte) : kasan_alloc_zeroed_page(node); - __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); + __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE); } - return early ? pte_offset_kimg(pmd, addr) - : pte_offset_kernel(pmd, addr); + return early ? pte_offset_kimg(pmdp, addr) + : pte_offset_kernel(pmdp, addr); } -static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node, +static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) { - if (pud_none(*pud)) { + if (pud_none(READ_ONCE(*pudp))) { phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd) : kasan_alloc_zeroed_page(node); - __pud_populate(pud, pmd_phys, PMD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE); } - return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr); + return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr); } -static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node, +static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node, bool early) { - if (pgd_none(*pgd)) { + if (pgd_none(READ_ONCE(*pgdp))) { phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud) : kasan_alloc_zeroed_page(node); - __pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE); + __pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE); } - return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr); + return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr); } -static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr, +static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, unsigned long end, int node, bool early) { unsigned long next; - pte_t *pte = kasan_pte_offset(pmd, addr, node, early); + pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early); do { phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page) : kasan_alloc_zeroed_page(node); next = addr + PAGE_SIZE; - set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); - } while (pte++, addr = next, addr != end && pte_none(*pte)); + set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); + } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep))); } -static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr, +static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, unsigned long end, int node, bool early) { unsigned long next; - pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early); + pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early); do { next = pmd_addr_end(addr, end); - kasan_pte_populate(pmd, addr, next, node, early); - } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); + kasan_pte_populate(pmdp, addr, next, node, early); + } while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp))); } -static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr, +static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr, unsigned long end, int node, bool early) { unsigned long next; - pud_t *pud = kasan_pud_offset(pgd, addr, node, early); + pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early); do { next = pud_addr_end(addr, end); - kasan_pmd_populate(pud, addr, next, node, early); - } while (pud++, addr = next, addr != end && pud_none(*pud)); + kasan_pmd_populate(pudp, addr, next, node, early); + } while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp))); } static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, int node, bool early) { unsigned long next; - pgd_t *pgd; + pgd_t *pgdp; - pgd = pgd_offset_k(addr); + pgdp = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - kasan_pud_populate(pgd, addr, next, node, early); - } while (pgd++, addr = next, addr != end); + kasan_pud_populate(pgdp, addr, next, node, early); + } while (pgdp++, addr = next, addr != end); } /* The early shadow maps everything to a single page of zeroes */ @@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end, */ void __init kasan_copy_shadow(pgd_t *pgdir) { - pgd_t *pgd, *pgd_new, *pgd_end; + pgd_t *pgdp, *pgdp_new, *pgdp_end; - pgd = pgd_offset_k(KASAN_SHADOW_START); - pgd_end = pgd_offset_k(KASAN_SHADOW_END); - pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START); + pgdp = pgd_offset_k(KASAN_SHADOW_START); + pgdp_end = pgd_offset_k(KASAN_SHADOW_END); + pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START); do { - set_pgd(pgd_new, *pgd); - } while (pgd++, pgd_new++, pgd != pgd_end); + set_pgd(pgdp_new, READ_ONCE(*pgdp)); + } while (pgdp++, pgdp_new++, pgdp != pgdp_end); } static void __init clear_pgds(unsigned long start, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4694cda823c9..8c704f1e53c2 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -108,7 +108,7 @@ static bool pgattr_change_is_safe(u64 old, u64 new) * The following mapping attributes may be updated in live * kernel mappings without the need for break-before-make. */ - static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE; + static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; /* creating or taking down mappings is always safe */ if (old == 0 || new == 0) @@ -118,52 +118,55 @@ static bool pgattr_change_is_safe(u64 old, u64 new) if ((old | new) & PTE_CONT) return false; - /* Transitioning from Global to Non-Global is safe */ - if (((old ^ new) == PTE_NG) && (new & PTE_NG)) - return true; + /* Transitioning from Non-Global to Global is unsafe */ + if (old & ~new & PTE_NG) + return false; return ((old ^ new) & ~mask) == 0; } -static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, +static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot) { - pte_t *pte; + pte_t *ptep; - pte = pte_set_fixmap_offset(pmd, addr); + ptep = pte_set_fixmap_offset(pmdp, addr); do { - pte_t old_pte = *pte; + pte_t old_pte = READ_ONCE(*ptep); - set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot)); + set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); /* * After the PTE entry has been populated once, we * only allow updates to the permission attributes. */ - BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte))); + BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), + READ_ONCE(pte_val(*ptep)))); phys += PAGE_SIZE; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (ptep++, addr += PAGE_SIZE, addr != end); pte_clear_fixmap(); } -static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr, +static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), int flags) { unsigned long next; + pmd_t pmd = READ_ONCE(*pmdp); - BUG_ON(pmd_sect(*pmd)); - if (pmd_none(*pmd)) { + BUG_ON(pmd_sect(pmd)); + if (pmd_none(pmd)) { phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(); - __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); + __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE); + pmd = READ_ONCE(*pmdp); } - BUG_ON(pmd_bad(*pmd)); + BUG_ON(pmd_bad(pmd)); do { pgprot_t __prot = prot; @@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pte(pmd, addr, next, phys, __prot); + init_pte(pmdp, addr, next, phys, __prot); phys += next - addr; } while (addr = next, addr != end); } -static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end, +static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), int flags) { unsigned long next; - pmd_t *pmd; + pmd_t *pmdp; - pmd = pmd_set_fixmap_offset(pud, addr); + pmdp = pmd_set_fixmap_offset(pudp, addr); do { - pmd_t old_pmd = *pmd; + pmd_t old_pmd = READ_ONCE(*pmdp); next = pmd_addr_end(addr, end); /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0) { - pmd_set_huge(pmd, phys, prot); + pmd_set_huge(pmdp, phys, prot); /* * After the PMD entry has been populated once, we * only allow updates to the permission attributes. */ BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), - pmd_val(*pmd))); + READ_ONCE(pmd_val(*pmdp)))); } else { - alloc_init_cont_pte(pmd, addr, next, phys, prot, + alloc_init_cont_pte(pmdp, addr, next, phys, prot, pgtable_alloc, flags); BUG_ON(pmd_val(old_pmd) != 0 && - pmd_val(old_pmd) != pmd_val(*pmd)); + pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); } phys += next - addr; - } while (pmd++, addr = next, addr != end); + } while (pmdp++, addr = next, addr != end); pmd_clear_fixmap(); } -static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr, +static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), int flags) { unsigned long next; + pud_t pud = READ_ONCE(*pudp); /* * Check for initial section mappings in the pgd/pud. */ - BUG_ON(pud_sect(*pud)); - if (pud_none(*pud)) { + BUG_ON(pud_sect(pud)); + if (pud_none(pud)) { phys_addr_t pmd_phys; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(); - __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); + pud = READ_ONCE(*pudp); } - BUG_ON(pud_bad(*pud)); + BUG_ON(pud_bad(pud)); do { pgprot_t __prot = prot; @@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags); + init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags); phys += next - addr; } while (addr = next, addr != end); @@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, return true; } -static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), - int flags) +static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), + int flags) { - pud_t *pud; unsigned long next; + pud_t *pudp; + pgd_t pgd = READ_ONCE(*pgdp); - if (pgd_none(*pgd)) { + if (pgd_none(pgd)) { phys_addr_t pud_phys; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(); - __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); + __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE); + pgd = READ_ONCE(*pgdp); } - BUG_ON(pgd_bad(*pgd)); + BUG_ON(pgd_bad(pgd)); - pud = pud_set_fixmap_offset(pgd, addr); + pudp = pud_set_fixmap_offset(pgdp, addr); do { - pud_t old_pud = *pud; + pud_t old_pud = READ_ONCE(*pudp); next = pud_addr_end(addr, end); @@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, */ if (use_1G_block(addr, next, phys) && (flags & NO_BLOCK_MAPPINGS) == 0) { - pud_set_huge(pud, phys, prot); + pud_set_huge(pudp, phys, prot); /* * After the PUD entry has been populated once, we * only allow updates to the permission attributes. */ BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), - pud_val(*pud))); + READ_ONCE(pud_val(*pudp)))); } else { - alloc_init_cont_pmd(pud, addr, next, phys, prot, + alloc_init_cont_pmd(pudp, addr, next, phys, prot, pgtable_alloc, flags); BUG_ON(pud_val(old_pud) != 0 && - pud_val(old_pud) != pud_val(*pud)); + pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); } phys += next - addr; - } while (pud++, addr = next, addr != end); + } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); } @@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, int flags) { unsigned long addr, length, end, next; - pgd_t *pgd = pgd_offset_raw(pgdir, virt); + pgd_t *pgdp = pgd_offset_raw(pgdir, virt); /* * If the virtual and physical address don't have the same offset @@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, + alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, flags); phys += next - addr; - } while (pgd++, addr = next, addr != end); + } while (pgdp++, addr = next, addr != end); } static phys_addr_t pgd_pgtable_alloc(void) @@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt, flush_tlb_kernel_range(virt, virt + size); } -static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, +static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, phys_addr_t end, pgprot_t prot, int flags) { - __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, + __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, prot, early_pgtable_alloc, flags); } @@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void) PAGE_KERNEL_RO); } -static void __init map_mem(pgd_t *pgd) +static void __init map_mem(pgd_t *pgdp) { phys_addr_t kernel_start = __pa_symbol(_text); phys_addr_t kernel_end = __pa_symbol(__init_begin); @@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd) if (memblock_is_nomap(reg)) continue; - __map_memblock(pgd, start, end, PAGE_KERNEL, flags); + __map_memblock(pgdp, start, end, PAGE_KERNEL, flags); } /* @@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd) * Note that contiguous mappings cannot be remapped in this way, * so we should avoid them here. */ - __map_memblock(pgd, kernel_start, kernel_end, + __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); @@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd) * through /sys/kernel/kexec_crash_size interface. */ if (crashk_res.end) { - __map_memblock(pgd, crashk_res.start, crashk_res.end + 1, + __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1, PAGE_KERNEL, NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); memblock_clear_nomap(crashk_res.start, @@ -499,7 +506,7 @@ void mark_rodata_ro(void) debug_checkwx(); } -static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, +static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, pgprot_t prot, struct vm_struct *vma, int flags, unsigned long vm_flags) { @@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(pa_start)); BUG_ON(!PAGE_ALIGNED(size)); - __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, + __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot, early_pgtable_alloc, flags); if (!(vm_flags & VM_NO_GUARD)) @@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline); /* * Create fine-grained mappings for the kernel. */ -static void __init map_kernel(pgd_t *pgd) +static void __init map_kernel(pgd_t *pgdp) { static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, vmlinux_initdata, vmlinux_data; @@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd) * Only rodata will be remapped with different permissions later on, * all other segments are allowed to use contiguous mappings. */ - map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0, + map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0, VM_NO_GUARD); - map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL, + map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); - map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot, + map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, &vmlinux_inittext, 0, VM_NO_GUARD); - map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL, + map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, &vmlinux_initdata, 0, VM_NO_GUARD); - map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); + map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); - if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { + if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) { /* * The fixmap falls in a separate pgd to the kernel, and doesn't * live in the carveout for the swapper_pg_dir. We can simply * re-use the existing dir for the fixmap. */ - set_pgd(pgd_offset_raw(pgd, FIXADDR_START), - *pgd_offset_k(FIXADDR_START)); + set_pgd(pgd_offset_raw(pgdp, FIXADDR_START), + READ_ONCE(*pgd_offset_k(FIXADDR_START))); } else if (CONFIG_PGTABLE_LEVELS > 3) { /* * The fixmap shares its top level pgd entry with the kernel @@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START), + pud_populate(&init_mm, + pud_set_fixmap_offset(pgdp, FIXADDR_START), lm_alias(bm_pmd)); pud_clear_fixmap(); } else { BUG(); } - kasan_copy_shadow(pgd); + kasan_copy_shadow(pgdp); } /* @@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd) void __init paging_init(void) { phys_addr_t pgd_phys = early_pgtable_alloc(); - pgd_t *pgd = pgd_set_fixmap(pgd_phys); + pgd_t *pgdp = pgd_set_fixmap(pgd_phys); - map_kernel(pgd); - map_mem(pgd); + map_kernel(pgdp); + map_mem(pgdp); /* * We want to reuse the original swapper_pg_dir so we don't have to @@ -635,7 +643,7 @@ void __init paging_init(void) * To do this we need to go via a temporary pgd. */ cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgd, PGD_SIZE); + memcpy(swapper_pg_dir, pgdp, PGD_SIZE); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); pgd_clear_fixmap(); @@ -655,37 +663,40 @@ void __init paging_init(void) */ int kern_addr_valid(unsigned long addr) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + pgd_t *pgdp; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; if ((((long)addr) >> VA_BITS) != -1UL) return 0; - pgd = pgd_offset_k(addr); - if (pgd_none(*pgd)) + pgdp = pgd_offset_k(addr); + if (pgd_none(READ_ONCE(*pgdp))) return 0; - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) + pudp = pud_offset(pgdp, addr); + pud = READ_ONCE(*pudp); + if (pud_none(pud)) return 0; - if (pud_sect(*pud)) - return pfn_valid(pud_pfn(*pud)); + if (pud_sect(pud)) + return pfn_valid(pud_pfn(pud)); - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + if (pmd_none(pmd)) return 0; - if (pmd_sect(*pmd)) - return pfn_valid(pmd_pfn(*pmd)); + if (pmd_sect(pmd)) + return pfn_valid(pmd_pfn(pmd)); - pte = pte_offset_kernel(pmd, addr); - if (pte_none(*pte)) + ptep = pte_offset_kernel(pmdp, addr); + pte = READ_ONCE(*ptep); + if (pte_none(pte)) return 0; - return pfn_valid(pte_pfn(*pte)); + return pfn_valid(pte_pfn(pte)); } #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS @@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, { unsigned long addr = start; unsigned long next; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; do { next = pmd_addr_end(addr, end); - pgd = vmemmap_pgd_populate(addr, node); - if (!pgd) + pgdp = vmemmap_pgd_populate(addr, node); + if (!pgdp) return -ENOMEM; - pud = vmemmap_pud_populate(pgd, addr, node); - if (!pud) + pudp = vmemmap_pud_populate(pgdp, addr, node); + if (!pudp) return -ENOMEM; - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { + pmdp = pmd_offset(pudp, addr); + if (pmd_none(READ_ONCE(*pmdp))) { void *p = NULL; p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (!p) return -ENOMEM; - pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL)); + pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); } else - vmemmap_verify((pte_t *)pmd, node, addr, next); + vmemmap_verify((pte_t *)pmdp, node, addr, next); } while (addr = next, addr != end); return 0; @@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end, static inline pud_t * fixmap_pud(unsigned long addr) { - pgd_t *pgd = pgd_offset_k(addr); + pgd_t *pgdp = pgd_offset_k(addr); + pgd_t pgd = READ_ONCE(*pgdp); - BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + BUG_ON(pgd_none(pgd) || pgd_bad(pgd)); - return pud_offset_kimg(pgd, addr); + return pud_offset_kimg(pgdp, addr); } static inline pmd_t * fixmap_pmd(unsigned long addr) { - pud_t *pud = fixmap_pud(addr); + pud_t *pudp = fixmap_pud(addr); + pud_t pud = READ_ONCE(*pudp); - BUG_ON(pud_none(*pud) || pud_bad(*pud)); + BUG_ON(pud_none(pud) || pud_bad(pud)); - return pmd_offset_kimg(pud, addr); + return pmd_offset_kimg(pudp, addr); } static inline pte_t * fixmap_pte(unsigned long addr) @@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr) */ void __init early_fixmap_init(void) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; + pgd_t *pgdp, pgd; + pud_t *pudp; + pmd_t *pmdp; unsigned long addr = FIXADDR_START; - pgd = pgd_offset_k(addr); + pgdp = pgd_offset_k(addr); + pgd = READ_ONCE(*pgdp); if (CONFIG_PGTABLE_LEVELS > 3 && - !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) { + !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) { /* * We only end up here if the kernel mapping and the fixmap * share the top level pgd entry, which should only happen on * 16k/4 levels configurations. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - pud = pud_offset_kimg(pgd, addr); + pudp = pud_offset_kimg(pgdp, addr); } else { - if (pgd_none(*pgd)) - __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE); - pud = fixmap_pud(addr); + if (pgd_none(pgd)) + __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE); + pudp = fixmap_pud(addr); } - if (pud_none(*pud)) - __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); - pmd = fixmap_pmd(addr); - __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE); + if (pud_none(READ_ONCE(*pudp))) + __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); + pmdp = fixmap_pmd(addr); + __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); /* * The boot-ioremap range spans multiple pmds, for which @@ -800,11 +814,11 @@ void __init early_fixmap_init(void) BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) - || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { + if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) + || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { WARN_ON(1); - pr_warn("pmd %p != %p, %p\n", - pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), + pr_warn("pmdp %p != %p, %p\n", + pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", fix_to_virt(FIX_BTMAP_BEGIN)); @@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { unsigned long addr = __fix_to_virt(idx); - pte_t *pte; + pte_t *ptep; BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); - pte = fixmap_pte(addr); + ptep = fixmap_pte(addr); if (pgprot_val(flags)) { - set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); } else { - pte_clear(&init_mm, addr, pte); + pte_clear(&init_mm, addr, ptep); flush_tlb_kernel_range(addr, addr+PAGE_SIZE); } } @@ -915,36 +929,46 @@ int __init arch_ioremap_pmd_supported(void) return 1; } -int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + + /* ioremap_page_range doesn't honour BBM */ + if (pud_present(READ_ONCE(*pudp))) + return 0; + BUG_ON(phys & ~PUD_MASK); - set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot)); + set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; } -int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + + /* ioremap_page_range doesn't honour BBM */ + if (pmd_present(READ_ONCE(*pmdp))) + return 0; + BUG_ON(phys & ~PMD_MASK); - set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot)); + set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; } -int pud_clear_huge(pud_t *pud) +int pud_clear_huge(pud_t *pudp) { - if (!pud_sect(*pud)) + if (!pud_sect(READ_ONCE(*pudp))) return 0; - pud_clear(pud); + pud_clear(pudp); return 1; } -int pmd_clear_huge(pmd_t *pmd) +int pmd_clear_huge(pmd_t *pmdp) { - if (!pmd_sect(*pmd)) + if (!pmd_sect(READ_ONCE(*pmdp))) return 0; - pmd_clear(pmd); + pmd_clear(pmdp); return 1; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index a682a0a2a0fa..a56359373d8b 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct page_change_data *cdata = data; - pte_t pte = *ptep; + pte_t pte = READ_ONCE(*ptep); pte = clear_pte_bit(pte, cdata->clear_mask); pte = set_pte_bit(pte, cdata->set_mask); @@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) */ bool kernel_page_present(struct page *page) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + pgd_t *pgdp; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); - pgd = pgd_offset_k(addr); - if (pgd_none(*pgd)) + pgdp = pgd_offset_k(addr); + if (pgd_none(READ_ONCE(*pgdp))) return false; - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) + pudp = pud_offset(pgdp, addr); + pud = READ_ONCE(*pudp); + if (pud_none(pud)) return false; - if (pud_sect(*pud)) + if (pud_sect(pud)) return true; - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + if (pmd_none(pmd)) return false; - if (pmd_sect(*pmd)) + if (pmd_sect(pmd)) return true; - pte = pte_offset_kernel(pmd, addr); - return pte_valid(*pte); + ptep = pte_offset_kernel(pmdp, addr); + return pte_valid(READ_ONCE(*ptep)); } #endif /* CONFIG_HIBERNATION */ #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 71baed7e592a..c0af47617299 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1) dc cvac, cur_\()\type\()p // Ensure any existing dirty dmb sy // lines are written back before ldr \type, [cur_\()\type\()p] // loading the entry - tbz \type, #0, next_\()\type // Skip invalid entries + tbz \type, #0, skip_\()\type // Skip invalid and + tbnz \type, #11, skip_\()\type // non-global entries .endm .macro __idmap_kpti_put_pgtable_ent_ng, type @@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings) add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) do_pgd: __idmap_kpti_get_pgtable_ent pgd tbnz pgd, #1, walk_puds - __idmap_kpti_put_pgtable_ent_ng pgd next_pgd: + __idmap_kpti_put_pgtable_ent_ng pgd +skip_pgd: add cur_pgdp, cur_pgdp, #8 cmp cur_pgdp, end_pgdp b.ne do_pgd @@ -294,8 +296,9 @@ walk_puds: add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) do_pud: __idmap_kpti_get_pgtable_ent pud tbnz pud, #1, walk_pmds - __idmap_kpti_put_pgtable_ent_ng pud next_pud: + __idmap_kpti_put_pgtable_ent_ng pud +skip_pud: add cur_pudp, cur_pudp, 8 cmp cur_pudp, end_pudp b.ne do_pud @@ -314,8 +317,9 @@ walk_pmds: add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) do_pmd: __idmap_kpti_get_pgtable_ent pmd tbnz pmd, #1, walk_ptes - __idmap_kpti_put_pgtable_ent_ng pmd next_pmd: + __idmap_kpti_put_pgtable_ent_ng pmd +skip_pmd: add cur_pmdp, cur_pmdp, #8 cmp cur_pmdp, end_pmdp b.ne do_pmd @@ -333,7 +337,7 @@ walk_ptes: add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) do_pte: __idmap_kpti_get_pgtable_ent pte __idmap_kpti_put_pgtable_ent_ng pte -next_pte: +skip_pte: add cur_ptep, cur_ptep, #8 cmp cur_ptep, end_ptep b.ne do_pte diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1d4f1da7c58f..a93350451e8e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) off = offsetof(struct bpf_array, map.max_entries); emit_a64_mov_i64(tmp, off, ctx); emit(A64_LDR32(tmp, r2, tmp), ctx); + emit(A64_MOV(0, r3, r3), ctx); emit(A64_CMP(0, r3, tmp), ctx); - emit(A64_B_(A64_COND_GE, jmp_offset), ctx); + emit(A64_B_(A64_COND_CS, jmp_offset), ctx); /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; @@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); emit(A64_CMP(1, tcc, tmp), ctx); - emit(A64_B_(A64_COND_GT, jmp_offset), ctx); + emit(A64_B_(A64_COND_HI, jmp_offset), ctx); emit(A64_ADD_I(1, tcc, tcc, 1), ctx); /* prog = array->ptrs[index]; diff --git a/arch/cris/include/arch-v10/arch/bug.h b/arch/cris/include/arch-v10/arch/bug.h index 905afeacfedf..06da9d49152a 100644 --- a/arch/cris/include/arch-v10/arch/bug.h +++ b/arch/cris/include/arch-v10/arch/bug.h @@ -44,18 +44,25 @@ struct bug_frame { * not be used like this with newer versions of gcc. */ #define BUG() \ +do { \ __asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\ "movu.w " __stringify(__LINE__) ",$r0\n\t"\ "jump 0f\n\t" \ ".section .rodata\n" \ "0:\t.string \"" __FILE__ "\"\n\t" \ - ".previous") + ".previous"); \ + unreachable(); \ +} while (0) #endif #else /* This just causes an oops. */ -#define BUG() (*(int *)0 = 0) +#define BUG() \ +do { \ + barrier_before_unreachable(); \ + __builtin_trap(); \ +} while (0) #endif diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 762eeb0fcc1d..2524fb60fbc2 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -66,38 +66,35 @@ ATOMIC_OPS(add, +) ATOMIC_OPS(sub, -) #ifdef __OPTIMIZE__ -#define __ia64_atomic_const(i) __builtin_constant_p(i) ? \ +#define __ia64_atomic_const(i) \ + static const int __ia64_atomic_p = __builtin_constant_p(i) ? \ ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 || \ - (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0 + (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0;\ + __ia64_atomic_p +#else +#define __ia64_atomic_const(i) 0 +#endif -#define atomic_add_return(i, v) \ +#define atomic_add_return(i,v) \ ({ \ - int __i = (i); \ - static const int __ia64_atomic_p = __ia64_atomic_const(i); \ - __ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) : \ - ia64_atomic_add(__i, v); \ + int __ia64_aar_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ + : ia64_atomic_add(__ia64_aar_i, v); \ }) -#define atomic_sub_return(i, v) \ +#define atomic_sub_return(i,v) \ ({ \ - int __i = (i); \ - static const int __ia64_atomic_p = __ia64_atomic_const(i); \ - __ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) : \ - ia64_atomic_sub(__i, v); \ + int __ia64_asr_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ + : ia64_atomic_sub(__ia64_asr_i, v); \ }) -#else -#define atomic_add_return(i, v) ia64_atomic_add(i, v) -#define atomic_sub_return(i, v) ia64_atomic_sub(i, v) -#endif #define atomic_fetch_add(i,v) \ ({ \ int __ia64_aar_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ - || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ - || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ - || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + __ia64_atomic_const(i) \ ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ : ia64_atomic_fetch_add(__ia64_aar_i, v); \ }) @@ -105,11 +102,7 @@ ATOMIC_OPS(sub, -) #define atomic_fetch_sub(i,v) \ ({ \ int __ia64_asr_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ - || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ - || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ - || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + __ia64_atomic_const(i) \ ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ : ia64_atomic_fetch_sub(__ia64_asr_i, v); \ }) @@ -170,11 +163,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_add_return(i,v) \ ({ \ long __ia64_aar_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ - || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ - || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ - || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + __ia64_atomic_const(i) \ ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ : ia64_atomic64_add(__ia64_aar_i, v); \ }) @@ -182,11 +171,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_sub_return(i,v) \ ({ \ long __ia64_asr_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ - || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ - || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ - || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + __ia64_atomic_const(i) \ ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ : ia64_atomic64_sub(__ia64_asr_i, v); \ }) @@ -194,11 +179,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_fetch_add(i,v) \ ({ \ long __ia64_aar_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ - || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ - || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ - || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + __ia64_atomic_const(i) \ ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ }) @@ -206,11 +187,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_fetch_sub(i,v) \ ({ \ long __ia64_asr_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ - || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ - || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ - || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + __ia64_atomic_const(i) \ ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ }) diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h index bd3eeb8d1cfa..66b37a532765 100644 --- a/arch/ia64/include/asm/bug.h +++ b/arch/ia64/include/asm/bug.h @@ -4,7 +4,11 @@ #ifdef CONFIG_BUG #define ia64_abort() __builtin_trap() -#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ + ia64_abort(); \ +} while (0) /* should this BUG be made generic? */ #define HAVE_ARCH_BUG diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 0b4c65a1af25..498f3da3f225 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper endif obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o -obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_BINFMT_ELF) += elfcore.o diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 85bba43e7d5d..8b5b8e6bc9d9 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -117,7 +117,7 @@ store_call_start(struct device *dev, struct device_attribute *attr, #ifdef ERR_INJ_DEBUG printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); - printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]); + printk(KERN_DEBUG "capabilities=%lx,\n", capabilities[cpu]); printk(KERN_DEBUG "resources=%lx\n", resources[cpu]); #endif return size; @@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, u64 virt_addr=simple_strtoull(buf, NULL, 16); int ret; - ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL); + ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG printk("Virtual address %lx is not existing.\n",virt_addr); diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py index 89f3a1480a63..c55276e31b6b 100644 --- a/arch/ia64/scripts/unwcheck.py +++ b/arch/ia64/scripts/unwcheck.py @@ -16,7 +16,7 @@ import re import sys if len(sys.argv) != 2: - print "Usage: %s FILE" % sys.argv[0] + print("Usage: %s FILE" % sys.argv[0]) sys.exit(2) readelf = os.getenv("READELF", "readelf") @@ -29,7 +29,7 @@ def check_func (func, slots, rlen_sum): global num_errors num_errors += 1 if not func: func = "[%#x-%#x]" % (start, end) - print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum) + print("ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum)) return num_funcs = 0 @@ -43,23 +43,23 @@ for line in os.popen("%s -u %s" % (readelf, sys.argv[1])): check_func(func, slots, rlen_sum) func = m.group(1) - start = long(m.group(2), 16) - end = long(m.group(3), 16) + start = int(m.group(2), 16) + end = int(m.group(3), 16) slots = 3 * (end - start) / 16 - rlen_sum = 0L + rlen_sum = 0 num_funcs += 1 else: m = rlen_pattern.match(line) if m: - rlen_sum += long(m.group(1)) + rlen_sum += int(m.group(1)) check_func(func, slots, rlen_sum) if num_errors == 0: - print "No errors detected in %u functions." % num_funcs + print("No errors detected in %u functions." % num_funcs) else: if num_errors > 1: err="errors" else: err="error" - print "%u %s detected in %u functions." % (num_errors, err, num_funcs) + print("%u %s detected in %u functions." % (num_errors, err, num_funcs)) sys.exit(1) diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h index b7e2bf1ba4a6..275dca1435bf 100644 --- a/arch/m68k/include/asm/bug.h +++ b/arch/m68k/include/asm/bug.h @@ -8,16 +8,19 @@ #ifndef CONFIG_SUN3 #define BUG() do { \ pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #else #define BUG() do { \ pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ panic("BUG!"); \ } while (0) #endif #else #define BUG() do { \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #endif diff --git a/arch/mips/ath25/board.c b/arch/mips/ath25/board.c index 9ab48ff80c1c..6d11ae581ea7 100644 --- a/arch/mips/ath25/board.c +++ b/arch/mips/ath25/board.c @@ -135,6 +135,8 @@ int __init ath25_find_config(phys_addr_t base, unsigned long size) } board_data = kzalloc(BOARD_CONFIG_BUFSZ, GFP_KERNEL); + if (!board_data) + goto error; ath25_board.config = (struct ath25_boarddata *)board_data; memcpy_fromio(board_data, bcfg, 0x100); if (broken_boarddata) { diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 1bd5c4f00d19..c22da16d67b8 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -126,6 +126,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS quiet_cmd_cpp_its_S = ITS $@ cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \ + -D__ASSEMBLY__ \ -DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \ -DVMLINUX_BINARY="\"$(3)\"" \ -DVMLINUX_COMPRESSION="\"$(2)\"" \ diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 5b3a3f6a9ad3..d99f5242169e 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2277,6 +2277,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, } host_data = kzalloc(sizeof(*host_data), GFP_KERNEL); + if (!host_data) + return -ENOMEM; raw_spin_lock_init(&host_data->lock); addr = of_get_address(ciu_node, 0, NULL, NULL); diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 946681db8dc3..9a0fa66b81ac 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -86,7 +86,6 @@ struct compat_flock { compat_off_t l_len; s32 l_sysid; compat_pid_t l_pid; - short __unused; s32 pad[4]; }; diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 19c88d770054..fcf9af492d60 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -10,6 +10,8 @@ #include <linux/errno.h> #include <linux/percpu.h> +#include <linux/of.h> +#include <linux/of_address.h> #include <linux/spinlock.h> #include <asm/mips-cps.h> @@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags); phys_addr_t __weak mips_cpc_default_phys_base(void) { + struct device_node *cpc_node; + struct resource res; + int err; + + cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc"); + if (cpc_node) { + err = of_address_to_resource(cpc_node, 0, &res); + if (!err) + return res.start; + } + return 0; } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 85bc601e9a0d..5f8b0a9e30b3 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -375,6 +375,7 @@ static void __init bootmem_init(void) unsigned long reserved_end; unsigned long mapstart = ~0UL; unsigned long bootmap_size; + phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX; bool bootmap_valid = false; int i; @@ -395,7 +396,8 @@ static void __init bootmem_init(void) max_low_pfn = 0; /* - * Find the highest page frame number we have available. + * Find the highest page frame number we have available + * and the lowest used RAM address */ for (i = 0; i < boot_mem_map.nr_map; i++) { unsigned long start, end; @@ -407,6 +409,8 @@ static void __init bootmem_init(void) end = PFN_DOWN(boot_mem_map.map[i].addr + boot_mem_map.map[i].size); + ramstart = min(ramstart, boot_mem_map.map[i].addr); + #ifndef CONFIG_HIGHMEM /* * Skip highmem here so we get an accurate max_low_pfn if low @@ -436,6 +440,13 @@ static void __init bootmem_init(void) mapstart = max(reserved_end, start); } + /* + * Reserve any memory between the start of RAM and PHYS_OFFSET + */ + if (ramstart > PHYS_OFFSET) + add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET, + BOOT_MEM_RESERVED); + if (min_low_pfn >= max_low_pfn) panic("Incorrect memory mapping !!!"); if (min_low_pfn > ARCH_PFN_OFFSET) { @@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p) add_memory_region(start, size, BOOT_MEM_RAM); - if (start && start > PHYS_OFFSET) - add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET, - BOOT_MEM_RESERVED); return 0; } early_param("mem", early_parse_mem); diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 87dcac2447c8..159e83add4bb 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -168,11 +168,11 @@ static void bmips_prepare_cpus(unsigned int max_cpus) return; } - if (request_irq(IPI0_IRQ, bmips_ipi_interrupt, IRQF_PERCPU, - "smp_ipi0", NULL)) + if (request_irq(IPI0_IRQ, bmips_ipi_interrupt, + IRQF_PERCPU | IRQF_NO_SUSPEND, "smp_ipi0", NULL)) panic("Can't request IPI0 interrupt"); - if (request_irq(IPI1_IRQ, bmips_ipi_interrupt, IRQF_PERCPU, - "smp_ipi1", NULL)) + if (request_irq(IPI1_IRQ, bmips_ipi_interrupt, + IRQF_PERCPU | IRQF_NO_SUSPEND, "smp_ipi1", NULL)) panic("Can't request IPI1 interrupt"); } @@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void) */ } -void __init bmips_cpu_setup(void) +void bmips_cpu_setup(void) { void __iomem __maybe_unused *cbr = BMIPS_GET_CBR(); u32 __maybe_unused cfg; diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index bc2fdbfa8223..72af0c183969 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -7,6 +7,8 @@ choice config LEMOTE_FULOONG2E bool "Lemote Fuloong(2e) mini-PC" select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select CEVT_R4K select CSRC_R4K select SYS_HAS_CPU_LOONGSON2E @@ -33,6 +35,8 @@ config LEMOTE_FULOONG2E config LEMOTE_MACH2F bool "Lemote Loongson 2F family machines" select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select BOARD_SCACHE select BOOT_ELF32 select CEVT_R4K if ! MIPS_EXTERNAL_TIMER @@ -62,6 +66,8 @@ config LEMOTE_MACH2F config LOONGSON_MACH3X bool "Generic Loongson 3 family machines" select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select GENERIC_ISA_DMA_SUPPORT_BROKEN select BOOT_ELF32 select BOARD_SCACHE diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 3742508cc534..bd5ce31936f5 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -26,6 +26,7 @@ void flush_user_icache_range_asm(unsigned long, unsigned long); void flush_kernel_icache_range_asm(unsigned long, unsigned long); void flush_user_dcache_range_asm(unsigned long, unsigned long); void flush_kernel_dcache_range_asm(unsigned long, unsigned long); +void purge_kernel_dcache_range_asm(unsigned long, unsigned long); void flush_kernel_dcache_page_asm(void *); void flush_kernel_icache_page(void *); diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 0e6ab6e4a4e9..2dbe5580a1a4 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -316,6 +316,8 @@ extern int _parisc_requires_coherency; #define parisc_requires_coherency() (0) #endif +extern int running_on_qemu; + #endif /* __ASSEMBLY__ */ #endif /* __ASM_PARISC_PROCESSOR_H */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 19c0c141bc3f..79089778725b 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -465,10 +465,10 @@ EXPORT_SYMBOL(copy_user_page); int __flush_tlb_range(unsigned long sid, unsigned long start, unsigned long end) { - unsigned long flags, size; + unsigned long flags; - size = (end - start); - if (size >= parisc_tlb_flush_threshold) { + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + end - start >= parisc_tlb_flush_threshold) { flush_tlb_all(); return 1; } @@ -539,13 +539,11 @@ void flush_cache_mm(struct mm_struct *mm) struct vm_area_struct *vma; pgd_t *pgd; - /* Flush the TLB to avoid speculation if coherency is required. */ - if (parisc_requires_coherency()) - flush_tlb_all(); - /* Flushing the whole cache on each cpu takes forever on rp3440, etc. So, avoid it if the mm isn't too big. */ - if (mm_total_size(mm) >= parisc_cache_flush_threshold) { + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + mm_total_size(mm) >= parisc_cache_flush_threshold) { + flush_tlb_all(); flush_cache_all(); return; } @@ -553,9 +551,9 @@ void flush_cache_mm(struct mm_struct *mm) if (mm->context == mfsp(3)) { for (vma = mm->mmap; vma; vma = vma->vm_next) { flush_user_dcache_range_asm(vma->vm_start, vma->vm_end); - if ((vma->vm_flags & VM_EXEC) == 0) - continue; - flush_user_icache_range_asm(vma->vm_start, vma->vm_end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(vma->vm_start, vma->vm_end); + flush_tlb_range(vma, vma->vm_start, vma->vm_end); } return; } @@ -581,14 +579,9 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - BUG_ON(!vma->vm_mm->context); - - /* Flush the TLB to avoid speculation if coherency is required. */ - if (parisc_requires_coherency()) + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + end - start >= parisc_cache_flush_threshold) { flush_tlb_range(vma, start, end); - - if ((end - start) >= parisc_cache_flush_threshold - || vma->vm_mm->context != mfsp(3)) { flush_cache_all(); return; } @@ -596,6 +589,7 @@ void flush_cache_range(struct vm_area_struct *vma, flush_user_dcache_range_asm(start, end); if (vma->vm_flags & VM_EXEC) flush_user_icache_range_asm(start, end); + flush_tlb_range(vma, start, end); } void @@ -604,8 +598,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long BUG_ON(!vma->vm_mm->context); if (pfn_valid(pfn)) { - if (parisc_requires_coherency()) - flush_tlb_page(vma, vmaddr); + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } @@ -613,21 +606,33 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long void flush_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; - if ((unsigned long)size > parisc_cache_flush_threshold) + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + (unsigned long)size >= parisc_cache_flush_threshold) { + flush_tlb_kernel_range(start, end); flush_data_cache(); - else - flush_kernel_dcache_range_asm(start, start + size); + return; + } + + flush_kernel_dcache_range_asm(start, end); + flush_tlb_kernel_range(start, end); } EXPORT_SYMBOL(flush_kernel_vmap_range); void invalidate_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; - if ((unsigned long)size > parisc_cache_flush_threshold) + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + (unsigned long)size >= parisc_cache_flush_threshold) { + flush_tlb_kernel_range(start, end); flush_data_cache(); - else - flush_kernel_dcache_range_asm(start, start + size); + return; + } + + purge_kernel_dcache_range_asm(start, end); + flush_tlb_kernel_range(start, end); } EXPORT_SYMBOL(invalidate_kernel_vmap_range); diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index bbbe360b458f..fbb4e43fda05 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -138,6 +138,16 @@ $pgt_fill_loop: std %dp,0x18(%r10) #endif +#ifdef CONFIG_64BIT + /* Get PDCE_PROC for monarch CPU. */ +#define MEM_PDC_LO 0x388 +#define MEM_PDC_HI 0x35C + ldw MEM_PDC_LO(%r0),%r3 + ldw MEM_PDC_HI(%r0),%r10 + depd %r10, 31, 32, %r3 /* move to upper word */ +#endif + + #ifdef CONFIG_SMP /* Set the smp rendezvous address into page zero. ** It would be safer to do this in init_smp_config() but @@ -196,12 +206,6 @@ common_stext: ** Someday, palo might not do this for the Monarch either. */ 2: -#define MEM_PDC_LO 0x388 -#define MEM_PDC_HI 0x35C - ldw MEM_PDC_LO(%r0),%r3 - ldw MEM_PDC_HI(%r0),%r6 - depd %r6, 31, 32, %r3 /* move to upper word */ - mfctl %cr30,%r6 /* PCX-W2 firmware bug */ ldo PDC_PSW(%r0),%arg0 /* 21 */ @@ -268,6 +272,8 @@ $install_iva: aligned_rfi: pcxt_ssm_bug + copy %r3, %arg0 /* PDCE_PROC for smp_callin() */ + rsm PSW_SM_QUIET,%r0 /* off troublesome PSW bits */ /* Don't need NOPs, have 8 compliant insn before rfi */ diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 2d40c4ff3f69..67b0f7532e83 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -1110,6 +1110,28 @@ ENTRY_CFI(flush_kernel_dcache_range_asm) .procend ENDPROC_CFI(flush_kernel_dcache_range_asm) +ENTRY_CFI(purge_kernel_dcache_range_asm) + .proc + .callinfo NO_CALLS + .entry + + ldil L%dcache_stride, %r1 + ldw R%dcache_stride(%r1), %r23 + ldo -1(%r23), %r21 + ANDCM %r26, %r21, %r26 + +1: cmpb,COND(<<),n %r26, %r25,1b + pdc,m %r23(%r26) + + sync + syncdma + bv %r0(%r2) + nop + .exit + + .procend +ENDPROC_CFI(purge_kernel_dcache_range_asm) + ENTRY_CFI(flush_user_icache_range_asm) .proc .callinfo NO_CALLS diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 30c28ab14540..4065b5e48c9d 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -292,10 +292,15 @@ smp_cpu_init(int cpunum) * Slaves start using C here. Indirectly called from smp_slave_stext. * Do what start_kernel() and main() do for boot strap processor (aka monarch) */ -void __init smp_callin(void) +void __init smp_callin(unsigned long pdce_proc) { int slave_id = cpu_now_booting; +#ifdef CONFIG_64BIT + WARN_ON(((unsigned long)(PAGE0->mem_pdc_hi) << 32 + | PAGE0->mem_pdc) != pdce_proc); +#endif + smp_cpu_init(slave_id); preempt_disable(); diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 4b8fd6dc22da..f7e684560186 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -76,10 +76,10 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) next_tick = cpuinfo->it_value; /* Calculate how many ticks have elapsed. */ + now = mfctl(16); do { ++ticks_elapsed; next_tick += cpt; - now = mfctl(16); } while (next_tick - now > cpt); /* Store (in CR16 cycles) up to when we are accounting right now. */ @@ -103,16 +103,17 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) * if one or the other wrapped. If "now" is "bigger" we'll end up * with a very large unsigned number. */ - while (next_tick - mfctl(16) > cpt) + now = mfctl(16); + while (next_tick - now > cpt) next_tick += cpt; /* Program the IT when to deliver the next interrupt. * Only bottom 32-bits of next_tick are writable in CR16! * Timer interrupt will be delivered at least a few hundred cycles - * after the IT fires, so if we are too close (<= 500 cycles) to the + * after the IT fires, so if we are too close (<= 8000 cycles) to the * next cycle, simply skip it. */ - if (next_tick - mfctl(16) <= 500) + if (next_tick - now <= 8000) next_tick += cpt; mtctl(next_tick, 16); @@ -248,7 +249,7 @@ static int __init init_cr16_clocksource(void) * different sockets, so mark them unstable and lower rating on * multi-socket SMP systems. */ - if (num_online_cpus() > 1) { + if (num_online_cpus() > 1 && !running_on_qemu) { int cpu; unsigned long cpu0_loc; cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 48f41399fc0b..cab32ee824d2 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -629,7 +629,12 @@ void __init mem_init(void) #endif mem_init_print_info(NULL); -#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ + +#if 0 + /* + * Do not expose the virtual kernel memory layout to userspace. + * But keep code for debugging purposes. + */ printk("virtual kernel memory layout:\n" " vmalloc : 0x%px - 0x%px (%4ld MB)\n" " memory : 0x%px - 0x%px (%4ld MB)\n" diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ef6549e57157..26d5d2a5b8e9 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \ libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \ + treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \ $(addprefix $(obj)/,$(libfdtheader)) src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 30a155c0a6b0..c615abdce119 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -16,6 +16,7 @@ #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) #define PMD_CACHE_INDEX PMD_INDEX_SIZE +#define PUD_CACHE_INDEX PUD_INDEX_SIZE #ifndef __ASSEMBLY__ #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 949d691094a4..67c5475311ee 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd) * keeping the prototype consistent across the two formats. */ static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, - unsigned int subpg_index, unsigned long hidx) + unsigned int subpg_index, unsigned long hidx, + int offset) { return (hidx << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX); diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 338b7da468ce..3bcf269f8f55 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -45,7 +45,7 @@ * generic accessors and iterators here */ #define __real_pte __real_pte -static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) { real_pte_t rpte; unsigned long *hidxp; @@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) */ smp_rmb(); - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + hidxp = (unsigned long *)(ptep + offset); rpte.hidx = *hidxp; return rpte; } @@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) * expected to modify the PTE bits accordingly and commit the PTE to memory. */ static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, - unsigned int subpg_index, unsigned long hidx) + unsigned int subpg_index, + unsigned long hidx, int offset) { - unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + unsigned long *hidxp = (unsigned long *)(ptep + offset); rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); @@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a } #define H_PTE_TABLE_SIZE PTE_FRAG_SIZE -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE) #define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ (sizeof(unsigned long) << PMD_INDEX_SIZE)) #else #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #endif +#ifdef CONFIG_HUGETLB_PAGE +#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \ + (sizeof(unsigned long) << PUD_INDEX_SIZE)) +#else #define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#endif #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 0920eff731b3..935adcd92a81 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -23,7 +23,8 @@ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES) +#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \ + defined(CONFIG_PPC_64K_PAGES) /* * only with hash 64k we need to use the second half of pmd page table * to store pointer to deposited pgtable_t @@ -33,6 +34,16 @@ #define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE #endif /* + * We store the slot details in the second half of page table. + * Increase the pud level table so that hugetlb ptes can be stored + * at pud level. + */ +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) +#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1) +#else +#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE) +#endif +/* * Define the address range of the kernel non-linear virtual area */ #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 1fcfa425cefa..4746bc68d446 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { + pgd_t *pgd; + if (radix_enabled()) return radix__pgd_alloc(mm); - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); + + pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), + pgtable_gfp_flags(mm, GFP_KERNEL)); + memset(pgd, 0, PGD_TABLE_SIZE); + + return pgd; } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), + return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX), pgtable_gfp_flags(mm, GFP_KERNEL)); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) @@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, * ahead and flush the page walk cache */ flush_tlb_pgtable(tlb, address); - pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); + pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX); } static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 51017726d495..a6b9f1d74600 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size; extern unsigned long __pud_index_size; extern unsigned long __pgd_index_size; extern unsigned long __pmd_cache_index; +extern unsigned long __pud_cache_index; #define PTE_INDEX_SIZE __pte_index_size #define PMD_INDEX_SIZE __pmd_index_size #define PUD_INDEX_SIZE __pud_index_size #define PGD_INDEX_SIZE __pgd_index_size #define PMD_CACHE_INDEX __pmd_cache_index +#define PUD_CACHE_INDEX __pud_cache_index /* * Because of use of pte fragments and THP, size of page table * are not always derived out of index size above. @@ -348,7 +350,7 @@ extern unsigned long pci_io_base; */ #ifndef __real_pte -#define __real_pte(e,p) ((real_pte_t){(e)}) +#define __real_pte(e, p, o) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) #define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 176dfb73d42c..471b2274fbeb 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 511acfd7ab0d..535add3f7791 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,7 +52,7 @@ #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) #define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) -#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000) +#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 88e5e8f17e98..855e17d158b1 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -30,6 +30,16 @@ #define PACA_IRQ_PMI 0x40 /* + * Some soft-masked interrupts must be hard masked until they are replayed + * (e.g., because the soft-masked handler does not clear the exception). + */ +#ifdef CONFIG_PPC_BOOK3S +#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI) +#else +#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) +#endif + +/* * flags for paca->irq_soft_mask */ #define IRQS_ENABLED 0 @@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void) static inline void may_hard_irq_enable(void) { get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; - if (!(get_paca()->irq_happened & PACA_IRQ_EE)) + if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) __hard_irq_enable(); } diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 9dcbfa6bbb91..d8b1e8e7e035 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void) return false; } +static inline void crash_ipi_callback(struct pt_regs *regs) { } + +static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) +{ +} + #endif /* CONFIG_KEXEC_CORE */ #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 504a3c36ce5c..03bbd1149530 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -24,6 +24,7 @@ extern int icache_44x_need_flush; #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) #define PMD_CACHE_INDEX PMD_INDEX_SIZE +#define PUD_CACHE_INDEX PUD_INDEX_SIZE #ifndef __ASSEMBLY__ #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index abddf5830ad5..5c5f75d005ad 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -27,6 +27,7 @@ #else #define PMD_CACHE_INDEX PMD_INDEX_SIZE #endif +#define PUD_CACHE_INDEX PUD_INDEX_SIZE /* * Define the address range of the kernel non-linear virtual area diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 88187c285c70..9f421641a35c 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); extern int numa_update_cpu_topology(bool cpus_locked); +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) +{ + numa_cpu_lookup_table[cpu] = node; +} + static inline int early_cpu_to_node(int cpu) { int nid; @@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked) { return 0; } + +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} + #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int start_topology_update(void); extern int stop_topology_update(void); extern int prrn_is_enabled(void); +extern int find_and_online_cpu_nid(int cpu); #else static inline int start_topology_update(void) { @@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void) { return 0; } +static inline int find_and_online_cpu_nid(int cpu) +{ + return 0; +} #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index beea2182d754..0c0b66fc5bfb 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -384,7 +384,8 @@ static void *eeh_report_resume(void *data, void *userdata) eeh_pcid_put(dev); pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV - eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); + if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) + eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); #endif return NULL; } diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index ee832d344a5a..9b6e653e501a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -943,6 +943,8 @@ kernel_dbg_exc: /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable + * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so + * keep these in synch. */ .macro masked_interrupt_book3e paca_irq full_mask diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 243d072a225a..3ac87e53b3da 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) * triggered and won't automatically refire. * - If it was a HMI we return immediately since we handled it in realmode * and it won't refire. - * - else we hard disable and return. + * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return. * This is called with r10 containing the value to OR to the paca field. */ #define MASKED_INTERRUPT(_H) \ @@ -1441,8 +1441,8 @@ masked_##_H##interrupt: \ ori r10,r10,0xffff; \ mtspr SPRN_DEC,r10; \ b MASKED_DEC_HANDLER_LABEL; \ -1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \ - bne 2f; \ +1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \ + beq 2f; \ mfspr r10,SPRN_##_H##SRR1; \ xori r10,r10,MSR_EE; /* clear MSR_EE */ \ mtspr SPRN_##_H##SRR1,r10; \ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index adf044daafd7..acf4b2e0530c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -874,7 +874,6 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .mmu = 0, .hash_ext = 0, .radix_ext = 0, - .byte22 = OV5_FEAT(OV5_DRC_INFO), }, /* option vector 6: IBM PAPR hints */ diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5a8bfee6e187..04d0bbd7a1dd 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_create_file(s, &dev_attr_pir); - if (cpu_has_feature(CPU_FTR_ARCH_206)) + if (cpu_has_feature(CPU_FTR_ARCH_206) && + !firmware_has_feature(FW_FEATURE_LPAR)) device_create_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ @@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_remove_file(s, &dev_attr_pir); - if (cpu_has_feature(CPU_FTR_ARCH_206)) + if (cpu_has_feature(CPU_FTR_ARCH_206) && + !firmware_has_feature(FW_FEATURE_LPAR)) device_remove_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0c854816e653..5cb4e4687107 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep) kmem_cache_free(kvm_pte_cache, ptep); } +/* Like pmd_huge() and pmd_large(), but works regardless of config options */ +static inline int pmd_is_leaf(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PTE); +} + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, unsigned int level, unsigned long mmu_seq) { @@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, else new_pmd = pmd_alloc_one(kvm->mm, gpa); - if (level == 0 && !(pmd && pmd_present(*pmd))) + if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) new_ptep = kvmppc_pte_alloc(); /* Check if we might have been invalidated; let the guest retry if so */ @@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, new_pmd = NULL; } pmd = pmd_offset(pud, gpa); - if (pmd_large(*pmd)) { - /* Someone else has instantiated a large page here; retry */ - ret = -EAGAIN; - goto out_unlock; - } - if (level == 1 && !pmd_none(*pmd)) { + if (pmd_is_leaf(*pmd)) { + unsigned long lgpa = gpa & PMD_MASK; + + /* + * If we raced with another CPU which has just put + * a 2MB pte in after we saw a pte page, try again. + */ + if (level == 0 && !new_ptep) { + ret = -EAGAIN; + goto out_unlock; + } + /* Valid 2MB page here already, remove it */ + old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), + ~0UL, 0, lgpa, PMD_SHIFT); + kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT); + if (old & _PAGE_DIRTY) { + unsigned long gfn = lgpa >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + memslot = gfn_to_memslot(kvm, gfn); + if (memslot && memslot->dirty_bitmap) + kvmppc_update_dirty_map(memslot, + gfn, PMD_SIZE); + } + } else if (level == 1 && !pmd_none(*pmd)) { /* * There's a page table page here, but we wanted * to install a large page. Tell the caller and let @@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } else { page = pages[0]; pfn = page_to_pfn(page); - if (PageHuge(page)) { - page = compound_head(page); - pte_size <<= compound_order(page); + if (PageCompound(page)) { + pte_size <<= compound_order(compound_head(page)); /* See if we can insert a 2MB large-page PTE here */ if (pte_size >= PMD_SIZE && - (gpa & PMD_MASK & PAGE_MASK) == - (hva & PMD_MASK & PAGE_MASK)) { + (gpa & (PMD_SIZE - PAGE_SIZE)) == + (hva & (PMD_SIZE - PAGE_SIZE))) { level = 1; pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); } } /* See if we can provide write access */ if (writing) { - /* - * We assume gup_fast has set dirty on the host PTE. - */ pgflags |= _PAGE_WRITE; } else { local_irq_save(flags); ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); - if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) + if (ptep && pte_write(*ptep)) pgflags |= _PAGE_WRITE; local_irq_restore(flags); } @@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte = pfn_pte(pfn, __pgprot(pgflags)); ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); } - if (ret == 0 || ret == -EAGAIN) - ret = RESUME_GUEST; if (page) { - /* - * We drop pages[0] here, not page because page might - * have been set to the head page of a compound, but - * we have to drop the reference on the correct tail - * page to match the get inside gup() - */ - put_page(pages[0]); + if (!ret && (pgflags & _PAGE_WRITE)) + set_page_dirty_lock(page); + put_page(page); } + + if (ret == 0 || ret == -EAGAIN) + ret = RESUME_GUEST; return ret; } @@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm) continue; pmd = pmd_offset(pud, 0); for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { - if (pmd_huge(*pmd)) { + if (pmd_is_leaf(*pmd)) { pmd_clear(pmd); continue; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 89707354c2ef..9cb9448163c4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ trace_hardirqs_on(); - guest_enter(); + guest_enter_irqoff(); srcu_idx = srcu_read_lock(&vc->kvm->srcu); @@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) srcu_read_unlock(&vc->kvm->srcu, srcu_idx); - guest_exit(); - trace_hardirqs_off(); set_irq_happened(trap); @@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_set_host_core(pcpu); local_irq_enable(); + guest_exit(); /* Let secondaries go back to the offline loop */ for (i = 0; i < controlled_threads; ++i) { @@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) goto up_out; psize = vma_kernel_pagesize(vma); - porder = __ilog2(psize); up_read(¤t->mm->mmap_sem); /* We can handle 4k, 64k or 16M pages in the VRMA */ - err = -EINVAL; - if (!(psize == 0x1000 || psize == 0x10000 || - psize == 0x1000000)) - goto out_srcu; + if (psize >= 0x1000000) + psize = 0x1000000; + else if (psize >= 0x10000) + psize = 0x10000; + else + psize = 0x1000; + porder = __ilog2(psize); senc = slb_pgsize_encoding(psize); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f0f5cd4d2fe7..f9818d7d3381 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio) if (!qpage) { pr_err("Failed to allocate queue %d for VCPU %d\n", prio, xc->server_num); - return -ENOMEM;; + return -ENOMEM; } memset(qpage, 0, 1 << xive->q_order); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 403e642c78f5..52c205373986 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian) { - enum emulation_result emulated; + enum emulation_result emulated = EMULATE_DONE; while (vcpu->arch.mmio_vmx_copy_nums) { emulated = __kvmppc_handle_load(run, vcpu, rt, 8, @@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); +#ifdef CONFIG_ALTIVEC out: +#endif vcpu_put(vcpu); return r; } diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 1604110c4238..3f1803672c9b 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, dr_cell->base_addr = cpu_to_be64(lmb->base_addr); dr_cell->drc_index = cpu_to_be32(lmb->drc_index); dr_cell->aa_index = cpu_to_be32(lmb->aa_index); - dr_cell->flags = cpu_to_be32(lmb->flags); + dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb)); } static int drmem_update_dt_v2(struct device_node *memory, @@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory, } if (prev_lmb->aa_index != lmb->aa_index || - prev_lmb->flags != lmb->flags) + drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) lmb_sets++; prev_lmb = lmb; @@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory, } if (prev_lmb->aa_index != lmb->aa_index || - prev_lmb->flags != lmb->flags) { + drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) { /* end of one set, start of another */ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs); dr_cell++; @@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, u32 i, n_lmbs; n_lmbs = of_read_number(prop++, 1); + if (n_lmbs == 0) + return; for (i = 0; i < n_lmbs; i++) { read_drconf_v1_cell(&lmb, &prop); @@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, u32 i, j, lmb_sets; lmb_sets = of_read_number(prop++, 1); + if (lmb_sets == 0) + return; for (i = 0; i < lmb_sets; i++) { read_drconf_v2_cell(&dr_cell, &prop); @@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) struct drmem_lmb *lmb; drmem_info->n_lmbs = of_read_number(prop++, 1); + if (drmem_info->n_lmbs == 0) + return; drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb), GFP_KERNEL); @@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) int lmb_index; lmb_sets = of_read_number(prop++, 1); + if (lmb_sets == 0) + return; /* first pass, calculate the number of LMBs */ p = prop; diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 5a69b51d08a3..d573d7d07f25 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * need to add in 0x1 if it's a read-only user page */ rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -117,7 +117,7 @@ repeat: return -1; } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 2253bbc6a599..e601d95c3b20 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, subpg_index = (ea & (PAGE_SIZE - 1)) >> shift; vpn = hpt_vpn(ea, vsid, ssize); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); /* *None of the sub 4k page is hashed */ @@ -214,7 +214,7 @@ repeat: return -1; } - new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); + new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); new_pte |= H_PAGE_HASHPTE; *ptep = __pte(new_pte & ~H_PAGE_BUSY); @@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -327,7 +327,7 @@ repeat: } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d07c7e17db6..cf290d415dcd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void) __pmd_index_size = H_PMD_INDEX_SIZE; __pud_index_size = H_PUD_INDEX_SIZE; __pgd_index_size = H_PGD_INDEX_SIZE; + __pud_cache_index = H_PUD_CACHE_INDEX; __pmd_cache_index = H_PMD_CACHE_INDEX; __pte_table_size = H_PTE_TABLE_SIZE; __pmd_table_size = H_PMD_TABLE_SIZE; diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 12511f5a015f..b320f5097a06 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long vpn; unsigned long old_pte, new_pte; unsigned long rflags, pa, sz; - long slot; + long slot, offset; BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); @@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); - rpte = __real_pte(__pte(old_pte), ptep); + if (unlikely(mmu_psize == MMU_PAGE_16G)) + offset = PTRS_PER_PUD; + else + offset = PTRS_PER_PMD; + rpte = __real_pte(__pte(old_pte), ptep, offset); sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= pte_set_hidx(ptep, rpte, 0, slot); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset); } /* diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index eb8c6c8c4851..2b656e67f2ea 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -100,6 +100,6 @@ void pgtable_cache_init(void) * same size as either the pgd or pmd index except with THP enabled * on book3s 64 */ - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); + if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX)) + pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor); } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 314d19ab9385..edd8d0bc9364 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void) numa_cpu_lookup_table[cpu] = -1; } -static void update_numa_cpu_lookup_table(unsigned int cpu, int node) -{ - numa_cpu_lookup_table[cpu] = node; -} - static void map_cpu_to_node(int cpu, int node) { update_numa_cpu_lookup_table(cpu, node); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 573a9a2ee455..2e10a964e290 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -17,9 +17,11 @@ #include <linux/of_fdt.h> #include <linux/mm.h> #include <linux/string_helpers.h> +#include <linux/stop_machine.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/mmu_context.h> #include <asm/dma.h> #include <asm/machdep.h> #include <asm/mmu.h> @@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void) "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); + + /* + * The init_mm context is given the first available (non-zero) PID, + * which is the "guard PID" and contains no page table. PIDR should + * never be set to zero because that duplicates the kernel address + * space at the 0x0... offset (quadrant 0)! + * + * An arbitrary PID that may later be allocated by the PID allocator + * for userspace processes must not be used either, because that + * would cause stale user mappings for that PID on CPUs outside of + * the TLB invalidation scheme (because it won't be in mm_cpumask). + * + * So permanently carve out one PID for the purpose of a guard PID. + */ + init_mm.context.id = mmu_base_pid; + mmu_base_pid++; } static void __init radix_init_partition_table(void) @@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void) __pmd_index_size = RADIX_PMD_INDEX_SIZE; __pud_index_size = RADIX_PUD_INDEX_SIZE; __pgd_index_size = RADIX_PGD_INDEX_SIZE; + __pud_cache_index = RADIX_PUD_INDEX_SIZE; __pmd_cache_index = RADIX_PMD_INDEX_SIZE; __pte_table_size = RADIX_PTE_TABLE_SIZE; __pmd_table_size = RADIX_PMD_TABLE_SIZE; @@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void) radix_init_iamr(); radix_init_pgtable(); - + /* Switch to the guard PID before turning on MMU */ + radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); } @@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void) } radix_init_iamr(); + radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); } @@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) pud_clear(pud); } +struct change_mapping_params { + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long aligned_start; + unsigned long aligned_end; +}; + +static int stop_machine_change_mapping(void *data) +{ + struct change_mapping_params *params = + (struct change_mapping_params *)data; + + if (!data) + return -1; + + spin_unlock(&init_mm.page_table_lock); + pte_clear(&init_mm, params->aligned_start, params->pte); + create_physical_mapping(params->aligned_start, params->start); + create_physical_mapping(params->end, params->aligned_end); + spin_lock(&init_mm.page_table_lock); + return 0; +} + static void remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end) { @@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } } +/* + * clear the pte and potentially split the mapping helper + */ +static void split_kernel_mapping(unsigned long addr, unsigned long end, + unsigned long size, pte_t *pte) +{ + unsigned long mask = ~(size - 1); + unsigned long aligned_start = addr & mask; + unsigned long aligned_end = addr + size; + struct change_mapping_params params; + bool split_region = false; + + if ((end - addr) < size) { + /* + * We're going to clear the PTE, but not flushed + * the mapping, time to remap and flush. The + * effects if visible outside the processor or + * if we are running in code close to the + * mapping we cleared, we are in trouble. + */ + if (overlaps_kernel_text(aligned_start, addr) || + overlaps_kernel_text(end, aligned_end)) { + /* + * Hack, just return, don't pte_clear + */ + WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " + "text, not splitting\n", addr, end); + return; + } + split_region = true; + } + + if (split_region) { + params.pte = pte; + params.start = addr; + params.end = end; + params.aligned_start = addr & ~(size - 1); + params.aligned_end = min_t(unsigned long, aligned_end, + (unsigned long)__va(memblock_end_of_DRAM())); + stop_machine(stop_machine_change_mapping, ¶ms, NULL); + return; + } + + pte_clear(&init_mm, addr, pte); +} + static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { @@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; if (pmd_huge(*pmd)) { - if (!IS_ALIGNED(addr, PMD_SIZE) || - !IS_ALIGNED(next, PMD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pmd); + split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); continue; } @@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, continue; if (pud_huge(*pud)) { - if (!IS_ALIGNED(addr, PUD_SIZE) || - !IS_ALIGNED(next, PUD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pud); + split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); continue; } @@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end) continue; if (pgd_huge(*pgd)) { - if (!IS_ALIGNED(addr, PGDIR_SIZE) || - !IS_ALIGNED(next, PGDIR_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pgd); + split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); continue; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c9a623c2d8a2..28c980eb4422 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -82,6 +82,8 @@ unsigned long __pgd_index_size; EXPORT_SYMBOL(__pgd_index_size); unsigned long __pmd_cache_index; EXPORT_SYMBOL(__pmd_cache_index); +unsigned long __pud_cache_index; +EXPORT_SYMBOL(__pud_cache_index); unsigned long __pte_table_size; EXPORT_SYMBOL(__pte_table_size); unsigned long __pmd_table_size; @@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, if (old & PATB_HR) { asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); } else { asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 881ebd53ffc2..9b23f12e863c 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, unsigned int psize; int ssize; real_pte_t rpte; - int i; + int i, offset; i = batch->index; @@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, psize = get_slice_psize(mm, addr); /* Mask the address for the correct page size */ addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); + if (unlikely(psize == MMU_PAGE_16G)) + offset = PTRS_PER_PUD; + else + offset = PTRS_PER_PMD; #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ @@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, * support 64k pages, this might be different from the * hardware page size encoded in the slice table. */ addr &= PAGE_MASK; + offset = PTRS_PER_PTE; } @@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, } WARN_ON(vsid == 0); vpn = hpt_vpn(addr, vsid, ssize); - rpte = __real_pte(__pte(pte), ptep); + rpte = __real_pte(__pte(pte), ptep, offset); /* * Check if we have an active batch on this CPU. If not, just diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 872d1f6dd11e..a9636d8cba15 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -327,6 +327,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; + case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ + PPC_LWZ_OFFS(r_A, r_skb, K); + break; case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0a34b0cec7b7..0ef3d9580e98 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -240,6 +240,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * goto out; */ PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); + PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); PPC_BCC(COND_GE, out); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index dd4c9b8b8a81..f6f55ab4980e 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void) const struct cpumask *l_cpumask; get_online_cpus(); - for_each_online_node(nid) { + for_each_node_with_cpus(nid) { l_cpumask = cpumask_of_node(nid); - cpu = cpumask_first(l_cpumask); + cpu = cpumask_first_and(l_cpumask, cpu_online_mask); + if (cpu >= nr_cpu_ids) + continue; opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(cpu)); } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 496e47696ed0..a6c92c78c9b2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) s64 rc; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return -ENODEV;; + return -ENODEV; pe = &phb->ioda.pe_array[pdn->pe_number]; if (pe->tce_bypass_enabled) { diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 4fb21e17504a..092715b9674b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -80,6 +80,10 @@ static void pnv_setup_rfi_flush(void) if (np && of_property_read_bool(np, "disabled")) enable--; + np = of_get_child_by_name(fw_features, "speculation-policy-favor-security"); + if (np && of_property_read_bool(np, "disabled")) + enable = 0; + of_node_put(np); of_node_put(fw_features); } diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 2b3eb01ab110..b7c53a51c31b 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rc = PTR_ERR(txwin->paste_kaddr); goto free_window; } + } else { + /* + * A user mapping must ensure that context switch issues + * CP_ABORT for this thread. + */ + rc = set_thread_uses_vas(); + if (rc) + goto free_window; } - /* - * Now that we have a send window, ensure context switch issues - * CP_ABORT for this thread. - */ - rc = -EINVAL; - if (set_thread_uses_vas() < 0) - goto free_window; - set_vinst_win(vinst, txwin); return txwin; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index dceb51454d8d..652d3e96b812 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -36,6 +36,7 @@ #include <asm/xics.h> #include <asm/xive.h> #include <asm/plpar_wrappers.h> +#include <asm/topology.h> #include "pseries.h" #include "offline_states.h" @@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np) BUG_ON(cpu_online(cpu)); set_cpu_present(cpu, false); set_hard_smp_processor_id(cpu, -1); + update_numa_cpu_lookup_table(cpu, -1); break; } if (cpu >= nr_cpu_ids) @@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_done(); } -extern int find_and_online_cpu_nid(int cpu); - static int dlpar_online_cpu(struct device_node *dn) { int rc = 0; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 81d8614e7379..5e1ef9150182 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id); /* + * Enable the hotplug interrupt late because processing them may touch other + * devices or systems (e.g. hugepages) that have not been initialized at the + * subsys stage. + */ +int __init init_ras_hotplug_IRQ(void) +{ + struct device_node *np; + + /* Hotplug Events */ + np = of_find_node_by_path("/event-sources/hot-plug-events"); + if (np != NULL) { + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, + "RAS_HOTPLUG"); + of_node_put(np); + } + + return 0; +} +machine_late_initcall(pseries, init_ras_hotplug_IRQ); + +/* * Initialize handlers for the set of interrupts caused by hardware errors * and power system events. */ @@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void) of_node_put(np); } - /* Hotplug Events */ - np = of_find_node_by_path("/event-sources/hot-plug-events"); - if (np != NULL) { - if (dlpar_workqueue_init() == 0) - request_event_sources_irqs(np, ras_hotplug_interrupt, - "RAS_HOTPLUG"); - of_node_put(np); - } - /* EPOW Events */ np = of_find_node_by_path("/event-sources/epow-events"); if (np != NULL) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 372d7ada1a0c..1a527625acf7 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -482,7 +482,8 @@ static void pseries_setup_rfi_flush(void) if (types == L1D_FLUSH_NONE) types = L1D_FLUSH_FALLBACK; - if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) || + (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))) enable = false; } else { /* Default to fallback if case hcall is not available */ diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d9c4c9366049..091f1d0d0af1 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + pr_err("Error %lld getting queue info CPU %d prio %d\n", rc, + target, prio); rc = -EIO; goto fail; } @@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, /* Configure and enable the queue in HW */ rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order); if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + pr_err("Error %lld setting queue for CPU %d prio %d\n", rc, + target, prio); rc = -EIO; } else { q->qpage = qpage; @@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc, if (IS_ERR(qpage)) return PTR_ERR(qpage); - return xive_spapr_configure_queue(cpu, q, prio, qpage, - xive_queue_shift); + return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu), + q, prio, qpage, xive_queue_shift); } static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, @@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, struct xive_q *q = &xc->queue[prio]; unsigned int alloc_order; long rc; + int hw_cpu = get_hard_smp_processor_id(cpu); - rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0); + rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0); if (rc) - pr_err("Error %ld setting queue for prio %d\n", rc, prio); + pr_err("Error %ld setting queue for CPU %d prio %d\n", rc, + hw_cpu, prio); alloc_order = xive_alloc_order(xive_queue_shift); free_pages((unsigned long)q->qpage, alloc_order); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b6722c246d9c..04807c7f64cc 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -8,7 +8,6 @@ config RISCV select OF select OF_EARLY_FLATTREE select OF_IRQ - select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_WANT_FRAME_POINTERS select CLONE_BACKWARDS select COMMON_CLK @@ -20,7 +19,6 @@ config RISCV select GENERIC_STRNLEN_USER select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A - select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_API_DEBUG @@ -34,7 +32,6 @@ config RISCV select HAVE_ARCH_TRACEHOOK select MODULES_USE_ELF_RELA if MODULES select THREAD_INFO_IN_TASK - select RISCV_IRQ_INTC select RISCV_TIMER config MMU diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index c0319cbf1eec..5510366d169a 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -34,9 +34,9 @@ #define wmb() RISCV_FENCE(ow,ow) /* These barriers do not need to enforce ordering on devices, just memory. */ -#define smp_mb() RISCV_FENCE(rw,rw) -#define smp_rmb() RISCV_FENCE(r,r) -#define smp_wmb() RISCV_FENCE(w,w) +#define __smp_mb() RISCV_FENCE(rw,rw) +#define __smp_rmb() RISCV_FENCE(r,r) +#define __smp_wmb() RISCV_FENCE(w,w) /* * This is a very specific barrier: it's currently only used in two places in diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 87fc045be51f..56fa592cfa34 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -172,6 +172,9 @@ ENTRY(handle_exception) move a1, sp /* pt_regs */ tail do_IRQ 1: + /* Exceptions run with interrupts enabled */ + csrs sstatus, SR_SIE + /* Handle syscalls */ li t0, EXC_SYSCALL beq s4, t0, handle_syscall @@ -198,8 +201,6 @@ handle_syscall: */ addi s2, s2, 0x4 REG_S s2, PT_SEPC(sp) - /* System calls run with interrupts enabled */ - csrs sstatus, SR_SIE /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_TRACE diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 226eeb190f90..6e07ed37bbff 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -64,7 +64,7 @@ ENTRY(_start) /* Start the kernel */ mv a0, s0 mv a1, s1 - call sbi_save + call parse_dtb tail start_kernel relocate: diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 09f7064e898c..c11f40c1b2a8 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -144,7 +144,7 @@ asmlinkage void __init setup_vm(void) #endif } -void __init sbi_save(unsigned int hartid, void *dtb) +void __init parse_dtb(unsigned int hartid, void *dtb) { early_init_dt_scan(__va(dtb)); } diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 65154eaa3714..6c8ce15cde7b 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -63,6 +63,7 @@ static inline int init_new_context(struct task_struct *tsk, _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; /* pgd_alloc() did not account this pmd */ mm_inc_nr_pmds(mm); + mm_inc_nr_puds(mm); } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 13a133a6015c..a5621ea6d123 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <asm/cache.h> #include <asm/ctl_reg.h> +#include <asm/dwarf.h> #include <asm/errno.h> #include <asm/ptrace.h> #include <asm/thread_info.h> @@ -230,7 +231,7 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) .hidden \name .type \name,@function \name: - .cfi_startproc + CFI_STARTPROC #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES exrl 0,0f #else @@ -239,7 +240,7 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) #endif j . 0: br \reg - .cfi_endproc + CFI_ENDPROC .endm GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 @@ -426,13 +427,13 @@ ENTRY(system_call) UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP stmg %r0,%r7,__PT_R0(%r11) - # clear user controlled register to prevent speculative use - xgr %r0,%r0 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC stg %r14,__PT_FLAGS(%r11) .Lsysc_do_svc: + # clear user controlled register to prevent speculative use + xgr %r0,%r0 # load address of system call table lg %r10,__THREAD_sysc_table(%r13,%r12) llgh %r8,__PT_INT_CODE+2(%r11) @@ -1439,6 +1440,7 @@ cleanup_critical: stg %r15,__LC_SYSTEM_TIMER 0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP # set up saved register r11 lg %r15,__LC_KERNEL_STACK la %r9,STACK_FRAME_OVERHEAD(%r15) diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 69d7fcf48158..9aff72d3abda 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -2,8 +2,8 @@ #include <linux/module.h> #include <asm/nospec-branch.h> -int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF); -int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL); +int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); +int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL); static int __init nospectre_v2_setup_early(char *str) { diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9c7d70715862..07c6e81163bf 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -22,22 +22,6 @@ #include "trace.h" #include "trace-s390.h" - -static const intercept_handler_t instruction_handlers[256] = { - [0x01] = kvm_s390_handle_01, - [0x82] = kvm_s390_handle_lpsw, - [0x83] = kvm_s390_handle_diag, - [0xaa] = kvm_s390_handle_aa, - [0xae] = kvm_s390_handle_sigp, - [0xb2] = kvm_s390_handle_b2, - [0xb6] = kvm_s390_handle_stctl, - [0xb7] = kvm_s390_handle_lctl, - [0xb9] = kvm_s390_handle_b9, - [0xe3] = kvm_s390_handle_e3, - [0xe5] = kvm_s390_handle_e5, - [0xeb] = kvm_s390_handle_eb, -}; - u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; @@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu) static int handle_instruction(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - vcpu->stat.exit_instruction++; trace_kvm_s390_intercept_instruction(vcpu, vcpu->arch.sie_block->ipa, vcpu->arch.sie_block->ipb); - handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; + + switch (vcpu->arch.sie_block->ipa >> 8) { + case 0x01: + return kvm_s390_handle_01(vcpu); + case 0x82: + return kvm_s390_handle_lpsw(vcpu); + case 0x83: + return kvm_s390_handle_diag(vcpu); + case 0xaa: + return kvm_s390_handle_aa(vcpu); + case 0xae: + return kvm_s390_handle_sigp(vcpu); + case 0xb2: + return kvm_s390_handle_b2(vcpu); + case 0xb6: + return kvm_s390_handle_stctl(vcpu); + case 0xb7: + return kvm_s390_handle_lctl(vcpu); + case 0xb9: + return kvm_s390_handle_b9(vcpu); + case 0xe3: + return kvm_s390_handle_e3(vcpu); + case 0xe5: + return kvm_s390_handle_e5(vcpu); + case 0xeb: + return kvm_s390_handle_eb(vcpu); + default: + return -EOPNOTSUPP; + } } static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index aabf46f5f883..b04616b57a94 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) static int ckc_irq_pending(struct kvm_vcpu *vcpu) { - if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm)) + const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); + const u64 ckc = vcpu->arch.sie_block->ckc; + + if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { + if ((s64)ckc >= (s64)now) + return 0; + } else if (ckc >= now) { return 0; + } return ckc_interrupts_enabled(vcpu); } @@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) return kvm_s390_get_cpu_timer(vcpu) >> 63; } -static inline int is_ioirq(unsigned long irq_type) -{ - return ((irq_type >= IRQ_PEND_IO_ISC_7) && - (irq_type <= IRQ_PEND_IO_ISC_0)); -} - static uint64_t isc_to_isc_bits(int isc) { return (0x80 >> isc) << 24; @@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); } -static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) +static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.float_int.pending_irqs | - vcpu->arch.local_int.pending_irqs | + vcpu->arch.local_int.pending_irqs; +} + +static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) +{ + return pending_irqs_no_gisa(vcpu) | kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; } @@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) { - if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) + if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) return; else if (psw_ioint_disabled(vcpu)) kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); @@ -1011,24 +1017,6 @@ out: return rc; } -typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); - -static const deliver_irq_t deliver_irq_funcs[] = { - [IRQ_PEND_MCHK_EX] = __deliver_machine_check, - [IRQ_PEND_MCHK_REP] = __deliver_machine_check, - [IRQ_PEND_PROG] = __deliver_prog, - [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, - [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, - [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, - [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, - [IRQ_PEND_RESTART] = __deliver_restart, - [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, - [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, - [IRQ_PEND_EXT_SERVICE] = __deliver_service, - [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done, - [IRQ_PEND_VIRTIO] = __deliver_virtio, -}; - /* Check whether an external call is pending (deliverable or not) */ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { @@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) static u64 __calculate_sltime(struct kvm_vcpu *vcpu) { - u64 now, cputm, sltime = 0; + const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); + const u64 ckc = vcpu->arch.sie_block->ckc; + u64 cputm, sltime = 0; if (ckc_interrupts_enabled(vcpu)) { - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - /* already expired or overflow? */ - if (!sltime || vcpu->arch.sie_block->ckc <= now) + if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { + if ((s64)now < (s64)ckc) + sltime = tod_to_ns((s64)ckc - (s64)now); + } else if (now < ckc) { + sltime = tod_to_ns(ckc - now); + } + /* already expired */ + if (!sltime) return 0; if (cpu_timer_interrupts_enabled(vcpu)) { cputm = kvm_s390_get_cpu_timer(vcpu); @@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - deliver_irq_t func; int rc = 0; unsigned long irq_type; unsigned long irqs; @@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) while ((irqs = deliverable_irqs(vcpu)) && !rc) { /* bits are in the reverse order of interrupt priority */ irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); - if (is_ioirq(irq_type)) { + switch (irq_type) { + case IRQ_PEND_IO_ISC_0: + case IRQ_PEND_IO_ISC_1: + case IRQ_PEND_IO_ISC_2: + case IRQ_PEND_IO_ISC_3: + case IRQ_PEND_IO_ISC_4: + case IRQ_PEND_IO_ISC_5: + case IRQ_PEND_IO_ISC_6: + case IRQ_PEND_IO_ISC_7: rc = __deliver_io(vcpu, irq_type); - } else { - func = deliver_irq_funcs[irq_type]; - if (!func) { - WARN_ON_ONCE(func == NULL); - clear_bit(irq_type, &li->pending_irqs); - continue; - } - rc = func(vcpu); + break; + case IRQ_PEND_MCHK_EX: + case IRQ_PEND_MCHK_REP: + rc = __deliver_machine_check(vcpu); + break; + case IRQ_PEND_PROG: + rc = __deliver_prog(vcpu); + break; + case IRQ_PEND_EXT_EMERGENCY: + rc = __deliver_emergency_signal(vcpu); + break; + case IRQ_PEND_EXT_EXTERNAL: + rc = __deliver_external_call(vcpu); + break; + case IRQ_PEND_EXT_CLOCK_COMP: + rc = __deliver_ckc(vcpu); + break; + case IRQ_PEND_EXT_CPU_TIMER: + rc = __deliver_cpu_timer(vcpu); + break; + case IRQ_PEND_RESTART: + rc = __deliver_restart(vcpu); + break; + case IRQ_PEND_SET_PREFIX: + rc = __deliver_set_prefix(vcpu); + break; + case IRQ_PEND_PFAULT_INIT: + rc = __deliver_pfault_init(vcpu); + break; + case IRQ_PEND_EXT_SERVICE: + rc = __deliver_service(vcpu); + break; + case IRQ_PEND_PFAULT_DONE: + rc = __deliver_pfault_done(vcpu); + break; + case IRQ_PEND_VIRTIO: + rc = __deliver_virtio(vcpu); + break; + default: + WARN_ONCE(1, "Unknown pending irq type %ld", irq_type); + clear_bit(irq_type, &li->pending_irqs); } } @@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); + if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa)) + kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); break; default: kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ba4c7092335a..339ac0964590 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, + { "deliver_io_interrupt", VCPU_STAT(deliver_io_int) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) }, { "instruction_epsw", VCPU_STAT(instruction_epsw) }, { "instruction_gs", VCPU_STAT(instruction_gs) }, @@ -179,6 +180,28 @@ int kvm_arch_hardware_enable(void) static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); +static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) +{ + u8 delta_idx = 0; + + /* + * The TOD jumps by delta, we have to compensate this by adding + * -delta to the epoch. + */ + delta = -delta; + + /* sign-extension - we're adding to signed values below */ + if ((s64)delta < 0) + delta_idx = -1; + + scb->epoch += delta; + if (scb->ecd & ECD_MEF) { + scb->epdx += delta_idx; + if (scb->epoch < delta) + scb->epdx += 1; + } +} + /* * This callback is executed during stop_machine(). All CPUs are therefore * temporarily stopped. In order not to change guest behavior, we have to @@ -194,13 +217,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, unsigned long long *delta = v; list_for_each_entry(kvm, &vm_list, vm_list) { - kvm->arch.epoch -= *delta; kvm_for_each_vcpu(i, vcpu, kvm) { - vcpu->arch.sie_block->epoch -= *delta; + kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); + if (i == 0) { + kvm->arch.epoch = vcpu->arch.sie_block->epoch; + kvm->arch.epdx = vcpu->arch.sie_block->epdx; + } if (vcpu->arch.cputm_enabled) vcpu->arch.cputm_start += *delta; if (vcpu->arch.vsie_block) - vcpu->arch.vsie_block->epoch -= *delta; + kvm_clock_sync_scb(vcpu->arch.vsie_block, + *delta); } } return NOTIFY_OK; @@ -902,12 +929,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) return -EFAULT; - if (test_kvm_facility(kvm, 139)) - kvm_s390_set_tod_clock_ext(kvm, >od); - else if (gtod.epoch_idx == 0) - kvm_s390_set_tod_clock(kvm, gtod.tod); - else + if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) return -EINVAL; + kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", gtod.epoch_idx, gtod.tod); @@ -932,13 +956,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) { - u64 gtod; + struct kvm_s390_vm_tod_clock gtod = { 0 }; - if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) + if (copy_from_user(>od.tod, (void __user *)attr->addr, + sizeof(gtod.tod))) return -EFAULT; - kvm_s390_set_tod_clock(kvm, gtod); - VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); + kvm_s390_set_tod_clock(kvm, >od); + VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); return 0; } @@ -2122,6 +2147,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu) /* we still need the basic sca for the ipte control */ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + return; } read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { @@ -2389,6 +2415,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) mutex_lock(&vcpu->kvm->lock); preempt_disable(); vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; + vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; preempt_enable(); mutex_unlock(&vcpu->kvm->lock); if (!kvm_is_ucontrol(vcpu->kvm)) { @@ -3021,8 +3048,8 @@ retry: return 0; } -void kvm_s390_set_tod_clock_ext(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +void kvm_s390_set_tod_clock(struct kvm *kvm, + const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; struct kvm_s390_tod_clock_ext htod; @@ -3034,10 +3061,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm, get_tod_clock_ext((char *)&htod); kvm->arch.epoch = gtod->tod - htod.tod; - kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; - - if (kvm->arch.epoch > gtod->tod) - kvm->arch.epdx -= 1; + kvm->arch.epdx = 0; + if (test_kvm_facility(kvm, 139)) { + kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; + if (kvm->arch.epoch > gtod->tod) + kvm->arch.epdx -= 1; + } kvm_s390_vcpu_block_all(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -3050,22 +3079,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm, mutex_unlock(&kvm->lock); } -void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) -{ - struct kvm_vcpu *vcpu; - int i; - - mutex_lock(&kvm->lock); - preempt_disable(); - kvm->arch.epoch = tod - get_tod_clock(); - kvm_s390_vcpu_block_all(kvm); - kvm_for_each_vcpu(i, vcpu, kvm) - vcpu->arch.sie_block->epoch = kvm->arch.epoch; - kvm_s390_vcpu_unblock_all(kvm); - preempt_enable(); - mutex_unlock(&kvm->lock); -} - /** * kvm_arch_fault_in_page - fault-in guest page if necessary * @vcpu: The corresponding virtual cpu diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index bd31b37b0e6f..f55ac0ef99ea 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,8 +19,6 @@ #include <asm/processor.h> #include <asm/sclp.h> -typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); - /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) #define TDB_FORMAT1 1 @@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock_ext(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); -void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); +void kvm_s390_set_tod_clock(struct kvm *kvm, + const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c4c4e157c036..f0b4185158af 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu) /* Handle SCK (SET CLOCK) interception */ static int handle_set_clock(struct kvm_vcpu *vcpu) { + struct kvm_s390_vm_tod_clock gtod = { 0 }; int rc; u8 ar; - u64 op2, val; + u64 op2; vcpu->stat.instruction_sck++; @@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) op2 = kvm_s390_get_base_disp_s(vcpu, &ar); if (op2 & 7) /* Operand must be on a doubleword boundary */ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, op2, ar, &val, sizeof(val)); + rc = read_guest(vcpu, op2, ar, >od.tod, sizeof(gtod.tod)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); - kvm_s390_set_tod_clock(vcpu->kvm, val); + VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); + kvm_s390_set_tod_clock(vcpu->kvm, >od); kvm_s390_set_psw_cc(vcpu, 0); return 0; @@ -795,55 +796,60 @@ out: return rc; } -static const intercept_handler_t b2_handlers[256] = { - [0x02] = handle_stidp, - [0x04] = handle_set_clock, - [0x10] = handle_set_prefix, - [0x11] = handle_store_prefix, - [0x12] = handle_store_cpu_address, - [0x14] = kvm_s390_handle_vsie, - [0x21] = handle_ipte_interlock, - [0x29] = handle_iske, - [0x2a] = handle_rrbe, - [0x2b] = handle_sske, - [0x2c] = handle_test_block, - [0x30] = handle_io_inst, - [0x31] = handle_io_inst, - [0x32] = handle_io_inst, - [0x33] = handle_io_inst, - [0x34] = handle_io_inst, - [0x35] = handle_io_inst, - [0x36] = handle_io_inst, - [0x37] = handle_io_inst, - [0x38] = handle_io_inst, - [0x39] = handle_io_inst, - [0x3a] = handle_io_inst, - [0x3b] = handle_io_inst, - [0x3c] = handle_io_inst, - [0x50] = handle_ipte_interlock, - [0x56] = handle_sthyi, - [0x5f] = handle_io_inst, - [0x74] = handle_io_inst, - [0x76] = handle_io_inst, - [0x7d] = handle_stsi, - [0xb1] = handle_stfl, - [0xb2] = handle_lpswe, -}; - int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - - /* - * A lot of B2 instructions are priviledged. Here we check for - * the privileged ones, that we can handle in the kernel. - * Anything else goes to userspace. - */ - handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) - return handler(vcpu); - - return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->ipa & 0x00ff) { + case 0x02: + return handle_stidp(vcpu); + case 0x04: + return handle_set_clock(vcpu); + case 0x10: + return handle_set_prefix(vcpu); + case 0x11: + return handle_store_prefix(vcpu); + case 0x12: + return handle_store_cpu_address(vcpu); + case 0x14: + return kvm_s390_handle_vsie(vcpu); + case 0x21: + case 0x50: + return handle_ipte_interlock(vcpu); + case 0x29: + return handle_iske(vcpu); + case 0x2a: + return handle_rrbe(vcpu); + case 0x2b: + return handle_sske(vcpu); + case 0x2c: + return handle_test_block(vcpu); + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x5f: + case 0x74: + case 0x76: + return handle_io_inst(vcpu); + case 0x56: + return handle_sthyi(vcpu); + case 0x7d: + return handle_stsi(vcpu); + case 0xb1: + return handle_stfl(vcpu); + case 0xb2: + return handle_lpswe(vcpu); + default: + return -EOPNOTSUPP; + } } static int handle_epsw(struct kvm_vcpu *vcpu) @@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu) return 0; } -static const intercept_handler_t b9_handlers[256] = { - [0x8a] = handle_ipte_interlock, - [0x8d] = handle_epsw, - [0x8e] = handle_ipte_interlock, - [0x8f] = handle_ipte_interlock, - [0xab] = handle_essa, - [0xaf] = handle_pfmf, -}; - int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - - /* This is handled just as for the B2 instructions. */ - handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) - return handler(vcpu); - - return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->ipa & 0x00ff) { + case 0x8a: + case 0x8e: + case 0x8f: + return handle_ipte_interlock(vcpu); + case 0x8d: + return handle_epsw(vcpu); + case 0xab: + return handle_essa(vcpu); + case 0xaf: + return handle_pfmf(vcpu); + default: + return -EOPNOTSUPP; + } } int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) @@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu) return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } -static const intercept_handler_t eb_handlers[256] = { - [0x2f] = handle_lctlg, - [0x25] = handle_stctg, - [0x60] = handle_ri, - [0x61] = handle_ri, - [0x62] = handle_ri, -}; - int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - - handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->ipb & 0x000000ff) { + case 0x25: + return handle_stctg(vcpu); + case 0x2f: + return handle_lctlg(vcpu); + case 0x60: + case 0x61: + case 0x62: + return handle_ri(vcpu); + default: + return -EOPNOTSUPP; + } } static int handle_tprot(struct kvm_vcpu *vcpu) @@ -1346,10 +1347,12 @@ out_unlock: int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) { - /* For e5xx... instructions we only handle TPROT */ - if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) + switch (vcpu->arch.sie_block->ipa & 0x00ff) { + case 0x01: return handle_tprot(vcpu); - return -EOPNOTSUPP; + default: + return -EOPNOTSUPP; + } } static int handle_sckpf(struct kvm_vcpu *vcpu) @@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu) return 0; } -static const intercept_handler_t x01_handlers[256] = { - [0x04] = handle_ptff, - [0x07] = handle_sckpf, -}; - int kvm_s390_handle_01(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - - handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->ipa & 0x00ff) { + case 0x04: + return handle_ptff(vcpu); + case 0x07: + return handle_sckpf(vcpu); + default: + return -EOPNOTSUPP; + } } diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ec772700ff96..8961e3970901 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + int guest_bp_isolation; int rc; handle_last_fault(vcpu, vsie_page); @@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) s390_handle_mcck(); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + + /* save current guest state of bp isolation override */ + guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST); + + /* + * The guest is running with BPBC, so we have to force it on for our + * nested guest. This is done by enabling BPBC globally, so the BPBC + * control in the SCB (which the nested guest can modify) is simply + * ignored. + */ + if (test_kvm_facility(vcpu->kvm, 82) && + vcpu->arch.sie_block->fpf & FPF_BPBC) + set_thread_flag(TIF_ISOLATE_BP_GUEST); + local_irq_disable(); guest_enter_irqoff(); local_irq_enable(); @@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) local_irq_disable(); guest_exit_irqoff(); local_irq_enable(); + + /* restore guest state for bp isolation override */ + if (!guest_bp_isolation) + clear_thread_flag(TIF_ISOLATE_BP_GUEST); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); if (rc == -EINTR) { diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile index 715def00a436..01d0f7fb14cc 100644 --- a/arch/sh/boot/dts/Makefile +++ b/arch/sh/boot/dts/Makefile @@ -1 +1,3 @@ -obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"") +obj-y += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +endif diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 6bf594ace663..8767e45f1b2b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -430,6 +430,8 @@ config SPARC_LEON depends on SPARC32 select USB_EHCI_BIG_ENDIAN_MMIO select USB_EHCI_BIG_ENDIAN_DESC + select USB_UHCI_BIG_ENDIAN_MMIO + select USB_UHCI_BIG_ENDIAN_DESC ---help--- If you say Y here if you are running on a SPARC-LEON processor. The LEON processor is a synthesizable VHDL model of the diff --git a/arch/sparc/include/asm/bug.h b/arch/sparc/include/asm/bug.h index 6f17528356b2..ea53e418f6c0 100644 --- a/arch/sparc/include/asm/bug.h +++ b/arch/sparc/include/asm/bug.h @@ -9,10 +9,14 @@ void do_BUG(const char *file, int line); #define BUG() do { \ do_BUG(__FILE__, __LINE__); \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #else -#define BUG() __builtin_trap() +#define BUG() do { \ + barrier_before_unreachable(); \ + __builtin_trap(); \ +} while (0) #endif #define HAVE_ARCH_BUG diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore index aff152c87cf4..5a82bac5e0bc 100644 --- a/arch/x86/.gitignore +++ b/arch/x86/.gitignore @@ -1,6 +1,7 @@ boot/compressed/vmlinux tools/test_get_len tools/insn_sanity +tools/insn_decoder_test purgatory/kexec-purgatory.c purgatory/purgatory.ro diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 63bf349b2b24..0fa71a78ec99 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -423,12 +423,6 @@ config X86_MPPARSE For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it -config X86_BIGSMP - bool "Support for big SMP systems with more than 8 CPUs" - depends on X86_32 && SMP - ---help--- - This option is needed for the systems that have more than 8 CPUs - config GOLDFISH def_bool y depends on X86_GOLDFISH @@ -436,6 +430,7 @@ config GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" default y + select STACK_VALIDATION if HAVE_STACK_VALIDATION help Compile kernel with the retpoline compiler options to guard against kernel-to-user data leaks by avoiding speculative indirect @@ -460,6 +455,12 @@ config INTEL_RDT Say N if unsure. if X86_32 +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on SMP + ---help--- + This option is needed for the systems that have more than 8 CPUs + config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" default y @@ -949,25 +950,66 @@ config MAXSMP Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. +# +# The maximum number of CPUs supported: +# +# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT, +# and which can be configured interactively in the +# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range. +# +# The ranges are different on 32-bit and 64-bit kernels, depending on +# hardware capabilities and scalability features of the kernel. +# +# ( If MAXSMP is enabled we just use the highest possible value and disable +# interactive configuration. ) +# + +config NR_CPUS_RANGE_BEGIN + int + default NR_CPUS_RANGE_END if MAXSMP + default 1 if !SMP + default 2 + +config NR_CPUS_RANGE_END + int + depends on X86_32 + default 64 if SMP && X86_BIGSMP + default 8 if SMP && !X86_BIGSMP + default 1 if !SMP + +config NR_CPUS_RANGE_END + int + depends on X86_64 + default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK) + default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK) + default 1 if !SMP + +config NR_CPUS_DEFAULT + int + depends on X86_32 + default 32 if X86_BIGSMP + default 8 if SMP + default 1 if !SMP + +config NR_CPUS_DEFAULT + int + depends on X86_64 + default 8192 if MAXSMP + default 64 if SMP + default 1 if !SMP + config NR_CPUS int "Maximum number of CPUs" if SMP && !MAXSMP - range 2 8 if SMP && X86_32 && !X86_BIGSMP - range 2 64 if SMP && X86_32 && X86_BIGSMP - range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64 - range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 - default "1" if !SMP - default "8192" if MAXSMP - default "32" if SMP && X86_BIGSMP - default "8" if SMP && X86_32 - default "64" if SMP + range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END + default NR_CPUS_DEFAULT ---help--- This allows you to specify the maximum number of CPUs which this kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum supported value is 8192, otherwise the maximum value is 512. The minimum value which makes sense is 2. - This is purely to save memory - each supported CPU adds - approximately eight kilobytes to the kernel image. + This is purely to save memory: each supported CPU adds about 8KB + to the kernel image. config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" @@ -1363,7 +1405,7 @@ config HIGHMEM4G config HIGHMEM64G bool "64GB" - depends on !M486 + depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6 select X86_PAE ---help--- Select this if you have a 32-bit processor and more than 4 @@ -2265,7 +2307,7 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[native|emulate|none]. + line parameter vsyscall=[emulate|none]. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty @@ -2273,15 +2315,6 @@ choice If unsure, select "Emulate". - config LEGACY_VSYSCALL_NATIVE - bool "Native" - help - Actual executable code is located in the fixed vsyscall - address mapping, implementing time() efficiently. Since - this makes the mapping executable, it can be used during - security vulnerability exploitation (traditionally as - ROP gadgets). This configuration is not recommended. - config LEGACY_VSYSCALL_EMULATE bool "Emulate" help diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 65a9a4716e34..8b8d2297d486 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -374,7 +374,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 # this should be set for all -march=.. options where the compiler # generates cmov. @@ -385,7 +385,7 @@ config X86_CMOV config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && X86_P6_NOP + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) default "5" if X86_32 && X86_CMPXCHG64 default "4" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index fad55160dcb9..498c1b812300 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE - RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) - ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE - endif +ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +endif endif archscripts: scripts_basic diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 353e20c3f114..886a9115af62 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) struct efi_uga_draw_protocol *uga = NULL, *first_uga; efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; - u32 *handles = (u32 *)uga_handle;; + u32 *handles = (u32 *)uga_handle; efi_status_t status = EFI_INVALID_PARAMETER; int i; @@ -484,7 +484,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) struct efi_uga_draw_protocol *uga = NULL, *first_uga; efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; - u64 *handles = (u64 *)uga_handle;; + u64 *handles = (u64 *)uga_handle; efi_status_t status = EFI_INVALID_PARAMETER; int i; diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c index 36870b26067a..d08805032f01 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c @@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) { unsigned int j; - state->lens[0] = 0; - state->lens[1] = 1; - state->lens[2] = 2; - state->lens[3] = 3; + /* initially all lanes are unused */ + state->lens[0] = 0xFFFFFFFF00000000; + state->lens[1] = 0xFFFFFFFF00000001; + state->lens[2] = 0xFFFFFFFF00000002; + state->lens[3] = 0xFFFFFFFF00000003; + state->unused_lanes = 0xFF03020100; for (j = 0; j < 4; j++) state->ldata[j].job_in_lane = NULL; diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3f48f695d5e6..be63330c5511 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -97,80 +97,78 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 - .macro ALLOC_PT_GPREGS_ON_STACK - addq $-(15*8), %rsp - .endm - - .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 - .if \r11 - movq %r11, 6*8+\offset(%rsp) - .endif - .if \r8910 - movq %r10, 7*8+\offset(%rsp) - movq %r9, 8*8+\offset(%rsp) - movq %r8, 9*8+\offset(%rsp) - .endif - .if \rax - movq %rax, 10*8+\offset(%rsp) +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 + /* + * Push registers and sanitize registers of values that a + * speculation attack might otherwise want to exploit. The + * lower registers are likely clobbered well before they + * could be put to use in a speculative execution gadget. + * Interleave XOR with PUSH for better uop scheduling: + */ + .if \save_ret + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ + movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ + .else + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ .endif - .if \rcx - movq %rcx, 11*8+\offset(%rsp) + pushq \rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq \rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + xorl %r8d, %r8d /* nospec r8 */ + pushq %r9 /* pt_regs->r9 */ + xorl %r9d, %r9d /* nospec r9 */ + pushq %r10 /* pt_regs->r10 */ + xorl %r10d, %r10d /* nospec r10 */ + pushq %r11 /* pt_regs->r11 */ + xorl %r11d, %r11d /* nospec r11*/ + pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx*/ + pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp*/ + pushq %r12 /* pt_regs->r12 */ + xorl %r12d, %r12d /* nospec r12*/ + pushq %r13 /* pt_regs->r13 */ + xorl %r13d, %r13d /* nospec r13*/ + pushq %r14 /* pt_regs->r14 */ + xorl %r14d, %r14d /* nospec r14*/ + pushq %r15 /* pt_regs->r15 */ + xorl %r15d, %r15d /* nospec r15*/ + UNWIND_HINT_REGS + .if \save_ret + pushq %rsi /* return address on top of stack */ .endif - movq %rdx, 12*8+\offset(%rsp) - movq %rsi, 13*8+\offset(%rsp) - movq %rdi, 14*8+\offset(%rsp) - UNWIND_HINT_REGS offset=\offset extra=0 - .endm - .macro SAVE_C_REGS offset=0 - SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 - .endm - .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 - SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 - .endm - .macro SAVE_C_REGS_EXCEPT_R891011 - SAVE_C_REGS_HELPER 0, 1, 1, 0, 0 - .endm - .macro SAVE_C_REGS_EXCEPT_RCX_R891011 - SAVE_C_REGS_HELPER 0, 1, 0, 0, 0 - .endm - .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11 - SAVE_C_REGS_HELPER 0, 0, 0, 1, 0 - .endm - - .macro SAVE_EXTRA_REGS offset=0 - movq %r15, 0*8+\offset(%rsp) - movq %r14, 1*8+\offset(%rsp) - movq %r13, 2*8+\offset(%rsp) - movq %r12, 3*8+\offset(%rsp) - movq %rbp, 4*8+\offset(%rsp) - movq %rbx, 5*8+\offset(%rsp) - UNWIND_HINT_REGS offset=\offset - .endm - - .macro POP_EXTRA_REGS +.endm + +.macro POP_REGS pop_rdi=1 skip_r11rcx=0 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx - .endm - - .macro POP_C_REGS + .if \skip_r11rcx + popq %rsi + .else popq %r11 + .endif popq %r10 popq %r9 popq %r8 popq %rax + .if \skip_r11rcx + popq %rsi + .else popq %rcx + .endif popq %rdx popq %rsi + .if \pop_rdi popq %rdi - .endm - - .macro icebp - .byte 0xf1 - .endm + .endif +.endm /* * This is a sneaky trick to help the unwinder find pt_regs on the stack. The @@ -178,17 +176,12 @@ For 32-bit we have the following conventions - kernel is built with * is just setting the LSB, which makes it an invalid stack address and is also * a signal to the unwinder that it's a pt_regs pointer in disguise. * - * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts + * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts * the original rbp. */ .macro ENCODE_FRAME_POINTER ptregs_offset=0 #ifdef CONFIG_FRAME_POINTER - .if \ptregs_offset - leaq \ptregs_offset(%rsp), %rbp - .else - mov %rsp, %rbp - .endif - orq $0x1, %rbp + leaq 1+\ptregs_offset(%rsp), %rbp #endif .endm diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 16c2c022540d..6ad064c8cf35 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,8 +252,7 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - /* Clobbers %ebx */ - FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 30c8c5344c4a..805f52703ee3 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -55,7 +55,7 @@ END(native_usergs_sysret64) .macro TRACE_IRQS_FLAGS flags:req #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, \flags /* interrupts off? */ + btl $9, \flags /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: @@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64) swapgs /* - * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it + * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it * is not required to switch CR3. */ movq %rsp, PER_CPU_VAR(rsp_scratch) @@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64) pushq %rcx /* pt_regs->ip */ GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - pushq %rdx /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 */ - pushq %r9 /* pt_regs->r9 */ - pushq %r10 /* pt_regs->r10 */ - pushq %r11 /* pt_regs->r11 */ - pushq %rbx /* pt_regs->rbx */ - pushq %rbp /* pt_regs->rbp */ - pushq %r12 /* pt_regs->r12 */ - pushq %r13 /* pt_regs->r13 */ - pushq %r14 /* pt_regs->r14 */ - pushq %r15 /* pt_regs->r15 */ - UNWIND_HINT_REGS + + PUSH_AND_CLEAR_REGS rax=$-ENOSYS TRACE_IRQS_OFF @@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ UNWIND_HINT_EMPTY - POP_EXTRA_REGS - popq %rsi /* skip r11 */ - popq %r10 - popq %r9 - popq %r8 - popq %rax - popq %rsi /* skip rcx */ - popq %rdx - popq %rsi + POP_REGS pop_rdi=0 skip_r11rcx=1 /* * Now all regs are restored except RSP and RDI. @@ -386,8 +364,7 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - /* Clobbers %rbx */ - FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ @@ -471,9 +448,19 @@ END(irq_entries_start) * * The invariant is that, if irq_count != -1, then the IRQ stack is in use. */ -.macro ENTER_IRQ_STACK regs=1 old_rsp +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0 DEBUG_ENTRY_ASSERT_IRQS_OFF + + .if \save_ret + /* + * If save_ret is set, the original stack contains one additional + * entry -- the return address. Therefore, move the address one + * entry below %rsp to \old_rsp. + */ + leaq 8(%rsp), \old_rsp + .else movq %rsp, \old_rsp + .endif .if \regs UNWIND_HINT_REGS base=\old_rsp @@ -519,6 +506,15 @@ END(irq_entries_start) .if \regs UNWIND_HINT_REGS indirect=1 .endif + + .if \save_ret + /* + * Push the return address to the stack. This return address can + * be found at the "real" original RSP, which was offset by 8 at + * the beginning of this macro. + */ + pushq -8(\old_rsp) + .endif .endm /* @@ -542,29 +538,65 @@ END(irq_entries_start) .endm /* - * Interrupt entry/exit. - * - * Interrupt entry points save only callee clobbered registers in fast path. + * Interrupt entry helper function. * - * Entry runs with interrupts off. + * Entry runs with interrupts off. Stack layout at entry: + * +----------------------------------------------------+ + * | regs->ss | + * | regs->rsp | + * | regs->eflags | + * | regs->cs | + * | regs->ip | + * +----------------------------------------------------+ + * | regs->orig_ax = ~(interrupt number) | + * +----------------------------------------------------+ + * | return address | + * +----------------------------------------------------+ */ - -/* 0(%rsp): ~(interrupt number) */ - .macro interrupt func +ENTRY(interrupt_entry) + UNWIND_HINT_FUNC + ASM_CLAC cld - testb $3, CS-ORIG_RAX(%rsp) + testb $3, CS-ORIG_RAX+8(%rsp) jz 1f SWAPGS - call switch_to_thread_stack + + /* + * Switch to the thread stack. The IRET frame and orig_ax are + * on the stack, as well as the return address. RDI..R12 are + * not (yet) on the stack and space has not (yet) been + * allocated for them. + */ + pushq %rdi + + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + /* + * We have RDI, return address, and orig_ax on the stack on + * top of the IRET frame. That means offset=24 + */ + UNWIND_HINT_IRET_REGS base=%rdi offset=24 + + pushq 7*8(%rdi) /* regs->ss */ + pushq 6*8(%rdi) /* regs->rsp */ + pushq 5*8(%rdi) /* regs->eflags */ + pushq 4*8(%rdi) /* regs->cs */ + pushq 3*8(%rdi) /* regs->ip */ + pushq 2*8(%rdi) /* regs->orig_ax */ + pushq 8(%rdi) /* return address */ + UNWIND_HINT_FUNC + + movq (%rdi), %rdi 1: - ALLOC_PT_GPREGS_ON_STACK - SAVE_C_REGS - SAVE_EXTRA_REGS - ENCODE_FRAME_POINTER + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 - testb $3, CS(%rsp) + testb $3, CS+8(%rsp) jz 1f /* @@ -572,7 +604,7 @@ END(irq_entries_start) * * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). Since TRACE_IRQS_OFF idempotent, + * (which can take locks). Since TRACE_IRQS_OFF is idempotent, * the simplest way to handle it is to just call it twice if * we enter from user mode. There's no reason to optimize this since * TRACE_IRQS_OFF is a no-op if lockdep is off. @@ -582,12 +614,15 @@ END(irq_entries_start) CALL_enter_from_user_mode 1: - ENTER_IRQ_STACK old_rsp=%rdi + ENTER_IRQ_STACK old_rsp=%rdi save_ret=1 /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF - call \func /* rdi points to pt_regs */ - .endm + ret +END(interrupt_entry) + + +/* Interrupt entry/exit. */ /* * The interrupt stubs push (~vector+0x80) onto the stack and @@ -595,9 +630,10 @@ END(irq_entries_start) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: - ASM_CLAC addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ - interrupt do_IRQ + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call do_IRQ /* rdi points to pt_regs */ /* 0(%rsp): old RSP */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) @@ -622,15 +658,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) ud2 1: #endif - POP_EXTRA_REGS - popq %r11 - popq %r10 - popq %r9 - popq %r8 - popq %rax - popq %rcx - popq %rdx - popq %rsi + POP_REGS pop_rdi=0 /* * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. @@ -688,8 +716,7 @@ GLOBAL(restore_regs_and_return_to_kernel) ud2 1: #endif - POP_EXTRA_REGS - POP_C_REGS + POP_REGS addq $8, %rsp /* skip regs->orig_ax */ /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -799,10 +826,11 @@ END(common_interrupt) .macro apicinterrupt3 num sym do_sym ENTRY(\sym) UNWIND_HINT_IRET_REGS - ASM_CLAC pushq $~(\num) .Lcommon_\sym: - interrupt \do_sym + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call \do_sym /* rdi points to pt_regs */ jmp ret_from_intr END(\sym) .endm @@ -865,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) -/* - * Switch to the thread stack. This is called with the IRET frame and - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and - * space has not been allocated for them.) - */ -ENTRY(switch_to_thread_stack) - UNWIND_HINT_FUNC - - pushq %rdi - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi - movq %rsp, %rdi - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI - - pushq 7*8(%rdi) /* regs->ss */ - pushq 6*8(%rdi) /* regs->rsp */ - pushq 5*8(%rdi) /* regs->eflags */ - pushq 4*8(%rdi) /* regs->cs */ - pushq 3*8(%rdi) /* regs->ip */ - pushq 2*8(%rdi) /* regs->orig_ax */ - pushq 8(%rdi) /* return address */ - UNWIND_HINT_FUNC - - movq (%rdi), %rdi - ret -END(switch_to_thread_stack) - .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -908,10 +908,8 @@ ENTRY(\sym) pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif - ALLOC_PT_GPREGS_ON_STACK - .if \paranoid < 2 - testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ + testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */ jnz .Lfrom_usermode_switch_stack_\@ .endif @@ -1121,9 +1119,7 @@ ENTRY(xen_failsafe_callback) addq $0x30, %rsp UNWIND_HINT_IRET_REGS pushq $-1 /* orig_ax = -1 => not a system call */ - ALLOC_PT_GPREGS_ON_STACK - SAVE_C_REGS - SAVE_EXTRA_REGS + PUSH_AND_CLEAR_REGS ENCODE_FRAME_POINTER jmp error_exit END(xen_failsafe_callback) @@ -1170,8 +1166,7 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 ENTRY(paranoid_entry) UNWIND_HINT_FUNC cld - SAVE_C_REGS 8 - SAVE_EXTRA_REGS 8 + PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 movl $1, %ebx movl $MSR_GS_BASE, %ecx @@ -1211,21 +1206,20 @@ ENTRY(paranoid_exit) jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG + RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 .Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel END(paranoid_exit) /* - * Save all registers in pt_regs, and switch gs if needed. + * Save all registers in pt_regs, and switch GS if needed. * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) UNWIND_HINT_FUNC cld - SAVE_C_REGS 8 - SAVE_EXTRA_REGS 8 + PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 - xorl %ebx, %ebx testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1406,22 +1400,7 @@ ENTRY(nmi) pushq 1*8(%rdx) /* pt_regs->rip */ UNWIND_HINT_IRET_REGS pushq $-1 /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - pushq (%rdx) /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ - pushq %rax /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 */ - pushq %r9 /* pt_regs->r9 */ - pushq %r10 /* pt_regs->r10 */ - pushq %r11 /* pt_regs->r11 */ - pushq %rbx /* pt_regs->rbx */ - pushq %rbp /* pt_regs->rbp */ - pushq %r12 /* pt_regs->r12 */ - pushq %r13 /* pt_regs->r13 */ - pushq %r14 /* pt_regs->r14 */ - pushq %r15 /* pt_regs->r15 */ - UNWIND_HINT_REGS + PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER /* @@ -1631,7 +1610,6 @@ end_repeat_nmi: * frame to point back to repeat_nmi. */ pushq $-1 /* ORIG_RAX: no syscall to restart */ - ALLOC_PT_GPREGS_ON_STACK /* * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit @@ -1655,8 +1633,7 @@ end_repeat_nmi: nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: - POP_EXTRA_REGS - POP_C_REGS + POP_REGS /* * Skip orig_ax and the "outermost" frame to point RSP at the "iret" diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 98d5358e4041..08425c42f8b7 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ + xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ + xorl %r12d, %r12d /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ + xorl %r13d, %r13d /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ + xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ + xorl %r15d, %r15d /* nospec r15 */ cld /* @@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ + xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ + xorl %r12d, %r12d /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ + xorl %r13d, %r13d /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ + xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ + xorl %r15d, %r15d /* nospec r15 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -278,9 +298,9 @@ sysret32_from_system_call: */ SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d swapgs sysretl END(entry_SYSCALL_compat) @@ -327,26 +347,47 @@ ENTRY(entry_INT80_compat) */ movl %eax, %eax + /* switch to thread stack expects orig_ax and rdi to be pushed */ pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ - /* switch to thread stack expects orig_ax to be pushed */ - call switch_to_thread_stack + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - pushq %rdi /* pt_regs->di */ + pushq 6*8(%rdi) /* regs->ss */ + pushq 5*8(%rdi) /* regs->rsp */ + pushq 4*8(%rdi) /* regs->eflags */ + pushq 3*8(%rdi) /* regs->cs */ + pushq 2*8(%rdi) /* regs->ip */ + pushq 1*8(%rdi) /* regs->orig_ax */ + + pushq (%rdi) /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ + xorl %r15d, %r15d /* nospec r15 */ cld /* @@ -363,15 +404,3 @@ ENTRY(entry_INT80_compat) TRACE_IRQS_ON jmp swapgs_restore_regs_and_return_to_usermode END(entry_INT80_compat) - -ENTRY(stub32_clone) - /* - * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). - * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). - * - * The native 64-bit kernel's sys_clone() implements the latter, - * so we need to swap arguments here before calling it: - */ - xchg %r8, %rcx - jmp sys_clone -ENDPROC(stub32_clone) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 448ac2161112..2a5e99cff859 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -8,12 +8,12 @@ # 0 i386 restart_syscall sys_restart_syscall 1 i386 exit sys_exit -2 i386 fork sys_fork sys_fork +2 i386 fork sys_fork 3 i386 read sys_read 4 i386 write sys_write 5 i386 open sys_open compat_sys_open 6 i386 close sys_close -7 i386 waitpid sys_waitpid sys32_waitpid +7 i386 waitpid sys_waitpid compat_sys_x86_waitpid 8 i386 creat sys_creat 9 i386 link sys_link 10 i386 unlink sys_unlink @@ -78,7 +78,7 @@ 69 i386 ssetmask sys_ssetmask 70 i386 setreuid sys_setreuid16 71 i386 setregid sys_setregid16 -72 i386 sigsuspend sys_sigsuspend sys_sigsuspend +72 i386 sigsuspend sys_sigsuspend 73 i386 sigpending sys_sigpending compat_sys_sigpending 74 i386 sethostname sys_sethostname 75 i386 setrlimit sys_setrlimit compat_sys_setrlimit @@ -96,7 +96,7 @@ 87 i386 swapon sys_swapon 88 i386 reboot sys_reboot 89 i386 readdir sys_old_readdir compat_sys_old_readdir -90 i386 mmap sys_old_mmap sys32_mmap +90 i386 mmap sys_old_mmap compat_sys_x86_mmap 91 i386 munmap sys_munmap 92 i386 truncate sys_truncate compat_sys_truncate 93 i386 ftruncate sys_ftruncate compat_sys_ftruncate @@ -126,7 +126,7 @@ 117 i386 ipc sys_ipc compat_sys_ipc 118 i386 fsync sys_fsync 119 i386 sigreturn sys_sigreturn sys32_sigreturn -120 i386 clone sys_clone stub32_clone +120 i386 clone sys_clone compat_sys_x86_clone 121 i386 setdomainname sys_setdomainname 122 i386 uname sys_newuname 123 i386 modify_ldt sys_modify_ldt @@ -186,8 +186,8 @@ 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 i386 rt_sigsuspend sys_rt_sigsuspend -180 i386 pread64 sys_pread64 sys32_pread -181 i386 pwrite64 sys_pwrite64 sys32_pwrite +180 i386 pread64 sys_pread64 compat_sys_x86_pread +181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite 182 i386 chown sys_chown16 183 i386 getcwd sys_getcwd 184 i386 capget sys_capget @@ -196,14 +196,14 @@ 187 i386 sendfile sys_sendfile compat_sys_sendfile 188 i386 getpmsg 189 i386 putpmsg -190 i386 vfork sys_vfork sys_vfork +190 i386 vfork sys_vfork 191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 192 i386 mmap2 sys_mmap_pgoff -193 i386 truncate64 sys_truncate64 sys32_truncate64 -194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64 -195 i386 stat64 sys_stat64 sys32_stat64 -196 i386 lstat64 sys_lstat64 sys32_lstat64 -197 i386 fstat64 sys_fstat64 sys32_fstat64 +193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64 +194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64 +195 i386 stat64 sys_stat64 compat_sys_x86_stat64 +196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64 +197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64 198 i386 lchown32 sys_lchown 199 i386 getuid32 sys_getuid 200 i386 getgid32 sys_getgid @@ -231,7 +231,7 @@ # 222 is unused # 223 is unused 224 i386 gettid sys_gettid -225 i386 readahead sys_readahead sys32_readahead +225 i386 readahead sys_readahead compat_sys_x86_readahead 226 i386 setxattr sys_setxattr 227 i386 lsetxattr sys_lsetxattr 228 i386 fsetxattr sys_fsetxattr @@ -256,7 +256,7 @@ 247 i386 io_getevents sys_io_getevents compat_sys_io_getevents 248 i386 io_submit sys_io_submit compat_sys_io_submit 249 i386 io_cancel sys_io_cancel -250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 +250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64 # 251 is available for reuse (was briefly sys_set_zone_reclaim) 252 i386 exit_group sys_exit_group 253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie @@ -278,7 +278,7 @@ 269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 270 i386 tgkill sys_tgkill 271 i386 utimes sys_utimes compat_sys_utimes -272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64 +272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64 273 i386 vserver 274 i386 mbind sys_mbind 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy @@ -306,7 +306,7 @@ 297 i386 mknodat sys_mknodat 298 i386 fchownat sys_fchownat 299 i386 futimesat sys_futimesat compat_sys_futimesat -300 i386 fstatat64 sys_fstatat64 sys32_fstatat +300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat 301 i386 unlinkat sys_unlinkat 302 i386 renameat sys_renameat 303 i386 linkat sys_linkat @@ -320,7 +320,7 @@ 311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list 312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list 313 i386 splice sys_splice -314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range +314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range 315 i386 tee sys_tee 316 i386 vmsplice sys_vmsplice compat_sys_vmsplice 317 i386 move_pages sys_move_pages compat_sys_move_pages @@ -330,7 +330,7 @@ 321 i386 signalfd sys_signalfd compat_sys_signalfd 322 i386 timerfd_create sys_timerfd_create 323 i386 eventfd sys_eventfd -324 i386 fallocate sys_fallocate sys32_fallocate +324 i386 fallocate sys_fallocate compat_sys_x86_fallocate 325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime 326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime 327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 577fa8adb785..8560ef68a9d6 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,10 +42,8 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = -#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE) - NATIVE; -#elif defined(CONFIG_LEGACY_VSYSCALL_NONE) +static enum { EMULATE, NONE } vsyscall_mode = +#ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; #else EMULATE; @@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str) if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; - else if (!strcmp("native", str)) - vsyscall_mode = NATIVE; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); - /* This should be unreachable in NATIVE mode. */ - if (WARN_ON(vsyscall_mode == NATIVE)) - return false; - if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); @@ -370,9 +362,7 @@ void __init map_vsyscall(void) if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 731153a4681e..56457cb73448 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu) break; case INTEL_FAM6_SANDYBRIDGE_X: - switch (cpu_data(cpu).x86_mask) { + switch (cpu_data(cpu).x86_stepping) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index ae64d0b69729..cf372b90557e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void) * on PMU interrupt */ if (boot_cpu_data.x86_model == 28 - && boot_cpu_data.x86_mask < 10) { + && boot_cpu_data.x86_stepping < 10) { pr_cont("LBR disabled due to erratum"); return; } diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index a5604c352930..408879b0c0d4 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = { static __init void p6_pmu_rdpmc_quirk(void) { - if (boot_cpu_data.x86_mask < 9) { + if (boot_cpu_data.x86_stepping < 9) { /* * PPro erratum 26; fixed in stepping 9 and above. */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 6d8044ab1060..22ec65bc033a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3606,7 +3606,7 @@ static struct intel_uncore_type skx_uncore_imc = { }; static struct attribute *skx_upi_uncore_formats_attr[] = { - &format_attr_event_ext.attr, + &format_attr_event.attr, &format_attr_umask_ext.attr, &format_attr_edge.attr, &format_attr_inv.attr, diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 96cd33bbfc85..6512498bbef6 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -51,15 +51,14 @@ #define AA(__x) ((unsigned long)(__x)) -asmlinkage long sys32_truncate64(const char __user *filename, - unsigned long offset_low, - unsigned long offset_high) +COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename, + unsigned long, offset_low, unsigned long, offset_high) { return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); } -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, - unsigned long offset_high) +COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd, + unsigned long, offset_low, unsigned long, offset_high) { return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); } @@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) return 0; } -asmlinkage long sys32_stat64(const char __user *filename, - struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); @@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename, return ret; } -asmlinkage long sys32_lstat64(const char __user *filename, - struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename, return ret; } -asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) return ret; } -asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename, - struct stat64 __user *statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd, + const char __user *, filename, + struct stat64 __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -153,7 +154,7 @@ struct mmap_arg_struct32 { unsigned int offset; }; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) +COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg) { struct mmap_arg_struct32 a; @@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) a.offset>>PAGE_SHIFT); } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, - int options) +COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *, + stat_addr, int, options) { return compat_sys_wait4(pid, stat_addr, options, NULL); } /* warning: next two assume little endian */ -asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, - u32 poslo, u32 poshi) +COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf, + u32, count, u32, poslo, u32, poshi) { return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, - u32 count, u32 poslo, u32 poshi) +COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf, + u32, count, u32, poslo, u32, poshi) { return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); @@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, * Some system calls that need sign extended arguments. This could be * done by a generic wrapper. */ -long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, - __u32 len_low, __u32 len_high, int advice) +COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low, + __u32, offset_high, __u32, len_low, __u32, len_high, + int, advice) { return sys_fadvise64_64(fd, (((u64)offset_high)<<32) | offset_low, @@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, advice); } -asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, - size_t count) +COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo, + unsigned int, off_hi, size_t, count) { return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); } -asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, - unsigned n_low, unsigned n_hi, int flags) +COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low, + unsigned int, off_hi, unsigned int, n_low, + unsigned int, n_hi, int, flags) { return sys_sync_file_range(fd, ((u64)off_hi << 32) | off_low, ((u64)n_hi << 32) | n_low, flags); } -asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, - size_t len, int advice) +COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo, + unsigned int, offset_hi, size_t, len, int, advice) { return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, len, advice); } -asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo, - unsigned offset_hi, unsigned len_lo, - unsigned len_hi) +COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode, + unsigned int, offset_lo, unsigned int, offset_hi, + unsigned int, len_lo, unsigned int, len_hi) { return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, ((u64)len_hi << 32) | len_lo); } + +/* + * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS + */ +COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, + unsigned long, newsp, int __user *, parent_tidptr, + unsigned long, tls_val, int __user *, child_tidptr) +{ + return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr, + tls_val); +} diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 44f5d79d5105..11881726ed37 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) if (boot_cpu_data.x86 == 0x0F && boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86_model <= 0x05 && - boot_cpu_data.x86_mask < 0x0A) + boot_cpu_data.x86_stepping < 0x0A) return 1; else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E)) return 1; diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 4d4015ddcf26..c356098b6fb9 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,6 +7,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H +#include <asm/nospec-branch.h> + #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 4d111616524b..1908214b9125 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,7 +38,4 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) -asmlinkage void __fill_rsb(void); -asmlinkage void __clear_rsb(void); - #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 30d406146016..e1259f043ae9 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, asm ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) - :"r"(size),"r" (index) + :"g"(size),"r" (index) :"cc"); return mask; } diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 3fa039855b8f..9f645ba57dbb 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr) : "iq" ((u8)CONST_MASK(nr)) : "memory"); } else { - asm volatile(LOCK_PREFIX "bts %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); } } @@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr) */ static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { - asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); + asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); } /** @@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr) : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)~CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX "btr %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } @@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { - asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); } static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) @@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long * */ static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { - asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); } /** @@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX "btc %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } @@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), + *addr, "Ir", nr, "%0", c); } /** @@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * { bool oldbit; - asm("bts %2,%1" + asm(__ASM_SIZE(bts) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); @@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * */ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), + *addr, "Ir", nr, "%0", c); } /** @@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long { bool oldbit; - asm volatile("btr %2,%1" + asm volatile(__ASM_SIZE(btr) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); @@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon { bool oldbit; - asm volatile("btc %2,%1" + asm volatile(__ASM_SIZE(btc) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr) : "memory"); @@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon */ static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), + *addr, "Ir", nr, "%0", c); } static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) @@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l { bool oldbit; - asm volatile("bt %2,%1" + asm volatile(__ASM_SIZE(bt) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 34d99af43994..6804d6642767 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -5,23 +5,20 @@ #include <linux/stringify.h> /* - * Since some emulators terminate on UD2, we cannot use it for WARN. - * Since various instruction decoders disagree on the length of UD1, - * we cannot use it either. So use UD0 for WARN. + * Despite that some emulators terminate on UD2, we use it for WARN(). * - * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas - * our kernel decoder thinks it takes a ModRM byte, which seems consistent - * with various things like the Intel SDM instruction encoding rules) + * Since various instruction decoders/specs disagree on the encoding of + * UD0/UD1. */ -#define ASM_UD0 ".byte 0x0f, 0xff" +#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */ #define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */ #define ASM_UD2 ".byte 0x0f, 0x0b" #define INSN_UD0 0xff0f #define INSN_UD2 0x0b0f -#define LEN_UD0 2 +#define LEN_UD2 2 #ifdef CONFIG_GENERIC_BUG @@ -77,7 +74,11 @@ do { \ unreachable(); \ } while (0) -#define __WARN_FLAGS(flags) _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags)) +#define __WARN_FLAGS(flags) \ +do { \ + _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ + annotate_reachable(); \ +} while (0) #include <asm-generic/bug.h> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 70eddb3922ff..736771c9822e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline __pure bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" - "2:\n" - ".skip -(((5f-4f) - (2b-1b)) > 0) * " - "((5f-4f) - (2b-1b)),0x90\n" - "3:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 4f - .\n" /* repl offset */ - " .word %P1\n" /* always replace */ - " .byte 3b - 1b\n" /* src len */ - " .byte 5f - 4f\n" /* repl len */ - " .byte 3b - 2b\n" /* pad len */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "4: jmp %l[t_no]\n" - "5:\n" - ".previous\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 0\n" /* no replacement */ - " .word %P0\n" /* feature bit */ - " .byte 3b - 1b\n" /* src len */ - " .byte 0\n" /* repl len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .altinstr_aux,\"ax\"\n" - "6:\n" - " testb %[bitnum],%[cap_byte]\n" - " jnz %l[t_yes]\n" - " jmp %l[t_no]\n" - ".previous\n" - : : "i" (bit), "i" (X86_FEATURE_ALWAYS), - [bitnum] "i" (1 << (bit & 7)), - [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) - : : t_yes, t_no); - t_yes: - return true; - t_no: - return false; + asm_volatile_goto("1: jmp 6f\n" + "2:\n" + ".skip -(((5f-4f) - (2b-1b)) > 0) * " + "((5f-4f) - (2b-1b)),0x90\n" + "3:\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 4f - .\n" /* repl offset */ + " .word %P[always]\n" /* always replace */ + " .byte 3b - 1b\n" /* src len */ + " .byte 5f - 4f\n" /* repl len */ + " .byte 3b - 2b\n" /* pad len */ + ".previous\n" + ".section .altinstr_replacement,\"ax\"\n" + "4: jmp %l[t_no]\n" + "5:\n" + ".previous\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 0\n" /* no replacement */ + " .word %P[feature]\n" /* feature bit */ + " .byte 3b - 1b\n" /* src len */ + " .byte 0\n" /* repl len */ + " .byte 0\n" /* pad len */ + ".previous\n" + ".section .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum],%[cap_byte]\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".previous\n" + : : [feature] "i" (bit), + [always] "i" (X86_FEATURE_ALWAYS), + [bitnum] "i" (1 << (bit & 7)), + [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + : : t_yes, t_no); +t_yes: + return true; +t_no: + return false; } #define static_cpu_has(bit) \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0dfe4d3f74e2..f41079da38c5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -213,6 +213,7 @@ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 85f6ccb80b91..a399c1ebf6f0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -6,6 +6,7 @@ #include <asm/pgtable.h> #include <asm/processor-flags.h> #include <asm/tlb.h> +#include <asm/nospec-branch.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -36,8 +37,18 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); -#define arch_efi_call_virt_setup() kernel_fpu_begin() -#define arch_efi_call_virt_teardown() kernel_fpu_end() +#define arch_efi_call_virt_setup() \ +({ \ + kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ + firmware_restrict_branch_speculation_end(); \ + kernel_fpu_end(); \ +}) + /* * Wrap all the virtual calls in a way that forces the parameters on the stack. @@ -73,6 +84,7 @@ struct efi_scratch { efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ \ if (efi_scratch.use_pgd) { \ efi_scratch.prev_cr3 = __read_cr3(); \ @@ -91,6 +103,7 @@ struct efi_scratch { __flush_tlb_all(); \ } \ \ + firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ preempt_enable(); \ }) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd6f57a54a26..b605a5b6a30c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -507,6 +507,7 @@ struct kvm_vcpu_arch { u64 smi_count; bool tpr_access_reporting; u64 ia32_xss; + u64 microcode_version; /* * Paging state of the vcpu @@ -1095,6 +1096,8 @@ struct kvm_x86_ops { int (*mem_enc_op)(struct kvm *kvm, void __user *argp); int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); + + int (*get_msr_feature)(struct kvm_msr_entry *entry); }; struct kvm_arch_async_pf { @@ -1464,7 +1467,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end); - #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 55520cec8b27..7fb1047d61c7 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -37,7 +37,12 @@ struct cpu_signature { struct device; -enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; +enum ucode_state { + UCODE_OK = 0, + UCODE_UPDATED, + UCODE_NFOUND, + UCODE_ERROR, +}; struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, @@ -54,7 +59,7 @@ struct microcode_ops { * are being called. * See also the "Synchronization" section in microcode_core.c. */ - int (*apply_microcode) (int cpu); + enum ucode_state (*apply_microcode) (int cpu); int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); }; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c931b88982a0..1de72ce514cd 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot) return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); #else BUG(); + return (void *)fix_to_virt(FIX_HOLE); #endif } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4d57894635f2..d0dabeae0505 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -6,6 +6,51 @@ #include <asm/alternative.h> #include <asm/alternative-asm.h> #include <asm/cpufeatures.h> +#include <asm/msr-index.h> + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; #ifdef __ASSEMBLY__ @@ -23,6 +68,18 @@ .endm /* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +.macro ANNOTATE_RETPOLINE_SAFE + .Lannotate_\@: + .pushsection .discard.retpoline_safe + _ASM_PTR .Lannotate_\@ + .popsection +.endm + +/* * These are the bare retpoline primitives for indirect jmp and call. * Do not use these directly; they only exist to make the ALTERNATIVE * invocation below less ugly. @@ -58,9 +115,9 @@ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(jmp *\reg), \ + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD #else jmp *\reg #endif @@ -69,18 +126,25 @@ .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(call *\reg), \ + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD #else call *\reg #endif .endm -/* This clobbers the BX register */ -.macro FILL_RETURN_BUFFER nr:req ftr:req + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req #ifdef CONFIG_RETPOLINE - ALTERNATIVE "", "call __clear_rsb", \ftr + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: #endif .endm @@ -92,6 +156,12 @@ ".long 999b - .\n\t" \ ".popsection\n\t" +#define ANNOTATE_RETPOLINE_SAFE \ + "999:\n\t" \ + ".pushsection .discard.retpoline_safe\n\t" \ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + #if defined(CONFIG_X86_64) && defined(RETPOLINE) /* @@ -101,6 +171,7 @@ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE( \ + ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ "call __x86_indirect_thunk_%V[thunk_target]\n", \ X86_FEATURE_RETPOLINE) @@ -155,20 +226,90 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - alternative_input("", - "call __fill_rsb", - X86_FEATURE_RETPOLINE, - ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); + unsigned long loops; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } +#define alternative_msr_write(_msr, _val, _feature) \ + asm volatile(ALTERNATIVE("", \ + "movl %[msr], %%ecx\n\t" \ + "movl %[val], %%eax\n\t" \ + "movl $0, %%edx\n\t" \ + "wrmsr", \ + _feature) \ + : : [msr] "i" (_msr), [val] "i" (_val) \ + : "eax", "ecx", "edx", "memory") + static inline void indirect_branch_prediction_barrier(void) { - alternative_input("", - "call __ibp_barrier", - X86_FEATURE_USE_IBPB, - ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, + X86_FEATURE_USE_IBPB); } +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + * + * (Implemented as CPP macros due to header hell.) + */ +#define firmware_restrict_branch_speculation_start() \ +do { \ + preempt_disable(); \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ +} while (0) + +#define firmware_restrict_branch_speculation_end() \ +do { \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ +} while (0) + #endif /* __ASSEMBLY__ */ + +/* + * Below is used in the eBPF JIT compiler and emits the byte sequence + * for the following assembly: + * + * With retpolines configured: + * + * callq do_rop + * spec_trap: + * pause + * lfence + * jmp spec_trap + * do_rop: + * mov %rax,(%rsp) + * retq + * + * Without retpolines configured: + * + * jmp *%rax + */ +#ifdef CONFIG_RETPOLINE +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ + /* spec_trap: */ \ + EMIT2(0xF3, 0x90); /* pause */ \ + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ + /* do_rop: */ \ + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ + EMIT1(0xC3); /* retq */ +#else +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT2(0xFF, 0xE0); /* jmp *%rax */ +#endif + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4baa6bceb232..d652a3808065 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -52,10 +52,6 @@ static inline void clear_page(void *page) void copy_page(void *to, void *from); -#ifdef CONFIG_X86_MCE -#define arch_unmap_kpfn arch_unmap_kpfn -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 892df375b615..c83a2f418cea 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -7,6 +7,7 @@ #ifdef CONFIG_PARAVIRT #include <asm/pgtable_types.h> #include <asm/asm.h> +#include <asm/nospec-branch.h> #include <asm/paravirt_types.h> @@ -297,9 +298,9 @@ static inline void __flush_tlb_global(void) { PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); } -static inline void __flush_tlb_single(unsigned long addr) +static inline void __flush_tlb_one_user(unsigned long addr) { - PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); + PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr); } static inline void flush_tlb_others(const struct cpumask *cpumask, @@ -879,23 +880,27 @@ extern void default_banner(void); #define INTERRUPT_RETURN \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #ifdef CONFIG_X86_32 #define GET_CR0_INTO_EAX \ push %ecx; push %edx; \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx #else /* !CONFIG_X86_32 */ @@ -917,21 +922,25 @@ extern void default_banner(void); */ #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ ) #define GET_CR2_INTO_RAX \ - call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);) #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 6ec54d01972d..180bc0bff0fb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -43,6 +43,7 @@ #include <asm/desc_defs.h> #include <asm/kmap_types.h> #include <asm/pgtable_types.h> +#include <asm/nospec-branch.h> struct page; struct thread_struct; @@ -217,7 +218,7 @@ struct pv_mmu_ops { /* TLB operations */ void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); - void (*flush_tlb_single)(unsigned long addr); + void (*flush_tlb_one_user)(unsigned long addr); void (*flush_tlb_others)(const struct cpumask *cpus, const struct flush_tlb_info *info); @@ -392,7 +393,9 @@ int paravirt_disable_iospace(void); * offset into the paravirt_patch_template structure, and can therefore be * freely converted back into a structure offset. */ -#define PARAVIRT_CALL "call *%c[paravirt_opptr];" +#define PARAVIRT_CALL \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%c[paravirt_opptr];" /* * These macros are intended to wrap calls through one of the paravirt diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ba3c523aaf16..a06b07399d17 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, { bool oldbit; - asm volatile("bt "__percpu_arg(2)",%1" + asm volatile("btl "__percpu_arg(2)",%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 63c2552b6b65..b444d83cfc95 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); - return __pmd(v | set); + return native_make_pmd(v | set); } static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) { pmdval_t v = native_pmd_val(pmd); - return __pmd(v & ~clear); + return native_make_pmd(v & ~clear); } static inline pmd_t pmd_mkold(pmd_t pmd) @@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); - return __pud(v | set); + return native_make_pud(v | set); } static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) { pudval_t v = native_pud_val(pud); - return __pud(v & ~clear); + return native_make_pud(v & ~clear); } static inline pud_t pud_mkold(pud_t pud) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e67c0620aec2..b3ec519e3982 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +void sync_initial_page_table(void); /* * Define this if things work differently on an i386 and an i486: @@ -61,7 +62,7 @@ void paging_init(void); #define kpte_clear_flush(ptep, vaddr) \ do { \ pte_clear(&init_mm, (vaddr), (ptep)); \ - __flush_tlb_one((vaddr)); \ + __flush_tlb_one_kernel((vaddr)); \ } while (0) #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81462e9a34f6..1149d2112b2e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt extern void paging_init(void); +static inline void sync_initial_page_table(void) { } #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 3696398a9475..acfe755562a6 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -174,7 +174,6 @@ enum page_cache_mode { #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -206,7 +205,6 @@ enum page_cache_mode { #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC) #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) @@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud) #else #include <asm-generic/pgtable-nopud.h> +static inline pud_t native_make_pud(pudval_t val) +{ + return (pud_t) { .p4d.pgd = native_make_pgd(val) }; +} + static inline pudval_t native_pud_val(pud_t pud) { return native_pgd_val(pud.p4d.pgd); @@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) #else #include <asm-generic/pgtable-nopmd.h> +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) }; +} + static inline pmdval_t native_pmd_val(pmd_t pmd) { return native_pgd_val(pmd.pud.p4d.pgd); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 793bae7e7ce3..b0ccd4847a58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -91,7 +91,7 @@ struct cpuinfo_x86 { __u8 x86; /* CPU family */ __u8 x86_vendor; /* CPU vendor */ __u8 x86_model; - __u8 x86_mask; + __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; @@ -109,7 +109,7 @@ struct cpuinfo_x86 { char x86_vendor_id[16]; char x86_model_id[64]; /* in KB - valid for CPUS which support this call: */ - int x86_cache_size; + unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ /* Cache QoS architectural values: */ int x86_cache_max_rmid; /* max index */ @@ -977,7 +977,5 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); - -void __ibp_barrier(void); - +void microcode_check(void); #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 4e44250e7d0d..4cf11d88d3b3 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -17,7 +17,7 @@ #define _REFCOUNT_EXCEPTION \ ".pushsection .text..refcount\n" \ "111:\tlea %[counter], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD0 "\n" \ + "112:\t" ASM_UD2 "\n" \ ASM_UNREACHABLE \ ".popsection\n" \ "113:\n" \ @@ -67,13 +67,13 @@ static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) { GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "er", i, "%0", e); + r->refs.counter, "er", i, "%0", e, "cx"); } static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) { GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "%0", e); + r->refs.counter, "%0", e, "cx"); } static __always_inline __must_check diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index f91c365e57c3..4914a3e7c803 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,8 +2,7 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -#define __CLOBBERS_MEM "memory" -#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx" +#define __CLOBBERS_MEM(clb...) "memory", ## clb #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) @@ -40,18 +39,19 @@ do { \ #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ - __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) + __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) -#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\ __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM_CC_CX) + __CLOBBERS_MEM(clobbers)) #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ - __CLOBBERS_MEM, vcon (val)) + __CLOBBERS_MEM(), vcon (val)) -#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \ + clobbers...) \ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM_CC_CX, vcon (val)) + __CLOBBERS_MEM(clobbers), vcon (val)) #endif /* _ASM_X86_RMWcc */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index d6baf23782bc..5c019d23d06b 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; +extern char __entry_trampoline_start[], __entry_trampoline_end[]; #endif #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 461f53d27708..a4189762b266 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); void native_smp_prepare_cpus(unsigned int max_cpus); +void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 82c34ee25a65..906794aa034e 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -20,31 +20,43 @@ #include <asm/ia32.h> /* ia32/sys_ia32.c */ -asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long); -asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); +asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long, + unsigned long); +asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long, + unsigned long); -asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long sys32_fstatat(unsigned int, const char __user *, +asmlinkage long compat_sys_x86_stat64(const char __user *, + struct stat64 __user *); +asmlinkage long compat_sys_x86_lstat64(const char __user *, + struct stat64 __user *); +asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *); +asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, struct stat64 __user *, int); struct mmap_arg_struct32; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); +asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); +asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *, + int); -asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); -asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); +asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, + u32); +asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, + u32, u32); -long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); -long sys32_vm86_warning(void); +asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32, + int); -asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); -asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, - unsigned, unsigned, int); -asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); -asmlinkage long sys32_fallocate(int, int, unsigned, - unsigned, unsigned, unsigned); +asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int, + size_t); +asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int, + unsigned int, unsigned int, + int); +asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, + size_t, int); +asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, + unsigned int, unsigned int); +asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *, + unsigned long, int __user *); /* ia32/ia32_signal.c */ asmlinkage long sys32_sigreturn(void); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 2b8f18ca5874..84137c22fdfa 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) #else #define __flush_tlb() __native_flush_tlb() #define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) #endif static inline bool tlb_defer_switch_to_init_mm(void) @@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void) /* * flush one page in the user mapping */ -static inline void __native_flush_tlb_single(unsigned long addr) +static inline void __native_flush_tlb_one_user(unsigned long addr) { u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); @@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void) /* * flush one page in the kernel mapping */ -static inline void __flush_tlb_one(unsigned long addr) +static inline void __flush_tlb_one_kernel(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - __flush_tlb_single(addr); + + /* + * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its + * paravirt equivalent. Even with PCID, this is sufficient: we only + * use PCID if we also use global PTEs for the kernel mapping, and + * INVLPG flushes global translations across all address spaces. + * + * If PTI is on, then the kernel is mapped with non-global PTEs, and + * __flush_tlb_one_user() will flush the given address for the current + * kernel address space and for its usermode counterpart, but it does + * not flush it for other address spaces. + */ + __flush_tlb_one_user(addr); if (!static_cpu_has(X86_FEATURE_PTI)) return; /* - * __flush_tlb_single() will have cleared the TLB entry for this ASID, - * but since kernel space is replicated across all, we must also - * invalidate all others. + * See above. We need to propagate the flush to all other address + * spaces. In principle, we only need to propagate it to kernelmode + * address spaces, but the extra bookkeeping we would need is not + * worth it. */ invalidate_other_asid(); } diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 197c2e6c7376..099414345865 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -241,24 +241,24 @@ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 struct hv_reenlightenment_control { - u64 vector:8; - u64 reserved1:8; - u64 enabled:1; - u64 reserved2:15; - u64 target_vp:32; + __u64 vector:8; + __u64 reserved1:8; + __u64 enabled:1; + __u64 reserved2:15; + __u64 target_vp:32; }; #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 struct hv_tsc_emulation_control { - u64 enabled:1; - u64 reserved:63; + __u64 enabled:1; + __u64 reserved:63; }; struct hv_tsc_emulation_status { - u64 inprogress:1; - u64 reserved:63; + __u64 inprogress:1; + __u64 reserved:63; }; #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 7a2ade4aa235..6cfa9c8cb7d6 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -26,6 +26,7 @@ #define KVM_FEATURE_PV_EOI 6 #define KVM_FEATURE_PV_UNHALT 7 #define KVM_FEATURE_PV_TLB_FLUSH 9 +#define KVM_FEATURE_ASYNC_PF_VMEXIT 10 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 91723461dc1f..435db58a7bad 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -30,6 +30,7 @@ struct mce { __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ __u64 ppin; /* Protected Processor Inventory Number */ + __u32 microcode;/* Microcode revision */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 6db28f17ff28..c88e0b127810 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -235,7 +235,7 @@ int amd_cache_northbridges(void) if (boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model >= 0x8 && (boot_cpu_data.x86_model > 0x9 || - boot_cpu_data.x86_mask >= 0x1)) + boot_cpu_data.x86_stepping >= 0x1)) amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; if (boot_cpu_data.x86 == 0x15) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 25ddf02598d2..b203af0855b5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static u32 hsx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x3a; /* EP */ case 0x04: return 0x0f; /* EX */ } @@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void) static u32 bdx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x00000011; case 0x03: return 0x0700000e; case 0x04: return 0x0f00000c; @@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void) static u32 skx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x03: return 0x01000136; case 0x04: return 0x02000014; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ad2e410974f..7c5538769f7e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void) do { rep_nop(); now = rdtsc(); - } while ((now - start) < 40000000000UL / HZ && + } while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end)); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3cc471beb50b..bb6f7a2148d7 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, { struct apic_chip_data *apicd = apic_chip_data(irqd); struct irq_desc *desc = irq_data_to_desc(irqd); + bool managed = irqd_affinity_is_managed(irqd); lockdep_assert_held(&vector_lock); trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, apicd->cpu); - /* Setup the vector move, if required */ - if (apicd->vector && cpu_online(apicd->cpu)) { + /* + * If there is no vector associated or if the associated vector is + * the shutdown vector, which is associated to make PCI/MSI + * shutdown mode work, then there is nothing to release. Clear out + * prev_vector for this and the offlined target case. + */ + apicd->prev_vector = 0; + if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR) + goto setnew; + /* + * If the target CPU of the previous vector is online, then mark + * the vector as move in progress and store it for cleanup when the + * first interrupt on the new vector arrives. If the target CPU is + * offline then the regular release mechanism via the cleanup + * vector is not possible and the vector can be immediately freed + * in the underlying matrix allocator. + */ + if (cpu_online(apicd->cpu)) { apicd->move_in_progress = true; apicd->prev_vector = apicd->vector; apicd->prev_cpu = apicd->cpu; } else { - apicd->prev_vector = 0; + irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, + managed); } +setnew: apicd->vector = newvec; apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 46b675aaf20b..f11910b44638 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) uv_gre_table = gre; for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { + unsigned long size = ((unsigned long)(gre->limit - lgre) + << UV_GAM_RANGE_SHFT); + int order = 0; + char suffix[] = " KMGTPE"; + + while (size > 9999 && order < sizeof(suffix)) { + size /= 1024; + order++; + } + if (!index) { pr_info("UV: GAM Range Table...\n"); pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); } - pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n", + pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n", index++, (unsigned long)lgre << UV_GAM_RANGE_SHFT, (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, - ((unsigned long)(gre->limit - lgre)) >> - (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ + size, suffix[order], gre->type, gre->nasid, gre->sockid, gre->pnode); lgre = gre->limit; diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index fa1261eefa16..f91ba53e06c8 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -18,7 +18,7 @@ void foo(void) OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); - OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); + OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping); OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5bddbdcbc4a3..f0e6456ca7d3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if (c->x86_model == 6 && c->x86_mask == 1) { + if (c->x86_model == 6 && c->x86_stepping == 1) { const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); @@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) /* K6 with old style WHCR */ if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { + (c->x86_model == 8 && c->x86_stepping < 8)) { /* We can only write allocate on the low 508Mb */ if (mbytes > 508) mbytes = 508; @@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if ((c->x86_model == 8 && c->x86_mask > 7) || + if ((c->x86_model == 8 && c->x86_stepping > 7) || c->x86_model == 9 || c->x86_model == 13) { /* The more serious chips .. */ @@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx * As per AMD technical note 27212 0.2 */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", @@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * but they are not certified as MP capable. */ /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) + if ((c->x86_model == 6) && ((c->x86_stepping == 0) || + (c->x86_stepping == 1))) return; /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) + if ((c->x86_model == 7) && (c->x86_stepping == 0)) return; /* @@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for * more. */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || + if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || + ((c->x86_model == 7) && (c->x86_stepping >= 1)) || (c->x86_model > 7)) if (cpu_has(c, X86_FEATURE_MP)) return; @@ -628,7 +628,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* Set MTRR capability flag if appropriate */ if (c->x86 == 5) if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) + (c->x86_model == 8 && c->x86_stepping >= 8)) set_cpu_cap(c, X86_FEATURE_K6_MTRR); #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) @@ -795,7 +795,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c) * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. */ - if (c->x86_model <= 1 && c->x86_mask <= 1) + if (c->x86_model <= 1 && c->x86_stepping <= 1) set_cpu_cap(c, X86_FEATURE_CPB); } @@ -906,11 +906,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { /* Duron Rev A0 */ - if (c->x86_model == 3 && c->x86_mask == 0) + if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; /* Tbird rev A1/A2 */ if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) + (c->x86_stepping == 0 || c->x86_stepping == 1)) size = 256; } return size; @@ -1047,7 +1047,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) } /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_mask; + ms = (cpu->x86_model << 4) | cpu->x86_stepping; while ((range = *erratum++)) if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && (ms >= AMD_MODEL_RANGE_START(range)) && diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 71949bf2de5a..bfca937bdcc3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); if (ret < 0) return SPECTRE_V2_CMD_AUTO; @@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("unknown option (%s). Switching to AUTO select\n", arg); return SPECTRE_V2_CMD_AUTO; } } @@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void) goto retpoline_auto; break; } - pr_err("kernel not compiled with retpoline; no mitigation available!"); + pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); return; retpoline_auto: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { retpoline_amd: if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : @@ -281,7 +278,7 @@ retpoline_auto: pr_info("%s\n", spectre_v2_strings[mode]); /* - * If neither SMEP or KPTI are available, there is a risk of + * If neither SMEP nor PTI are available, there is a risk of * hitting userspace addresses in the RSB after a context switch * from a shallow call stack to a deeper one. To prevent this fill * the entire RSB, even when using IBRS. @@ -295,21 +292,29 @@ retpoline_auto: if ((!boot_cpu_has(X86_FEATURE_PTI) && !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Filling RSB on context switch\n"); + pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); } /* Initialize Indirect Branch Prediction Barrier if supported */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Enabling Indirect Branch Prediction Barrier\n"); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + } + + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. But firmware isn't, so use IBRS to protect that. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); } } #undef pr_fmt #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) return sprintf(buf, "Not affected\n"); @@ -318,28 +323,21 @@ ssize_t cpu_show_meltdown(struct device *dev, return sprintf(buf, "Vulnerable\n"); } -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); } #endif - -void __ibp_barrier(void) -{ - __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); -} -EXPORT_SYMBOL_GPL(__ibp_barrier); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c578cd29c2d2..e5ec0f11c0de 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -140,7 +140,7 @@ static void init_centaur(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_TSC); break; case 8: - switch (c->x86_mask) { + switch (c->x86_stepping) { default: name = "2"; break; @@ -215,7 +215,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) * - Note, it seems this may only be in engineering samples. */ if ((c->x86 == 6) && (c->x86_model == 9) && - (c->x86_mask == 1) && (size == 65)) + (c->x86_stepping == 1) && (size == 65)) size -= 1; return size; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d63f4b5706e4..348cf4821240 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = x86_family(tfms); c->x86_model = x86_model(tfms); - c->x86_mask = x86_stepping(tfms); + c->x86_stepping = x86_stepping(tfms); if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; @@ -1184,9 +1184,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) int i; c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; + c->x86_cache_size = 0; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; @@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c) pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); - if (c->x86_mask || c->cpuid_level >= 0) - pr_cont(", stepping: 0x%x)\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + pr_cont(", stepping: 0x%x)\n", c->x86_stepping); else pr_cont(")\n"); } @@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void) return 0; } core_initcall(init_cpu_syscore); + +/* + * The microcode loader calls this upon late microcode load to recheck features, + * only when microcode has been updated. Caller holds microcode_mutex and CPU + * hotplug lock. + */ +void microcode_check(void) +{ + struct cpuinfo_x86 info; + + perf_check_microcode(); + + /* Reload CPUID max function as it might've changed. */ + info.cpuid_level = cpuid_eax(0); + + /* + * Copy all capability leafs to pick up the synthetic ones so that + * memcmp() below doesn't fail on that. The ones coming from CPUID will + * get overwritten in get_cpu_cap(). + */ + memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); + + get_cpu_cap(&info); + + if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) + return; + + pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); + pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); +} diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 6b4bb335641f..8949b7ae6d92 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* common case step number/rev -- exceptions handled below */ c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; + c->x86_stepping = dir1 & 0xf; /* Now cook; the original recipe is by Channing Corn, from Cyrix. * We do the same thing for each generation: we work out diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 319bf989fad1..4aa9fd379390 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -116,14 +116,13 @@ struct sku_microcode { u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, @@ -136,8 +135,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, - /* Updated in the 20180108 release; blacklist until we know otherwise */ - { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, /* Observed in the wild */ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, @@ -147,9 +144,16 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) { int i; + /* + * We know that the hypervisor lie to us on the microcode version so + * we may as well hope that it is running the correct version. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && - c->x86_mask == spectre_bad_microcodes[i].stepping) + c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } return false; @@ -196,7 +200,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && + if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); @@ -212,7 +216,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* CPUID workaround for 0F33/0F34 CPU */ if (c->x86 == 0xF && c->x86_model == 0x3 - && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) c->x86_phys_bits = 36; /* @@ -310,7 +314,7 @@ int ppro_with_ram_bug(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { + boot_cpu_data.x86_stepping < 8) { pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } @@ -327,7 +331,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c) * Mask B, Pentium, but not Pentium MMX */ if (c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_stepping >= 1 && c->x86_stepping <= 4 && c->x86_model <= 3) { /* * Remember we have B step Pentia with bugs @@ -370,7 +374,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until * model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); /* @@ -388,7 +392,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); @@ -403,7 +407,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Specification Update"). */ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); @@ -650,7 +654,7 @@ static void init_intel(struct cpuinfo_x86 *c) case 6: if (l2 == 128) p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) + else if (c->x86_stepping == 0 || c->x86_stepping == 5) p = "Celeron-A"; break; diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 410629f10ad3..589b948e6e01 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -819,7 +819,7 @@ static __init void rdt_quirks(void) cache_alloc_hsw_probe(); break; case INTEL_FAM6_SKYLAKE_X: - if (boot_cpu_data.x86_mask <= 4) + if (boot_cpu_data.x86_stepping <= 4) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); } } diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index bdab7d2f51af..fca759d272a1 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, goto out_common_fail; } closid = ret; + ret = 0; rdtgrp->closid = closid; list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index aa0d5df9dc60..e956eb267061 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { } extern struct mca_config mca_cfg; +#ifndef CONFIG_X86_64 +/* + * On 32-bit systems it would be difficult to safely unmap a poison page + * from the kernel 1:1 map because there are no non-canonical addresses that + * we can use to refer to the address without risking a speculative access. + * However, this isn't much of an issue because: + * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which + * are only mapped into the kernel as needed + * 2) Few people would run a 32-bit kernel on a machine that supports + * recoverable errors because they have too much memory to boot 32-bit. + */ +static inline void mce_unmap_kpfn(unsigned long pfn) {} +#define mce_unmap_kpfn mce_unmap_kpfn +#endif + #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3a8e88a611eb..466f47301334 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -56,6 +56,9 @@ static DEFINE_MUTEX(mce_log_mutex); +/* sysfs synchronization */ +static DEFINE_MUTEX(mce_sysfs_mutex); + #define CREATE_TRACE_POINTS #include <trace/events/mce.h> @@ -105,6 +108,10 @@ static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); +#ifndef mce_unmap_kpfn +static void mce_unmap_kpfn(unsigned long pfn); +#endif + /* * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. @@ -126,6 +133,8 @@ void mce_setup(struct mce *m) if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) rdmsrl(MSR_PPIN, m->ppin); + + m->microcode = boot_cpu_data.microcode; } DEFINE_PER_CPU(struct mce, injectm); @@ -234,7 +243,7 @@ static void __print_mce(struct mce *m) m->cs, m->ip); if (m->cs == __KERNEL_CS) - pr_cont("{%pS}", (void *)m->ip); + pr_cont("{%pS}", (void *)(unsigned long)m->ip); pr_cont("\n"); } @@ -258,7 +267,7 @@ static void __print_mce(struct mce *m) */ pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, - cpu_data(m->extcpu).microcode); + m->microcode); } static void print_mce(struct mce *m) @@ -590,7 +599,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; - memory_failure(pfn, 0); + if (!memory_failure(pfn, 0)) + mce_unmap_kpfn(pfn); } return NOTIFY_OK; @@ -1057,12 +1067,13 @@ static int do_memory_failure(struct mce *m) ret = memory_failure(m->addr >> PAGE_SHIFT, flags); if (ret) pr_err("Memory error not recovered"); + else + mce_unmap_kpfn(m->addr >> PAGE_SHIFT); return ret; } -#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE) - -void arch_unmap_kpfn(unsigned long pfn) +#ifndef mce_unmap_kpfn +static void mce_unmap_kpfn(unsigned long pfn) { unsigned long decoy_addr; @@ -1073,7 +1084,7 @@ void arch_unmap_kpfn(unsigned long pfn) * We would like to just call: * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a - * speculative access to the posion page because we'd have + * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address @@ -1098,7 +1109,6 @@ void arch_unmap_kpfn(unsigned long pfn) if (set_memory_np(decoy_addr, 1)) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); - } #endif @@ -2081,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; + mutex_lock(&mce_sysfs_mutex); if (mca_cfg.ignore_ce ^ !!new) { if (new) { /* disable ce features */ @@ -2093,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s, on_each_cpu(mce_enable_ce, (void *)1, 1); } } + mutex_unlock(&mce_sysfs_mutex); + return size; } @@ -2105,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; + mutex_lock(&mce_sysfs_mutex); if (mca_cfg.cmci_disabled ^ !!new) { if (new) { /* disable cmci */ @@ -2116,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s, on_each_cpu(mce_enable_ce, NULL, 1); } } + mutex_unlock(&mce_sysfs_mutex); + return size; } @@ -2123,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s, struct device_attribute *attr, const char *buf, size_t size) { - ssize_t ret = device_store_int(s, attr, buf, size); + unsigned long old_check_interval = check_interval; + ssize_t ret = device_store_ulong(s, attr, buf, size); + + if (check_interval == old_check_interval) + return ret; + + if (check_interval < 1) + check_interval = 1; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); + return ret; } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 330b8462d426..a998e1a7d46f 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -static int apply_microcode_amd(int cpu) +static enum ucode_state apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; @@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu) p = find_patch(cpu); if (!p) - return 0; + return UCODE_NFOUND; mc_amd = p->data; uci->mc = p->data; @@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu) if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; uci->cpu_sig.rev = rev; - return 0; + return UCODE_OK; } if (__apply_microcode_amd(mc_amd)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); - return -1; + return UCODE_ERROR; } pr_info("CPU%d: new patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; - return 0; + return UCODE_UPDATED; } static int install_equiv_cpu_table(const u8 *buf) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 319dd65f98a2..70ecbc8099c9 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -22,13 +22,16 @@ #define pr_fmt(fmt) "microcode: " fmt #include <linux/platform_device.h> +#include <linux/stop_machine.h> #include <linux/syscore_ops.h> #include <linux/miscdevice.h> #include <linux/capability.h> #include <linux/firmware.h> #include <linux/kernel.h> +#include <linux/delay.h> #include <linux/mutex.h> #include <linux/cpu.h> +#include <linux/nmi.h> #include <linux/fs.h> #include <linux/mm.h> @@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache); */ static DEFINE_MUTEX(microcode_mutex); +/* + * Serialize late loading so that CPUs get updated one-by-one. + */ +static DEFINE_SPINLOCK(update_lock); + struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; struct cpu_info_ctx { @@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu) return ret; } -struct apply_microcode_ctx { - int err; -}; - static void apply_microcode_local(void *arg) { - struct apply_microcode_ctx *ctx = arg; + enum ucode_state *err = arg; - ctx->err = microcode_ops->apply_microcode(smp_processor_id()); + *err = microcode_ops->apply_microcode(smp_processor_id()); } static int apply_microcode_on_target(int cpu) { - struct apply_microcode_ctx ctx = { .err = 0 }; + enum ucode_state err; int ret; - ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); - if (!ret) - ret = ctx.err; - + ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1); + if (!ret) { + if (err == UCODE_ERROR) + ret = 1; + } return ret; } @@ -489,31 +494,110 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; -static int reload_for_cpu(int cpu) +/* + * Late loading dance. Why the heavy-handed stomp_machine effort? + * + * - HT siblings must be idle and not execute other code while the other sibling + * is loading microcode in order to avoid any negative interactions caused by + * the loading. + * + * - In addition, microcode update on the cores must be serialized until this + * requirement can be relaxed in the future. Right now, this is conservative + * and good. + */ +#define SPINUNIT 100 /* 100 nsec */ + +static int check_online_cpus(void) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; - int err = 0; + if (num_online_cpus() == num_present_cpus()) + return 0; - if (!uci->valid) - return err; + pr_err("Not all CPUs online, aborting microcode update.\n"); - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; + return -EINVAL; +} + +static atomic_t late_cpus; + +/* + * Returns: + * < 0 - on error + * 0 - no update done + * 1 - microcode was updated + */ +static int __reload_late(void *info) +{ + unsigned int timeout = NSEC_PER_SEC; + int all_cpus = num_online_cpus(); + int cpu = smp_processor_id(); + enum ucode_state err; + int ret = 0; + + atomic_dec(&late_cpus); + + /* + * Wait for all CPUs to arrive. A load will not be attempted unless all + * CPUs show up. + * */ + while (atomic_read(&late_cpus)) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + atomic_read(&late_cpus)); + return -1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + + spin_lock(&update_lock); + apply_microcode_local(&err); + spin_unlock(&update_lock); + + if (err > UCODE_NFOUND) { + pr_warn("Error reloading microcode on CPU %d\n", cpu); + ret = -1; + } else if (err == UCODE_UPDATED) { + ret = 1; + } + + atomic_inc(&late_cpus); + + while (atomic_read(&late_cpus) != all_cpus) + cpu_relax(); + + return ret; +} + +/* + * Reload microcode late on all CPUs. Wait for a sec until they + * all gather together. + */ +static int microcode_reload_late(void) +{ + int ret; + + atomic_set(&late_cpus, num_online_cpus()); + + ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); + if (ret < 0) + return ret; + else if (ret > 0) + microcode_check(); + + return ret; } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + enum ucode_state tmp_ret = UCODE_OK; + int bsp = boot_cpu_data.cpu_index; unsigned long val; - int cpu; - ssize_t ret = 0, tmp_ret; + ssize_t ret = 0; ret = kstrtoul(buf, 0, &val); if (ret) @@ -522,23 +606,24 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size; + tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); + if (tmp_ret != UCODE_OK) + return size; + get_online_cpus(); - mutex_lock(µcode_mutex); - for_each_online_cpu(cpu) { - tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) - pr_warn("Error reloading microcode on CPU %d\n", cpu); - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; - } - if (!ret) - perf_check_microcode(); + ret = check_online_cpus(); + if (ret) + goto put; + + mutex_lock(µcode_mutex); + ret = microcode_reload_late(); mutex_unlock(µcode_mutex); + +put: put_online_cpus(); - if (!ret) + if (ret >= 0) ret = size; return ret; diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index f7c55b0e753a..2aded9db1d42 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (!mc) return 0; + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + return UCODE_OK; + } + + /* + * Writeback and invalidate caches before updating microcode to avoid + * internal issues depending on what the microcode is updating. + */ + native_wbinvd(); + /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static int apply_microcode_intel(int cpu) +static enum ucode_state apply_microcode_intel(int cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_intel *mc; - struct ucode_cpu_info *uci; - struct cpuinfo_x86 *c; static int prev_rev; u32 rev; /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) - return -1; + return UCODE_ERROR; - uci = ucode_cpu_info + cpu; - mc = uci->mc; + /* Look for a newer patch in our cache: */ + mc = find_patch(uci); if (!mc) { - /* Look for a newer patch in our cache: */ - mc = find_patch(uci); + mc = uci->mc; if (!mc) - return 0; + return UCODE_NFOUND; } + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + c->microcode = rev; + return UCODE_OK; + } + + /* + * Writeback and invalidate caches before updating microcode to avoid + * internal issues depending on what the microcode is updating. + */ + native_wbinvd(); + /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu) if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); - return -1; + return UCODE_ERROR; } if (rev != prev_rev) { @@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu) prev_rev = rev; } - c = &cpu_data(cpu); - uci->cpu_sig.rev = rev; c->microcode = rev; - return 0; + return UCODE_UPDATED; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, @@ -921,7 +953,7 @@ static bool is_blacklisted(unsigned int cpu) */ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X && - c->x86_mask == 0x01 && + c->x86_stepping == 0x01 && llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); @@ -944,7 +976,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, return UCODE_NFOUND; sprintf(name, "intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); + c->x86, c->x86_model, c->x86_stepping); if (request_firmware_direct(&firmware, name, device)) { pr_debug("data file %s load failed\n", name); @@ -982,7 +1014,7 @@ static struct microcode_ops microcode_intel_ops = { static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) { - u64 llc_size = c->x86_cache_size * 1024; + u64 llc_size = c->x86_cache_size * 1024ULL; do_div(llc_size, c->x86_max_cores); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fdc55215d44d..e12ee86906c6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, */ if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask <= 7) { + boot_cpu_data.x86_stepping <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 40d5a8a75212..7468de429087 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -711,8 +711,8 @@ void __init mtrr_bp_init(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_mask == 0x3 || - boot_cpu_data.x86_mask == 0x4)) + (boot_cpu_data.x86_stepping == 0x3 || + boot_cpu_data.x86_stepping == 0x4)) phys_addr = 36; size_or_mask = SIZE_OR_MASK_BITS(phys_addr); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index e7ecedafa1c8..2c8522a39ed5 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -72,8 +72,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) c->x86_model, c->x86_model_id[0] ? c->x86_model_id : "unknown"); - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_stepping); else seq_puts(m, "stepping\t: unknown\n"); if (c->microcode) @@ -91,8 +91,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) } /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + if (c->x86_cache_size) + seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size); show_cpuinfo_core(m, c, cpu); show_cpuinfo_misc(m, c); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c29020907886..b59e4fb40fd9 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -37,7 +37,7 @@ #define X86 new_cpu_data+CPUINFO_x86 #define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor #define X86_MODEL new_cpu_data+CPUINFO_x86_model -#define X86_MASK new_cpu_data+CPUINFO_x86_mask +#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping #define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math #define X86_CPUID new_cpu_data+CPUINFO_cpuid_level #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability @@ -332,7 +332,7 @@ ENTRY(startup_32_smp) shrb $4,%al movb %al,X86_MODEL andb $0x0f,%cl # mask mask revision - movb %cl,X86_MASK + movb %cl,X86_STEPPING movl %edx,X86_CAPABILITY .Lis486: diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04a625f0fcda..0f545b3cf926 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -23,6 +23,7 @@ #include <asm/nops.h> #include "../entry/calling.h" #include <asm/export.h> +#include <asm/nospec-branch.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -134,6 +135,7 @@ ENTRY(secondary_startup_64) /* Ensure I am executing from virtual addresses */ movq $1f, %rax + ANNOTATE_RETPOLINE_SAFE jmp *%rax 1: UNWIND_HINT_EMPTY diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 2f723301eb58..38deafebb21b 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -23,7 +23,7 @@ /* * this changes the io permissions bitmap in the current task. */ -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) { struct thread_struct *t = ¤t->thread; struct tss_struct *tss; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd36f3c33cd0..0715f827607c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler); bool arch_within_kprobe_blacklist(unsigned long addr) { + bool is_in_entry_trampoline_section = false; + +#ifdef CONFIG_X86_64 + is_in_entry_trampoline_section = + (addr >= (unsigned long)__entry_trampoline_start && + addr < (unsigned long)__entry_trampoline_end); +#endif return (addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) || (addr >= (unsigned long)__entry_text_start && - addr < (unsigned long)__entry_text_end); + addr < (unsigned long)__entry_text_end) || + is_in_entry_trampoline_section; } int __init arch_init_kprobes(void) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4e37d1a851a6..bc1a27280c4b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -49,7 +49,7 @@ static int kvmapf = 1; -static int parse_no_kvmapf(char *arg) +static int __init parse_no_kvmapf(char *arg) { kvmapf = 0; return 0; @@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg) early_param("no-kvmapf", parse_no_kvmapf); static int steal_acc = 1; -static int parse_no_stealacc(char *arg) +static int __init parse_no_stealacc(char *arg) { steal_acc = 0; return 0; @@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); static int kvmclock_vsyscall = 1; -static int parse_no_kvmclock_vsyscall(char *arg) +static int __init parse_no_kvmclock_vsyscall(char *arg) { kvmclock_vsyscall = 0; return 0; @@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void) #endif pa |= KVM_ASYNC_PF_ENABLED; - /* Async page fault support for L1 hypervisor is optional */ - if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, - (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) + pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; + + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); printk(KERN_INFO"KVM setup async PF for cpu %d\n", smp_processor_id()); @@ -545,7 +545,8 @@ static void __init kvm_guest_init(void) pv_time_ops.steal_clock = kvm_steal_clock; } - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void) { int cpu; - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 1f790cf9d38f..3b7427aa7d85 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: value -= (u64)address; *(u32 *)location = value; break; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index da0c160e5589..f58336af095c 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: if (*(u32 *)loc != 0) goto invalid_relocation; val -= (u64)loc; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 27d0a1712663..f1c5eb99d445 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -410,7 +410,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; processor.cpuflag = CPU_ENABLED; processor.cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping; processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; processor.reserved[0] = 0; processor.reserved[1] = 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 041096bdef86..99dc79e76bdc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -200,9 +200,9 @@ static void native_flush_tlb_global(void) __native_flush_tlb_global(); } -static void native_flush_tlb_single(unsigned long addr) +static void native_flush_tlb_one_user(unsigned long addr) { - __native_flush_tlb_single(addr); + __native_flush_tlb_one_user(addr); } struct static_key paravirt_steal_enabled; @@ -401,7 +401,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { .flush_tlb_user = native_flush_tlb, .flush_tlb_kernel = native_flush_tlb_global, - .flush_tlb_single = native_flush_tlb_single, + .flush_tlb_one_user = native_flush_tlb_one_user, .flush_tlb_others = native_flush_tlb_others, .pgd_alloc = __paravirt_pgd_alloc, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1ae67e982af7..4c616be28506 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1204,20 +1204,13 @@ void __init setup_arch(char **cmdline_p) kasan_init(); -#ifdef CONFIG_X86_32 - /* sync back kernel address range */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. + * Sync back kernel address range. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); tboot_probe(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 497aa766fab3..ea554f812ee1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); -#ifdef CONFIG_X86_32 /* * Sync back kernel address range again. We already did this in * setup_arch(), but percpu data also needs to be available in * the smpboot asm. We can't reliably pick up percpu mappings * using vmalloc_fault(), because exception dispatch needs * percpu data. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. - */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); } diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index ac057f9b0763..0d930d8987cc 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int)); #define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name)) + BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0); + BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4); + BUILD_BUG_ON(offsetof(siginfo_t, si_code) != 8); + + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code) != 8); /* * Ensure that the size of each si_field never changes. * If it does, it is a sign that the @@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void) CHECK_CSI_SIZE (_kill, 2*sizeof(int)); CHECK_SI_SIZE (_kill, 2*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + CHECK_CSI_OFFSET(_timer); CHECK_CSI_SIZE (_timer, 3*sizeof(int)); CHECK_SI_SIZE (_timer, 6*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14); + CHECK_CSI_OFFSET(_rt); CHECK_CSI_SIZE (_rt, 3*sizeof(int)); CHECK_SI_SIZE (_rt, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14); + CHECK_CSI_OFFSET(_sigchld); CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_utime) != 0x20); + BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x28); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime) != 0x1C); + #ifdef CONFIG_X86_X32_ABI CHECK_CSI_OFFSET(_sigchld_x32); CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); /* no _sigchld_x32 in the generic siginfo_t */ + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime) != 0x20); #endif CHECK_CSI_OFFSET(_sigfault); CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); CHECK_SI_SIZE (_sigfault, 8*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + + BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); + + BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20); + BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18); + + BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); + CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); CHECK_SI_SIZE (_sigpoll, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd) != 0x10); + CHECK_CSI_OFFSET(_sigsys); CHECK_CSI_SIZE (_sigsys, 3*sizeof(int)); CHECK_SI_SIZE (_sigsys, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_syscall) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_arch) != 0x1C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch) != 0x14); + /* any new si_fields should be added here */ } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6f27facbaa9b..ff99e2b6fc54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1281,11 +1281,10 @@ void __init native_smp_prepare_boot_cpu(void) cpu_set_state_online(me); } -void __init native_smp_cpus_done(unsigned int max_cpus) +void __init calculate_max_logical_packages(void) { int ncpus; - pr_debug("Boot done\n"); /* * Today neither Intel nor AMD support heterogenous systems so * extrapolate the boot cpu's data to all packages. @@ -1293,6 +1292,13 @@ void __init native_smp_cpus_done(unsigned int max_cpus) ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); pr_info("Max logical packages: %u\n", __max_logical_packages); +} + +void __init native_smp_cpus_done(unsigned int max_cpus) +{ + pr_debug("Boot done\n"); + + calculate_max_logical_packages(); if (x86_has_numa_in_package) set_sched_topology(x86_numa_in_package_topology); @@ -1430,8 +1436,8 @@ static void remove_siblinginfo(int cpu) cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); - c->phys_proc_id = 0; c->cpu_core_id = 0; + c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 446c9ef8cfc3..3d9b2308e7fa 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -181,7 +181,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr) break; case BUG_TRAP_TYPE_WARN: - regs->ip += LEN_UD0; + regs->ip += LEN_UD2; return 1; } diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1f9188f5357c..feb28fee6cea 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -5,7 +5,6 @@ #include <asm/unwind.h> #include <asm/orc_types.h> #include <asm/orc_lookup.h> -#include <asm/sections.h> #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) @@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip) } /* vmlinux .init slow lookup: */ - if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) + if (init_kernel_text(ip)) return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9b138a06c1a4..b854ebf5851b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -118,9 +118,11 @@ SECTIONS #ifdef CONFIG_X86_64 . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_start) = .; _entry_trampoline = .; *(.entry_trampoline) . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_end) = .; ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a0c5a69bc7c4..b671fc2d0422 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_PV_UNHALT) | - (1 << KVM_FEATURE_PV_TLB_FLUSH); + (1 << KVM_FEATURE_PV_TLB_FLUSH) | + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 924ac8ce9d50..391dda8d43b7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { - struct kvm_lapic *apic; + struct kvm_lapic *apic = vcpu->arch.apic; int i; - apic_debug("%s\n", __func__); + if (!apic) + return; - ASSERT(vcpu); - apic = vcpu->arch.apic; - ASSERT(apic != NULL); + apic_debug("%s\n", __func__); /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2165,7 +2164,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) */ vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ - kvm_lapic_reset(vcpu, false); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); return 0; @@ -2569,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) pe = xchg(&apic->pending_events, 0); if (test_bit(KVM_APIC_INIT, &pe)) { - kvm_lapic_reset(vcpu, true); kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8eca1d04aeb8..f551962ac294 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3029,7 +3029,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) return RET_PF_RETRY; } - return -EFAULT; + return RET_PF_EMULATE; } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, @@ -5080,7 +5080,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); /* The caller should hold mmu-lock before calling this function. */ -static bool +static __always_inline bool slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) @@ -5110,7 +5110,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, return flush; } -static bool +static __always_inline bool slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, bool lock_flush_tlb) @@ -5121,7 +5121,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { @@ -5129,7 +5129,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { @@ -5137,7 +5137,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a74828..be9c839e2c89 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,6 +49,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h> #include <asm/virtext.h> @@ -178,6 +179,8 @@ struct vcpu_svm { uint64_t sysenter_eip; uint64_t tsc_aux; + u64 msr_decfg; + u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; @@ -300,6 +303,8 @@ module_param(vgif, int, 0444); static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +static u8 rsm_ins_bytes[] = "\x0f\xaa"; + static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -1383,6 +1388,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_SKINIT); set_intercept(svm, INTERCEPT_WBINVD); set_intercept(svm, INTERCEPT_XSETBV); + set_intercept(svm, INTERCEPT_RSM); if (!kvm_mwait_in_guest()) { set_intercept(svm, INTERCEPT_MONITOR); @@ -1902,6 +1908,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; if (!init_event) { @@ -3699,6 +3706,12 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +static int rsm_interception(struct vcpu_svm *svm) +{ + return x86_emulate_instruction(&svm->vcpu, 0, 0, + rsm_ins_bytes, 2) == EMULATE_DONE; +} + static int rdpmc_interception(struct vcpu_svm *svm) { int err; @@ -3860,6 +3873,22 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int svm_get_msr_feature(struct kvm_msr_entry *msr) +{ + msr->data = 0; + + switch (msr->index) { + case MSR_F10H_DECFG: + if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + break; + default: + return 1; + } + + return 0; +} + static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3935,9 +3964,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->spec_ctrl; break; - case MSR_IA32_UCODE_REV: - msr_info->data = 0x01000065; - break; case MSR_F15H_IC_CFG: { int family, model; @@ -3955,6 +3981,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; + case MSR_F10H_DECFG: + msr_info->data = svm->msr_decfg; + break; default: return kvm_get_msr_common(vcpu, msr_info); } @@ -4133,6 +4162,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_F10H_DECFG: { + struct kvm_msr_entry msr_entry; + + msr_entry.index = msr->index; + if (svm_get_msr_feature(&msr_entry)) + return 1; + + /* Check the supported bits */ + if (data & ~msr_entry.data) + return 1; + + /* Don't allow the guest to change a bit, #GP */ + if (!msr->host_initiated && (data ^ msr_entry.data)) + return 1; + + svm->msr_decfg = data; + break; + } case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); @@ -4541,7 +4588,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_NPF] = npf_interception, - [SVM_EXIT_RSM] = emulate_on_interception, + [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, }; @@ -5355,7 +5402,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5464,11 +5511,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -6236,16 +6283,18 @@ e_free: static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { + void __user *measure = (void __user *)(uintptr_t)argp->data; struct kvm_sev_info *sev = &kvm->arch.sev_info; struct sev_data_launch_measure *data; struct kvm_sev_launch_measure params; + void __user *p = NULL; void *blob = NULL; int ret; if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, measure, sizeof(params))) return -EFAULT; data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -6256,17 +6305,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!params.len) goto cmd; - if (params.uaddr) { + p = (void __user *)(uintptr_t)params.uaddr; + if (p) { if (params.len > SEV_FW_BLOB_MAX_SIZE) { ret = -EINVAL; goto e_free; } - if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) { - ret = -EFAULT; - goto e_free; - } - ret = -ENOMEM; blob = kmalloc(params.len, GFP_KERNEL); if (!blob) @@ -6290,13 +6335,13 @@ cmd: goto e_free_blob; if (blob) { - if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) + if (copy_to_user(p, blob, params.len)) ret = -EFAULT; } done: params.len = data->len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) + if (copy_to_user(measure, ¶ms, sizeof(params))) ret = -EFAULT; e_free_blob: kfree(blob); @@ -6597,7 +6642,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) struct page **pages; void *blob, *hdr; unsigned long n; - int ret; + int ret, offset; if (!sev_guest(kvm)) return -ENOTTY; @@ -6623,6 +6668,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!data) goto e_unpin_memory; + offset = params.guest_uaddr & (PAGE_SIZE - 1); + data->guest_address = __sme_page_pa(pages[0]) + offset; + data->guest_len = params.guest_len; + blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); if (IS_ERR(blob)) { ret = PTR_ERR(blob); @@ -6637,8 +6686,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) ret = PTR_ERR(hdr); goto e_free_blob; } - data->trans_address = __psp_pa(blob); - data->trans_len = params.trans_len; + data->hdr_address = __psp_pa(hdr); + data->hdr_len = params.hdr_len; data->handle = sev->handle; ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); @@ -6821,6 +6870,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_unblocking = svm_vcpu_unblocking, .update_bp_intercept = update_bp_intercept, + .get_msr_feature = svm_get_msr_feature, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f427723dc7db..051dab74e4e9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,6 +51,7 @@ #include <asm/apic.h> #include <asm/irq_remapping.h> #include <asm/mmu_context.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h> #include "trace.h" @@ -3226,6 +3227,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, return !(val & ~valid_bits); } +static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +{ + return 1; +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -4485,7 +4491,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_DESC); hw_cr4 &= ~X86_CR4_UMIP; - } else + } else if (!is_guest_mode(vcpu) || + !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_DESC); @@ -5765,6 +5772,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); @@ -9452,7 +9460,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -9587,11 +9595,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -10136,7 +10144,10 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, (unsigned long)(vmcs12->posted_intr_desc_addr & (PAGE_SIZE - 1))); } - if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) + if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) + vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_USE_MSR_BITMAPS); + else vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_USE_MSR_BITMAPS); } @@ -10224,8 +10235,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, * updated to reflect this when L1 (or its L2s) actually write to * the MSR. */ - bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); + bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); /* Nothing to do if the MSR bitmap is not in use. */ if (!cpu_has_vmx_msr_bitmap() || @@ -11196,7 +11207,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (ret) return ret; - if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) + /* + * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken + * by event injection, halt vcpu. + */ + if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && + !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) return kvm_vcpu_halt(vcpu); vmx->nested.nested_run_pending = 1; @@ -12287,6 +12303,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .vcpu_put = vmx_vcpu_put, .update_bp_intercept = update_exception_bitmap, + .get_msr_feature = vmx_get_msr_feature, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8a0b545ac20..18b5ca7a3197 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1049,6 +1049,45 @@ static u32 emulated_msrs[] = { static unsigned num_emulated_msrs; +/* + * List of msr numbers which are used to expose MSR-based features that + * can be used by a hypervisor to validate requested CPU features. + */ +static u32 msr_based_features[] = { + MSR_F10H_DECFG, + MSR_IA32_UCODE_REV, +}; + +static unsigned int num_msr_based_features; + +static int kvm_get_msr_feature(struct kvm_msr_entry *msr) +{ + switch (msr->index) { + case MSR_IA32_UCODE_REV: + rdmsrl(msr->index, msr->data); + break; + default: + if (kvm_x86_ops->get_msr_feature(msr)) + return 1; + } + return 0; +} + +static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +{ + struct kvm_msr_entry msr; + int r; + + msr.index = index; + r = kvm_get_msr_feature(&msr); + if (r) + return r; + + *data = msr.data; + + return 0; +} + bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) @@ -2222,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_AMD64_NB_CFG: - case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: case MSR_VM_HSAVE_PA: case MSR_AMD64_PATCH_LOADER: @@ -2230,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_DC_CFG: break; + case MSR_IA32_UCODE_REV: + if (msr_info->host_initiated) + vcpu->arch.microcode_version = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2525,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0; break; case MSR_IA32_UCODE_REV: - msr_info->data = 0x100000000ULL; + msr_info->data = vcpu->arch.microcode_version; break; case MSR_MTRRcap: case 0x200 ... 0x2ff: @@ -2680,13 +2722,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { - int i, idx; + int i; - idx = srcu_read_lock(&vcpu->kvm->srcu); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; - srcu_read_unlock(&vcpu->kvm->srcu, idx); return i; } @@ -2785,6 +2825,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_GET_MSR_FEATURES: r = 1; break; case KVM_CAP_ADJUST_CLOCK: @@ -2899,6 +2940,31 @@ long kvm_arch_dev_ioctl(struct file *filp, goto out; r = 0; break; + case KVM_GET_MSR_FEATURE_INDEX_LIST: { + struct kvm_msr_list __user *user_msr_list = argp; + struct kvm_msr_list msr_list; + unsigned int n; + + r = -EFAULT; + if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) + goto out; + n = msr_list.nmsrs; + msr_list.nmsrs = num_msr_based_features; + if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) + goto out; + r = -E2BIG; + if (n < msr_list.nmsrs) + goto out; + r = -EFAULT; + if (copy_to_user(user_msr_list->indices, &msr_based_features, + num_msr_based_features * sizeof(u32))) + goto out; + r = 0; + break; + } + case KVM_GET_MSRS: + r = msr_io(NULL, argp, do_get_msr_feature, 1); + break; } default: r = -EINVAL; @@ -3636,12 +3702,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_MSRS: + case KVM_GET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_get_msr, 1); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; - case KVM_SET_MSRS: + } + case KVM_SET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_set_msr, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; + } case KVM_TPR_ACCESS_REPORTING: { struct kvm_tpr_access_ctl tac; @@ -4464,6 +4536,19 @@ static void kvm_init_msr_list(void) j++; } num_emulated_msrs = j; + + for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { + struct kvm_msr_entry msr; + + msr.index = msr_based_features[i]; + if (kvm_get_msr_feature(&msr)) + continue; + + if (j < i) + msr_based_features[j] = msr_based_features[i]; + j++; + } + num_msr_based_features = j; } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, @@ -8017,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { + kvm_lapic_reset(vcpu, init_event); + vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; @@ -8460,10 +8547,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) return r; } - if (!size) { - r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); - WARN_ON(r < 0); - } + if (!size) + vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); return 0; } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 91e9700cc6dc..25a972c61b0a 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RETPOLINE) += retpoline.o -OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c index d6f848d1211d..2dd1fe13a37b 100644 --- a/arch/x86/lib/cpu.c +++ b/arch/x86/lib/cpu.c @@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig) { unsigned int fam, model; - fam = x86_family(sig); + fam = x86_family(sig); model = (sig >> 4) & 0xf; diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 7b881d03d0dd..3cdf06128d13 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -7,6 +7,7 @@ asmlinkage void just_return_func(void); asm( ".type just_return_func, @function\n" + ".globl just_return_func\n" "just_return_func:\n" " ret\n" ".size just_return_func, .-just_return_func\n" diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 480edc3a5e03..c909961e678a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,7 +7,6 @@ #include <asm/alternative-asm.h> #include <asm/export.h> #include <asm/nospec-branch.h> -#include <asm/bitsperlong.h> .macro THUNK reg .section .text.__x86.indirect_thunk @@ -47,58 +46,3 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif - -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * Google experimented with loop-unrolling and this turned out to be - * the optimal version - two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -.macro STUFF_RSB nr:req sp:req - mov $(\nr / 2), %_ASM_BX - .align 16 -771: - call 772f -773: /* speculation trap */ - pause - lfence - jmp 773b - .align 16 -772: - call 774f -775: /* speculation trap */ - pause - lfence - jmp 775b - .align 16 -774: - dec %_ASM_BX - jnz 771b - add $((BITS_PER_LONG/8) * \nr), \sp -.endm - -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -ENTRY(__fill_rsb) - STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP - ret -END(__fill_rsb) -EXPORT_SYMBOL_GPL(__fill_rsb) - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ - -ENTRY(__clear_rsb) - STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP - ret -END(__clear_rsb) -EXPORT_SYMBOL_GPL(__clear_rsb) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index b9283cc27622..476d810639a8 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void) for_each_possible_cpu(cpu) setup_cpu_entry_area(cpu); + + /* + * This is the last essential update to swapper_pgdir which needs + * to be synchronized to initial_page_table on 32bit. + */ + sync_initial_page_table(); } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 800de815519c..c88573d90f3e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, tsk = current; mm = tsk->mm; - /* - * Detect and handle instructions that would cause a page fault for - * both a tracked kernel page and a userspace page. - */ prefetchw(&mm->mmap_sem); if (unlikely(kmmio_fault(regs, address))) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 79cb066f40c0..396e1f0151ac 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ +void __init sync_initial_page_table(void) +{ + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); +} + void __init native_pagetable_init(void) { unsigned long pfn, va; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ab42c852069..8b72923f1d35 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -256,7 +256,7 @@ static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte) * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ - __flush_tlb_one(vaddr); + __flush_tlb_one_kernel(vaddr); } void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) @@ -1193,8 +1193,8 @@ void __init mem_init(void) register_page_bootmem_info(); /* Register memory areas for /proc/kcore */ - kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, - PAGE_SIZE, KCORE_OTHER); + if (get_gate_vma(&init_mm)) + kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER); mem_init_print_info(NULL); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c45b6ec5357b..e2db83bebc3b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -820,5 +820,5 @@ void __init __early_set_fixmap(enum fixed_addresses idx, set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else pte_clear(&init_mm, addr, pte); - __flush_tlb_one(addr); + __flush_tlb_one_kernel(addr); } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 58477ec3d66d..7c8686709636 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -168,7 +168,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one(f->addr); + __flush_tlb_one_kernel(f->addr); return 0; } diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 01f682cf77a8..40a6085063d6 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -15,6 +15,7 @@ #include <asm/page.h> #include <asm/processor-flags.h> #include <asm/msr-index.h> +#include <asm/nospec-branch.h> .text .code64 @@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute) movq %rax, %r8 /* Workarea encryption routine */ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ + ANNOTATE_RETPOLINE_SAFE call *%rax /* Call the encryption routine */ pop %r12 diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index c3c5274410a9..9bb7f0ab9fe6 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -63,7 +63,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ - __flush_tlb_one(vaddr); + __flush_tlb_one_kernel(vaddr); } unsigned long __FIXADDR_TOP = 0xfffff000; diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ce38f165489b..631507f0c198 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void) } /* - * Clone the ESPFIX P4D into the user space visinble page table + * Clone the ESPFIX P4D into the user space visible page table */ static void __init pti_setup_espfix64(void) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8dcc0607f805..7f1a51399674 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -498,7 +498,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, * flush that changes context.tlb_gen from 2 to 3. If they get * processed on this CPU in reverse order, we'll see * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL. - * If we were to use __flush_tlb_single() and set local_tlb_gen to + * If we were to use __flush_tlb_one_user() and set local_tlb_gen to * 3, we'd be break the invariant: we'd update local_tlb_gen above * 1 without the full flush that's needed for tlb_gen 2. * @@ -519,7 +519,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, addr = f->start; while (addr < f->end) { - __flush_tlb_single(addr); + __flush_tlb_one_user(addr); addr += PAGE_SIZE; } if (local) @@ -666,7 +666,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_one(addr); + __flush_tlb_one_kernel(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4923d92f918d..45e4eb5bcbb2 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -13,6 +13,7 @@ #include <linux/if_vlan.h> #include <asm/cacheflush.h> #include <asm/set_memory.h> +#include <asm/nospec-branch.h> #include <linux/bpf.h> /* @@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 43 /* number of bytes to jump */ +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -299,7 +300,7 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 32 +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ @@ -313,7 +314,7 @@ static void emit_bpf_tail_call(u8 **pprog) * goto out; */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ -#define OFFSET3 10 +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; @@ -326,7 +327,7 @@ static void emit_bpf_tail_call(u8 **pprog) * rdi == ctx (1st arg) * rax == prog->bpf_func + prologue_size */ - EMIT2(0xFF, 0xE0); /* jmp rax */ + RETPOLINE_RAX_BPF_JIT(); /* out: */ BUILD_BUG_ON(cnt - label1 != OFFSET1); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 174c59774cc9..a7a7677265b6 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -460,7 +460,7 @@ static int nmi_setup(void) goto fail; for_each_possible_cpu(cpu) { - if (!cpu) + if (!IS_ENABLED(CONFIG_SMP) || !cpu) continue; memcpy(per_cpu(cpu_msrs, cpu).counters, diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 2c67bae6bb53..fb1df9488e98 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -79,7 +79,7 @@ static void intel_mid_power_off(void) static void intel_mid_reboot(void) { - intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); + intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); } static unsigned long __init intel_mid_calibrate_tsc(void) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index c2e9285d1bf1..db77e087adaf 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, local_flush_tlb(); stat->d_alltlb++; } else { - __flush_tlb_single(msg->address); + __flush_tlb_one_user(msg->address); stat->d_onetlb++; } stat->d_requestee++; diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index de53bd15df5a..24bb7598774e 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -102,7 +102,7 @@ ENTRY(startup_32) * don't we'll eventually crash trying to execute encrypted * instructions. */ - bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags + btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone movl $MSR_K8_SYSCFG, %ecx rdmsr diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5d73c443e778..220e97841e49 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, break; case R_X86_64_PC32: + case R_X86_64_PLT32: /* * PC relative relocations don't need to be adjusted unless * referencing a percpu symbol. + * + * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32. */ if (is_percpu_sym(sym, symname)) add_reloc(&relocs32neg, offset); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c047f42552e1..3c2c2530737e 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (!xen_initial_domain()) { add_preferred_console("xenboot", 0, NULL); - add_preferred_console("tty", 0, NULL); - add_preferred_console("hvc", 0, NULL); if (pci_xen) x86_init.pci.arch_init = pci_xen_init; } else { @@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_boot_params_init_edd(); } + + add_preferred_console("tty", 0, NULL); + add_preferred_console("hvc", 0, NULL); + #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ pci_probe &= ~PCI_PROBE_BIOS; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index d85076223a69..aae88fec9941 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1300,12 +1300,12 @@ static void xen_flush_tlb(void) preempt_enable(); } -static void xen_flush_tlb_single(unsigned long addr) +static void xen_flush_tlb_one_user(unsigned long addr) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_single(addr); + trace_xen_mmu_flush_tlb_one_user(addr); preempt_disable(); @@ -2370,7 +2370,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_one_user = xen_flush_tlb_one_user, .flush_tlb_others = xen_flush_tlb_others, .pgd_alloc = xen_pgd_alloc, diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 77c959cf81e7..7a43b2ae19f1 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -122,6 +122,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus) if (xen_hvm_domain()) native_smp_cpus_done(max_cpus); + else + calculate_max_logical_packages(); if (xen_have_vcpu_info_placement) return; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d9f96cc5d743..1d83152c761b 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,12 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/tick.h> +#include <linux/percpu-defs.h> #include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/grant_table.h> #include <xen/events.h> +#include <asm/cpufeatures.h> +#include <asm/msr-index.h> #include <asm/xen/hypercall.h> #include <asm/xen/page.h> #include <asm/fixmap.h> @@ -15,6 +18,8 @@ #include "mmu.h" #include "pmu.h" +static DEFINE_PER_CPU(u64, spec_ctrl); + void xen_arch_pre_suspend(void) { xen_save_time_memory_area(); @@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) { + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); + /* Boot processor notified via generic timekeeping_resume() */ if (smp_processor_id() == 0) return; @@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data) static void xen_vcpu_notify_suspend(void *data) { + u64 tmp; + tick_suspend_local(); + + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { + rdmsrl(MSR_IA32_SPEC_CTRL, tmp); + this_cpu_write(spec_ctrl, tmp); + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + } } void xen_arch_resume(void) diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 623720a11143..732631ce250f 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -16,6 +16,7 @@ */ #include <linux/dma-contiguous.h> +#include <linux/dma-direct.h> #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/mm.h> @@ -123,7 +124,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, unsigned long attrs) { unsigned long ret; - unsigned long uncached = 0; + unsigned long uncached; unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page = NULL; @@ -144,15 +145,27 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, if (!page) return NULL; - ret = (unsigned long)page_address(page); + *handle = phys_to_dma(dev, page_to_phys(page)); - /* We currently don't support coherent memory outside KSEG */ +#ifdef CONFIG_MMU + if (PageHighMem(page)) { + void *p; + p = dma_common_contiguous_remap(page, size, VM_MAP, + pgprot_noncached(PAGE_KERNEL), + __builtin_return_address(0)); + if (!p) { + if (!dma_release_from_contiguous(dev, page, count)) + __free_pages(page, get_order(size)); + } + return p; + } +#endif + ret = (unsigned long)page_address(page); BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR || ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR; - *handle = virt_to_bus((void *)ret); __invalidate_dcache_range(ret, size); return (void *)uncached; @@ -161,13 +174,20 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - unsigned long addr = (unsigned long)vaddr + - XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; - struct page *page = virt_to_page(addr); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR || - addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); + unsigned long addr = (unsigned long)vaddr; + struct page *page; + + if (addr >= XCHAL_KSEG_BYPASS_VADDR && + addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) { + addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; + page = virt_to_page(addr); + } else { +#ifdef CONFIG_MMU + dma_common_free_remap(vaddr, size, VM_MAP); +#endif + page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle))); + } if (!dma_release_from_contiguous(dev, page, count)) __free_pages(page, get_order(size)); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index d776ec0d7b22..34aead7dcb48 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -79,19 +79,75 @@ void __init zones_init(void) free_area_init_node(0, zones_size, ARCH_PFN_OFFSET, NULL); } +#ifdef CONFIG_HIGHMEM +static void __init free_area_high(unsigned long pfn, unsigned long end) +{ + for (; pfn < end; pfn++) + free_highmem_page(pfn_to_page(pfn)); +} + +static void __init free_highpages(void) +{ + unsigned long max_low = max_low_pfn; + struct memblock_region *mem, *res; + + reset_all_zones_managed_pages(); + /* set highmem page free */ + for_each_memblock(memory, mem) { + unsigned long start = memblock_region_memory_base_pfn(mem); + unsigned long end = memblock_region_memory_end_pfn(mem); + + /* Ignore complete lowmem entries */ + if (end <= max_low) + continue; + + if (memblock_is_nomap(mem)) + continue; + + /* Truncate partial highmem entries */ + if (start < max_low) + start = max_low; + + /* Find and exclude any reserved regions */ + for_each_memblock(reserved, res) { + unsigned long res_start, res_end; + + res_start = memblock_region_reserved_base_pfn(res); + res_end = memblock_region_reserved_end_pfn(res); + + if (res_end < start) + continue; + if (res_start < start) + res_start = start; + if (res_start > end) + res_start = end; + if (res_end > end) + res_end = end; + if (res_start != start) + free_area_high(start, res_start); + start = res_end; + if (start == end) + break; + } + + /* And now free anything which remains */ + if (start < end) + free_area_high(start, end); + } +} +#else +static void __init free_highpages(void) +{ +} +#endif + /* * Initialize memory pages. */ void __init mem_init(void) { -#ifdef CONFIG_HIGHMEM - unsigned long tmp; - - reset_all_zones_managed_pages(); - for (tmp = max_low_pfn; tmp < max_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); -#endif + free_highpages(); max_mapnr = max_pfn - ARCH_PFN_OFFSET; high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); |