diff options
Diffstat (limited to 'arch')
324 files changed, 3556 insertions, 2234 deletions
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h index 46ebf14aed4e..8a2b331e43fe 100644 --- a/arch/alpha/include/asm/cmpxchg.h +++ b/arch/alpha/include/asm/cmpxchg.h @@ -6,7 +6,6 @@   * Atomic exchange routines.   */ -#define __ASM__MB  #define ____xchg(type, args...)		__xchg ## type ## _local(args)  #define ____cmpxchg(type, args...)	__cmpxchg ## type ## _local(args)  #include <asm/xchg.h> @@ -33,10 +32,6 @@  	cmpxchg_local((ptr), (o), (n));					\  }) -#ifdef CONFIG_SMP -#undef __ASM__MB -#define __ASM__MB	"\tmb\n" -#endif  #undef ____xchg  #undef ____cmpxchg  #define ____xchg(type, args...)		__xchg ##type(args) @@ -64,7 +59,6 @@  	cmpxchg((ptr), (o), (n));					\  }) -#undef __ASM__MB  #undef ____cmpxchg  #endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h index 68dfb3cb7145..e2b59fac5257 100644 --- a/arch/alpha/include/asm/xchg.h +++ b/arch/alpha/include/asm/xchg.h @@ -12,6 +12,10 @@   * Atomic exchange.   * Since it can be used to implement critical sections   * it must clobber "memory" (also for interrupts in UP). + * + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + *   */  static inline unsigned long @@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)  {  	unsigned long ret, tmp, addr64; +	smp_mb();  	__asm__ __volatile__(  	"	andnot	%4,7,%3\n"  	"	insbl	%1,%4,%1\n" @@ -28,12 +33,12 @@ ____xchg(_u8, volatile char *m, unsigned long val)  	"	or	%1,%2,%2\n"  	"	stq_c	%2,0(%3)\n"  	"	beq	%2,2f\n" -		__ASM__MB  	".subsection 2\n"  	"2:	br	1b\n"  	".previous"  	: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)  	: "r" ((long)m), "1" (val) : "memory"); +	smp_mb();  	return ret;  } @@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)  {  	unsigned long ret, tmp, addr64; +	smp_mb();  	__asm__ __volatile__(  	"	andnot	%4,7,%3\n"  	"	inswl	%1,%4,%1\n" @@ -52,12 +58,12 @@ ____xchg(_u16, volatile short *m, unsigned long val)  	"	or	%1,%2,%2\n"  	"	stq_c	%2,0(%3)\n"  	"	beq	%2,2f\n" -		__ASM__MB  	".subsection 2\n"  	"2:	br	1b\n"  	".previous"  	: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)  	: "r" ((long)m), "1" (val) : "memory"); +	smp_mb();  	return ret;  } @@ -67,17 +73,18 @@ ____xchg(_u32, volatile int *m, unsigned long val)  {  	unsigned long dummy; +	smp_mb();  	__asm__ __volatile__(  	"1:	ldl_l %0,%4\n"  	"	bis $31,%3,%1\n"  	"	stl_c %1,%2\n"  	"	beq %1,2f\n" -		__ASM__MB  	".subsection 2\n"  	"2:	br 1b\n"  	".previous"  	: "=&r" (val), "=&r" (dummy), "=m" (*m)  	: "rI" (val), "m" (*m) : "memory"); +	smp_mb();  	return val;  } @@ -87,17 +94,18 @@ ____xchg(_u64, volatile long *m, unsigned long val)  {  	unsigned long dummy; +	smp_mb();  	__asm__ __volatile__(  	"1:	ldq_l %0,%4\n"  	"	bis $31,%3,%1\n"  	"	stq_c %1,%2\n"  	"	beq %1,2f\n" -		__ASM__MB  	".subsection 2\n"  	"2:	br 1b\n"  	".previous"  	: "=&r" (val), "=&r" (dummy), "=m" (*m)  	: "rI" (val), "m" (*m) : "memory"); +	smp_mb();  	return val;  } @@ -128,10 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)   * store NEW in MEM.  Return the initial value in MEM.  Success is   * indicated by comparing RETURN with OLD.   * - * The memory barrier should be placed in SMP only when we actually - * make the change. If we don't change anything (so if the returned - * prev is equal to old) then we aren't acquiring anything new and - * we don't need any memory barrier as far I can tell. + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + * + * The trailing memory barrier is placed in SMP unconditionally, in + * order to guarantee that dependency ordering is preserved when a + * dependency is headed by an unsuccessful operation.   */  static inline unsigned long @@ -139,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)  {  	unsigned long prev, tmp, cmp, addr64; +	smp_mb();  	__asm__ __volatile__(  	"	andnot	%5,7,%4\n"  	"	insbl	%1,%5,%1\n" @@ -150,13 +161,13 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)  	"	or	%1,%2,%2\n"  	"	stq_c	%2,0(%4)\n"  	"	beq	%2,3f\n" -		__ASM__MB  	"2:\n"  	".subsection 2\n"  	"3:	br	1b\n"  	".previous"  	: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)  	: "r" ((long)m), "Ir" (old), "1" (new) : "memory"); +	smp_mb();  	return prev;  } @@ -166,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)  {  	unsigned long prev, tmp, cmp, addr64; +	smp_mb();  	__asm__ __volatile__(  	"	andnot	%5,7,%4\n"  	"	inswl	%1,%5,%1\n" @@ -177,13 +189,13 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)  	"	or	%1,%2,%2\n"  	"	stq_c	%2,0(%4)\n"  	"	beq	%2,3f\n" -		__ASM__MB  	"2:\n"  	".subsection 2\n"  	"3:	br	1b\n"  	".previous"  	: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)  	: "r" ((long)m), "Ir" (old), "1" (new) : "memory"); +	smp_mb();  	return prev;  } @@ -193,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)  {  	unsigned long prev, cmp; +	smp_mb();  	__asm__ __volatile__(  	"1:	ldl_l %0,%5\n"  	"	cmpeq %0,%3,%1\n" @@ -200,13 +213,13 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)  	"	mov %4,%1\n"  	"	stl_c %1,%2\n"  	"	beq %1,3f\n" -		__ASM__MB  	"2:\n"  	".subsection 2\n"  	"3:	br 1b\n"  	".previous"  	: "=&r"(prev), "=&r"(cmp), "=m"(*m)  	: "r"((long) old), "r"(new), "m"(*m) : "memory"); +	smp_mb();  	return prev;  } @@ -216,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)  {  	unsigned long prev, cmp; +	smp_mb();  	__asm__ __volatile__(  	"1:	ldq_l %0,%5\n"  	"	cmpeq %0,%3,%1\n" @@ -223,13 +237,13 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)  	"	mov %4,%1\n"  	"	stq_c %1,%2\n"  	"	beq %1,3f\n" -		__ASM__MB  	"2:\n"  	".subsection 2\n"  	"3:	br 1b\n"  	".previous"  	: "=&r"(prev), "=&r"(cmp), "=m"(*m)  	: "r"((long) old), "r"(new), "m"(*m) : "memory"); +	smp_mb();  	return prev;  } diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index f3a80cf164cc..d76bf4a83740 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -484,7 +484,6 @@ config ARC_CURR_IN_REG  config ARC_EMUL_UNALIGNED  	bool "Emulate unaligned memory access (userspace only)" -	default N  	select SYSCTL_ARCH_UNALIGN_NO_WARN  	select SYSCTL_ARCH_UNALIGN_ALLOW  	depends on ISA_ARCOMPACT diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts index 70aec7d6ca60..626b694c7be7 100644 --- a/arch/arc/boot/dts/axs101.dts +++ b/arch/arc/boot/dts/axs101.dts @@ -17,6 +17,6 @@  	compatible = "snps,axs101", "snps,arc-sdp";  	chosen { -		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60"; +		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60 print-fatal-signals=1";  	};  }; diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 74d070cd3c13..47b74fbc403c 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -214,13 +214,13 @@  			};  			eeprom@0x54{ -				compatible = "24c01"; +				compatible = "atmel,24c01";  				reg = <0x54>;  				pagesize = <0x8>;  			};  			eeprom@0x57{ -				compatible = "24c04"; +				compatible = "atmel,24c04";  				reg = <0x57>;  				pagesize = <0x8>;  			}; diff --git a/arch/arc/boot/dts/haps_hs_idu.dts b/arch/arc/boot/dts/haps_hs_idu.dts index 215cddd0b63b..0c603308aeb3 100644 --- a/arch/arc/boot/dts/haps_hs_idu.dts +++ b/arch/arc/boot/dts/haps_hs_idu.dts @@ -22,7 +22,7 @@  	};  	chosen { -		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug"; +		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";  	};  	aliases { diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts index 5ee96b067c08..ff2f2c70c545 100644 --- a/arch/arc/boot/dts/nsim_700.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -17,7 +17,7 @@  	interrupt-parent = <&core_intc>;  	chosen { -		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; +		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";  	};  	aliases { diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index 8d787b251f73..8e2489b16b0a 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -24,7 +24,7 @@  	};  	chosen { -		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; +		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";  	};  	aliases { diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts index 4f98ebf71fd8..ed12f494721d 100644 --- a/arch/arc/boot/dts/nsim_hs_idu.dts +++ b/arch/arc/boot/dts/nsim_hs_idu.dts @@ -15,7 +15,7 @@  	interrupt-parent = <&core_intc>;  	chosen { -		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; +		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";  	};  	aliases { diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index 3c391ba565ed..7842e5eb4ab5 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -20,7 +20,7 @@  		/* this is for console on PGU */  		/* bootargs = "console=tty0 consoleblank=0"; */  		/* this is for console on serial */ -		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24"; +		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";  	};  	aliases { diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts index 14a727cbf4c9..b8838cf2b4ec 100644 --- a/arch/arc/boot/dts/nsimosci_hs.dts +++ b/arch/arc/boot/dts/nsimosci_hs.dts @@ -20,7 +20,7 @@  		/* this is for console on PGU */  		/* bootargs = "console=tty0 consoleblank=0"; */  		/* this is for console on serial */ -		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24"; +		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";  	};  	aliases { diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts index 5052917d4a99..72a2c723f1f7 100644 --- a/arch/arc/boot/dts/nsimosci_hs_idu.dts +++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts @@ -18,7 +18,7 @@  	chosen {  		/* this is for console on serial */ -		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24"; +		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24 print-fatal-signals=1";  	};  	aliases { diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index ea022d47896c..21ec82466d62 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address);  #define BUG()	do {								\  	pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ -	dump_stack();								\ +	barrier_before_unreachable();						\ +	__builtin_trap();							\  } while (0)  #define HAVE_ARCH_BUG diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 257a68f3c2fe..309f4e6721b3 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -184,7 +184,7 @@  .macro FAKE_RET_FROM_EXCPN  	lr      r9, [status32]  	bic     r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK) -	or      r9, r9, (STATUS_L_MASK|STATUS_IE_MASK) +	or      r9, r9, STATUS_IE_MASK  	kflag   r9  .endm diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index f61a52b01625..5fe84e481654 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -22,10 +22,79 @@ static DEFINE_RAW_SPINLOCK(mcip_lock);  static char smp_cpuinfo_buf[128]; +/* + * Set mask to halt GFRC if any online core in SMP cluster is halted. + * Only works for ARC HS v3.0+, on earlier versions has no effect. + */ +static void mcip_update_gfrc_halt_mask(int cpu) +{ +	struct bcr_generic gfrc; +	unsigned long flags; +	u32 gfrc_halt_mask; + +	READ_BCR(ARC_REG_GFRC_BUILD, gfrc); + +	/* +	 * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in +	 * GFRC 0x3 version. +	 */ +	if (gfrc.ver < 0x3) +		return; + +	raw_spin_lock_irqsave(&mcip_lock, flags); + +	__mcip_cmd(CMD_GFRC_READ_CORE, 0); +	gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK); +	gfrc_halt_mask |= BIT(cpu); +	__mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask); + +	raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + +static void mcip_update_debug_halt_mask(int cpu) +{ +	u32 mcip_mask = 0; +	unsigned long flags; + +	raw_spin_lock_irqsave(&mcip_lock, flags); + +	/* +	 * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK +	 * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK +	 * and CMD_DEBUG_READ_SELECT. +	 */ +	__mcip_cmd(CMD_DEBUG_READ_SELECT, 0); +	mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK); + +	mcip_mask |= BIT(cpu); + +	__mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask); +	/* +	 * Parameter specified halt cause: +	 * STATUS32[H]/actionpoint/breakpoint/self-halt +	 * We choose all of them (0xF). +	 */ +	__mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask); + +	raw_spin_unlock_irqrestore(&mcip_lock, flags); +} +  static void mcip_setup_per_cpu(int cpu)  { +	struct mcip_bcr mp; + +	READ_BCR(ARC_REG_MCIP_BCR, mp); +  	smp_ipi_irq_setup(cpu, IPI_IRQ);  	smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ); + +	/* Update GFRC halt mask as new CPU came online */ +	if (mp.gfrc) +		mcip_update_gfrc_halt_mask(cpu); + +	/* Update MCIP debug mask as new CPU came online */ +	if (mp.dbg) +		mcip_update_debug_halt_mask(cpu);  }  static void mcip_ipi_send(int cpu) @@ -101,11 +170,6 @@ static void mcip_probe_n_setup(void)  		IS_AVAIL1(mp.gfrc, "GFRC"));  	cpuinfo_arc700[0].extn.gfrc = mp.gfrc; - -	if (mp.dbg) { -		__mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); -		__mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf); -	}  }  struct plat_smp_ops plat_smp_ops = { diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 9d27331fe69a..b2cae79a25d7 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -51,7 +51,7 @@ static const struct id_to_str arc_cpu_rel[] = {  	{ 0x51, "R2.0" },  	{ 0x52, "R2.1" },  	{ 0x53, "R3.0" }, -	{ 0x54, "R4.0" }, +	{ 0x54, "R3.10a" },  #endif  	{ 0x00, NULL   }  }; @@ -373,7 +373,7 @@ static void arc_chk_core_config(void)  {  	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];  	int saved = 0, present = 0; -	char *opt_nm = NULL;; +	char *opt_nm = NULL;  	if (!cpu->extn.timer0)  		panic("Timer0 is not present!\n"); diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index efe8b4200a67..21d86c36692b 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -24,6 +24,7 @@  #include <linux/reboot.h>  #include <linux/irqdomain.h>  #include <linux/export.h> +#include <linux/of_fdt.h>  #include <asm/processor.h>  #include <asm/setup.h> @@ -47,6 +48,42 @@ void __init smp_prepare_boot_cpu(void)  {  } +static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask) +{ +	unsigned long dt_root = of_get_flat_dt_root(); +	const char *buf; + +	buf = of_get_flat_dt_prop(dt_root, name, NULL); +	if (!buf) +		return -EINVAL; + +	if (cpulist_parse(buf, cpumask)) +		return -EINVAL; + +	return 0; +} + +/* + * Read from DeviceTree and setup cpu possible mask. If there is no + * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist. + */ +static void __init arc_init_cpu_possible(void) +{ +	struct cpumask cpumask; + +	if (arc_get_cpu_map("possible-cpus", &cpumask)) { +		pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n", +			NR_CPUS); + +		cpumask_setall(&cpumask); +	} + +	if (!cpumask_test_cpu(0, &cpumask)) +		panic("Master cpu (cpu[0]) is missed in cpu possible mask!"); + +	init_cpu_possible(&cpumask); +} +  /*   * Called from setup_arch() before calling setup_processor()   * @@ -58,10 +95,7 @@ void __init smp_prepare_boot_cpu(void)   */  void __init smp_init_cpus(void)  { -	unsigned int i; - -	for (i = 0; i < NR_CPUS; i++) -		set_cpu_possible(i, true); +	arc_init_cpu_possible();  	if (plat_smp_ops.init_early_smp)  		plat_smp_ops.init_early_smp(); @@ -70,16 +104,12 @@ void __init smp_init_cpus(void)  /* called from init ( ) =>  process 1 */  void __init smp_prepare_cpus(unsigned int max_cpus)  { -	int i; -  	/*  	 * if platform didn't set the present map already, do it now  	 * boot cpu is set to present already by init/main.c  	 */ -	if (num_present_cpus() <= 1) { -		for (i = 0; i < max_cpus; i++) -			set_cpu_present(i, true); -	} +	if (num_present_cpus() <= 1) +		init_cpu_present(cpu_possible_mask);  }  void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 333daab7def0..183391d4d33a 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -366,7 +366,7 @@ static void init_unwind_hdr(struct unwind_table *table,  	return;  ret_err: -	panic("Attention !!! Dwarf FDE parsing errors\n");; +	panic("Attention !!! Dwarf FDE parsing errors\n");  }  #ifdef CONFIG_MODULES diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index eee924dfffa6..2072f3451e9c 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -780,7 +780,10 @@ noinline static void slc_entire_op(const int op)  	write_aux_reg(r, ctrl); -	write_aux_reg(ARC_REG_SLC_INVALIDATE, 1); +	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */ +		write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1); +	else +		write_aux_reg(ARC_REG_SLC_FLUSH, 0x1);  	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */  	read_aux_reg(r); diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi index 18045c38bcf1..db7cded1b7ad 100644 --- a/arch/arm/boot/dts/bcm11351.dtsi +++ b/arch/arm/boot/dts/bcm11351.dtsi @@ -55,7 +55,7 @@  		      <0x3ff00100 0x100>;  	}; -	smc@0x3404c000 { +	smc@3404c000 {  		compatible = "brcm,bcm11351-smc", "brcm,kona-smc";  		reg = <0x3404c000 0x400>; /* 1 KiB in SRAM */  	}; diff --git a/arch/arm/boot/dts/bcm21664.dtsi b/arch/arm/boot/dts/bcm21664.dtsi index 6dde95f21cef..266f2611dc22 100644 --- a/arch/arm/boot/dts/bcm21664.dtsi +++ b/arch/arm/boot/dts/bcm21664.dtsi @@ -55,7 +55,7 @@  		      <0x3ff00100 0x100>;  	}; -	smc@0x3404e000 { +	smc@3404e000 {  		compatible = "brcm,bcm21664-smc", "brcm,kona-smc";  		reg = <0x3404e000 0x400>; /* 1 KiB in SRAM */  	}; diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi index 0e3d2a5ff208..a5c3824c8056 100644 --- a/arch/arm/boot/dts/bcm2835.dtsi +++ b/arch/arm/boot/dts/bcm2835.dtsi @@ -18,10 +18,10 @@  	soc {  		ranges = <0x7e000000 0x20000000 0x02000000>;  		dma-ranges = <0x40000000 0x00000000 0x20000000>; +	}; -		arm-pmu { -			compatible = "arm,arm1176-pmu"; -		}; +	arm-pmu { +		compatible = "arm,arm1176-pmu";  	};  }; diff --git a/arch/arm/boot/dts/bcm2836.dtsi b/arch/arm/boot/dts/bcm2836.dtsi index 1dfd76442777..c933e8413884 100644 --- a/arch/arm/boot/dts/bcm2836.dtsi +++ b/arch/arm/boot/dts/bcm2836.dtsi @@ -9,19 +9,19 @@  			 <0x40000000 0x40000000 0x00001000>;  		dma-ranges = <0xc0000000 0x00000000 0x3f000000>; -		local_intc: local_intc { +		local_intc: local_intc@40000000 {  			compatible = "brcm,bcm2836-l1-intc";  			reg = <0x40000000 0x100>;  			interrupt-controller;  			#interrupt-cells = <2>;  			interrupt-parent = <&local_intc>;  		}; +	}; -		arm-pmu { -			compatible = "arm,cortex-a7-pmu"; -			interrupt-parent = <&local_intc>; -			interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; -		}; +	arm-pmu { +		compatible = "arm,cortex-a7-pmu"; +		interrupt-parent = <&local_intc>; +		interrupts = <9 IRQ_TYPE_LEVEL_HIGH>;  	};  	timer { diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index efa7d3387ab2..7704bb029605 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -8,7 +8,7 @@  			 <0x40000000 0x40000000 0x00001000>;  		dma-ranges = <0xc0000000 0x00000000 0x3f000000>; -		local_intc: local_intc { +		local_intc: local_intc@40000000 {  			compatible = "brcm,bcm2836-l1-intc";  			reg = <0x40000000 0x100>;  			interrupt-controller; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 18db25a5a66e..9d293decf8d3 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -465,7 +465,7 @@  			status = "disabled";  		}; -		aux: aux@0x7e215000 { +		aux: aux@7e215000 {  			compatible = "brcm,bcm2835-aux";  			#clock-cells = <1>;  			reg = <0x7e215000 0x8>; diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index 6a44b8021702..f0e2008f7490 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -49,7 +49,7 @@  	memory {  		device_type = "memory"; -		reg = <0x60000000 0x80000000>; +		reg = <0x60000000 0x20000000>;  	};  	gpio-restart { diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts index 08568ce24d06..da8bb9d60f99 100644 --- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts +++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts @@ -269,7 +269,7 @@  		sata: sata@46000000 {  			/* The ROM uses this muxmode */ -			cortina,gemini-ata-muxmode = <3>; +			cortina,gemini-ata-muxmode = <0>;  			cortina,gemini-enable-sata-bridge;  			status = "okay";  		}; diff --git a/arch/arm/boot/dts/imx6dl-icore-rqs.dts b/arch/arm/boot/dts/imx6dl-icore-rqs.dts index cf42c2f5cdc7..1281bc39b7ab 100644 --- a/arch/arm/boot/dts/imx6dl-icore-rqs.dts +++ b/arch/arm/boot/dts/imx6dl-icore-rqs.dts @@ -42,7 +42,7 @@  /dts-v1/; -#include "imx6q.dtsi" +#include "imx6dl.dtsi"  #include "imx6qdl-icore-rqs.dtsi"  / { diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index c1aa7a4518fb..a30ee9fcb3ae 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -71,6 +71,8 @@  };  &i2c1 { +	pinctrl-names = "default"; +	pinctrl-0 = <&i2c1_pins>;  	clock-frequency = <2600000>;  	twl: twl@48 { @@ -189,7 +191,12 @@  		>;  	}; - +	i2c1_pins: pinmux_i2c1_pins { +		pinctrl-single,pins = < +			OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */ +			OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */ +		>; +	};  };  &omap3_pmx_wkup { diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index b50b796e15c7..47915447a826 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -66,6 +66,8 @@  };  &i2c1 { +	pinctrl-names = "default"; +	pinctrl-0 = <&i2c1_pins>;  	clock-frequency = <2600000>;  	twl: twl@48 { @@ -136,6 +138,12 @@  			OMAP3_CORE1_IOPAD(0x21b8, PIN_INPUT | MUX_MODE0)	/* hsusb0_data7.hsusb0_data7 */  		>;  	}; +	i2c1_pins: pinmux_i2c1_pins { +		pinctrl-single,pins = < +			OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */ +			OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */ +		>; +	};  };  &uart2 { diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index ec2c8baef62a..592e17fd4eeb 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -47,7 +47,7 @@  			gpios = <&gpio3 19 GPIO_ACTIVE_LOW>;	/* gpio3_83 */  			wakeup-source;  			autorepeat; -			debounce_interval = <50>; +			debounce-interval = <50>;  		};  	}; diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 3b704cfed69a..a97458112ff6 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -280,7 +280,7 @@  		max-frequency = <37500000>;  		clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,  			 <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		fifo-depth = <0x100>;  		interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;  		resets = <&cru SRST_SDIO>; @@ -298,7 +298,7 @@  		max-frequency = <37500000>;  		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,  			 <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		default-sample-phase = <158>;  		disable-wp;  		dmas = <&pdma 12>; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 780ec3a99b21..341deaf62ff6 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -621,7 +621,7 @@  		interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;  		clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,  			 <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		fifo-depth = <0x100>;  		pinctrl-names = "default";  		pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>; @@ -634,7 +634,7 @@  		interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;  		clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,  			 <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		fifo-depth = <0x100>;  		pinctrl-names = "default";  		pinctrl-0 = <&sdio_clk &sdio_cmd &sdio_bus4>; @@ -649,7 +649,7 @@  		max-frequency = <37500000>;  		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,  			 <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		bus-width = <8>;  		default-sample-phase = <158>;  		fifo-depth = <0x100>; diff --git a/arch/arm/boot/dts/rk3288-phycore-som.dtsi b/arch/arm/boot/dts/rk3288-phycore-som.dtsi index 99cfae875e12..5eae4776ffde 100644 --- a/arch/arm/boot/dts/rk3288-phycore-som.dtsi +++ b/arch/arm/boot/dts/rk3288-phycore-som.dtsi @@ -110,26 +110,6 @@  	};  }; -&cpu0 { -	cpu0-supply = <&vdd_cpu>; -	operating-points = < -		/* KHz    uV */ -		1800000	1400000 -		1608000	1350000 -		1512000 1300000 -		1416000 1200000 -		1200000 1100000 -		1008000 1050000 -		 816000 1000000 -		 696000  950000 -		 600000  900000 -		 408000  900000 -		 312000  900000 -		 216000  900000 -		 126000  900000 -	>; -}; -  &emmc {  	status = "okay";  	bus-width = <8>; diff --git a/arch/arm/boot/dts/zx296702.dtsi b/arch/arm/boot/dts/zx296702.dtsi index 8a74efdb6360..240e7a23d81f 100644 --- a/arch/arm/boot/dts/zx296702.dtsi +++ b/arch/arm/boot/dts/zx296702.dtsi @@ -56,7 +56,7 @@  			clocks = <&topclk ZX296702_A9_PERIPHCLK>;  		}; -		l2cc: l2-cache-controller@0x00c00000 { +		l2cc: l2-cache-controller@c00000 {  			compatible = "arm,pl310-cache";  			reg = <0x00c00000 0x1000>;  			cache-unified; @@ -67,30 +67,30 @@  			arm,double-linefill-incr = <0>;  		}; -		pcu: pcu@0xa0008000 { +		pcu: pcu@a0008000 {  			compatible = "zte,zx296702-pcu";  			reg = <0xa0008000 0x1000>;  		}; -		topclk: topclk@0x09800000 { +		topclk: topclk@9800000 {  			compatible = "zte,zx296702-topcrm-clk";  			reg = <0x09800000 0x1000>;  			#clock-cells = <1>;  		}; -		lsp1clk: lsp1clk@0x09400000 { +		lsp1clk: lsp1clk@9400000 {  			compatible = "zte,zx296702-lsp1crpm-clk";  			reg = <0x09400000 0x1000>;  			#clock-cells = <1>;  		}; -		lsp0clk: lsp0clk@0x0b000000 { +		lsp0clk: lsp0clk@b000000 {  			compatible = "zte,zx296702-lsp0crpm-clk";  			reg = <0x0b000000 0x1000>;  			#clock-cells = <1>;  		}; -		uart0: serial@0x09405000 { +		uart0: serial@9405000 {  			compatible = "zte,zx296702-uart";  			reg = <0x09405000 0x1000>;  			interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>; @@ -98,7 +98,7 @@  			status = "disabled";  		}; -		uart1: serial@0x09406000 { +		uart1: serial@9406000 {  			compatible = "zte,zx296702-uart";  			reg = <0x09406000 0x1000>;  			interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; @@ -106,7 +106,7 @@  			status = "disabled";  		}; -		mmc0: mmc@0x09408000 { +		mmc0: mmc@9408000 {  			compatible = "snps,dw-mshc";  			#address-cells = <1>;  			#size-cells = <0>; @@ -119,7 +119,7 @@  			status = "disabled";  		}; -		mmc1: mmc@0x0b003000 { +		mmc1: mmc@b003000 {  			compatible = "snps,dw-mshc";  			#address-cells = <1>;  			#size-cells = <0>; @@ -132,7 +132,7 @@  			status = "disabled";  		}; -		sysctrl: sysctrl@0xa0007000 { +		sysctrl: sysctrl@a0007000 {  			compatible = "zte,sysctrl", "syscon";  			reg = <0xa0007000 0x1000>;  		}; diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 2f145c4af93a..92674f247a12 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -319,7 +319,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y  CONFIG_RC_CORE=m  CONFIG_MEDIA_CONTROLLER=y  CONFIG_VIDEO_V4L2_SUBDEV_API=y -CONFIG_LIRC=m +CONFIG_LIRC=y  CONFIG_RC_DEVICES=y  CONFIG_IR_RX51=m  CONFIG_V4L_PLATFORM_DRIVERS=y diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 629f8e9981f1..cf2701cb0de8 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -83,7 +83,7 @@ static void dummy_clock_access(struct timespec64 *ts)  }  static clock_access_fn __read_persistent_clock = dummy_clock_access; -static clock_access_fn __read_boot_clock = dummy_clock_access;; +static clock_access_fn __read_boot_clock = dummy_clock_access;  void read_persistent_clock64(struct timespec64 *ts)  { diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 5638ce0c9524..63d6b404d88e 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING  KVM=../../../../virt/kvm +CFLAGS_ARMV7VE		   :=$(call cc-option, -march=armv7ve) +  obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o  obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o  obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o @@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o  obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o  obj-$(CONFIG_KVM_ARM_HOST) += vfp.o  obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o +CFLAGS_banked-sr.o	   += $(CFLAGS_ARMV7VE) +  obj-$(CONFIG_KVM_ARM_HOST) += entry.o  obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o  obj-$(CONFIG_KVM_ARM_HOST) += switch.o +CFLAGS_switch.o		   += $(CFLAGS_ARMV7VE)  obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c index 111bda8cdebd..be4b8b0a40ad 100644 --- a/arch/arm/kvm/hyp/banked-sr.c +++ b/arch/arm/kvm/hyp/banked-sr.c @@ -20,6 +20,10 @@  #include <asm/kvm_hyp.h> +/* + * gcc before 4.9 doesn't understand -march=armv7ve, so we have to + * trick the assembler. + */  __asm__(".arch_extension     virt");  void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt) diff --git a/arch/arm/mach-clps711x/board-dt.c b/arch/arm/mach-clps711x/board-dt.c index ee1f83b1a332..4c89a8e9a2e3 100644 --- a/arch/arm/mach-clps711x/board-dt.c +++ b/arch/arm/mach-clps711x/board-dt.c @@ -69,7 +69,7 @@ static void clps711x_restart(enum reboot_mode mode, const char *cmd)  	soft_restart(0);  } -static const char *clps711x_compat[] __initconst = { +static const char *const clps711x_compat[] __initconst = {  	"cirrus,ep7209",  	NULL  }; diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index e457f299cd44..d6b11907380c 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -368,7 +368,7 @@ static struct spi_eeprom at25640a = {  	.flags		= EE_ADDR2,  }; -static struct spi_board_info dm355_evm_spi_info[] __initconst = { +static const struct spi_board_info dm355_evm_spi_info[] __initconst = {  	{  		.modalias	= "at25",  		.platform_data	= &at25640a, diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c index be997243447b..fad9a5611a5d 100644 --- a/arch/arm/mach-davinci/board-dm355-leopard.c +++ b/arch/arm/mach-davinci/board-dm355-leopard.c @@ -217,7 +217,7 @@ static struct spi_eeprom at25640a = {  	.flags		= EE_ADDR2,  }; -static struct spi_board_info dm355_leopard_spi_info[] __initconst = { +static const struct spi_board_info dm355_leopard_spi_info[] __initconst = {  	{  		.modalias	= "at25",  		.platform_data	= &at25640a, diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index e75741fb2c1d..e3780986d2a3 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -726,7 +726,7 @@ static struct spi_eeprom at25640 = {  	.flags		= EE_ADDR2,  }; -static struct spi_board_info dm365_evm_spi_info[] __initconst = { +static const struct spi_board_info dm365_evm_spi_info[] __initconst = {  	{  		.modalias	= "at25",  		.platform_data	= &at25640, diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 6b32dc527edc..2c20599cc350 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -41,7 +41,7 @@ config MACH_ARMADA_375  	depends on ARCH_MULTI_V7  	select ARMADA_370_XP_IRQ  	select ARM_ERRATA_720789 -	select ARM_ERRATA_753970 +	select PL310_ERRATA_753970  	select ARM_GIC  	select ARMADA_375_CLK  	select HAVE_ARM_SCU @@ -57,7 +57,7 @@ config MACH_ARMADA_38X  	bool "Marvell Armada 380/385 boards"  	depends on ARCH_MULTI_V7  	select ARM_ERRATA_720789 -	select ARM_ERRATA_753970 +	select PL310_ERRATA_753970  	select ARM_GIC  	select ARM_GLOBAL_TIMER  	select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 43e3e188f521..fa512413a471 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -1011,17 +1011,17 @@ static int clk_debugfs_register_one(struct clk *c)  		return -ENOMEM;  	c->dent = d; -	d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount); +	d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount);  	if (!d) {  		err = -ENOMEM;  		goto err_out;  	} -	d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate); +	d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate);  	if (!d) {  		err = -ENOMEM;  		goto err_out;  	} -	d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags); +	d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags);  	if (!d) {  		err = -ENOMEM;  		goto err_out; diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 4bb6751864a5..fc5fb776a710 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -299,8 +299,6 @@ static void irq_save_context(void)  	if (soc_is_dra7xx())  		return; -	if (!sar_base) -		sar_base = omap4_get_sar_ram_base();  	if (wakeupgen_ops && wakeupgen_ops->save_context)  		wakeupgen_ops->save_context();  } @@ -598,6 +596,8 @@ static int __init wakeupgen_init(struct device_node *node,  	irq_hotplug_init();  	irq_pm_init(); +	sar_base = omap4_get_sar_ram_base(); +  	return 0;  }  IRQCHIP_DECLARE(ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init); diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 124f9af34a15..34156eca8e23 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -977,6 +977,9 @@ static int _enable_clocks(struct omap_hwmod *oh)  	pr_debug("omap_hwmod: %s: enabling clocks\n", oh->name); +	if (oh->flags & HWMOD_OPT_CLKS_NEEDED) +		_enable_optional_clocks(oh); +  	if (oh->_clk)  		clk_enable(oh->_clk); @@ -985,9 +988,6 @@ static int _enable_clocks(struct omap_hwmod *oh)  			clk_enable(os->_clk);  	} -	if (oh->flags & HWMOD_OPT_CLKS_NEEDED) -		_enable_optional_clocks(oh); -  	/* The opt clocks are controlled by the device driver. */  	return 0; diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 366158a54fcd..6f68576e5695 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -186,7 +186,7 @@ static void omap_pm_end(void)  	cpu_idle_poll_ctrl(false);  } -static void omap_pm_finish(void) +static void omap_pm_wake(void)  {  	if (soc_is_omap34xx())  		omap_prcm_irq_complete(); @@ -196,7 +196,7 @@ static const struct platform_suspend_ops omap_pm_ops = {  	.begin		= omap_pm_begin,  	.end		= omap_pm_end,  	.enter		= omap_pm_enter, -	.finish		= omap_pm_finish, +	.wake		= omap_pm_wake,  	.valid		= suspend_valid_only_mem,  }; diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index ece09c9461f7..d61fbd7a2840 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -156,12 +156,6 @@ static struct clock_event_device clockevent_gpt = {  	.tick_resume		= omap2_gp_timer_shutdown,  }; -static struct property device_disabled = { -	.name = "status", -	.length = sizeof("disabled"), -	.value = "disabled", -}; -  static const struct of_device_id omap_timer_match[] __initconst = {  	{ .compatible = "ti,omap2420-timer", },  	{ .compatible = "ti,omap3430-timer", }, @@ -203,8 +197,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id *  				  of_get_property(np, "ti,timer-secure", NULL)))  			continue; -		if (!of_device_is_compatible(np, "ti,omap-counter32k")) -			of_add_property(np, &device_disabled); +		if (!of_device_is_compatible(np, "ti,omap-counter32k")) { +			struct property *prop; + +			prop = kzalloc(sizeof(*prop), GFP_KERNEL); +			if (!prop) +				return NULL; +			prop->name = "status"; +			prop->value = "disabled"; +			prop->length = strlen(prop->value); +			of_add_property(np, prop); +		}  		return np;  	} diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig index 2a7bb6ccdcb7..a810f4dd34b1 100644 --- a/arch/arm/mach-orion5x/Kconfig +++ b/arch/arm/mach-orion5x/Kconfig @@ -58,7 +58,6 @@ config MACH_KUROBOX_PRO  config MACH_DNS323  	bool "D-Link DNS-323" -	select GENERIC_NET_UTILS  	select I2C_BOARDINFO if I2C  	help  	  Say 'Y' here if you want your kernel to support the @@ -66,7 +65,6 @@ config MACH_DNS323  config MACH_TS209  	bool "QNAP TS-109/TS-209" -	select GENERIC_NET_UTILS  	help  	  Say 'Y' here if you want your kernel to support the  	  QNAP TS-109/TS-209 platform. @@ -101,7 +99,6 @@ config MACH_LINKSTATION_LS_HGL  config MACH_TS409  	bool "QNAP TS-409" -	select GENERIC_NET_UTILS  	help  	  Say 'Y' here if you want your kernel to support the  	  QNAP TS-409 platform. diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c index cd483bfb5ca8..d13344b2ddcd 100644 --- a/arch/arm/mach-orion5x/dns323-setup.c +++ b/arch/arm/mach-orion5x/dns323-setup.c @@ -173,10 +173,42 @@ static struct mv643xx_eth_platform_data dns323_eth_data = {  	.phy_addr = MV643XX_ETH_PHY_ADDR(8),  }; +/* dns323_parse_hex_*() taken from tsx09-common.c; should a common copy of these + * functions be kept somewhere? + */ +static int __init dns323_parse_hex_nibble(char n) +{ +	if (n >= '0' && n <= '9') +		return n - '0'; + +	if (n >= 'A' && n <= 'F') +		return n - 'A' + 10; + +	if (n >= 'a' && n <= 'f') +		return n - 'a' + 10; + +	return -1; +} + +static int __init dns323_parse_hex_byte(const char *b) +{ +	int hi; +	int lo; + +	hi = dns323_parse_hex_nibble(b[0]); +	lo = dns323_parse_hex_nibble(b[1]); + +	if (hi < 0 || lo < 0) +		return -1; + +	return (hi << 4) | lo; +} +  static int __init dns323_read_mac_addr(void)  {  	u_int8_t addr[6]; -	void __iomem *mac_page; +	int i; +	char *mac_page;  	/* MAC address is stored as a regular ol' string in /dev/mtdblock4  	 * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80). @@ -185,8 +217,23 @@ static int __init dns323_read_mac_addr(void)  	if (!mac_page)  		return -ENOMEM; -	if (!mac_pton((__force const char *) mac_page, addr)) -		goto error_fail; +	/* Sanity check the string we're looking at */ +	for (i = 0; i < 5; i++) { +		if (*(mac_page + (i * 3) + 2) != ':') { +			goto error_fail; +		} +	} + +	for (i = 0; i < 6; i++)	{ +		int byte; + +		byte = dns323_parse_hex_byte(mac_page + (i * 3)); +		if (byte < 0) { +			goto error_fail; +		} + +		addr[i] = byte; +	}  	iounmap(mac_page);  	printk("DNS-323: Found ethernet MAC address: %pM\n", addr); diff --git a/arch/arm/mach-orion5x/tsx09-common.c b/arch/arm/mach-orion5x/tsx09-common.c index 89774985d380..905d4f2dd0b8 100644 --- a/arch/arm/mach-orion5x/tsx09-common.c +++ b/arch/arm/mach-orion5x/tsx09-common.c @@ -53,12 +53,53 @@ struct mv643xx_eth_platform_data qnap_tsx09_eth_data = {  	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),  }; +static int __init qnap_tsx09_parse_hex_nibble(char n) +{ +	if (n >= '0' && n <= '9') +		return n - '0'; + +	if (n >= 'A' && n <= 'F') +		return n - 'A' + 10; + +	if (n >= 'a' && n <= 'f') +		return n - 'a' + 10; + +	return -1; +} + +static int __init qnap_tsx09_parse_hex_byte(const char *b) +{ +	int hi; +	int lo; + +	hi = qnap_tsx09_parse_hex_nibble(b[0]); +	lo = qnap_tsx09_parse_hex_nibble(b[1]); + +	if (hi < 0 || lo < 0) +		return -1; + +	return (hi << 4) | lo; +} +  static int __init qnap_tsx09_check_mac_addr(const char *addr_str)  {  	u_int8_t addr[6]; +	int i; -	if (!mac_pton(addr_str, addr)) -		return -1; +	for (i = 0; i < 6; i++) { +		int byte; + +		/* +		 * Enforce "xx:xx:xx:xx:xx:xx\n" format. +		 */ +		if (addr_str[(i * 3) + 2] != ((i < 5) ? ':' : '\n')) +			return -1; + +		byte = qnap_tsx09_parse_hex_byte(addr_str + (i * 3)); +		if (byte < 0) +			return -1; +		addr[i] = byte; +	}  	printk(KERN_INFO "tsx09: found ethernet mac address %pM\n", addr); @@ -77,12 +118,12 @@ void __init qnap_tsx09_find_mac_addr(u32 mem_base, u32 size)  	unsigned long addr;  	for (addr = mem_base; addr < (mem_base + size); addr += 1024) { -		void __iomem *nor_page; +		char *nor_page;  		int ret = 0;  		nor_page = ioremap(addr, 1024);  		if (nor_page != NULL) { -			ret = qnap_tsx09_check_mac_addr((__force const char *)nor_page); +			ret = qnap_tsx09_check_mac_addr(nor_page);  			iounmap(nor_page);  		} diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 57058ac46f49..7e5d7a083707 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -23,7 +23,6 @@  #include <linux/of.h>  #include <linux/of_address.h>  #include <linux/of_platform.h> -#include <linux/perf/arm_pmu.h>  #include <linux/regulator/machine.h>  #include <asm/outercache.h> @@ -112,37 +111,6 @@ static void ux500_restart(enum reboot_mode mode, const char *cmd)  	prcmu_system_reset(0);  } -/* - * The PMU IRQ lines of two cores are wired together into a single interrupt. - * Bounce the interrupt to the other core if it's not ours. - */ -static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler) -{ -	irqreturn_t ret = handler(irq, dev); -	int other = !smp_processor_id(); - -	if (ret == IRQ_NONE && cpu_online(other)) -		irq_set_affinity(irq, cpumask_of(other)); - -	/* -	 * We should be able to get away with the amount of IRQ_NONEs we give, -	 * while still having the spurious IRQ detection code kick in if the -	 * interrupt really starts hitting spuriously. -	 */ -	return ret; -} - -static struct arm_pmu_platdata db8500_pmu_platdata = { -	.handle_irq		= db8500_pmu_handler, -	.irq_flags		= IRQF_NOBALANCING | IRQF_NO_THREAD, -}; - -static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = { -	/* Requires call-back bindings. */ -	OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata), -	{}, -}; -  static struct of_dev_auxdata u8540_auxdata_lookup[] __initdata = {  	OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu", NULL),  	{}, @@ -165,9 +133,6 @@ static void __init u8500_init_machine(void)  	if (of_machine_is_compatible("st-ericsson,u8540"))  		of_platform_populate(NULL, u8500_local_bus_nodes,  				     u8540_auxdata_lookup, NULL); -	else -		of_platform_populate(NULL, u8500_local_bus_nodes, -				     u8500_auxdata_lookup, NULL);  }  static const char * stericsson_dt_platform_compat[] = { diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index aff6994950ba..a2399fd66e97 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -472,28 +472,27 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,  /*****************************************************************************   * Ethernet switch   ****************************************************************************/ -static __initconst const char *orion_ge00_mvmdio_bus_name = "orion-mii"; -static __initdata struct mdio_board_info -		  orion_ge00_switch_board_info; +static __initdata struct mdio_board_info orion_ge00_switch_board_info = { +	.bus_id   = "orion-mii", +	.modalias = "mv88e6085", +};  void __init orion_ge00_switch_init(struct dsa_chip_data *d)  { -	struct mdio_board_info *bd;  	unsigned int i;  	if (!IS_BUILTIN(CONFIG_PHYLIB))  		return; -	for (i = 0; i < ARRAY_SIZE(d->port_names); i++) -		if (!strcmp(d->port_names[i], "cpu")) +	for (i = 0; i < ARRAY_SIZE(d->port_names); i++) { +		if (!strcmp(d->port_names[i], "cpu")) { +			d->netdev[i] = &orion_ge00.dev;  			break; +		} +	} -	bd = &orion_ge00_switch_board_info; -	bd->bus_id = orion_ge00_mvmdio_bus_name; -	bd->mdio_addr = d->sw_addr; -	d->netdev[i] = &orion_ge00.dev; -	strcpy(bd->modalias, "mv88e6085"); -	bd->platform_data = d; +	orion_ge00_switch_board_info.mdio_addr = d->sw_addr; +	orion_ge00_switch_board_info.platform_data = d;  	mdiobus_register_board_info(&orion_ge00_switch_board_info, 1);  } diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index a80632641b39..70c776ef7aa7 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -165,14 +165,14 @@  			uart_A: serial@24000 {  				compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart"; -				reg = <0x0 0x24000 0x0 0x14>; +				reg = <0x0 0x24000 0x0 0x18>;  				interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;  				status = "disabled";  			};  			uart_B: serial@23000 {  				compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart"; -				reg = <0x0 0x23000 0x0 0x14>; +				reg = <0x0 0x23000 0x0 0x18>;  				interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;  				status = "disabled";  			}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 6cb3c2a52baf..4ee2e7951482 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -235,14 +235,14 @@  			uart_A: serial@84c0 {  				compatible = "amlogic,meson-gx-uart"; -				reg = <0x0 0x84c0 0x0 0x14>; +				reg = <0x0 0x84c0 0x0 0x18>;  				interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;  				status = "disabled";  			};  			uart_B: serial@84dc {  				compatible = "amlogic,meson-gx-uart"; -				reg = <0x0 0x84dc 0x0 0x14>; +				reg = <0x0 0x84dc 0x0 0x18>;  				interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;  				status = "disabled";  			}; @@ -287,7 +287,7 @@  			uart_C: serial@8700 {  				compatible = "amlogic,meson-gx-uart"; -				reg = <0x0 0x8700 0x0 0x14>; +				reg = <0x0 0x8700 0x0 0x18>;  				interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>;  				status = "disabled";  			}; @@ -404,14 +404,14 @@  			uart_AO: serial@4c0 {  				compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart"; -				reg = <0x0 0x004c0 0x0 0x14>; +				reg = <0x0 0x004c0 0x0 0x18>;  				interrupts = <GIC_SPI 193 IRQ_TYPE_EDGE_RISING>;  				status = "disabled";  			};  			uart_AO_B: serial@4e0 {  				compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart"; -				reg = <0x0 0x004e0 0x0 0x14>; +				reg = <0x0 0x004e0 0x0 0x18>;  				interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;  				status = "disabled";  			}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 4f355f17eed6..c8514110b9da 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -631,6 +631,7 @@  			internal_phy: ethernet-phy@8 {  				compatible = "ethernet-phy-id0181.4400", "ethernet-phy-ieee802.3-c22"; +				interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;  				reg = <8>;  				max-speed = <100>;  			}; diff --git a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi index 4220fbdcb24a..ff5c4c47b22b 100644 --- a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi @@ -98,7 +98,7 @@  		clock-output-names = "clk125mhz";  	}; -	pci { +	pcie@30000000 {  		compatible = "pci-host-ecam-generic";  		device_type = "pci";  		#interrupt-cells = <1>; @@ -118,6 +118,7 @@  		ranges =  		  <0x02000000    0 0x40000000    0 0x40000000    0 0x20000000  		   0x43000000 0x40 0x00000000 0x40 0x00000000 0x20 0x00000000>; +		bus-range = <0 0xff>;  		interrupt-map-mask = <0 0 0 7>;  		interrupt-map =  		      /* addr  pin  ic   icaddr  icintr */ diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index e94fa1a53192..047641fe294c 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -51,7 +51,7 @@  		#size-cells = <2>;  		ranges; -		ramoops@0x21f00000 { +		ramoops@21f00000 {  			compatible = "ramoops";  			reg = <0x0 0x21f00000 0x0 0x00100000>;  			record-size	= <0x00020000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 9fbe4705ee88..94597e33c806 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -341,7 +341,7 @@  			reg = <0 0x10005000 0 0x1000>;  		}; -		pio: pinctrl@0x10005000 { +		pio: pinctrl@10005000 {  			compatible = "mediatek,mt8173-pinctrl";  			reg = <0 0x1000b000 0 0x1000>;  			mediatek,pctl-regmap = <&syscfg_pctl_a>; diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 492a011f14f6..1c8f1b86472d 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -140,16 +140,16 @@  		};  		agnoc@0 { -			qcom,pcie@00600000 { +			qcom,pcie@600000 {  				perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>;  			}; -			qcom,pcie@00608000 { +			qcom,pcie@608000 {  				status = "okay";  				perst-gpio = <&msmgpio 130 GPIO_ACTIVE_LOW>;  			}; -			qcom,pcie@00610000 { +			qcom,pcie@610000 {  				status = "okay";  				perst-gpio = <&msmgpio 114 GPIO_ACTIVE_LOW>;  			}; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 4b2afcc4fdf4..0a6f7952bbb1 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -840,7 +840,7 @@  			#size-cells = <1>;  			ranges; -			pcie0: qcom,pcie@00600000 { +			pcie0: qcom,pcie@600000 {  				compatible = "qcom,pcie-msm8996", "snps,dw-pcie";  				status = "disabled";  				power-domains = <&gcc PCIE0_GDSC>; @@ -893,7 +893,7 @@  			}; -			pcie1: qcom,pcie@00608000 { +			pcie1: qcom,pcie@608000 {  				compatible = "qcom,pcie-msm8996", "snps,dw-pcie";  				power-domains = <&gcc PCIE1_GDSC>;  				bus-range = <0x00 0xff>; @@ -946,7 +946,7 @@  						"bus_slave";  			}; -			pcie2: qcom,pcie@00610000 { +			pcie2: qcom,pcie@610000 {  				compatible = "qcom,pcie-msm8996", "snps,dw-pcie";  				power-domains = <&gcc PCIE2_GDSC>;  				bus-range = <0x00 0xff>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 3890468678ce..28257724a56e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -132,17 +132,16 @@  	assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;  	assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;  	clock_in_out = "input"; -	/* shows instability at 1GBit right now */ -	max-speed = <100>;  	phy-supply = <&vcc_io>;  	phy-mode = "rgmii";  	pinctrl-names = "default";  	pinctrl-0 = <&rgmiim1_pins>; +	snps,force_thresh_dma_mode;  	snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;  	snps,reset-active-low;  	snps,reset-delays-us = <0 10000 50000>; -	tx_delay = <0x26>; -	rx_delay = <0x11>; +	tx_delay = <0x24>; +	rx_delay = <0x18>;  	status = "okay";  }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index a037ee56fead..cae341554486 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -730,7 +730,7 @@  		interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;  		clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,  			 <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		fifo-depth = <0x100>;  		status = "disabled";  	}; @@ -741,7 +741,7 @@  		interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;  		clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,  			 <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		fifo-depth = <0x100>;  		status = "disabled";  	}; @@ -752,7 +752,7 @@  		interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;  		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,  			 <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		fifo-depth = <0x100>;  		status = "disabled";  	}; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index aa4d07046a7b..03458ac44201 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -257,7 +257,7 @@  		max-frequency = <150000000>;  		clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>,  			 <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>; -		clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; +		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";  		fifo-depth = <0x100>;  		interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;  		resets = <&cru SRST_SDIO0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index 0f873c897d0d..ce592a4c0c4c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -457,7 +457,7 @@  	assigned-clocks = <&cru SCLK_PCIEPHY_REF>;  	assigned-clock-parents = <&cru SCLK_PCIEPHY_REF100M>;  	assigned-clock-rates = <100000000>; -	ep-gpios = <&gpio3 RK_PB5 GPIO_ACTIVE_HIGH>; +	ep-gpios = <&gpio2 RK_PA4 GPIO_ACTIVE_HIGH>;  	num-lanes = <4>;  	pinctrl-names = "default";  	pinctrl-0 = <&pcie_clkreqn_cpm>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 7aa2144e0d47..2605118d4b4c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1739,8 +1739,8 @@  		compatible = "rockchip,rk3399-edp";  		reg = <0x0 0xff970000 0x0 0x8000>;  		interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH 0>; -		clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>; -		clock-names = "dp", "pclk"; +		clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>, <&cru PCLK_VIO_GRF>; +		clock-names = "dp", "pclk", "grf";  		pinctrl-names = "default";  		pinctrl-0 = <&edp_hpd>;  		power-domains = <&power RK3399_PD_EDP>; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index be7bd19c87ec..350c76a1d15b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -20,7 +20,7 @@  #define MPIDR_UP_BITMASK	(0x1 << 30)  #define MPIDR_MT_BITMASK	(0x1 << 24) -#define MPIDR_HWID_BITMASK	0xff00ffffff +#define MPIDR_HWID_BITMASK	UL(0xff00ffffff)  #define MPIDR_LEVEL_BITS_SHIFT	3  #define MPIDR_LEVEL_BITS	(1 << MPIDR_LEVEL_BITS_SHIFT) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 1dca41bea16a..e73f68569624 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -22,7 +22,7 @@  static inline pte_t huge_ptep_get(pte_t *ptep)  { -	return *ptep; +	return READ_ONCE(*ptep);  } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 9679067a1574..7faed6e48b46 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)  	return pmd;  } -static inline void kvm_set_s2pte_readonly(pte_t *pte) +static inline void kvm_set_s2pte_readonly(pte_t *ptep)  {  	pteval_t old_pteval, pteval; -	pteval = READ_ONCE(pte_val(*pte)); +	pteval = READ_ONCE(pte_val(*ptep));  	do {  		old_pteval = pteval;  		pteval &= ~PTE_S2_RDWR;  		pteval |= PTE_S2_RDONLY; -		pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval); +		pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);  	} while (pteval != old_pteval);  } -static inline bool kvm_s2pte_readonly(pte_t *pte) +static inline bool kvm_s2pte_readonly(pte_t *ptep)  { -	return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY; +	return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;  } -static inline bool kvm_s2pte_exec(pte_t *pte) +static inline bool kvm_s2pte_exec(pte_t *ptep)  { -	return !(pte_val(*pte) & PTE_S2_XN); +	return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);  } -static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) +static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)  { -	kvm_set_s2pte_readonly((pte_t *)pmd); +	kvm_set_s2pte_readonly((pte_t *)pmdp);  } -static inline bool kvm_s2pmd_readonly(pmd_t *pmd) +static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)  { -	return kvm_s2pte_readonly((pte_t *)pmd); +	return kvm_s2pte_readonly((pte_t *)pmdp);  } -static inline bool kvm_s2pmd_exec(pmd_t *pmd) +static inline bool kvm_s2pmd_exec(pmd_t *pmdp)  { -	return !(pmd_val(*pmd) & PMD_S2_XN); +	return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);  }  static inline bool kvm_page_empty(void *ptr) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 8d3331985d2e..39ec0b8a689e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)   * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,   * avoiding the possibility of conflicting TLB entries being allocated.   */ -static inline void cpu_replace_ttbr1(pgd_t *pgd) +static inline void cpu_replace_ttbr1(pgd_t *pgdp)  {  	typedef void (ttbr_replace_func)(phys_addr_t);  	extern ttbr_replace_func idmap_cpu_replace_ttbr1;  	ttbr_replace_func *replace_phys; -	phys_addr_t pgd_phys = virt_to_phys(pgd); +	phys_addr_t pgd_phys = virt_to_phys(pgdp);  	replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index e9d9f1b006ef..2e05bcd944c8 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)  	return (pmd_t *)__get_free_page(PGALLOC_GFP);  } -static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)  { -	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); -	free_page((unsigned long)pmd); +	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1)); +	free_page((unsigned long)pmdp);  } -static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) +static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)  { -	set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot)); +	set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));  } -static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)  { -	__pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE); +	__pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);  }  #else -static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) +static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)  {  	BUILD_BUG();  } @@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)  	return (pud_t *)__get_free_page(PGALLOC_GFP);  } -static inline void pud_free(struct mm_struct *mm, pud_t *pud) +static inline void pud_free(struct mm_struct *mm, pud_t *pudp)  { -	BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); -	free_page((unsigned long)pud); +	BUG_ON((unsigned long)pudp & (PAGE_SIZE-1)); +	free_page((unsigned long)pudp);  } -static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)  { -	set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot)); +	set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));  } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)  { -	__pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE); +	__pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);  }  #else -static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)  {  	BUILD_BUG();  }  #endif	/* CONFIG_PGTABLE_LEVELS > 3 */  extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);  static inline pte_t *  pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) @@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)  /*   * Free a PTE table.   */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)  { -	if (pte) -		free_page((unsigned long)pte); +	if (ptep) +		free_page((unsigned long)ptep);  }  static inline void pte_free(struct mm_struct *mm, pgtable_t pte) @@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)  	__free_page(pte);  } -static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, +static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,  				  pmdval_t prot)  { -	set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot)); +	set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));  }  /* diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 094374c82db0..7e2c27e63cd8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)  static inline void set_pte(pte_t *ptep, pte_t pte)  { -	*ptep = pte; +	WRITE_ONCE(*ptep, pte);  	/*  	 * Only if the new pte is valid and kernel, otherwise TLB maintenance @@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);  static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,  			      pte_t *ptep, pte_t pte)  { +	pte_t old_pte; +  	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))  		__sync_icache_dcache(pte, addr); @@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,  	 * hardware updates of the pte (ptep_set_access_flags safely changes  	 * valid ptes without going through an invalid entry).  	 */ -	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) && +	old_pte = READ_ONCE(*ptep); +	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&  	   (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {  		VM_WARN_ONCE(!pte_young(pte),  			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", -			     __func__, pte_val(*ptep), pte_val(pte)); -		VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte), +			     __func__, pte_val(old_pte), pte_val(pte)); +		VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),  			     "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", -			     __func__, pte_val(*ptep), pte_val(pte)); +			     __func__, pte_val(old_pte), pte_val(pte));  	}  	set_pte(ptep, pte); @@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,  static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)  { -	*pmdp = pmd; +	WRITE_ONCE(*pmdp, pmd);  	dsb(ishst);  	isb();  } @@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)  static inline void set_pud(pud_t *pudp, pud_t pud)  { -	*pudp = pud; +	WRITE_ONCE(*pudp, pud);  	dsb(ishst);  	isb();  } @@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)  /* Find an entry in the second-level page table. */  #define pmd_index(addr)		(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -#define pmd_offset_phys(dir, addr)	(pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t)) +#define pmd_offset_phys(dir, addr)	(pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))  #define pmd_offset(dir, addr)		((pmd_t *)__va(pmd_offset_phys((dir), (addr))))  #define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) @@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)  static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)  { -	*pgdp = pgd; +	WRITE_ONCE(*pgdp, pgd);  	dsb(ishst);  } @@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)  /* Find an entry in the frst-level page table. */  #define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -#define pud_offset_phys(dir, addr)	(pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t)) +#define pud_offset_phys(dir, addr)	(pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))  #define pud_offset(dir, addr)		((pud_t *)__va(pud_offset_phys((dir), (addr))))  #define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr)) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 472ef944e932..902f9edacbea 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -28,7 +28,7 @@ struct stackframe {  	unsigned long fp;  	unsigned long pc;  #ifdef CONFIG_FUNCTION_GRAPH_TRACER -	unsigned int graph; +	int graph;  #endif  }; diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 543e11f0f657..e66b0fca99c2 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -72,15 +72,15 @@ static inline void set_fs(mm_segment_t fs)   * This is equivalent to the following test:   * (u65)addr + (u65)size <= (u65)current->addr_limit + 1   */ -static inline unsigned long __range_ok(unsigned long addr, unsigned long size) +static inline unsigned long __range_ok(const void __user *addr, unsigned long size)  { -	unsigned long limit = current_thread_info()->addr_limit; +	unsigned long ret, limit = current_thread_info()->addr_limit;  	__chk_user_ptr(addr);  	asm volatile(  	// A + B <= C + 1 for all A,B,C, in four easy steps:  	// 1: X = A + B; X' = X % 2^64 -	"	adds	%0, %0, %2\n" +	"	adds	%0, %3, %2\n"  	// 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4  	"	csel	%1, xzr, %1, hi\n"  	// 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' @@ -92,9 +92,9 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)  	//    testing X' - C == 0, subject to the previous adjustments.  	"	sbcs	xzr, %0, %1\n"  	"	cset	%0, ls\n" -	: "+r" (addr), "+r" (limit) : "Ir" (size) : "cc"); +	: "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); -	return addr; +	return ret;  }  /* @@ -104,7 +104,7 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)   */  #define untagged_addr(addr)		sign_extend64(addr, 55) -#define access_ok(type, addr, size)	__range_ok((unsigned long)(addr), size) +#define access_ok(type, addr, size)	__range_ok(addr, size)  #define user_addr_max			get_fs  #define _ASM_EXTABLE(from, to)						\ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c33b5e4010ab..68450e954d47 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -370,6 +370,7 @@ static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)  static int swp_handler(struct pt_regs *regs, u32 instr)  {  	u32 destreg, data, type, address = 0; +	const void __user *user_ptr;  	int rn, rt2, res = 0;  	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); @@ -401,7 +402,8 @@ static int swp_handler(struct pt_regs *regs, u32 instr)  		aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data);  	/* Check access in reasonable access range for both SWP and SWPB */ -	if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { +	user_ptr = (const void __user *)(unsigned long)(address & ~3); +	if (!access_ok(VERIFY_WRITE, user_ptr, 4)) {  		pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n",  			address);  		goto fault; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 07823595b7f0..b5a28336c077 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -178,7 +178,7 @@ static int enable_smccc_arch_workaround_1(void *data)  	case PSCI_CONDUIT_HVC:  		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,  				  ARM_SMCCC_ARCH_WORKAROUND_1, &res); -		if (res.a0) +		if ((int)res.a0 < 0)  			return 0;  		cb = call_hvc_arch_workaround_1;  		smccc_start = __smccc_workaround_1_hvc_start; @@ -188,7 +188,7 @@ static int enable_smccc_arch_workaround_1(void *data)  	case PSCI_CONDUIT_SMC:  		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,  				  ARM_SMCCC_ARCH_WORKAROUND_1, &res); -		if (res.a0) +		if ((int)res.a0 < 0)  			return 0;  		cb = call_smc_arch_workaround_1;  		smccc_start = __smccc_workaround_1_smc_start; @@ -408,6 +408,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {  	},  	{  		.capability = ARM64_HARDEN_BRANCH_PREDICTOR, +		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), +		.enable = qcom_enable_link_stack_sanitization, +	}, +	{ +		.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, +		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), +	}, +	{ +		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,  		MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),  		.enable = enable_smccc_arch_workaround_1,  	}, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 29b1f873e337..2985a067fc13 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -199,9 +199,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {  };  static const struct arm64_ftr_bits ftr_ctr[] = { -	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */ +	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),		/* RES1 */ +	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1),	/* DIC */ +	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1),	/* IDC */  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */ -	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */ +	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0),	/* ERG */  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */  	/*  	 * Linux can handle differing I-cache policies. Userspace JITs will diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index f85ac58d08a3..a8bf1c892b90 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,  				  unsigned long addr, void *data)  {  	efi_memory_desc_t *md = data; -	pte_t pte = *ptep; +	pte_t pte = READ_ONCE(*ptep);  	if (md->attribute & EFI_MEMORY_RO)  		pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index f20cf7e99249..1ec5f28c39fc 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,  				 gfp_t mask)  {  	int rc = 0; -	pgd_t *pgd; -	pud_t *pud; -	pmd_t *pmd; -	pte_t *pte; +	pgd_t *pgdp; +	pud_t *pudp; +	pmd_t *pmdp; +	pte_t *ptep;  	unsigned long dst = (unsigned long)allocator(mask);  	if (!dst) { @@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,  	memcpy((void *)dst, src_start, length);  	flush_icache_range(dst, dst + length); -	pgd = pgd_offset_raw(allocator(mask), dst_addr); -	if (pgd_none(*pgd)) { -		pud = allocator(mask); -		if (!pud) { +	pgdp = pgd_offset_raw(allocator(mask), dst_addr); +	if (pgd_none(READ_ONCE(*pgdp))) { +		pudp = allocator(mask); +		if (!pudp) {  			rc = -ENOMEM;  			goto out;  		} -		pgd_populate(&init_mm, pgd, pud); +		pgd_populate(&init_mm, pgdp, pudp);  	} -	pud = pud_offset(pgd, dst_addr); -	if (pud_none(*pud)) { -		pmd = allocator(mask); -		if (!pmd) { +	pudp = pud_offset(pgdp, dst_addr); +	if (pud_none(READ_ONCE(*pudp))) { +		pmdp = allocator(mask); +		if (!pmdp) {  			rc = -ENOMEM;  			goto out;  		} -		pud_populate(&init_mm, pud, pmd); +		pud_populate(&init_mm, pudp, pmdp);  	} -	pmd = pmd_offset(pud, dst_addr); -	if (pmd_none(*pmd)) { -		pte = allocator(mask); -		if (!pte) { +	pmdp = pmd_offset(pudp, dst_addr); +	if (pmd_none(READ_ONCE(*pmdp))) { +		ptep = allocator(mask); +		if (!ptep) {  			rc = -ENOMEM;  			goto out;  		} -		pmd_populate_kernel(&init_mm, pmd, pte); +		pmd_populate_kernel(&init_mm, pmdp, ptep);  	} -	pte = pte_offset_kernel(pmd, dst_addr); -	set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); +	ptep = pte_offset_kernel(pmdp, dst_addr); +	set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));  	/*  	 * Load our new page tables. A strict BBM approach requires that we @@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,  	 */  	cpu_set_reserved_ttbr0();  	local_flush_tlb_all(); -	write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1); +	write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);  	isb();  	*phys_dst_addr = virt_to_phys((void *)dst); @@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)  	return ret;  } -static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) +static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)  { -	pte_t pte = *src_pte; +	pte_t pte = READ_ONCE(*src_ptep);  	if (pte_valid(pte)) {  		/* @@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)  		 * read only (code, rodata). Clear the RDONLY bit from  		 * the temporary mappings we use during restore.  		 */ -		set_pte(dst_pte, pte_mkwrite(pte)); +		set_pte(dst_ptep, pte_mkwrite(pte));  	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {  		/*  		 * debug_pagealloc will removed the PTE_VALID bit if @@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)  		 */  		BUG_ON(!pfn_valid(pte_pfn(pte))); -		set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte))); +		set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));  	}  } -static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, +static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,  		    unsigned long end)  { -	pte_t *src_pte; -	pte_t *dst_pte; +	pte_t *src_ptep; +	pte_t *dst_ptep;  	unsigned long addr = start; -	dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); -	if (!dst_pte) +	dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); +	if (!dst_ptep)  		return -ENOMEM; -	pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); -	dst_pte = pte_offset_kernel(dst_pmd, start); +	pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); +	dst_ptep = pte_offset_kernel(dst_pmdp, start); -	src_pte = pte_offset_kernel(src_pmd, start); +	src_ptep = pte_offset_kernel(src_pmdp, start);  	do { -		_copy_pte(dst_pte, src_pte, addr); -	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); +		_copy_pte(dst_ptep, src_ptep, addr); +	} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);  	return 0;  } -static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, +static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,  		    unsigned long end)  { -	pmd_t *src_pmd; -	pmd_t *dst_pmd; +	pmd_t *src_pmdp; +	pmd_t *dst_pmdp;  	unsigned long next;  	unsigned long addr = start; -	if (pud_none(*dst_pud)) { -		dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); -		if (!dst_pmd) +	if (pud_none(READ_ONCE(*dst_pudp))) { +		dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); +		if (!dst_pmdp)  			return -ENOMEM; -		pud_populate(&init_mm, dst_pud, dst_pmd); +		pud_populate(&init_mm, dst_pudp, dst_pmdp);  	} -	dst_pmd = pmd_offset(dst_pud, start); +	dst_pmdp = pmd_offset(dst_pudp, start); -	src_pmd = pmd_offset(src_pud, start); +	src_pmdp = pmd_offset(src_pudp, start);  	do { +		pmd_t pmd = READ_ONCE(*src_pmdp); +  		next = pmd_addr_end(addr, end); -		if (pmd_none(*src_pmd)) +		if (pmd_none(pmd))  			continue; -		if (pmd_table(*src_pmd)) { -			if (copy_pte(dst_pmd, src_pmd, addr, next)) +		if (pmd_table(pmd)) { +			if (copy_pte(dst_pmdp, src_pmdp, addr, next))  				return -ENOMEM;  		} else { -			set_pmd(dst_pmd, -				__pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); +			set_pmd(dst_pmdp, +				__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));  		} -	} while (dst_pmd++, src_pmd++, addr = next, addr != end); +	} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);  	return 0;  } -static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, +static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,  		    unsigned long end)  { -	pud_t *dst_pud; -	pud_t *src_pud; +	pud_t *dst_pudp; +	pud_t *src_pudp;  	unsigned long next;  	unsigned long addr = start; -	if (pgd_none(*dst_pgd)) { -		dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); -		if (!dst_pud) +	if (pgd_none(READ_ONCE(*dst_pgdp))) { +		dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); +		if (!dst_pudp)  			return -ENOMEM; -		pgd_populate(&init_mm, dst_pgd, dst_pud); +		pgd_populate(&init_mm, dst_pgdp, dst_pudp);  	} -	dst_pud = pud_offset(dst_pgd, start); +	dst_pudp = pud_offset(dst_pgdp, start); -	src_pud = pud_offset(src_pgd, start); +	src_pudp = pud_offset(src_pgdp, start);  	do { +		pud_t pud = READ_ONCE(*src_pudp); +  		next = pud_addr_end(addr, end); -		if (pud_none(*src_pud)) +		if (pud_none(pud))  			continue; -		if (pud_table(*(src_pud))) { -			if (copy_pmd(dst_pud, src_pud, addr, next)) +		if (pud_table(pud)) { +			if (copy_pmd(dst_pudp, src_pudp, addr, next))  				return -ENOMEM;  		} else { -			set_pud(dst_pud, -				__pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); +			set_pud(dst_pudp, +				__pud(pud_val(pud) & ~PMD_SECT_RDONLY));  		} -	} while (dst_pud++, src_pud++, addr = next, addr != end); +	} while (dst_pudp++, src_pudp++, addr = next, addr != end);  	return 0;  } -static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, +static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,  			    unsigned long end)  {  	unsigned long next;  	unsigned long addr = start; -	pgd_t *src_pgd = pgd_offset_k(start); +	pgd_t *src_pgdp = pgd_offset_k(start); -	dst_pgd = pgd_offset_raw(dst_pgd, start); +	dst_pgdp = pgd_offset_raw(dst_pgdp, start);  	do {  		next = pgd_addr_end(addr, end); -		if (pgd_none(*src_pgd)) +		if (pgd_none(READ_ONCE(*src_pgdp)))  			continue; -		if (copy_pud(dst_pgd, src_pgd, addr, next)) +		if (copy_pud(dst_pgdp, src_pgdp, addr, next))  			return -ENOMEM; -	} while (dst_pgd++, src_pgd++, addr = next, addr != end); +	} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);  	return 0;  } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 75b220ba73a3..85a251b6dfa8 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -908,9 +908,9 @@ static void __armv8pmu_probe_pmu(void *info)  	int pmuver;  	dfr0 = read_sysreg(id_aa64dfr0_el1); -	pmuver = cpuid_feature_extract_signed_field(dfr0, +	pmuver = cpuid_feature_extract_unsigned_field(dfr0,  			ID_AA64DFR0_PMUVER_SHIFT); -	if (pmuver < 1) +	if (pmuver == 0xf || pmuver == 0)  		return;  	probe->present = true; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ad8aeb098b31..c0da6efe5465 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -220,8 +220,15 @@ void __show_regs(struct pt_regs *regs)  	show_regs_print_info(KERN_DEFAULT);  	print_pstate(regs); -	printk("pc : %pS\n", (void *)regs->pc); -	printk("lr : %pS\n", (void *)lr); + +	if (!user_mode(regs)) { +		printk("pc : %pS\n", (void *)regs->pc); +		printk("lr : %pS\n", (void *)lr); +	} else { +		printk("pc : %016llx\n", regs->pc); +		printk("lr : %016llx\n", lr); +	} +  	printk("sp : %016llx\n", sp);  	i = top_reg; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6618036ae6d4..9ae31f7e2243 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1419,7 +1419,7 @@ static int compat_ptrace_hbp_get(unsigned int note_type,  	u64 addr = 0;  	u32 ctrl = 0; -	int err, idx = compat_ptrace_hbp_num_to_idx(num);; +	int err, idx = compat_ptrace_hbp_num_to_idx(num);  	if (num & 1) {  		err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 76809ccd309c..d5718a060672 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -59,6 +59,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  	if (tsk->ret_stack &&  			(frame->pc == (unsigned long)return_to_handler)) { +		if (WARN_ON_ONCE(frame->graph == -1)) +			return -EINVAL; +		if (frame->graph < -1) +			frame->graph += FTRACE_NOTRACE_DEPTH; +  		/*  		 * This is a case where function graph tracer has  		 * modified a return address (LR) in a stack frame diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 8b8bbd3eaa52..a382b2a1b84e 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -57,7 +57,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)  	if (end < start || flags)  		return -EINVAL; -	if (!access_ok(VERIFY_READ, start, end - start)) +	if (!access_ok(VERIFY_READ, (const void __user *)start, end - start))  		return -EFAULT;  	return __do_compat_cache_op(start, end); diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a4391280fba9..f258636273c9 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,7 +52,7 @@ unsigned long profile_pc(struct pt_regs *regs)  	frame.fp = regs->regs[29];  	frame.pc = regs->pc;  #ifdef CONFIG_FUNCTION_GRAPH_TRACER -	frame.graph = -1; /* no task info */ +	frame.graph = current->curr_ret_stack;  #endif  	do {  		int ret = unwind_frame(NULL, &frame); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index bbb0fde2780e..eb2d15147e8d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -57,7 +57,7 @@ static const char *handler[]= {  	"Error"  }; -int show_unhandled_signals = 1; +int show_unhandled_signals = 0;  static void dump_backtrace_entry(unsigned long where)  { @@ -526,14 +526,6 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)  	}  #endif -	if (show_unhandled_signals_ratelimited()) { -		pr_info("%s[%d]: syscall %d\n", current->comm, -			task_pid_nr(current), regs->syscallno); -		dump_instr("", regs); -		if (user_mode(regs)) -			__show_regs(regs); -	} -  	return sys_ni_syscall();  } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d7e3299a7734..959e50d2588c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,  {  	int ret = 0; -	vcpu_load(vcpu); -  	trace_kvm_set_guest_debug(vcpu, dbg->control);  	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { @@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,  	}  out: -	vcpu_put(vcpu);  	return ret;  } diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 116252a8d3a5..870f4b1587f9 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -407,8 +407,10 @@ again:  		u32 midr = read_cpuid_id();  		/* Apply BTAC predictors mitigation to all Falkor chips */ -		if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1) +		if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || +		    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {  			__qcom_hyp_sanitize_btac_predictors(); +		}  	}  	fp_enabled = __fpsimd_enabled(); diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 7b60d62ac593..65dfc8571bf8 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,  } -static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)  { -	pte_t *pte = pte_offset_kernel(pmd, 0UL); +	pte_t *ptep = pte_offset_kernel(pmdp, 0UL);  	unsigned long addr;  	unsigned i; -	for (i = 0; i < PTRS_PER_PTE; i++, pte++) { +	for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {  		addr = start + i * PAGE_SIZE; -		note_page(st, addr, 4, pte_val(*pte)); +		note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));  	}  } -static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)  { -	pmd_t *pmd = pmd_offset(pud, 0UL); +	pmd_t *pmdp = pmd_offset(pudp, 0UL);  	unsigned long addr;  	unsigned i; -	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { +	for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) { +		pmd_t pmd = READ_ONCE(*pmdp); +  		addr = start + i * PMD_SIZE; -		if (pmd_none(*pmd) || pmd_sect(*pmd)) { -			note_page(st, addr, 3, pmd_val(*pmd)); +		if (pmd_none(pmd) || pmd_sect(pmd)) { +			note_page(st, addr, 3, pmd_val(pmd));  		} else { -			BUG_ON(pmd_bad(*pmd)); -			walk_pte(st, pmd, addr); +			BUG_ON(pmd_bad(pmd)); +			walk_pte(st, pmdp, addr);  		}  	}  } -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)  { -	pud_t *pud = pud_offset(pgd, 0UL); +	pud_t *pudp = pud_offset(pgdp, 0UL);  	unsigned long addr;  	unsigned i; -	for (i = 0; i < PTRS_PER_PUD; i++, pud++) { +	for (i = 0; i < PTRS_PER_PUD; i++, pudp++) { +		pud_t pud = READ_ONCE(*pudp); +  		addr = start + i * PUD_SIZE; -		if (pud_none(*pud) || pud_sect(*pud)) { -			note_page(st, addr, 2, pud_val(*pud)); +		if (pud_none(pud) || pud_sect(pud)) { +			note_page(st, addr, 2, pud_val(pud));  		} else { -			BUG_ON(pud_bad(*pud)); -			walk_pmd(st, pud, addr); +			BUG_ON(pud_bad(pud)); +			walk_pmd(st, pudp, addr);  		}  	}  } @@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)  static void walk_pgd(struct pg_state *st, struct mm_struct *mm,  		     unsigned long start)  { -	pgd_t *pgd = pgd_offset(mm, 0UL); +	pgd_t *pgdp = pgd_offset(mm, 0UL);  	unsigned i;  	unsigned long addr; -	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { +	for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) { +		pgd_t pgd = READ_ONCE(*pgdp); +  		addr = start + i * PGDIR_SIZE; -		if (pgd_none(*pgd)) { -			note_page(st, addr, 1, pgd_val(*pgd)); +		if (pgd_none(pgd)) { +			note_page(st, addr, 1, pgd_val(pgd));  		} else { -			BUG_ON(pgd_bad(*pgd)); -			walk_pud(st, pgd, addr); +			BUG_ON(pgd_bad(pgd)); +			walk_pud(st, pgdp, addr);  		}  	}  } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f76bb2c3c943..bff11553eb05 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)  void show_pte(unsigned long addr)  {  	struct mm_struct *mm; -	pgd_t *pgd; +	pgd_t *pgdp; +	pgd_t pgd;  	if (addr < TASK_SIZE) {  		/* TTBR0 */ @@ -149,33 +150,37 @@ void show_pte(unsigned long addr)  		return;  	} -	pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n", +	pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",  		 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,  		 VA_BITS, mm->pgd); -	pgd = pgd_offset(mm, addr); -	pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); +	pgdp = pgd_offset(mm, addr); +	pgd = READ_ONCE(*pgdp); +	pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));  	do { -		pud_t *pud; -		pmd_t *pmd; -		pte_t *pte; +		pud_t *pudp, pud; +		pmd_t *pmdp, pmd; +		pte_t *ptep, pte; -		if (pgd_none(*pgd) || pgd_bad(*pgd)) +		if (pgd_none(pgd) || pgd_bad(pgd))  			break; -		pud = pud_offset(pgd, addr); -		pr_cont(", *pud=%016llx", pud_val(*pud)); -		if (pud_none(*pud) || pud_bad(*pud)) +		pudp = pud_offset(pgdp, addr); +		pud = READ_ONCE(*pudp); +		pr_cont(", pud=%016llx", pud_val(pud)); +		if (pud_none(pud) || pud_bad(pud))  			break; -		pmd = pmd_offset(pud, addr); -		pr_cont(", *pmd=%016llx", pmd_val(*pmd)); -		if (pmd_none(*pmd) || pmd_bad(*pmd)) +		pmdp = pmd_offset(pudp, addr); +		pmd = READ_ONCE(*pmdp); +		pr_cont(", pmd=%016llx", pmd_val(pmd)); +		if (pmd_none(pmd) || pmd_bad(pmd))  			break; -		pte = pte_offset_map(pmd, addr); -		pr_cont(", *pte=%016llx", pte_val(*pte)); -		pte_unmap(pte); +		ptep = pte_offset_map(pmdp, addr); +		pte = READ_ONCE(*ptep); +		pr_cont(", pte=%016llx", pte_val(pte)); +		pte_unmap(ptep);  	} while(0);  	pr_cont("\n"); @@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,  			  pte_t entry, int dirty)  {  	pteval_t old_pteval, pteval; +	pte_t pte = READ_ONCE(*ptep); -	if (pte_same(*ptep, entry)) +	if (pte_same(pte, entry))  		return 0;  	/* only preserve the access flags and write permission */ @@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,  	 * (calculated as: a & b == ~(~a | ~b)).  	 */  	pte_val(entry) ^= PTE_RDONLY; -	pteval = READ_ONCE(pte_val(*ptep)); +	pteval = pte_val(pte);  	do {  		old_pteval = pteval;  		pteval ^= PTE_RDONLY; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 6cb0fa92a651..ecc6818191df 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)  static int find_num_contig(struct mm_struct *mm, unsigned long addr,  			   pte_t *ptep, size_t *pgsize)  { -	pgd_t *pgd = pgd_offset(mm, addr); -	pud_t *pud; -	pmd_t *pmd; +	pgd_t *pgdp = pgd_offset(mm, addr); +	pud_t *pudp; +	pmd_t *pmdp;  	*pgsize = PAGE_SIZE; -	pud = pud_offset(pgd, addr); -	pmd = pmd_offset(pud, addr); -	if ((pte_t *)pmd == ptep) { +	pudp = pud_offset(pgdp, addr); +	pmdp = pmd_offset(pudp, addr); +	if ((pte_t *)pmdp == ptep) {  		*pgsize = PMD_SIZE;  		return CONT_PMDS;  	} @@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,  	clear_flush(mm, addr, ptep, pgsize, ncontig); -	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) { -		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, -			 pte_val(pfn_pte(pfn, hugeprot))); +	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)  		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); -	}  }  void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, @@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,  pte_t *huge_pte_alloc(struct mm_struct *mm,  		      unsigned long addr, unsigned long sz)  { -	pgd_t *pgd; -	pud_t *pud; -	pte_t *pte = NULL; - -	pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz); -	pgd = pgd_offset(mm, addr); -	pud = pud_alloc(mm, pgd, addr); -	if (!pud) +	pgd_t *pgdp; +	pud_t *pudp; +	pmd_t *pmdp; +	pte_t *ptep = NULL; + +	pgdp = pgd_offset(mm, addr); +	pudp = pud_alloc(mm, pgdp, addr); +	if (!pudp)  		return NULL;  	if (sz == PUD_SIZE) { -		pte = (pte_t *)pud; +		ptep = (pte_t *)pudp;  	} else if (sz == (PAGE_SIZE * CONT_PTES)) { -		pmd_t *pmd = pmd_alloc(mm, pud, addr); +		pmdp = pmd_alloc(mm, pudp, addr);  		WARN_ON(addr & (sz - 1));  		/* @@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,  		 * will be no pte_unmap() to correspond with this  		 * pte_alloc_map().  		 */ -		pte = pte_alloc_map(mm, pmd, addr); +		ptep = pte_alloc_map(mm, pmdp, addr);  	} else if (sz == PMD_SIZE) {  		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && -		    pud_none(*pud)) -			pte = huge_pmd_share(mm, addr, pud); +		    pud_none(READ_ONCE(*pudp))) +			ptep = huge_pmd_share(mm, addr, pudp);  		else -			pte = (pte_t *)pmd_alloc(mm, pud, addr); +			ptep = (pte_t *)pmd_alloc(mm, pudp, addr);  	} else if (sz == (PMD_SIZE * CONT_PMDS)) { -		pmd_t *pmd; - -		pmd = pmd_alloc(mm, pud, addr); +		pmdp = pmd_alloc(mm, pudp, addr);  		WARN_ON(addr & (sz - 1)); -		return (pte_t *)pmd; +		return (pte_t *)pmdp;  	} -	pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr, -	       sz, pte, pte_val(*pte)); -	return pte; +	return ptep;  }  pte_t *huge_pte_offset(struct mm_struct *mm,  		       unsigned long addr, unsigned long sz)  { -	pgd_t *pgd; -	pud_t *pud; -	pmd_t *pmd; +	pgd_t *pgdp; +	pud_t *pudp, pud; +	pmd_t *pmdp, pmd; -	pgd = pgd_offset(mm, addr); -	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); -	if (!pgd_present(*pgd)) +	pgdp = pgd_offset(mm, addr); +	if (!pgd_present(READ_ONCE(*pgdp)))  		return NULL; -	pud = pud_offset(pgd, addr); -	if (sz != PUD_SIZE && pud_none(*pud)) +	pudp = pud_offset(pgdp, addr); +	pud = READ_ONCE(*pudp); +	if (sz != PUD_SIZE && pud_none(pud))  		return NULL;  	/* hugepage or swap? */ -	if (pud_huge(*pud) || !pud_present(*pud)) -		return (pte_t *)pud; +	if (pud_huge(pud) || !pud_present(pud)) +		return (pte_t *)pudp;  	/* table; check the next level */  	if (sz == CONT_PMD_SIZE)  		addr &= CONT_PMD_MASK; -	pmd = pmd_offset(pud, addr); +	pmdp = pmd_offset(pudp, addr); +	pmd = READ_ONCE(*pmdp);  	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && -	    pmd_none(*pmd)) +	    pmd_none(pmd))  		return NULL; -	if (pmd_huge(*pmd) || !pmd_present(*pmd)) -		return (pte_t *)pmd; +	if (pmd_huge(pmd) || !pmd_present(pmd)) +		return (pte_t *)pmdp; -	if (sz == CONT_PTE_SIZE) { -		pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK)); -		return pte; -	} +	if (sz == CONT_PTE_SIZE) +		return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));  	return NULL;  } @@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,  	size_t pgsize;  	pte_t pte; -	if (!pte_cont(*ptep)) { +	if (!pte_cont(READ_ONCE(*ptep))) {  		ptep_set_wrprotect(mm, addr, ptep);  		return;  	} @@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,  	size_t pgsize;  	int ncontig; -	if (!pte_cont(*ptep)) { +	if (!pte_cont(READ_ONCE(*ptep))) {  		ptep_clear_flush(vma, addr, ptep);  		return;  	} diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 6e02e6fb4c7b..dabfc1ecda3d 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)  	return __pa(p);  } -static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node, +static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,  				      bool early)  { -	if (pmd_none(*pmd)) { +	if (pmd_none(READ_ONCE(*pmdp))) {  		phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)  					     : kasan_alloc_zeroed_page(node); -		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); +		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);  	} -	return early ? pte_offset_kimg(pmd, addr) -		     : pte_offset_kernel(pmd, addr); +	return early ? pte_offset_kimg(pmdp, addr) +		     : pte_offset_kernel(pmdp, addr);  } -static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node, +static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,  				      bool early)  { -	if (pud_none(*pud)) { +	if (pud_none(READ_ONCE(*pudp))) {  		phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)  					     : kasan_alloc_zeroed_page(node); -		__pud_populate(pud, pmd_phys, PMD_TYPE_TABLE); +		__pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);  	} -	return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr); +	return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);  } -static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node, +static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,  				      bool early)  { -	if (pgd_none(*pgd)) { +	if (pgd_none(READ_ONCE(*pgdp))) {  		phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)  					     : kasan_alloc_zeroed_page(node); -		__pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE); +		__pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);  	} -	return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr); +	return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);  } -static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr, +static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,  				      unsigned long end, int node, bool early)  {  	unsigned long next; -	pte_t *pte = kasan_pte_offset(pmd, addr, node, early); +	pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);  	do {  		phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)  					      : kasan_alloc_zeroed_page(node);  		next = addr + PAGE_SIZE; -		set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); -	} while (pte++, addr = next, addr != end && pte_none(*pte)); +		set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); +	} while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));  } -static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr, +static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,  				      unsigned long end, int node, bool early)  {  	unsigned long next; -	pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early); +	pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);  	do {  		next = pmd_addr_end(addr, end); -		kasan_pte_populate(pmd, addr, next, node, early); -	} while (pmd++, addr = next, addr != end && pmd_none(*pmd)); +		kasan_pte_populate(pmdp, addr, next, node, early); +	} while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));  } -static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr, +static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,  				      unsigned long end, int node, bool early)  {  	unsigned long next; -	pud_t *pud = kasan_pud_offset(pgd, addr, node, early); +	pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);  	do {  		next = pud_addr_end(addr, end); -		kasan_pmd_populate(pud, addr, next, node, early); -	} while (pud++, addr = next, addr != end && pud_none(*pud)); +		kasan_pmd_populate(pudp, addr, next, node, early); +	} while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));  }  static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,  				      int node, bool early)  {  	unsigned long next; -	pgd_t *pgd; +	pgd_t *pgdp; -	pgd = pgd_offset_k(addr); +	pgdp = pgd_offset_k(addr);  	do {  		next = pgd_addr_end(addr, end); -		kasan_pud_populate(pgd, addr, next, node, early); -	} while (pgd++, addr = next, addr != end); +		kasan_pud_populate(pgdp, addr, next, node, early); +	} while (pgdp++, addr = next, addr != end);  }  /* The early shadow maps everything to a single page of zeroes */ @@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,   */  void __init kasan_copy_shadow(pgd_t *pgdir)  { -	pgd_t *pgd, *pgd_new, *pgd_end; +	pgd_t *pgdp, *pgdp_new, *pgdp_end; -	pgd = pgd_offset_k(KASAN_SHADOW_START); -	pgd_end = pgd_offset_k(KASAN_SHADOW_END); -	pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START); +	pgdp = pgd_offset_k(KASAN_SHADOW_START); +	pgdp_end = pgd_offset_k(KASAN_SHADOW_END); +	pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);  	do { -		set_pgd(pgd_new, *pgd); -	} while (pgd++, pgd_new++, pgd != pgd_end); +		set_pgd(pgdp_new, READ_ONCE(*pgdp)); +	} while (pgdp++, pgdp_new++, pgdp != pgdp_end);  }  static void __init clear_pgds(unsigned long start, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4694cda823c9..2dbb2c9f1ec1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -108,7 +108,7 @@ static bool pgattr_change_is_safe(u64 old, u64 new)  	 * The following mapping attributes may be updated in live  	 * kernel mappings without the need for break-before-make.  	 */ -	static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE; +	static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;  	/* creating or taking down mappings is always safe */  	if (old == 0 || new == 0) @@ -118,52 +118,55 @@ static bool pgattr_change_is_safe(u64 old, u64 new)  	if ((old | new) & PTE_CONT)  		return false; -	/* Transitioning from Global to Non-Global is safe */ -	if (((old ^ new) == PTE_NG) && (new & PTE_NG)) -		return true; +	/* Transitioning from Non-Global to Global is unsafe */ +	if (old & ~new & PTE_NG) +		return false;  	return ((old ^ new) & ~mask) == 0;  } -static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, +static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,  		     phys_addr_t phys, pgprot_t prot)  { -	pte_t *pte; +	pte_t *ptep; -	pte = pte_set_fixmap_offset(pmd, addr); +	ptep = pte_set_fixmap_offset(pmdp, addr);  	do { -		pte_t old_pte = *pte; +		pte_t old_pte = READ_ONCE(*ptep); -		set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot)); +		set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));  		/*  		 * After the PTE entry has been populated once, we  		 * only allow updates to the permission attributes.  		 */ -		BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte))); +		BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), +					      READ_ONCE(pte_val(*ptep))));  		phys += PAGE_SIZE; -	} while (pte++, addr += PAGE_SIZE, addr != end); +	} while (ptep++, addr += PAGE_SIZE, addr != end);  	pte_clear_fixmap();  } -static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr, +static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,  				unsigned long end, phys_addr_t phys,  				pgprot_t prot,  				phys_addr_t (*pgtable_alloc)(void),  				int flags)  {  	unsigned long next; +	pmd_t pmd = READ_ONCE(*pmdp); -	BUG_ON(pmd_sect(*pmd)); -	if (pmd_none(*pmd)) { +	BUG_ON(pmd_sect(pmd)); +	if (pmd_none(pmd)) {  		phys_addr_t pte_phys;  		BUG_ON(!pgtable_alloc);  		pte_phys = pgtable_alloc(); -		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); +		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE); +		pmd = READ_ONCE(*pmdp);  	} -	BUG_ON(pmd_bad(*pmd)); +	BUG_ON(pmd_bad(pmd));  	do {  		pgprot_t __prot = prot; @@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,  		    (flags & NO_CONT_MAPPINGS) == 0)  			__prot = __pgprot(pgprot_val(prot) | PTE_CONT); -		init_pte(pmd, addr, next, phys, __prot); +		init_pte(pmdp, addr, next, phys, __prot);  		phys += next - addr;  	} while (addr = next, addr != end);  } -static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end, +static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,  		     phys_addr_t phys, pgprot_t prot,  		     phys_addr_t (*pgtable_alloc)(void), int flags)  {  	unsigned long next; -	pmd_t *pmd; +	pmd_t *pmdp; -	pmd = pmd_set_fixmap_offset(pud, addr); +	pmdp = pmd_set_fixmap_offset(pudp, addr);  	do { -		pmd_t old_pmd = *pmd; +		pmd_t old_pmd = READ_ONCE(*pmdp);  		next = pmd_addr_end(addr, end);  		/* try section mapping first */  		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&  		    (flags & NO_BLOCK_MAPPINGS) == 0) { -			pmd_set_huge(pmd, phys, prot); +			pmd_set_huge(pmdp, phys, prot);  			/*  			 * After the PMD entry has been populated once, we  			 * only allow updates to the permission attributes.  			 */  			BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), -						      pmd_val(*pmd))); +						      READ_ONCE(pmd_val(*pmdp))));  		} else { -			alloc_init_cont_pte(pmd, addr, next, phys, prot, +			alloc_init_cont_pte(pmdp, addr, next, phys, prot,  					    pgtable_alloc, flags);  			BUG_ON(pmd_val(old_pmd) != 0 && -			       pmd_val(old_pmd) != pmd_val(*pmd)); +			       pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));  		}  		phys += next - addr; -	} while (pmd++, addr = next, addr != end); +	} while (pmdp++, addr = next, addr != end);  	pmd_clear_fixmap();  } -static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr, +static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,  				unsigned long end, phys_addr_t phys,  				pgprot_t prot,  				phys_addr_t (*pgtable_alloc)(void), int flags)  {  	unsigned long next; +	pud_t pud = READ_ONCE(*pudp);  	/*  	 * Check for initial section mappings in the pgd/pud.  	 */ -	BUG_ON(pud_sect(*pud)); -	if (pud_none(*pud)) { +	BUG_ON(pud_sect(pud)); +	if (pud_none(pud)) {  		phys_addr_t pmd_phys;  		BUG_ON(!pgtable_alloc);  		pmd_phys = pgtable_alloc(); -		__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); +		__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); +		pud = READ_ONCE(*pudp);  	} -	BUG_ON(pud_bad(*pud)); +	BUG_ON(pud_bad(pud));  	do {  		pgprot_t __prot = prot; @@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,  		    (flags & NO_CONT_MAPPINGS) == 0)  			__prot = __pgprot(pgprot_val(prot) | PTE_CONT); -		init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags); +		init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);  		phys += next - addr;  	} while (addr = next, addr != end); @@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,  	return true;  } -static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, -				  phys_addr_t phys, pgprot_t prot, -				  phys_addr_t (*pgtable_alloc)(void), -				  int flags) +static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, +			   phys_addr_t phys, pgprot_t prot, +			   phys_addr_t (*pgtable_alloc)(void), +			   int flags)  { -	pud_t *pud;  	unsigned long next; +	pud_t *pudp; +	pgd_t pgd = READ_ONCE(*pgdp); -	if (pgd_none(*pgd)) { +	if (pgd_none(pgd)) {  		phys_addr_t pud_phys;  		BUG_ON(!pgtable_alloc);  		pud_phys = pgtable_alloc(); -		__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); +		__pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE); +		pgd = READ_ONCE(*pgdp);  	} -	BUG_ON(pgd_bad(*pgd)); +	BUG_ON(pgd_bad(pgd)); -	pud = pud_set_fixmap_offset(pgd, addr); +	pudp = pud_set_fixmap_offset(pgdp, addr);  	do { -		pud_t old_pud = *pud; +		pud_t old_pud = READ_ONCE(*pudp);  		next = pud_addr_end(addr, end); @@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,  		 */  		if (use_1G_block(addr, next, phys) &&  		    (flags & NO_BLOCK_MAPPINGS) == 0) { -			pud_set_huge(pud, phys, prot); +			pud_set_huge(pudp, phys, prot);  			/*  			 * After the PUD entry has been populated once, we  			 * only allow updates to the permission attributes.  			 */  			BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), -						      pud_val(*pud))); +						      READ_ONCE(pud_val(*pudp))));  		} else { -			alloc_init_cont_pmd(pud, addr, next, phys, prot, +			alloc_init_cont_pmd(pudp, addr, next, phys, prot,  					    pgtable_alloc, flags);  			BUG_ON(pud_val(old_pud) != 0 && -			       pud_val(old_pud) != pud_val(*pud)); +			       pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));  		}  		phys += next - addr; -	} while (pud++, addr = next, addr != end); +	} while (pudp++, addr = next, addr != end);  	pud_clear_fixmap();  } @@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,  				 int flags)  {  	unsigned long addr, length, end, next; -	pgd_t *pgd = pgd_offset_raw(pgdir, virt); +	pgd_t *pgdp = pgd_offset_raw(pgdir, virt);  	/*  	 * If the virtual and physical address don't have the same offset @@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,  	end = addr + length;  	do {  		next = pgd_addr_end(addr, end); -		alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, +		alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,  			       flags);  		phys += next - addr; -	} while (pgd++, addr = next, addr != end); +	} while (pgdp++, addr = next, addr != end);  }  static phys_addr_t pgd_pgtable_alloc(void) @@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,  	flush_tlb_kernel_range(virt, virt + size);  } -static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, +static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,  				  phys_addr_t end, pgprot_t prot, int flags)  { -	__create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, +	__create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,  			     prot, early_pgtable_alloc, flags);  } @@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)  			    PAGE_KERNEL_RO);  } -static void __init map_mem(pgd_t *pgd) +static void __init map_mem(pgd_t *pgdp)  {  	phys_addr_t kernel_start = __pa_symbol(_text);  	phys_addr_t kernel_end = __pa_symbol(__init_begin); @@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)  		if (memblock_is_nomap(reg))  			continue; -		__map_memblock(pgd, start, end, PAGE_KERNEL, flags); +		__map_memblock(pgdp, start, end, PAGE_KERNEL, flags);  	}  	/* @@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)  	 * Note that contiguous mappings cannot be remapped in this way,  	 * so we should avoid them here.  	 */ -	__map_memblock(pgd, kernel_start, kernel_end, +	__map_memblock(pgdp, kernel_start, kernel_end,  		       PAGE_KERNEL, NO_CONT_MAPPINGS);  	memblock_clear_nomap(kernel_start, kernel_end - kernel_start); @@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)  	 * through /sys/kernel/kexec_crash_size interface.  	 */  	if (crashk_res.end) { -		__map_memblock(pgd, crashk_res.start, crashk_res.end + 1, +		__map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,  			       PAGE_KERNEL,  			       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);  		memblock_clear_nomap(crashk_res.start, @@ -499,7 +506,7 @@ void mark_rodata_ro(void)  	debug_checkwx();  } -static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, +static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,  				      pgprot_t prot, struct vm_struct *vma,  				      int flags, unsigned long vm_flags)  { @@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,  	BUG_ON(!PAGE_ALIGNED(pa_start));  	BUG_ON(!PAGE_ALIGNED(size)); -	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, +	__create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,  			     early_pgtable_alloc, flags);  	if (!(vm_flags & VM_NO_GUARD)) @@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);  /*   * Create fine-grained mappings for the kernel.   */ -static void __init map_kernel(pgd_t *pgd) +static void __init map_kernel(pgd_t *pgdp)  {  	static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,  				vmlinux_initdata, vmlinux_data; @@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)  	 * Only rodata will be remapped with different permissions later on,  	 * all other segments are allowed to use contiguous mappings.  	 */ -	map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0, +	map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,  			   VM_NO_GUARD); -	map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL, +	map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,  			   &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); -	map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot, +	map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,  			   &vmlinux_inittext, 0, VM_NO_GUARD); -	map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL, +	map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,  			   &vmlinux_initdata, 0, VM_NO_GUARD); -	map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); +	map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); -	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { +	if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {  		/*  		 * The fixmap falls in a separate pgd to the kernel, and doesn't  		 * live in the carveout for the swapper_pg_dir. We can simply  		 * re-use the existing dir for the fixmap.  		 */ -		set_pgd(pgd_offset_raw(pgd, FIXADDR_START), -			*pgd_offset_k(FIXADDR_START)); +		set_pgd(pgd_offset_raw(pgdp, FIXADDR_START), +			READ_ONCE(*pgd_offset_k(FIXADDR_START)));  	} else if (CONFIG_PGTABLE_LEVELS > 3) {  		/*  		 * The fixmap shares its top level pgd entry with the kernel @@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)  		 * entry instead.  		 */  		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); -		pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START), +		pud_populate(&init_mm, +			     pud_set_fixmap_offset(pgdp, FIXADDR_START),  			     lm_alias(bm_pmd));  		pud_clear_fixmap();  	} else {  		BUG();  	} -	kasan_copy_shadow(pgd); +	kasan_copy_shadow(pgdp);  }  /* @@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)  void __init paging_init(void)  {  	phys_addr_t pgd_phys = early_pgtable_alloc(); -	pgd_t *pgd = pgd_set_fixmap(pgd_phys); +	pgd_t *pgdp = pgd_set_fixmap(pgd_phys); -	map_kernel(pgd); -	map_mem(pgd); +	map_kernel(pgdp); +	map_mem(pgdp);  	/*  	 * We want to reuse the original swapper_pg_dir so we don't have to @@ -635,7 +643,7 @@ void __init paging_init(void)  	 * To do this we need to go via a temporary pgd.  	 */  	cpu_replace_ttbr1(__va(pgd_phys)); -	memcpy(swapper_pg_dir, pgd, PGD_SIZE); +	memcpy(swapper_pg_dir, pgdp, PGD_SIZE);  	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));  	pgd_clear_fixmap(); @@ -655,37 +663,40 @@ void __init paging_init(void)   */  int kern_addr_valid(unsigned long addr)  { -	pgd_t *pgd; -	pud_t *pud; -	pmd_t *pmd; -	pte_t *pte; +	pgd_t *pgdp; +	pud_t *pudp, pud; +	pmd_t *pmdp, pmd; +	pte_t *ptep, pte;  	if ((((long)addr) >> VA_BITS) != -1UL)  		return 0; -	pgd = pgd_offset_k(addr); -	if (pgd_none(*pgd)) +	pgdp = pgd_offset_k(addr); +	if (pgd_none(READ_ONCE(*pgdp)))  		return 0; -	pud = pud_offset(pgd, addr); -	if (pud_none(*pud)) +	pudp = pud_offset(pgdp, addr); +	pud = READ_ONCE(*pudp); +	if (pud_none(pud))  		return 0; -	if (pud_sect(*pud)) -		return pfn_valid(pud_pfn(*pud)); +	if (pud_sect(pud)) +		return pfn_valid(pud_pfn(pud)); -	pmd = pmd_offset(pud, addr); -	if (pmd_none(*pmd)) +	pmdp = pmd_offset(pudp, addr); +	pmd = READ_ONCE(*pmdp); +	if (pmd_none(pmd))  		return 0; -	if (pmd_sect(*pmd)) -		return pfn_valid(pmd_pfn(*pmd)); +	if (pmd_sect(pmd)) +		return pfn_valid(pmd_pfn(pmd)); -	pte = pte_offset_kernel(pmd, addr); -	if (pte_none(*pte)) +	ptep = pte_offset_kernel(pmdp, addr); +	pte = READ_ONCE(*ptep); +	if (pte_none(pte))  		return 0; -	return pfn_valid(pte_pfn(*pte)); +	return pfn_valid(pte_pfn(pte));  }  #ifdef CONFIG_SPARSEMEM_VMEMMAP  #if !ARM64_SWAPPER_USES_SECTION_MAPS @@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,  {  	unsigned long addr = start;  	unsigned long next; -	pgd_t *pgd; -	pud_t *pud; -	pmd_t *pmd; +	pgd_t *pgdp; +	pud_t *pudp; +	pmd_t *pmdp;  	do {  		next = pmd_addr_end(addr, end); -		pgd = vmemmap_pgd_populate(addr, node); -		if (!pgd) +		pgdp = vmemmap_pgd_populate(addr, node); +		if (!pgdp)  			return -ENOMEM; -		pud = vmemmap_pud_populate(pgd, addr, node); -		if (!pud) +		pudp = vmemmap_pud_populate(pgdp, addr, node); +		if (!pudp)  			return -ENOMEM; -		pmd = pmd_offset(pud, addr); -		if (pmd_none(*pmd)) { +		pmdp = pmd_offset(pudp, addr); +		if (pmd_none(READ_ONCE(*pmdp))) {  			void *p = NULL;  			p = vmemmap_alloc_block_buf(PMD_SIZE, node);  			if (!p)  				return -ENOMEM; -			pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL)); +			pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));  		} else -			vmemmap_verify((pte_t *)pmd, node, addr, next); +			vmemmap_verify((pte_t *)pmdp, node, addr, next);  	} while (addr = next, addr != end);  	return 0; @@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,  static inline pud_t * fixmap_pud(unsigned long addr)  { -	pgd_t *pgd = pgd_offset_k(addr); +	pgd_t *pgdp = pgd_offset_k(addr); +	pgd_t pgd = READ_ONCE(*pgdp); -	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); +	BUG_ON(pgd_none(pgd) || pgd_bad(pgd)); -	return pud_offset_kimg(pgd, addr); +	return pud_offset_kimg(pgdp, addr);  }  static inline pmd_t * fixmap_pmd(unsigned long addr)  { -	pud_t *pud = fixmap_pud(addr); +	pud_t *pudp = fixmap_pud(addr); +	pud_t pud = READ_ONCE(*pudp); -	BUG_ON(pud_none(*pud) || pud_bad(*pud)); +	BUG_ON(pud_none(pud) || pud_bad(pud)); -	return pmd_offset_kimg(pud, addr); +	return pmd_offset_kimg(pudp, addr);  }  static inline pte_t * fixmap_pte(unsigned long addr) @@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)   */  void __init early_fixmap_init(void)  { -	pgd_t *pgd; -	pud_t *pud; -	pmd_t *pmd; +	pgd_t *pgdp, pgd; +	pud_t *pudp; +	pmd_t *pmdp;  	unsigned long addr = FIXADDR_START; -	pgd = pgd_offset_k(addr); +	pgdp = pgd_offset_k(addr); +	pgd = READ_ONCE(*pgdp);  	if (CONFIG_PGTABLE_LEVELS > 3 && -	    !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) { +	    !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {  		/*  		 * We only end up here if the kernel mapping and the fixmap  		 * share the top level pgd entry, which should only happen on  		 * 16k/4 levels configurations.  		 */  		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); -		pud = pud_offset_kimg(pgd, addr); +		pudp = pud_offset_kimg(pgdp, addr);  	} else { -		if (pgd_none(*pgd)) -			__pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE); -		pud = fixmap_pud(addr); +		if (pgd_none(pgd)) +			__pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE); +		pudp = fixmap_pud(addr);  	} -	if (pud_none(*pud)) -		__pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); -	pmd = fixmap_pmd(addr); -	__pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE); +	if (pud_none(READ_ONCE(*pudp))) +		__pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); +	pmdp = fixmap_pmd(addr); +	__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);  	/*  	 * The boot-ioremap range spans multiple pmds, for which @@ -800,11 +814,11 @@ void __init early_fixmap_init(void)  	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)  		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); -	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) -	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { +	if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) +	     || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {  		WARN_ON(1); -		pr_warn("pmd %p != %p, %p\n", -			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), +		pr_warn("pmdp %p != %p, %p\n", +			pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),  			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));  		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",  			fix_to_virt(FIX_BTMAP_BEGIN)); @@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,  			       phys_addr_t phys, pgprot_t flags)  {  	unsigned long addr = __fix_to_virt(idx); -	pte_t *pte; +	pte_t *ptep;  	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); -	pte = fixmap_pte(addr); +	ptep = fixmap_pte(addr);  	if (pgprot_val(flags)) { -		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); +		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));  	} else { -		pte_clear(&init_mm, addr, pte); +		pte_clear(&init_mm, addr, ptep);  		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);  	}  } @@ -915,36 +929,56 @@ int __init arch_ioremap_pmd_supported(void)  	return 1;  } -int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)  {  	pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |  					pgprot_val(mk_sect_prot(prot))); + +	/* ioremap_page_range doesn't honour BBM */ +	if (pud_present(READ_ONCE(*pudp))) +		return 0; +  	BUG_ON(phys & ~PUD_MASK); -	set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot)); +	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));  	return 1;  } -int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)  {  	pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |  					pgprot_val(mk_sect_prot(prot))); + +	/* ioremap_page_range doesn't honour BBM */ +	if (pmd_present(READ_ONCE(*pmdp))) +		return 0; +  	BUG_ON(phys & ~PMD_MASK); -	set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot)); +	set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));  	return 1;  } -int pud_clear_huge(pud_t *pud) +int pud_clear_huge(pud_t *pudp)  { -	if (!pud_sect(*pud)) +	if (!pud_sect(READ_ONCE(*pudp)))  		return 0; -	pud_clear(pud); +	pud_clear(pudp);  	return 1;  } -int pmd_clear_huge(pmd_t *pmd) +int pmd_clear_huge(pmd_t *pmdp)  { -	if (!pmd_sect(*pmd)) +	if (!pmd_sect(READ_ONCE(*pmdp)))  		return 0; -	pmd_clear(pmd); +	pmd_clear(pmdp);  	return 1;  } + +int pud_free_pmd_page(pud_t *pud) +{ +	return pud_none(*pud); +} + +int pmd_free_pte_page(pmd_t *pmd) +{ +	return pmd_none(*pmd); +} diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index a682a0a2a0fa..a56359373d8b 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,  			void *data)  {  	struct page_change_data *cdata = data; -	pte_t pte = *ptep; +	pte_t pte = READ_ONCE(*ptep);  	pte = clear_pte_bit(pte, cdata->clear_mask);  	pte = set_pte_bit(pte, cdata->set_mask); @@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)   */  bool kernel_page_present(struct page *page)  { -	pgd_t *pgd; -	pud_t *pud; -	pmd_t *pmd; -	pte_t *pte; +	pgd_t *pgdp; +	pud_t *pudp, pud; +	pmd_t *pmdp, pmd; +	pte_t *ptep;  	unsigned long addr = (unsigned long)page_address(page); -	pgd = pgd_offset_k(addr); -	if (pgd_none(*pgd)) +	pgdp = pgd_offset_k(addr); +	if (pgd_none(READ_ONCE(*pgdp)))  		return false; -	pud = pud_offset(pgd, addr); -	if (pud_none(*pud)) +	pudp = pud_offset(pgdp, addr); +	pud = READ_ONCE(*pudp); +	if (pud_none(pud))  		return false; -	if (pud_sect(*pud)) +	if (pud_sect(pud))  		return true; -	pmd = pmd_offset(pud, addr); -	if (pmd_none(*pmd)) +	pmdp = pmd_offset(pudp, addr); +	pmd = READ_ONCE(*pmdp); +	if (pmd_none(pmd))  		return false; -	if (pmd_sect(*pmd)) +	if (pmd_sect(pmd))  		return true; -	pte = pte_offset_kernel(pmd, addr); -	return pte_valid(*pte); +	ptep = pte_offset_kernel(pmdp, addr); +	return pte_valid(READ_ONCE(*ptep));  }  #endif /* CONFIG_HIBERNATION */  #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 71baed7e592a..c0af47617299 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)  	dc	cvac, cur_\()\type\()p		// Ensure any existing dirty  	dmb	sy				// lines are written back before  	ldr	\type, [cur_\()\type\()p]	// loading the entry -	tbz	\type, #0, next_\()\type	// Skip invalid entries +	tbz	\type, #0, skip_\()\type	// Skip invalid and +	tbnz	\type, #11, skip_\()\type	// non-global entries  	.endm  	.macro __idmap_kpti_put_pgtable_ent_ng, type @@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)  	add	end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)  do_pgd:	__idmap_kpti_get_pgtable_ent	pgd  	tbnz	pgd, #1, walk_puds -	__idmap_kpti_put_pgtable_ent_ng	pgd  next_pgd: +	__idmap_kpti_put_pgtable_ent_ng	pgd +skip_pgd:  	add	cur_pgdp, cur_pgdp, #8  	cmp	cur_pgdp, end_pgdp  	b.ne	do_pgd @@ -294,8 +296,9 @@ walk_puds:  	add	end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)  do_pud:	__idmap_kpti_get_pgtable_ent	pud  	tbnz	pud, #1, walk_pmds -	__idmap_kpti_put_pgtable_ent_ng	pud  next_pud: +	__idmap_kpti_put_pgtable_ent_ng	pud +skip_pud:  	add	cur_pudp, cur_pudp, 8  	cmp	cur_pudp, end_pudp  	b.ne	do_pud @@ -314,8 +317,9 @@ walk_pmds:  	add	end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)  do_pmd:	__idmap_kpti_get_pgtable_ent	pmd  	tbnz	pmd, #1, walk_ptes -	__idmap_kpti_put_pgtable_ent_ng	pmd  next_pmd: +	__idmap_kpti_put_pgtable_ent_ng	pmd +skip_pmd:  	add	cur_pmdp, cur_pmdp, #8  	cmp	cur_pmdp, end_pmdp  	b.ne	do_pmd @@ -333,7 +337,7 @@ walk_ptes:  	add	end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)  do_pte:	__idmap_kpti_get_pgtable_ent	pte  	__idmap_kpti_put_pgtable_ent_ng	pte -next_pte: +skip_pte:  	add	cur_ptep, cur_ptep, #8  	cmp	cur_ptep, end_ptep  	b.ne	do_pte diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1d4f1da7c58f..a93350451e8e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)  	off = offsetof(struct bpf_array, map.max_entries);  	emit_a64_mov_i64(tmp, off, ctx);  	emit(A64_LDR32(tmp, r2, tmp), ctx); +	emit(A64_MOV(0, r3, r3), ctx);  	emit(A64_CMP(0, r3, tmp), ctx); -	emit(A64_B_(A64_COND_GE, jmp_offset), ctx); +	emit(A64_B_(A64_COND_CS, jmp_offset), ctx);  	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)  	 *     goto out; @@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)  	 */  	emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);  	emit(A64_CMP(1, tcc, tmp), ctx); -	emit(A64_B_(A64_COND_GT, jmp_offset), ctx); +	emit(A64_B_(A64_COND_HI, jmp_offset), ctx);  	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);  	/* prog = array->ptrs[index]; diff --git a/arch/cris/include/arch-v10/arch/bug.h b/arch/cris/include/arch-v10/arch/bug.h index 905afeacfedf..06da9d49152a 100644 --- a/arch/cris/include/arch-v10/arch/bug.h +++ b/arch/cris/include/arch-v10/arch/bug.h @@ -44,18 +44,25 @@ struct bug_frame {   * not be used like this with newer versions of gcc.   */  #define BUG()								\ +do {									\  	__asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\  			      "movu.w " __stringify(__LINE__) ",$r0\n\t"\  			      "jump 0f\n\t"				\  			      ".section .rodata\n"			\  			      "0:\t.string \"" __FILE__ "\"\n\t"	\ -			      ".previous") +			      ".previous");				\ +	unreachable();							\ +} while (0)  #endif  #else  /* This just causes an oops. */ -#define BUG() (*(int *)0 = 0) +#define BUG()								\ +do {									\ +	barrier_before_unreachable();					\ +	__builtin_trap();						\ +} while (0)  #endif diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/asm/byteorder.h index ecff2d1ca5a3..6eaa7ad5fc2c 100644 --- a/arch/h8300/include/asm/byteorder.h +++ b/arch/h8300/include/asm/byteorder.h @@ -2,7 +2,6 @@  #ifndef __H8300_BYTEORDER_H__  #define __H8300_BYTEORDER_H__ -#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__  #include <linux/byteorder/big_endian.h>  #endif diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 762eeb0fcc1d..2524fb60fbc2 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -66,38 +66,35 @@ ATOMIC_OPS(add, +)  ATOMIC_OPS(sub, -)  #ifdef __OPTIMIZE__ -#define __ia64_atomic_const(i)	__builtin_constant_p(i) ?		\ +#define __ia64_atomic_const(i)						\ +	static const int __ia64_atomic_p = __builtin_constant_p(i) ?	\  		((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 ||	\ -		 (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0 +		 (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0;\ +	__ia64_atomic_p +#else +#define __ia64_atomic_const(i)	0 +#endif -#define atomic_add_return(i, v)						\ +#define atomic_add_return(i,v)						\  ({									\ -	int __i = (i);							\ -	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\ -	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\ -				ia64_atomic_add(__i, v);		\ +	int __ia64_aar_i = (i);						\ +	__ia64_atomic_const(i)						\ +		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\ +		: ia64_atomic_add(__ia64_aar_i, v);			\  }) -#define atomic_sub_return(i, v)						\ +#define atomic_sub_return(i,v)						\  ({									\ -	int __i = (i);							\ -	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\ -	__ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) :	\ -				ia64_atomic_sub(__i, v);		\ +	int __ia64_asr_i = (i);						\ +	__ia64_atomic_const(i)						\ +		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\ +		: ia64_atomic_sub(__ia64_asr_i, v);			\  }) -#else -#define atomic_add_return(i, v)	ia64_atomic_add(i, v) -#define atomic_sub_return(i, v)	ia64_atomic_sub(i, v) -#endif  #define atomic_fetch_add(i,v)						\  ({									\  	int __ia64_aar_i = (i);						\ -	(__builtin_constant_p(i)					\ -	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\ -	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\ -	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\ -	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\ +	__ia64_atomic_const(i)						\  		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\  		: ia64_atomic_fetch_add(__ia64_aar_i, v);		\  }) @@ -105,11 +102,7 @@ ATOMIC_OPS(sub, -)  #define atomic_fetch_sub(i,v)						\  ({									\  	int __ia64_asr_i = (i);						\ -	(__builtin_constant_p(i)					\ -	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\ -	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\ -	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\ -	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\ +	__ia64_atomic_const(i)						\  		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\  		: ia64_atomic_fetch_sub(__ia64_asr_i, v);		\  }) @@ -170,11 +163,7 @@ ATOMIC64_OPS(sub, -)  #define atomic64_add_return(i,v)					\  ({									\  	long __ia64_aar_i = (i);					\ -	(__builtin_constant_p(i)					\ -	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\ -	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\ -	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\ -	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\ +	__ia64_atomic_const(i)						\  		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\  		: ia64_atomic64_add(__ia64_aar_i, v);			\  }) @@ -182,11 +171,7 @@ ATOMIC64_OPS(sub, -)  #define atomic64_sub_return(i,v)					\  ({									\  	long __ia64_asr_i = (i);					\ -	(__builtin_constant_p(i)					\ -	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\ -	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\ -	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\ -	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\ +	__ia64_atomic_const(i)						\  		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\  		: ia64_atomic64_sub(__ia64_asr_i, v);			\  }) @@ -194,11 +179,7 @@ ATOMIC64_OPS(sub, -)  #define atomic64_fetch_add(i,v)						\  ({									\  	long __ia64_aar_i = (i);					\ -	(__builtin_constant_p(i)					\ -	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\ -	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\ -	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\ -	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\ +	__ia64_atomic_const(i)						\  		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\  		: ia64_atomic64_fetch_add(__ia64_aar_i, v);		\  }) @@ -206,11 +187,7 @@ ATOMIC64_OPS(sub, -)  #define atomic64_fetch_sub(i,v)						\  ({									\  	long __ia64_asr_i = (i);					\ -	(__builtin_constant_p(i)					\ -	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\ -	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\ -	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\ -	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\ +	__ia64_atomic_const(i)						\  		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\  		: ia64_atomic64_fetch_sub(__ia64_asr_i, v);		\  }) diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h index bd3eeb8d1cfa..66b37a532765 100644 --- a/arch/ia64/include/asm/bug.h +++ b/arch/ia64/include/asm/bug.h @@ -4,7 +4,11 @@  #ifdef CONFIG_BUG  #define ia64_abort()	__builtin_trap() -#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) +#define BUG() do {						\ +	printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__);	\ +	barrier_before_unreachable();				\ +	ia64_abort();						\ +} while (0)  /* should this BUG be made generic? */  #define HAVE_ARCH_BUG diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 0b4c65a1af25..498f3da3f225 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)  obj-y				+= esi_stub.o	# must be in kernel proper  endif  obj-$(CONFIG_INTEL_IOMMU)	+= pci-dma.o -obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o  obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 85bba43e7d5d..8b5b8e6bc9d9 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -117,7 +117,7 @@ store_call_start(struct device *dev, struct device_attribute *attr,  #ifdef ERR_INJ_DEBUG  	printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); -	printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]); +	printk(KERN_DEBUG "capabilities=%lx,\n", capabilities[cpu]);  	printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);  #endif  	return size; @@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,  	u64 virt_addr=simple_strtoull(buf, NULL, 16);  	int ret; -	ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL); +	ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL);  	if (ret<=0) {  #ifdef ERR_INJ_DEBUG  		printk("Virtual address %lx is not existing.\n",virt_addr); diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py index 89f3a1480a63..c55276e31b6b 100644 --- a/arch/ia64/scripts/unwcheck.py +++ b/arch/ia64/scripts/unwcheck.py @@ -16,7 +16,7 @@ import re  import sys  if len(sys.argv) != 2: -    print "Usage: %s FILE" % sys.argv[0] +    print("Usage: %s FILE" % sys.argv[0])      sys.exit(2)  readelf = os.getenv("READELF", "readelf") @@ -29,7 +29,7 @@ def check_func (func, slots, rlen_sum):          global num_errors          num_errors += 1          if not func: func = "[%#x-%#x]" % (start, end) -        print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum) +        print("ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum))      return  num_funcs = 0 @@ -43,23 +43,23 @@ for line in os.popen("%s -u %s" % (readelf, sys.argv[1])):          check_func(func, slots, rlen_sum)          func  = m.group(1) -        start = long(m.group(2), 16) -        end   = long(m.group(3), 16) +        start = int(m.group(2), 16) +        end   = int(m.group(3), 16)          slots = 3 * (end - start) / 16 -        rlen_sum = 0L +        rlen_sum = 0          num_funcs += 1      else:          m = rlen_pattern.match(line)          if m: -            rlen_sum += long(m.group(1)) +            rlen_sum += int(m.group(1))  check_func(func, slots, rlen_sum)  if num_errors == 0: -    print "No errors detected in %u functions." % num_funcs +    print("No errors detected in %u functions." % num_funcs)  else:      if num_errors > 1:          err="errors"      else:          err="error" -    print "%u %s detected in %u functions." % (num_errors, err, num_funcs) +    print("%u %s detected in %u functions." % (num_errors, err, num_funcs))      sys.exit(1) diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h index b7e2bf1ba4a6..275dca1435bf 100644 --- a/arch/m68k/include/asm/bug.h +++ b/arch/m68k/include/asm/bug.h @@ -8,16 +8,19 @@  #ifndef CONFIG_SUN3  #define BUG() do { \  	pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ +	barrier_before_unreachable(); \  	__builtin_trap(); \  } while (0)  #else  #define BUG() do { \  	pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ +	barrier_before_unreachable(); \  	panic("BUG!"); \  } while (0)  #endif  #else  #define BUG() do { \ +	barrier_before_unreachable(); \  	__builtin_trap(); \  } while (0)  #endif diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 4f798aa671dd..3817a3e2146c 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -24,6 +24,7 @@ config MICROBLAZE  	select HAVE_FTRACE_MCOUNT_RECORD  	select HAVE_FUNCTION_GRAPH_TRACER  	select HAVE_FUNCTION_TRACER +	select NO_BOOTMEM  	select HAVE_MEMBLOCK  	select HAVE_MEMBLOCK_NODE_MAP  	select HAVE_OPROFILE diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform index 6996f397c16c..f7f1739c11b9 100644 --- a/arch/microblaze/Kconfig.platform +++ b/arch/microblaze/Kconfig.platform @@ -8,7 +8,6 @@ menu "Platform options"  config OPT_LIB_FUNCTION  	bool "Optimalized lib function" -	depends on CPU_LITTLE_ENDIAN  	default y  	help  	  Allows turn on optimalized library function (memcpy and memmove). @@ -21,6 +20,7 @@ config OPT_LIB_FUNCTION  config OPT_LIB_ASM  	bool "Optimalized lib function ASM"  	depends on OPT_LIB_FUNCTION && (XILINX_MICROBLAZE0_USE_BARREL = 1) +	depends on CPU_BIG_ENDIAN  	default n  	help  	  Allows turn on optimalized library function (memcpy and memmove). diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index be84a4d3917f..7c968c1d1729 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -44,7 +44,6 @@ void machine_shutdown(void);  void machine_halt(void);  void machine_power_off(void); -extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);  extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);  # endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/lib/fastcopy.S b/arch/microblaze/lib/fastcopy.S index 62021d7e249e..fdc48bb065d8 100644 --- a/arch/microblaze/lib/fastcopy.S +++ b/arch/microblaze/lib/fastcopy.S @@ -29,10 +29,6 @@   *	between mem locations with size of xfer spec'd in bytes   */ -#ifdef __MICROBLAZEEL__ -#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM. -#endif -  #include <linux/linkage.h>  	.text  	.globl	memcpy diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 434639f9a3a6..df6de7ccdc2e 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -32,9 +32,6 @@ int mem_init_done;  #ifndef CONFIG_MMU  unsigned int __page_offset;  EXPORT_SYMBOL(__page_offset); - -#else -static int init_bootmem_done;  #endif /* CONFIG_MMU */  char *klimit = _end; @@ -117,7 +114,6 @@ static void __init paging_init(void)  void __init setup_memory(void)  { -	unsigned long map_size;  	struct memblock_region *reg;  #ifndef CONFIG_MMU @@ -174,17 +170,6 @@ void __init setup_memory(void)  	pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);  	pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); -	/* -	 * Find an area to use for the bootmem bitmap. -	 * We look for the first area which is at least -	 * 128kB in length (128kB is enough for a bitmap -	 * for 4GB of memory, using 4kB pages), plus 1 page -	 * (in case the address isn't page-aligned). -	 */ -	map_size = init_bootmem_node(NODE_DATA(0), -		PFN_UP(TOPHYS((u32)klimit)), min_low_pfn, max_low_pfn); -	memblock_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size); -  	/* Add active regions with valid PFNs */  	for_each_memblock(memory, reg) {  		unsigned long start_pfn, end_pfn; @@ -196,32 +181,9 @@ void __init setup_memory(void)  				  &memblock.memory, 0);  	} -	/* free bootmem is whole main memory */ -	free_bootmem_with_active_regions(0, max_low_pfn); - -	/* reserve allocate blocks */ -	for_each_memblock(reserved, reg) { -		unsigned long top = reg->base + reg->size - 1; - -		pr_debug("reserved - 0x%08x-0x%08x, %lx, %lx\n", -			 (u32) reg->base, (u32) reg->size, top, -						memory_start + lowmem_size - 1); - -		if (top <= (memory_start + lowmem_size - 1)) { -			reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); -		} else if (reg->base < (memory_start + lowmem_size - 1)) { -			unsigned long trunc_size = memory_start + lowmem_size - -								reg->base; -			reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); -		} -	} -  	/* XXX need to clip this if using highmem? */  	sparse_memory_present_with_active_regions(0); -#ifdef CONFIG_MMU -	init_bootmem_done = 1; -#endif  	paging_init();  } @@ -398,30 +360,16 @@ asmlinkage void __init mmu_init(void)  /* This is only called until mem_init is done. */  void __init *early_get_page(void)  { -	void *p; -	if (init_bootmem_done) { -		p = alloc_bootmem_pages(PAGE_SIZE); -	} else { -		/* -		 * Mem start + kernel_tlb -> here is limit -		 * because of mem mapping from head.S -		 */ -		p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, -					memory_start + kernel_tlb)); -	} -	return p; +	/* +	 * Mem start + kernel_tlb -> here is limit +	 * because of mem mapping from head.S +	 */ +	return __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, +				memory_start + kernel_tlb));  }  #endif /* CONFIG_MMU */ -void * __ref alloc_maybe_bootmem(size_t size, gfp_t mask) -{ -	if (mem_init_done) -		return kmalloc(size, mask); -	else -		return alloc_bootmem(size); -} -  void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)  {  	void *p; diff --git a/arch/mips/ath25/board.c b/arch/mips/ath25/board.c index 9ab48ff80c1c..6d11ae581ea7 100644 --- a/arch/mips/ath25/board.c +++ b/arch/mips/ath25/board.c @@ -135,6 +135,8 @@ int __init ath25_find_config(phys_addr_t base, unsigned long size)  	}  	board_data = kzalloc(BOARD_CONFIG_BUFSZ, GFP_KERNEL); +	if (!board_data) +		goto error;  	ath25_board.config = (struct ath25_boarddata *)board_data;  	memcpy_fromio(board_data, bcfg, 0x100);  	if (broken_boarddata) { diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 1bd5c4f00d19..c22da16d67b8 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -126,6 +126,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS  quiet_cmd_cpp_its_S = ITS     $@        cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \ +			-D__ASSEMBLY__ \  		        -DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \  			-DVMLINUX_BINARY="\"$(3)\"" \  			-DVMLINUX_COMPRESSION="\"$(2)\"" \ diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 5b3a3f6a9ad3..d99f5242169e 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2277,6 +2277,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,  	}  	host_data = kzalloc(sizeof(*host_data), GFP_KERNEL); +	if (!host_data) +		return -ENOMEM;  	raw_spin_lock_init(&host_data->lock);  	addr = of_get_address(ciu_node, 0, NULL, NULL); diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 946681db8dc3..9a0fa66b81ac 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -86,7 +86,6 @@ struct compat_flock {  	compat_off_t	l_len;  	s32		l_sysid;  	compat_pid_t	l_pid; -	short		__unused;  	s32		pad[4];  }; diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 19c88d770054..fcf9af492d60 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -10,6 +10,8 @@  #include <linux/errno.h>  #include <linux/percpu.h> +#include <linux/of.h> +#include <linux/of_address.h>  #include <linux/spinlock.h>  #include <asm/mips-cps.h> @@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);  phys_addr_t __weak mips_cpc_default_phys_base(void)  { +	struct device_node *cpc_node; +	struct resource res; +	int err; + +	cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc"); +	if (cpc_node) { +		err = of_address_to_resource(cpc_node, 0, &res); +		if (!err) +			return res.start; +	} +  	return 0;  } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 85bc601e9a0d..5f8b0a9e30b3 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -375,6 +375,7 @@ static void __init bootmem_init(void)  	unsigned long reserved_end;  	unsigned long mapstart = ~0UL;  	unsigned long bootmap_size; +	phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;  	bool bootmap_valid = false;  	int i; @@ -395,7 +396,8 @@ static void __init bootmem_init(void)  	max_low_pfn = 0;  	/* -	 * Find the highest page frame number we have available. +	 * Find the highest page frame number we have available +	 * and the lowest used RAM address  	 */  	for (i = 0; i < boot_mem_map.nr_map; i++) {  		unsigned long start, end; @@ -407,6 +409,8 @@ static void __init bootmem_init(void)  		end = PFN_DOWN(boot_mem_map.map[i].addr  				+ boot_mem_map.map[i].size); +		ramstart = min(ramstart, boot_mem_map.map[i].addr); +  #ifndef CONFIG_HIGHMEM  		/*  		 * Skip highmem here so we get an accurate max_low_pfn if low @@ -436,6 +440,13 @@ static void __init bootmem_init(void)  		mapstart = max(reserved_end, start);  	} +	/* +	 * Reserve any memory between the start of RAM and PHYS_OFFSET +	 */ +	if (ramstart > PHYS_OFFSET) +		add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET, +				  BOOT_MEM_RESERVED); +  	if (min_low_pfn >= max_low_pfn)  		panic("Incorrect memory mapping !!!");  	if (min_low_pfn > ARCH_PFN_OFFSET) { @@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)  	add_memory_region(start, size, BOOT_MEM_RAM); -	if (start && start > PHYS_OFFSET) -		add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET, -				BOOT_MEM_RESERVED);  	return 0;  }  early_param("mem", early_parse_mem); diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 87dcac2447c8..159e83add4bb 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -168,11 +168,11 @@ static void bmips_prepare_cpus(unsigned int max_cpus)  		return;  	} -	if (request_irq(IPI0_IRQ, bmips_ipi_interrupt, IRQF_PERCPU, -			"smp_ipi0", NULL)) +	if (request_irq(IPI0_IRQ, bmips_ipi_interrupt, +			IRQF_PERCPU | IRQF_NO_SUSPEND, "smp_ipi0", NULL))  		panic("Can't request IPI0 interrupt"); -	if (request_irq(IPI1_IRQ, bmips_ipi_interrupt, IRQF_PERCPU, -			"smp_ipi1", NULL)) +	if (request_irq(IPI1_IRQ, bmips_ipi_interrupt, +			IRQF_PERCPU | IRQF_NO_SUSPEND, "smp_ipi1", NULL))  		panic("Can't request IPI1 interrupt");  } @@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)  	 */  } -void __init bmips_cpu_setup(void) +void bmips_cpu_setup(void)  {  	void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();  	u32 __maybe_unused cfg; diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig index 692ae85a3e3d..8e3a1fc2bc39 100644 --- a/arch/mips/lantiq/Kconfig +++ b/arch/mips/lantiq/Kconfig @@ -13,6 +13,8 @@ choice  config SOC_AMAZON_SE  	bool "Amazon SE"  	select SOC_TYPE_XWAY +	select MFD_SYSCON +	select MFD_CORE  config SOC_XWAY  	bool "XWAY" diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 52500d3b7004..e0af39b33e28 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -549,9 +549,9 @@ void __init ltq_soc_init(void)  		clkdev_add_static(ltq_ar9_cpu_hz(), ltq_ar9_fpi_hz(),  				ltq_ar9_fpi_hz(), CLOCK_250M);  		clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); -		clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0); +		clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM);  		clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 0, PMU_USB1_P); -		clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1); +		clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1 | PMU_AHBM);  		clkdev_add_pmu("1e180000.etop", "switch", 1, 0, PMU_SWITCH);  		clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO);  		clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); @@ -560,7 +560,7 @@ void __init ltq_soc_init(void)  	} else {  		clkdev_add_static(ltq_danube_cpu_hz(), ltq_danube_fpi_hz(),  				ltq_danube_fpi_hz(), ltq_danube_pp32_hz()); -		clkdev_add_pmu("1f203018.usb2-phy", "ctrl", 1, 0, PMU_USB0); +		clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM);  		clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P);  		clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO);  		clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index bc2fdbfa8223..72af0c183969 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -7,6 +7,8 @@ choice  config LEMOTE_FULOONG2E  	bool "Lemote Fuloong(2e) mini-PC"  	select ARCH_SPARSEMEM_ENABLE +	select ARCH_MIGHT_HAVE_PC_PARPORT +	select ARCH_MIGHT_HAVE_PC_SERIO  	select CEVT_R4K  	select CSRC_R4K  	select SYS_HAS_CPU_LOONGSON2E @@ -33,6 +35,8 @@ config LEMOTE_FULOONG2E  config LEMOTE_MACH2F  	bool "Lemote Loongson 2F family machines"  	select ARCH_SPARSEMEM_ENABLE +	select ARCH_MIGHT_HAVE_PC_PARPORT +	select ARCH_MIGHT_HAVE_PC_SERIO  	select BOARD_SCACHE  	select BOOT_ELF32  	select CEVT_R4K if ! MIPS_EXTERNAL_TIMER @@ -62,6 +66,8 @@ config LEMOTE_MACH2F  config LOONGSON_MACH3X  	bool "Generic Loongson 3 family machines"  	select ARCH_SPARSEMEM_ENABLE +	select ARCH_MIGHT_HAVE_PC_PARPORT +	select ARCH_MIGHT_HAVE_PC_SERIO  	select GENERIC_ISA_DMA_SUPPORT_BROKEN  	select BOOT_ELF32  	select BOARD_SCACHE diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c index 1b274742077d..d2718de60b9b 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c @@ -170,6 +170,28 @@ void prom_soc_init(struct ralink_soc_info *soc_info)  	u32 n1;  	u32 rev; +	/* Early detection of CMP support */ +	mips_cm_probe(); +	mips_cpc_probe(); + +	if (mips_cps_numiocu(0)) { +		/* +		 * mips_cm_probe() wipes out bootloader +		 * config for CM regions and we have to configure them +		 * again. This SoC cannot talk to pamlbus devices +		 * witout proper iocu region set up. +		 * +		 * FIXME: it would be better to do this with values +		 * from DT, but we need this very early because +		 * without this we cannot talk to pretty much anything +		 * including serial. +		 */ +		write_gcr_reg0_base(MT7621_PALMBUS_BASE); +		write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE | +				    CM_GCR_REGn_MASK_CMTGT_IOCU0); +		__sync(); +	} +  	n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0);  	n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1); @@ -194,26 +216,6 @@ void prom_soc_init(struct ralink_soc_info *soc_info)  	rt2880_pinmux_data = mt7621_pinmux_data; -	/* Early detection of CMP support */ -	mips_cm_probe(); -	mips_cpc_probe(); - -	if (mips_cps_numiocu(0)) { -		/* -		 * mips_cm_probe() wipes out bootloader -		 * config for CM regions and we have to configure them -		 * again. This SoC cannot talk to pamlbus devices -		 * witout proper iocu region set up. -		 * -		 * FIXME: it would be better to do this with values -		 * from DT, but we need this very early because -		 * without this we cannot talk to pretty much anything -		 * including serial. -		 */ -		write_gcr_reg0_base(MT7621_PALMBUS_BASE); -		write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE | -				    CM_GCR_REGn_MASK_CMTGT_IOCU0); -	}  	if (!register_cps_smp_ops())  		return; diff --git a/arch/mips/ralink/reset.c b/arch/mips/ralink/reset.c index 64543d66e76b..e9531fea23a2 100644 --- a/arch/mips/ralink/reset.c +++ b/arch/mips/ralink/reset.c @@ -96,16 +96,9 @@ static void ralink_restart(char *command)  	unreachable();  } -static void ralink_halt(void) -{ -	local_irq_disable(); -	unreachable(); -} -  static int __init mips_reboot_setup(void)  {  	_machine_restart = ralink_restart; -	_machine_halt = ralink_halt;  	return 0;  } diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 3742508cc534..bd5ce31936f5 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -26,6 +26,7 @@ void flush_user_icache_range_asm(unsigned long, unsigned long);  void flush_kernel_icache_range_asm(unsigned long, unsigned long);  void flush_user_dcache_range_asm(unsigned long, unsigned long);  void flush_kernel_dcache_range_asm(unsigned long, unsigned long); +void purge_kernel_dcache_range_asm(unsigned long, unsigned long);  void flush_kernel_dcache_page_asm(void *);  void flush_kernel_icache_page(void *); diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 0e6ab6e4a4e9..2dbe5580a1a4 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -316,6 +316,8 @@ extern int _parisc_requires_coherency;  #define parisc_requires_coherency()	(0)  #endif +extern int running_on_qemu; +  #endif /* __ASSEMBLY__ */  #endif /* __ASM_PARISC_PROCESSOR_H */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 19c0c141bc3f..e3b45546d589 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -465,10 +465,10 @@ EXPORT_SYMBOL(copy_user_page);  int __flush_tlb_range(unsigned long sid, unsigned long start,  		      unsigned long end)  { -	unsigned long flags, size; +	unsigned long flags; -	size = (end - start); -	if (size >= parisc_tlb_flush_threshold) { +	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +	    end - start >= parisc_tlb_flush_threshold) {  		flush_tlb_all();  		return 1;  	} @@ -539,13 +539,12 @@ void flush_cache_mm(struct mm_struct *mm)  	struct vm_area_struct *vma;  	pgd_t *pgd; -	/* Flush the TLB to avoid speculation if coherency is required. */ -	if (parisc_requires_coherency()) -		flush_tlb_all(); -  	/* Flushing the whole cache on each cpu takes forever on  	   rp3440, etc.  So, avoid it if the mm isn't too big.  */ -	if (mm_total_size(mm) >= parisc_cache_flush_threshold) { +	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +	    mm_total_size(mm) >= parisc_cache_flush_threshold) { +		if (mm->context) +			flush_tlb_all();  		flush_cache_all();  		return;  	} @@ -553,9 +552,9 @@ void flush_cache_mm(struct mm_struct *mm)  	if (mm->context == mfsp(3)) {  		for (vma = mm->mmap; vma; vma = vma->vm_next) {  			flush_user_dcache_range_asm(vma->vm_start, vma->vm_end); -			if ((vma->vm_flags & VM_EXEC) == 0) -				continue; -			flush_user_icache_range_asm(vma->vm_start, vma->vm_end); +			if (vma->vm_flags & VM_EXEC) +				flush_user_icache_range_asm(vma->vm_start, vma->vm_end); +			flush_tlb_range(vma, vma->vm_start, vma->vm_end);  		}  		return;  	} @@ -573,6 +572,8 @@ void flush_cache_mm(struct mm_struct *mm)  			pfn = pte_pfn(*ptep);  			if (!pfn_valid(pfn))  				continue; +			if (unlikely(mm->context)) +				flush_tlb_page(vma, addr);  			__flush_cache_page(vma, addr, PFN_PHYS(pfn));  		}  	} @@ -581,30 +582,45 @@ void flush_cache_mm(struct mm_struct *mm)  void flush_cache_range(struct vm_area_struct *vma,  		unsigned long start, unsigned long end)  { -	BUG_ON(!vma->vm_mm->context); - -	/* Flush the TLB to avoid speculation if coherency is required. */ -	if (parisc_requires_coherency()) -		flush_tlb_range(vma, start, end); +	pgd_t *pgd; +	unsigned long addr; -	if ((end - start) >= parisc_cache_flush_threshold -	    || vma->vm_mm->context != mfsp(3)) { +	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +	    end - start >= parisc_cache_flush_threshold) { +		if (vma->vm_mm->context) +			flush_tlb_range(vma, start, end);  		flush_cache_all();  		return;  	} -	flush_user_dcache_range_asm(start, end); -	if (vma->vm_flags & VM_EXEC) -		flush_user_icache_range_asm(start, end); +	if (vma->vm_mm->context == mfsp(3)) { +		flush_user_dcache_range_asm(start, end); +		if (vma->vm_flags & VM_EXEC) +			flush_user_icache_range_asm(start, end); +		flush_tlb_range(vma, start, end); +		return; +	} + +	pgd = vma->vm_mm->pgd; +	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { +		unsigned long pfn; +		pte_t *ptep = get_ptep(pgd, addr); +		if (!ptep) +			continue; +		pfn = pte_pfn(*ptep); +		if (pfn_valid(pfn)) { +			if (unlikely(vma->vm_mm->context)) +				flush_tlb_page(vma, addr); +			__flush_cache_page(vma, addr, PFN_PHYS(pfn)); +		} +	}  }  void  flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)  { -	BUG_ON(!vma->vm_mm->context); -  	if (pfn_valid(pfn)) { -		if (parisc_requires_coherency()) +		if (likely(vma->vm_mm->context))  			flush_tlb_page(vma, vmaddr);  		__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));  	} @@ -613,21 +629,33 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long  void flush_kernel_vmap_range(void *vaddr, int size)  {  	unsigned long start = (unsigned long)vaddr; +	unsigned long end = start + size; -	if ((unsigned long)size > parisc_cache_flush_threshold) +	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +	    (unsigned long)size >= parisc_cache_flush_threshold) { +		flush_tlb_kernel_range(start, end);  		flush_data_cache(); -	else -		flush_kernel_dcache_range_asm(start, start + size); +		return; +	} + +	flush_kernel_dcache_range_asm(start, end); +	flush_tlb_kernel_range(start, end);  }  EXPORT_SYMBOL(flush_kernel_vmap_range);  void invalidate_kernel_vmap_range(void *vaddr, int size)  {  	unsigned long start = (unsigned long)vaddr; +	unsigned long end = start + size; -	if ((unsigned long)size > parisc_cache_flush_threshold) +	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && +	    (unsigned long)size >= parisc_cache_flush_threshold) { +		flush_tlb_kernel_range(start, end);  		flush_data_cache(); -	else -		flush_kernel_dcache_range_asm(start, start + size); +		return; +	} + +	purge_kernel_dcache_range_asm(start, end); +	flush_tlb_kernel_range(start, end);  }  EXPORT_SYMBOL(invalidate_kernel_vmap_range); diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index bbbe360b458f..fbb4e43fda05 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -138,6 +138,16 @@ $pgt_fill_loop:  	std		%dp,0x18(%r10)  #endif +#ifdef CONFIG_64BIT +	/* Get PDCE_PROC for monarch CPU. */ +#define MEM_PDC_LO 0x388 +#define MEM_PDC_HI 0x35C +	ldw             MEM_PDC_LO(%r0),%r3 +	ldw             MEM_PDC_HI(%r0),%r10 +	depd            %r10, 31, 32, %r3        /* move to upper word */ +#endif + +  #ifdef CONFIG_SMP  	/* Set the smp rendezvous address into page zero.  	** It would be safer to do this in init_smp_config() but @@ -196,12 +206,6 @@ common_stext:          ** Someday, palo might not do this for the Monarch either.          */  2: -#define MEM_PDC_LO 0x388 -#define MEM_PDC_HI 0x35C -	ldw             MEM_PDC_LO(%r0),%r3 -	ldw             MEM_PDC_HI(%r0),%r6 -	depd            %r6, 31, 32, %r3        /* move to upper word */ -  	mfctl		%cr30,%r6		/* PCX-W2 firmware bug */  	ldo             PDC_PSW(%r0),%arg0              /* 21 */ @@ -268,6 +272,8 @@ $install_iva:  aligned_rfi:  	pcxt_ssm_bug +	copy		%r3, %arg0	/* PDCE_PROC for smp_callin() */ +  	rsm		PSW_SM_QUIET,%r0	/* off troublesome PSW bits */  	/* Don't need NOPs, have 8 compliant insn before rfi */ diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 2d40c4ff3f69..67b0f7532e83 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -1110,6 +1110,28 @@ ENTRY_CFI(flush_kernel_dcache_range_asm)  	.procend  ENDPROC_CFI(flush_kernel_dcache_range_asm) +ENTRY_CFI(purge_kernel_dcache_range_asm) +	.proc +	.callinfo NO_CALLS +	.entry + +	ldil		L%dcache_stride, %r1 +	ldw		R%dcache_stride(%r1), %r23 +	ldo		-1(%r23), %r21 +	ANDCM		%r26, %r21, %r26 + +1:      cmpb,COND(<<),n	%r26, %r25,1b +	pdc,m		%r23(%r26) + +	sync +	syncdma +	bv		%r0(%r2) +	nop +	.exit + +	.procend +ENDPROC_CFI(purge_kernel_dcache_range_asm) +  ENTRY_CFI(flush_user_icache_range_asm)  	.proc  	.callinfo NO_CALLS diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 30c28ab14540..4065b5e48c9d 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -292,10 +292,15 @@ smp_cpu_init(int cpunum)   * Slaves start using C here. Indirectly called from smp_slave_stext.   * Do what start_kernel() and main() do for boot strap processor (aka monarch)   */ -void __init smp_callin(void) +void __init smp_callin(unsigned long pdce_proc)  {  	int slave_id = cpu_now_booting; +#ifdef CONFIG_64BIT +	WARN_ON(((unsigned long)(PAGE0->mem_pdc_hi) << 32 +			| PAGE0->mem_pdc) != pdce_proc); +#endif +  	smp_cpu_init(slave_id);  	preempt_disable(); diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 4b8fd6dc22da..f7e684560186 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -76,10 +76,10 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)  	next_tick = cpuinfo->it_value;  	/* Calculate how many ticks have elapsed. */ +	now = mfctl(16);  	do {  		++ticks_elapsed;  		next_tick += cpt; -		now = mfctl(16);  	} while (next_tick - now > cpt);  	/* Store (in CR16 cycles) up to when we are accounting right now. */ @@ -103,16 +103,17 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)  	 * if one or the other wrapped. If "now" is "bigger" we'll end up  	 * with a very large unsigned number.  	 */ -	while (next_tick - mfctl(16) > cpt) +	now = mfctl(16); +	while (next_tick - now > cpt)  		next_tick += cpt;  	/* Program the IT when to deliver the next interrupt.  	 * Only bottom 32-bits of next_tick are writable in CR16!  	 * Timer interrupt will be delivered at least a few hundred cycles -	 * after the IT fires, so if we are too close (<= 500 cycles) to the +	 * after the IT fires, so if we are too close (<= 8000 cycles) to the  	 * next cycle, simply skip it.  	 */ -	if (next_tick - mfctl(16) <= 500) +	if (next_tick - now <= 8000)  		next_tick += cpt;  	mtctl(next_tick, 16); @@ -248,7 +249,7 @@ static int __init init_cr16_clocksource(void)  	 * different sockets, so mark them unstable and lower rating on  	 * multi-socket SMP systems.  	 */ -	if (num_online_cpus() > 1) { +	if (num_online_cpus() > 1 && !running_on_qemu) {  		int cpu;  		unsigned long cpu0_loc;  		cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 48f41399fc0b..cab32ee824d2 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -629,7 +629,12 @@ void __init mem_init(void)  #endif  	mem_init_print_info(NULL); -#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ + +#if 0 +	/* +	 * Do not expose the virtual kernel memory layout to userspace. +	 * But keep code for debugging purposes. +	 */  	printk("virtual kernel memory layout:\n"  	       "    vmalloc : 0x%px - 0x%px   (%4ld MB)\n"  	       "    memory  : 0x%px - 0x%px   (%4ld MB)\n" diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ef6549e57157..26d5d2a5b8e9 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \  libfdt       := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c  libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \ +	treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \  	$(addprefix $(obj)/,$(libfdtheader))  src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 30a155c0a6b0..c615abdce119 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -16,6 +16,7 @@  #define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)  #define PMD_CACHE_INDEX	PMD_INDEX_SIZE +#define PUD_CACHE_INDEX	PUD_INDEX_SIZE  #ifndef __ASSEMBLY__  #define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 949d691094a4..67c5475311ee 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)   * keeping the prototype consistent across the two formats.   */  static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, -			unsigned int subpg_index, unsigned long hidx) +					 unsigned int subpg_index, unsigned long hidx, +					 int offset)  {  	return (hidx << H_PAGE_F_GIX_SHIFT) &  		(H_PAGE_F_SECOND | H_PAGE_F_GIX); diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 338b7da468ce..3bcf269f8f55 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -45,7 +45,7 @@   * generic accessors and iterators here   */  #define __real_pte __real_pte -static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)  {  	real_pte_t rpte;  	unsigned long *hidxp; @@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)  	 */  	smp_rmb(); -	hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); +	hidxp = (unsigned long *)(ptep + offset);  	rpte.hidx = *hidxp;  	return rpte;  } @@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)   * expected to modify the PTE bits accordingly and commit the PTE to memory.   */  static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, -		unsigned int subpg_index, unsigned long hidx) +					 unsigned int subpg_index, +					 unsigned long hidx, int offset)  { -	unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); +	unsigned long *hidxp = (unsigned long *)(ptep + offset);  	rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);  	*hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); @@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a  }  #define H_PTE_TABLE_SIZE	PTE_FRAG_SIZE -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)  #define H_PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + \  				 (sizeof(unsigned long) << PMD_INDEX_SIZE))  #else  #define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)  #endif +#ifdef CONFIG_HUGETLB_PAGE +#define H_PUD_TABLE_SIZE	((sizeof(pud_t) << PUD_INDEX_SIZE) +	\ +				 (sizeof(unsigned long) << PUD_INDEX_SIZE)) +#else  #define H_PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE) +#endif  #define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)  #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 0920eff731b3..935adcd92a81 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -23,7 +23,8 @@  				 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)  #define H_PGTABLE_RANGE		(ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES) +#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \ +	defined(CONFIG_PPC_64K_PAGES)  /*   * only with hash 64k we need to use the second half of pmd page table   * to store pointer to deposited pgtable_t @@ -33,6 +34,16 @@  #define H_PMD_CACHE_INDEX	H_PMD_INDEX_SIZE  #endif  /* + * We store the slot details in the second half of page table. + * Increase the pud level table so that hugetlb ptes can be stored + * at pud level. + */ +#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES) +#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE + 1) +#else +#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE) +#endif +/*   * Define the address range of the kernel non-linear virtual area   */  #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 1fcfa425cefa..4746bc68d446 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)  static inline pgd_t *pgd_alloc(struct mm_struct *mm)  { +	pgd_t *pgd; +  	if (radix_enabled())  		return radix__pgd_alloc(mm); -	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), -		pgtable_gfp_flags(mm, GFP_KERNEL)); + +	pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), +			       pgtable_gfp_flags(mm, GFP_KERNEL)); +	memset(pgd, 0, PGD_TABLE_SIZE); + +	return pgd;  }  static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)  { -	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), +	return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),  		pgtable_gfp_flags(mm, GFP_KERNEL));  }  static inline void pud_free(struct mm_struct *mm, pud_t *pud)  { -	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); +	kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);  }  static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) @@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,  	 * ahead and flush the page walk cache  	 */  	flush_tlb_pgtable(tlb, address); -        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); +	pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);  }  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 51017726d495..a6b9f1d74600 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;  extern unsigned long __pud_index_size;  extern unsigned long __pgd_index_size;  extern unsigned long __pmd_cache_index; +extern unsigned long __pud_cache_index;  #define PTE_INDEX_SIZE  __pte_index_size  #define PMD_INDEX_SIZE  __pmd_index_size  #define PUD_INDEX_SIZE  __pud_index_size  #define PGD_INDEX_SIZE  __pgd_index_size  #define PMD_CACHE_INDEX __pmd_cache_index +#define PUD_CACHE_INDEX __pud_cache_index  /*   * Because of use of pte fragments and THP, size of page table   * are not always derived out of index size above. @@ -348,7 +350,7 @@ extern unsigned long pci_io_base;   */  #ifndef __real_pte -#define __real_pte(e,p)		((real_pte_t){(e)}) +#define __real_pte(e, p, o)		((real_pte_t){(e)})  #define __rpte_to_pte(r)	((r).pte)  #define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 176dfb73d42c..471b2274fbeb 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)  					  EXC_HV, SOFTEN_TEST_HV, bitmask)  #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask)		\ -	MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ +	MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\  	EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)  /* diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 511acfd7ab0d..535add3f7791 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,7 +52,7 @@  #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)  #define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)  #define FW_FEATURE_DRMEM_V2	ASM_CONST(0x0000000400000000) -#define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000400000000) +#define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000800000000)  #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 88e5e8f17e98..855e17d158b1 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -30,6 +30,16 @@  #define PACA_IRQ_PMI		0x40  /* + * Some soft-masked interrupts must be hard masked until they are replayed + * (e.g., because the soft-masked handler does not clear the exception). + */ +#ifdef CONFIG_PPC_BOOK3S +#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE|PACA_IRQ_PMI) +#else +#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE) +#endif + +/*   * flags for paca->irq_soft_mask   */  #define IRQS_ENABLED		0 @@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)  static inline void may_hard_irq_enable(void)  {  	get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; -	if (!(get_paca()->irq_happened & PACA_IRQ_EE)) +	if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))  		__hard_irq_enable();  } diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 9dcbfa6bbb91..d8b1e8e7e035 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)  	return false;  } +static inline void crash_ipi_callback(struct pt_regs *regs) { } + +static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) +{ +} +  #endif /* CONFIG_KEXEC_CORE */  #endif /* ! __ASSEMBLY__ */  #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 504a3c36ce5c..03bbd1149530 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -24,6 +24,7 @@ extern int icache_44x_need_flush;  #define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)  #define PMD_CACHE_INDEX	PMD_INDEX_SIZE +#define PUD_CACHE_INDEX	PUD_INDEX_SIZE  #ifndef __ASSEMBLY__  #define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index abddf5830ad5..5c5f75d005ad 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -27,6 +27,7 @@  #else  #define PMD_CACHE_INDEX	PMD_INDEX_SIZE  #endif +#define PUD_CACHE_INDEX PUD_INDEX_SIZE  /*   * Define the address range of the kernel non-linear virtual area diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 88187c285c70..9f421641a35c 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);  extern void sysfs_remove_device_from_node(struct device *dev, int nid);  extern int numa_update_cpu_topology(bool cpus_locked); +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) +{ +	numa_cpu_lookup_table[cpu] = node; +} +  static inline int early_cpu_to_node(int cpu)  {  	int nid; @@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)  {  	return 0;  } + +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} +  #endif /* CONFIG_NUMA */  #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)  extern int start_topology_update(void);  extern int stop_topology_update(void);  extern int prrn_is_enabled(void); +extern int find_and_online_cpu_nid(int cpu);  #else  static inline int start_topology_update(void)  { @@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)  {  	return 0;  } +static inline int find_and_online_cpu_nid(int cpu) +{ +	return 0; +}  #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */  #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index beea2182d754..0c0b66fc5bfb 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -384,7 +384,8 @@ static void *eeh_report_resume(void *data, void *userdata)  	eeh_pcid_put(dev);  	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);  #ifdef CONFIG_PCI_IOV -	eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); +	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) +		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));  #endif  	return NULL;  } diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index ee832d344a5a..9b6e653e501a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -943,6 +943,8 @@ kernel_dbg_exc:  /*   * An interrupt came in while soft-disabled; We mark paca->irq_happened   * accordingly and if the interrupt is level sensitive, we hard disable + * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so + * keep these in synch.   */  .macro masked_interrupt_book3e paca_irq full_mask diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 243d072a225a..3ac87e53b3da 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)   *   triggered and won't automatically refire.   * - If it was a HMI we return immediately since we handled it in realmode   *   and it won't refire. - * - else we hard disable and return. + * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.   * This is called with r10 containing the value to OR to the paca field.   */  #define MASKED_INTERRUPT(_H)				\ @@ -1441,8 +1441,8 @@ masked_##_H##interrupt:					\  	ori	r10,r10,0xffff;				\  	mtspr	SPRN_DEC,r10;				\  	b	MASKED_DEC_HANDLER_LABEL;		\ -1:	andi.	r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI);	\ -	bne	2f;					\ +1:	andi.	r10,r10,PACA_IRQ_MUST_HARD_MASK;	\ +	beq	2f;					\  	mfspr	r10,SPRN_##_H##SRR1;			\  	xori	r10,r10,MSR_EE; /* clear MSR_EE */	\  	mtspr	SPRN_##_H##SRR1,r10;			\ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index adf044daafd7..acf4b2e0530c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -874,7 +874,6 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {  		.mmu = 0,  		.hash_ext = 0,  		.radix_ext = 0, -		.byte22 = OV5_FEAT(OV5_DRC_INFO),  	},  	/* option vector 6: IBM PAPR hints */ diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5a8bfee6e187..04d0bbd7a1dd 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)  	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))  		device_create_file(s, &dev_attr_pir); -	if (cpu_has_feature(CPU_FTR_ARCH_206)) +	if (cpu_has_feature(CPU_FTR_ARCH_206) && +		!firmware_has_feature(FW_FEATURE_LPAR))  		device_create_file(s, &dev_attr_tscr);  #endif /* CONFIG_PPC64 */ @@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)  	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))  		device_remove_file(s, &dev_attr_pir); -	if (cpu_has_feature(CPU_FTR_ARCH_206)) +	if (cpu_has_feature(CPU_FTR_ARCH_206) && +		!firmware_has_feature(FW_FEATURE_LPAR))  		device_remove_file(s, &dev_attr_tscr);  #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0c854816e653..5cb4e4687107 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep)  	kmem_cache_free(kvm_pte_cache, ptep);  } +/* Like pmd_huge() and pmd_large(), but works regardless of config options */ +static inline int pmd_is_leaf(pmd_t pmd) +{ +	return !!(pmd_val(pmd) & _PAGE_PTE); +} +  static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,  			     unsigned int level, unsigned long mmu_seq)  { @@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,  	else  		new_pmd = pmd_alloc_one(kvm->mm, gpa); -	if (level == 0 && !(pmd && pmd_present(*pmd))) +	if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))  		new_ptep = kvmppc_pte_alloc();  	/* Check if we might have been invalidated; let the guest retry if so */ @@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,  		new_pmd = NULL;  	}  	pmd = pmd_offset(pud, gpa); -	if (pmd_large(*pmd)) { -		/* Someone else has instantiated a large page here; retry */ -		ret = -EAGAIN; -		goto out_unlock; -	} -	if (level == 1 && !pmd_none(*pmd)) { +	if (pmd_is_leaf(*pmd)) { +		unsigned long lgpa = gpa & PMD_MASK; + +		/* +		 * If we raced with another CPU which has just put +		 * a 2MB pte in after we saw a pte page, try again. +		 */ +		if (level == 0 && !new_ptep) { +			ret = -EAGAIN; +			goto out_unlock; +		} +		/* Valid 2MB page here already, remove it */ +		old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), +					      ~0UL, 0, lgpa, PMD_SHIFT); +		kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT); +		if (old & _PAGE_DIRTY) { +			unsigned long gfn = lgpa >> PAGE_SHIFT; +			struct kvm_memory_slot *memslot; +			memslot = gfn_to_memslot(kvm, gfn); +			if (memslot && memslot->dirty_bitmap) +				kvmppc_update_dirty_map(memslot, +							gfn, PMD_SIZE); +		} +	} else if (level == 1 && !pmd_none(*pmd)) {  		/*  		 * There's a page table page here, but we wanted  		 * to install a large page.  Tell the caller and let @@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  	} else {  		page = pages[0];  		pfn = page_to_pfn(page); -		if (PageHuge(page)) { -			page = compound_head(page); -			pte_size <<= compound_order(page); +		if (PageCompound(page)) { +			pte_size <<= compound_order(compound_head(page));  			/* See if we can insert a 2MB large-page PTE here */  			if (pte_size >= PMD_SIZE && -			    (gpa & PMD_MASK & PAGE_MASK) == -			    (hva & PMD_MASK & PAGE_MASK)) { +			    (gpa & (PMD_SIZE - PAGE_SIZE)) == +			    (hva & (PMD_SIZE - PAGE_SIZE))) {  				level = 1;  				pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);  			}  		}  		/* See if we can provide write access */  		if (writing) { -			/* -			 * We assume gup_fast has set dirty on the host PTE. -			 */  			pgflags |= _PAGE_WRITE;  		} else {  			local_irq_save(flags);  			ptep = find_current_mm_pte(current->mm->pgd,  						   hva, NULL, NULL); -			if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) +			if (ptep && pte_write(*ptep))  				pgflags |= _PAGE_WRITE;  			local_irq_restore(flags);  		} @@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  		pte = pfn_pte(pfn, __pgprot(pgflags));  		ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);  	} -	if (ret == 0 || ret == -EAGAIN) -		ret = RESUME_GUEST;  	if (page) { -		/* -		 * We drop pages[0] here, not page because page might -		 * have been set to the head page of a compound, but -		 * we have to drop the reference on the correct tail -		 * page to match the get inside gup() -		 */ -		put_page(pages[0]); +		if (!ret && (pgflags & _PAGE_WRITE)) +			set_page_dirty_lock(page); +		put_page(page);  	} + +	if (ret == 0 || ret == -EAGAIN) +		ret = RESUME_GUEST;  	return ret;  } @@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm)  				continue;  			pmd = pmd_offset(pud, 0);  			for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { -				if (pmd_huge(*pmd)) { +				if (pmd_is_leaf(*pmd)) {  					pmd_clear(pmd);  					continue;  				} diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 89707354c2ef..9cb9448163c4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)  	 */  	trace_hardirqs_on(); -	guest_enter(); +	guest_enter_irqoff();  	srcu_idx = srcu_read_lock(&vc->kvm->srcu); @@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)  	srcu_read_unlock(&vc->kvm->srcu, srcu_idx); -	guest_exit(); -  	trace_hardirqs_off();  	set_irq_happened(trap); @@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)  	kvmppc_set_host_core(pcpu);  	local_irq_enable(); +	guest_exit();  	/* Let secondaries go back to the offline loop */  	for (i = 0; i < controlled_threads; ++i) { @@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)  		goto up_out;  	psize = vma_kernel_pagesize(vma); -	porder = __ilog2(psize);  	up_read(¤t->mm->mmap_sem);  	/* We can handle 4k, 64k or 16M pages in the VRMA */ -	err = -EINVAL; -	if (!(psize == 0x1000 || psize == 0x10000 || -	      psize == 0x1000000)) -		goto out_srcu; +	if (psize >= 0x1000000) +		psize = 0x1000000; +	else if (psize >= 0x10000) +		psize = 0x10000; +	else +		psize = 0x1000; +	porder = __ilog2(psize);  	senc = slb_pgsize_encoding(psize);  	kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f31f357b8c5a..d33264697a31 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -320,7 +320,6 @@ kvm_novcpu_exit:  	stw	r12, STACK_SLOT_TRAP(r1)  	bl	kvmhv_commence_exit  	nop -	lwz	r12, STACK_SLOT_TRAP(r1)  	b	kvmhv_switch_to_host  /* @@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)  secondary_too_late:  	li	r12, 0 +	stw	r12, STACK_SLOT_TRAP(r1)  	cmpdi	r4, 0  	beq	11f  	stw	r12, VCPU_TRAP(r4) @@ -1558,12 +1558,12 @@ mc_cont:  3:	stw	r5,VCPU_SLB_MAX(r9)  guest_bypass: +	stw	r12, STACK_SLOT_TRAP(r1)  	mr 	r3, r12  	/* Increment exit count, poke other threads to exit */  	bl	kvmhv_commence_exit  	nop  	ld	r9, HSTATE_KVM_VCPU(r13) -	lwz	r12, VCPU_TRAP(r9)  	/* Stop others sending VCPU interrupts to this physical CPU */  	li	r0, -1 @@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)  	 * POWER7/POWER8 guest -> host partition switch code.  	 * We don't have to lock against tlbies but we do  	 * have to coordinate the hardware threads. +	 * Here STACK_SLOT_TRAP(r1) contains the trap number.  	 */  kvmhv_switch_to_host:  	/* Secondary threads wait for primary to do partition switch */ @@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)  	/* If HMI, call kvmppc_realmode_hmi_handler() */ +	lwz	r12, STACK_SLOT_TRAP(r1)  	cmpwi	r12, BOOK3S_INTERRUPT_HMI  	bne	27f  	bl	kvmppc_realmode_hmi_handler  	nop  	cmpdi	r3, 0 -	li	r12, BOOK3S_INTERRUPT_HMI  	/*  	 * At this point kvmppc_realmode_hmi_handler may have resync-ed  	 * the TB, and if it has, we must not subtract the guest timebase @@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION  	lwz	r8, KVM_SPLIT_DO_RESTORE(r3)  	cmpwi	r8, 0  	beq	47f -	stw	r12, STACK_SLOT_TRAP(r1)  	bl	kvmhv_p9_restore_lpcr  	nop -	lwz	r12, STACK_SLOT_TRAP(r1)  	b	48f  47:  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) @@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)  	li	r0, KVM_GUEST_MODE_NONE  	stb	r0, HSTATE_IN_GUEST(r13) +	lwz	r12, STACK_SLOT_TRAP(r1)	/* return trap # in r12 */  	ld	r0, SFS+PPC_LR_STKOFF(r1)  	addi	r1, r1, SFS  	mtlr	r0 diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f0f5cd4d2fe7..f9818d7d3381 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)  	if (!qpage) {  		pr_err("Failed to allocate queue %d for VCPU %d\n",  		       prio, xc->server_num); -		return -ENOMEM;; +		return -ENOMEM;  	}  	memset(qpage, 0, 1 << xive->q_order); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 403e642c78f5..52c205373986 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,  int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,  		unsigned int rt, int is_default_endian)  { -	enum emulation_result emulated; +	enum emulation_result emulated = EMULATE_DONE;  	while (vcpu->arch.mmio_vmx_copy_nums) {  		emulated = __kvmppc_handle_load(run, vcpu, rt, 8, @@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)  	kvm_sigset_deactivate(vcpu); +#ifdef CONFIG_ALTIVEC  out: +#endif  	vcpu_put(vcpu);  	return r;  } diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 1604110c4238..3f1803672c9b 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,  	dr_cell->base_addr = cpu_to_be64(lmb->base_addr);  	dr_cell->drc_index = cpu_to_be32(lmb->drc_index);  	dr_cell->aa_index = cpu_to_be32(lmb->aa_index); -	dr_cell->flags = cpu_to_be32(lmb->flags); +	dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));  }  static int drmem_update_dt_v2(struct device_node *memory, @@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory,  		}  		if (prev_lmb->aa_index != lmb->aa_index || -		    prev_lmb->flags != lmb->flags) +		    drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))  			lmb_sets++;  		prev_lmb = lmb; @@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory,  		}  		if (prev_lmb->aa_index != lmb->aa_index || -		    prev_lmb->flags != lmb->flags) { +		    drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {  			/* end of one set, start of another */  			dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);  			dr_cell++; @@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,  	u32 i, n_lmbs;  	n_lmbs = of_read_number(prop++, 1); +	if (n_lmbs == 0) +		return;  	for (i = 0; i < n_lmbs; i++) {  		read_drconf_v1_cell(&lmb, &prop); @@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,  	u32 i, j, lmb_sets;  	lmb_sets = of_read_number(prop++, 1); +	if (lmb_sets == 0) +		return;  	for (i = 0; i < lmb_sets; i++) {  		read_drconf_v2_cell(&dr_cell, &prop); @@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)  	struct drmem_lmb *lmb;  	drmem_info->n_lmbs = of_read_number(prop++, 1); +	if (drmem_info->n_lmbs == 0) +		return;  	drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),  				   GFP_KERNEL); @@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)  	int lmb_index;  	lmb_sets = of_read_number(prop++, 1); +	if (lmb_sets == 0) +		return;  	/* first pass, calculate the number of LMBs */  	p = prop; diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 5a69b51d08a3..d573d7d07f25 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,  	 * need to add in 0x1 if it's a read-only user page  	 */  	rflags = htab_convert_pte_flags(new_pte); -	rpte = __real_pte(__pte(old_pte), ptep); +	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);  	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&  	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -117,7 +117,7 @@ repeat:  			return -1;  		}  		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; -		new_pte |= pte_set_hidx(ptep, rpte, 0, slot); +		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);  	}  	*ptep = __pte(new_pte & ~H_PAGE_BUSY);  	return 0; diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 2253bbc6a599..e601d95c3b20 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,  	subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;  	vpn  = hpt_vpn(ea, vsid, ssize); -	rpte = __real_pte(__pte(old_pte), ptep); +	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);  	/*  	 *None of the sub 4k page is hashed  	 */ @@ -214,7 +214,7 @@ repeat:  		return -1;  	} -	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); +	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);  	new_pte |= H_PAGE_HASHPTE;  	*ptep = __pte(new_pte & ~H_PAGE_BUSY); @@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,  	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));  	rflags = htab_convert_pte_flags(new_pte); -	rpte = __real_pte(__pte(old_pte), ptep); +	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);  	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&  	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -327,7 +327,7 @@ repeat:  		}  		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; -		new_pte |= pte_set_hidx(ptep, rpte, 0, slot); +		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);  	}  	*ptep = __pte(new_pte & ~H_PAGE_BUSY);  	return 0; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d07c7e17db6..cf290d415dcd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)  	__pmd_index_size = H_PMD_INDEX_SIZE;  	__pud_index_size = H_PUD_INDEX_SIZE;  	__pgd_index_size = H_PGD_INDEX_SIZE; +	__pud_cache_index = H_PUD_CACHE_INDEX;  	__pmd_cache_index = H_PMD_CACHE_INDEX;  	__pte_table_size = H_PTE_TABLE_SIZE;  	__pmd_table_size = H_PMD_TABLE_SIZE; diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 12511f5a015f..b320f5097a06 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,  	unsigned long vpn;  	unsigned long old_pte, new_pte;  	unsigned long rflags, pa, sz; -	long slot; +	long slot, offset;  	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); @@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,  	} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));  	rflags = htab_convert_pte_flags(new_pte); -	rpte = __real_pte(__pte(old_pte), ptep); +	if (unlikely(mmu_psize == MMU_PAGE_16G)) +		offset = PTRS_PER_PUD; +	else +		offset = PTRS_PER_PMD; +	rpte = __real_pte(__pte(old_pte), ptep, offset);  	sz = ((1UL) << shift);  	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,  			return -1;  		} -		new_pte |= pte_set_hidx(ptep, rpte, 0, slot); +		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);  	}  	/* diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index eb8c6c8c4851..2b656e67f2ea 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -100,6 +100,6 @@ void pgtable_cache_init(void)  	 * same size as either the pgd or pmd index except with THP enabled  	 * on book3s 64  	 */ -	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) -		pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); +	if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX)) +		pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);  } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 314d19ab9385..edd8d0bc9364 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)  		numa_cpu_lookup_table[cpu] = -1;  } -static void update_numa_cpu_lookup_table(unsigned int cpu, int node) -{ -	numa_cpu_lookup_table[cpu] = node; -} -  static void map_cpu_to_node(int cpu, int node)  {  	update_numa_cpu_lookup_table(cpu, node); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 573a9a2ee455..2e10a964e290 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -17,9 +17,11 @@  #include <linux/of_fdt.h>  #include <linux/mm.h>  #include <linux/string_helpers.h> +#include <linux/stop_machine.h>  #include <asm/pgtable.h>  #include <asm/pgalloc.h> +#include <asm/mmu_context.h>  #include <asm/dma.h>  #include <asm/machdep.h>  #include <asm/mmu.h> @@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)  		     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));  	asm volatile("eieio; tlbsync; ptesync" : : : "memory");  	trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); + +	/* +	 * The init_mm context is given the first available (non-zero) PID, +	 * which is the "guard PID" and contains no page table. PIDR should +	 * never be set to zero because that duplicates the kernel address +	 * space at the 0x0... offset (quadrant 0)! +	 * +	 * An arbitrary PID that may later be allocated by the PID allocator +	 * for userspace processes must not be used either, because that +	 * would cause stale user mappings for that PID on CPUs outside of +	 * the TLB invalidation scheme (because it won't be in mm_cpumask). +	 * +	 * So permanently carve out one PID for the purpose of a guard PID. +	 */ +	init_mm.context.id = mmu_base_pid; +	mmu_base_pid++;  }  static void __init radix_init_partition_table(void) @@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)  	__pmd_index_size = RADIX_PMD_INDEX_SIZE;  	__pud_index_size = RADIX_PUD_INDEX_SIZE;  	__pgd_index_size = RADIX_PGD_INDEX_SIZE; +	__pud_cache_index = RADIX_PUD_INDEX_SIZE;  	__pmd_cache_index = RADIX_PMD_INDEX_SIZE;  	__pte_table_size = RADIX_PTE_TABLE_SIZE;  	__pmd_table_size = RADIX_PMD_TABLE_SIZE; @@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)  	radix_init_iamr();  	radix_init_pgtable(); - +	/* Switch to the guard PID before turning on MMU */ +	radix__switch_mmu_context(NULL, &init_mm);  	if (cpu_has_feature(CPU_FTR_HVMODE))  		tlbiel_all();  } @@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)  	}  	radix_init_iamr(); +	radix__switch_mmu_context(NULL, &init_mm);  	if (cpu_has_feature(CPU_FTR_HVMODE))  		tlbiel_all();  } @@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)  	pud_clear(pud);  } +struct change_mapping_params { +	pte_t *pte; +	unsigned long start; +	unsigned long end; +	unsigned long aligned_start; +	unsigned long aligned_end; +}; + +static int stop_machine_change_mapping(void *data) +{ +	struct change_mapping_params *params = +			(struct change_mapping_params *)data; + +	if (!data) +		return -1; + +	spin_unlock(&init_mm.page_table_lock); +	pte_clear(&init_mm, params->aligned_start, params->pte); +	create_physical_mapping(params->aligned_start, params->start); +	create_physical_mapping(params->end, params->aligned_end); +	spin_lock(&init_mm.page_table_lock); +	return 0; +} +  static void remove_pte_table(pte_t *pte_start, unsigned long addr,  			     unsigned long end)  { @@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,  	}  } +/* + * clear the pte and potentially split the mapping helper + */ +static void split_kernel_mapping(unsigned long addr, unsigned long end, +				unsigned long size, pte_t *pte) +{ +	unsigned long mask = ~(size - 1); +	unsigned long aligned_start = addr & mask; +	unsigned long aligned_end = addr + size; +	struct change_mapping_params params; +	bool split_region = false; + +	if ((end - addr) < size) { +		/* +		 * We're going to clear the PTE, but not flushed +		 * the mapping, time to remap and flush. The +		 * effects if visible outside the processor or +		 * if we are running in code close to the +		 * mapping we cleared, we are in trouble. +		 */ +		if (overlaps_kernel_text(aligned_start, addr) || +			overlaps_kernel_text(end, aligned_end)) { +			/* +			 * Hack, just return, don't pte_clear +			 */ +			WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " +				  "text, not splitting\n", addr, end); +			return; +		} +		split_region = true; +	} + +	if (split_region) { +		params.pte = pte; +		params.start = addr; +		params.end = end; +		params.aligned_start = addr & ~(size - 1); +		params.aligned_end = min_t(unsigned long, aligned_end, +				(unsigned long)__va(memblock_end_of_DRAM())); +		stop_machine(stop_machine_change_mapping, ¶ms, NULL); +		return; +	} + +	pte_clear(&init_mm, addr, pte); +} +  static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,  			     unsigned long end)  { @@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,  			continue;  		if (pmd_huge(*pmd)) { -			if (!IS_ALIGNED(addr, PMD_SIZE) || -			    !IS_ALIGNED(next, PMD_SIZE)) { -				WARN_ONCE(1, "%s: unaligned range\n", __func__); -				continue; -			} - -			pte_clear(&init_mm, addr, (pte_t *)pmd); +			split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);  			continue;  		} @@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,  			continue;  		if (pud_huge(*pud)) { -			if (!IS_ALIGNED(addr, PUD_SIZE) || -			    !IS_ALIGNED(next, PUD_SIZE)) { -				WARN_ONCE(1, "%s: unaligned range\n", __func__); -				continue; -			} - -			pte_clear(&init_mm, addr, (pte_t *)pud); +			split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);  			continue;  		} @@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)  			continue;  		if (pgd_huge(*pgd)) { -			if (!IS_ALIGNED(addr, PGDIR_SIZE) || -			    !IS_ALIGNED(next, PGDIR_SIZE)) { -				WARN_ONCE(1, "%s: unaligned range\n", __func__); -				continue; -			} - -			pte_clear(&init_mm, addr, (pte_t *)pgd); +			split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);  			continue;  		} diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c9a623c2d8a2..28c980eb4422 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -82,6 +82,8 @@ unsigned long __pgd_index_size;  EXPORT_SYMBOL(__pgd_index_size);  unsigned long __pmd_cache_index;  EXPORT_SYMBOL(__pmd_cache_index); +unsigned long __pud_cache_index; +EXPORT_SYMBOL(__pud_cache_index);  unsigned long __pte_table_size;  EXPORT_SYMBOL(__pte_table_size);  unsigned long __pmd_table_size; @@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,  	if (old & PATB_HR) {  		asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :  			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); +		asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : +			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));  		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);  	} else {  		asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 881ebd53ffc2..9b23f12e863c 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,  	unsigned int psize;  	int ssize;  	real_pte_t rpte; -	int i; +	int i, offset;  	i = batch->index; @@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,  		psize = get_slice_psize(mm, addr);  		/* Mask the address for the correct page size */  		addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); +		if (unlikely(psize == MMU_PAGE_16G)) +			offset = PTRS_PER_PUD; +		else +			offset = PTRS_PER_PMD;  #else  		BUG();  		psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ @@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,  		 * support 64k pages, this might be different from the  		 * hardware page size encoded in the slice table. */  		addr &= PAGE_MASK; +		offset = PTRS_PER_PTE;  	} @@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,  	}  	WARN_ON(vsid == 0);  	vpn = hpt_vpn(addr, vsid, ssize); -	rpte = __real_pte(__pte(pte), ptep); +	rpte = __real_pte(__pte(pte), ptep, offset);  	/*  	 * Check if we have an active batch on this CPU. If not, just diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 872d1f6dd11e..a9636d8cba15 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -327,6 +327,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,  			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);  			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));  			break; +		case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ +			PPC_LWZ_OFFS(r_A, r_skb, K); +			break;  		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */  			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));  			break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0a34b0cec7b7..0ef3d9580e98 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -240,6 +240,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32  	 *   goto out;  	 */  	PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); +	PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);  	PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);  	PPC_BCC(COND_GE, out); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index dd4c9b8b8a81..f6f55ab4980e 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)  	const struct cpumask *l_cpumask;  	get_online_cpus(); -	for_each_online_node(nid) { +	for_each_node_with_cpus(nid) {  		l_cpumask = cpumask_of_node(nid); -		cpu = cpumask_first(l_cpumask); +		cpu = cpumask_first_and(l_cpumask, cpu_online_mask); +		if (cpu >= nr_cpu_ids) +			continue;  		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,  				       get_hard_smp_processor_id(cpu));  	} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 496e47696ed0..a6c92c78c9b2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)  	s64 rc;  	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) -		return -ENODEV;; +		return -ENODEV;  	pe = &phb->ioda.pe_array[pdn->pe_number];  	if (pe->tce_bypass_enabled) { diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 4fb21e17504a..092715b9674b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -80,6 +80,10 @@ static void pnv_setup_rfi_flush(void)  		if (np && of_property_read_bool(np, "disabled"))  			enable--; +		np = of_get_child_by_name(fw_features, "speculation-policy-favor-security"); +		if (np && of_property_read_bool(np, "disabled")) +			enable = 0; +  		of_node_put(np);  		of_node_put(fw_features);  	} diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 2b3eb01ab110..b7c53a51c31b 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,  			rc = PTR_ERR(txwin->paste_kaddr);  			goto free_window;  		} +	} else { +		/* +		 * A user mapping must ensure that context switch issues +		 * CP_ABORT for this thread. +		 */ +		rc = set_thread_uses_vas(); +		if (rc) +			goto free_window;  	} -	/* -	 * Now that we have a send window, ensure context switch issues -	 * CP_ABORT for this thread. -	 */ -	rc = -EINVAL; -	if (set_thread_uses_vas() < 0) -		goto free_window; -  	set_vinst_win(vinst, txwin);  	return txwin; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index dceb51454d8d..652d3e96b812 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -36,6 +36,7 @@  #include <asm/xics.h>  #include <asm/xive.h>  #include <asm/plpar_wrappers.h> +#include <asm/topology.h>  #include "pseries.h"  #include "offline_states.h" @@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)  			BUG_ON(cpu_online(cpu));  			set_cpu_present(cpu, false);  			set_hard_smp_processor_id(cpu, -1); +			update_numa_cpu_lookup_table(cpu, -1);  			break;  		}  		if (cpu >= nr_cpu_ids) @@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)  	cpu_maps_update_done();  } -extern int find_and_online_cpu_nid(int cpu); -  static int dlpar_online_cpu(struct device_node *dn)  {  	int rc = 0; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 81d8614e7379..5e1ef9150182 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id);  /* + * Enable the hotplug interrupt late because processing them may touch other + * devices or systems (e.g. hugepages) that have not been initialized at the + * subsys stage. + */ +int __init init_ras_hotplug_IRQ(void) +{ +	struct device_node *np; + +	/* Hotplug Events */ +	np = of_find_node_by_path("/event-sources/hot-plug-events"); +	if (np != NULL) { +		if (dlpar_workqueue_init() == 0) +			request_event_sources_irqs(np, ras_hotplug_interrupt, +						   "RAS_HOTPLUG"); +		of_node_put(np); +	} + +	return 0; +} +machine_late_initcall(pseries, init_ras_hotplug_IRQ); + +/*   * Initialize handlers for the set of interrupts caused by hardware errors   * and power system events.   */ @@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)  		of_node_put(np);  	} -	/* Hotplug Events */ -	np = of_find_node_by_path("/event-sources/hot-plug-events"); -	if (np != NULL) { -		if (dlpar_workqueue_init() == 0) -			request_event_sources_irqs(np, ras_hotplug_interrupt, -					   "RAS_HOTPLUG"); -		of_node_put(np); -	} -  	/* EPOW Events */  	np = of_find_node_by_path("/event-sources/epow-events");  	if (np != NULL) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 372d7ada1a0c..1a527625acf7 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -482,7 +482,8 @@ static void pseries_setup_rfi_flush(void)  		if (types == L1D_FLUSH_NONE)  			types = L1D_FLUSH_FALLBACK; -		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) +		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) || +		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))  			enable = false;  	} else {  		/* Default to fallback if case hcall is not available */ diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d9c4c9366049..091f1d0d0af1 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,  	rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);  	if (rc) { -		pr_err("Error %lld getting queue info prio %d\n", rc, prio); +		pr_err("Error %lld getting queue info CPU %d prio %d\n", rc, +		       target, prio);  		rc = -EIO;  		goto fail;  	} @@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,  	/* Configure and enable the queue in HW */  	rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);  	if (rc) { -		pr_err("Error %lld setting queue for prio %d\n", rc, prio); +		pr_err("Error %lld setting queue for CPU %d prio %d\n", rc, +		       target, prio);  		rc = -EIO;  	} else {  		q->qpage = qpage; @@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,  	if (IS_ERR(qpage))  		return PTR_ERR(qpage); -	return xive_spapr_configure_queue(cpu, q, prio, qpage, -					  xive_queue_shift); +	return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu), +					  q, prio, qpage, xive_queue_shift);  }  static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, @@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,  	struct xive_q *q = &xc->queue[prio];  	unsigned int alloc_order;  	long rc; +	int hw_cpu = get_hard_smp_processor_id(cpu); -	rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0); +	rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);  	if (rc) -		pr_err("Error %ld setting queue for prio %d\n", rc, prio); +		pr_err("Error %ld setting queue for CPU %d prio %d\n", rc, +		       hw_cpu, prio);  	alloc_order = xive_alloc_order(xive_queue_shift);  	free_pages((unsigned long)q->qpage, alloc_order); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b6722c246d9c..04807c7f64cc 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -8,7 +8,6 @@ config RISCV  	select OF  	select OF_EARLY_FLATTREE  	select OF_IRQ -	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE  	select ARCH_WANT_FRAME_POINTERS  	select CLONE_BACKWARDS  	select COMMON_CLK @@ -20,7 +19,6 @@ config RISCV  	select GENERIC_STRNLEN_USER  	select GENERIC_SMP_IDLE_THREAD  	select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A -	select ARCH_WANT_OPTIONAL_GPIOLIB  	select HAVE_MEMBLOCK  	select HAVE_MEMBLOCK_NODE_MAP  	select HAVE_DMA_API_DEBUG @@ -34,7 +32,6 @@ config RISCV  	select HAVE_ARCH_TRACEHOOK  	select MODULES_USE_ELF_RELA if MODULES  	select THREAD_INFO_IN_TASK -	select RISCV_IRQ_INTC  	select RISCV_TIMER  config MMU diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index c0319cbf1eec..5510366d169a 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -34,9 +34,9 @@  #define wmb()		RISCV_FENCE(ow,ow)  /* These barriers do not need to enforce ordering on devices, just memory. */ -#define smp_mb()	RISCV_FENCE(rw,rw) -#define smp_rmb()	RISCV_FENCE(r,r) -#define smp_wmb()	RISCV_FENCE(w,w) +#define __smp_mb()	RISCV_FENCE(rw,rw) +#define __smp_rmb()	RISCV_FENCE(r,r) +#define __smp_wmb()	RISCV_FENCE(w,w)  /*   * This is a very specific barrier: it's currently only used in two places in diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 87fc045be51f..56fa592cfa34 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -172,6 +172,9 @@ ENTRY(handle_exception)  	move a1, sp /* pt_regs */  	tail do_IRQ  1: +	/* Exceptions run with interrupts enabled */ +	csrs sstatus, SR_SIE +  	/* Handle syscalls */  	li t0, EXC_SYSCALL  	beq s4, t0, handle_syscall @@ -198,8 +201,6 @@ handle_syscall:  	 */  	addi s2, s2, 0x4  	REG_S s2, PT_SEPC(sp) -	/* System calls run with interrupts enabled */ -	csrs sstatus, SR_SIE  	/* Trace syscalls, but only if requested by the user. */  	REG_L t0, TASK_TI_FLAGS(tp)  	andi t0, t0, _TIF_SYSCALL_TRACE diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 226eeb190f90..6e07ed37bbff 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -64,7 +64,7 @@ ENTRY(_start)  	/* Start the kernel */  	mv a0, s0  	mv a1, s1 -	call sbi_save +	call parse_dtb  	tail start_kernel  relocate: diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 09f7064e898c..c11f40c1b2a8 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -144,7 +144,7 @@ asmlinkage void __init setup_vm(void)  #endif  } -void __init sbi_save(unsigned int hartid, void *dtb) +void __init parse_dtb(unsigned int hartid, void *dtb)  {  	early_init_dt_scan(__va(dtb));  } diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 65154eaa3714..6c8ce15cde7b 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -63,6 +63,7 @@ static inline int init_new_context(struct task_struct *tsk,  				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;  		/* pgd_alloc() did not account this pmd */  		mm_inc_nr_pmds(mm); +		mm_inc_nr_puds(mm);  	}  	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));  	return 0; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 13a133a6015c..a5621ea6d123 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -14,6 +14,7 @@  #include <asm/processor.h>  #include <asm/cache.h>  #include <asm/ctl_reg.h> +#include <asm/dwarf.h>  #include <asm/errno.h>  #include <asm/ptrace.h>  #include <asm/thread_info.h> @@ -230,7 +231,7 @@ _PIF_WORK	= (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)  	.hidden \name  	.type \name,@function  \name: -	.cfi_startproc +	CFI_STARTPROC  #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES  	exrl	0,0f  #else @@ -239,7 +240,7 @@ _PIF_WORK	= (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)  #endif  	j	.  0:	br	\reg -	.cfi_endproc +	CFI_ENDPROC  	.endm  	GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 @@ -426,13 +427,13 @@ ENTRY(system_call)  	UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER  	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP  	stmg	%r0,%r7,__PT_R0(%r11) -	# clear user controlled register to prevent speculative use -	xgr	%r0,%r0  	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC  	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW  	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC  	stg	%r14,__PT_FLAGS(%r11)  .Lsysc_do_svc: +	# clear user controlled register to prevent speculative use +	xgr	%r0,%r0  	# load address of system call table  	lg	%r10,__THREAD_sysc_table(%r13,%r12)  	llgh	%r8,__PT_INT_CODE+2(%r11) @@ -1439,6 +1440,7 @@ cleanup_critical:  	stg	%r15,__LC_SYSTEM_TIMER  0:	# update accounting time stamp  	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER +	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP  	# set up saved register r11  	lg	%r15,__LC_KERNEL_STACK  	la	%r9,STACK_FRAME_OVERHEAD(%r15) diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 69d7fcf48158..9aff72d3abda 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -2,8 +2,8 @@  #include <linux/module.h>  #include <asm/nospec-branch.h> -int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF); -int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL); +int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); +int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL);  static int __init nospectre_v2_setup_early(char *str)  { diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9c7d70715862..07c6e81163bf 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -22,22 +22,6 @@  #include "trace.h"  #include "trace-s390.h" - -static const intercept_handler_t instruction_handlers[256] = { -	[0x01] = kvm_s390_handle_01, -	[0x82] = kvm_s390_handle_lpsw, -	[0x83] = kvm_s390_handle_diag, -	[0xaa] = kvm_s390_handle_aa, -	[0xae] = kvm_s390_handle_sigp, -	[0xb2] = kvm_s390_handle_b2, -	[0xb6] = kvm_s390_handle_stctl, -	[0xb7] = kvm_s390_handle_lctl, -	[0xb9] = kvm_s390_handle_b9, -	[0xe3] = kvm_s390_handle_e3, -	[0xe5] = kvm_s390_handle_e5, -	[0xeb] = kvm_s390_handle_eb, -}; -  u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)  {  	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; @@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu)  static int handle_instruction(struct kvm_vcpu *vcpu)  { -	intercept_handler_t handler; -  	vcpu->stat.exit_instruction++;  	trace_kvm_s390_intercept_instruction(vcpu,  					     vcpu->arch.sie_block->ipa,  					     vcpu->arch.sie_block->ipb); -	handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; -	if (handler) -		return handler(vcpu); -	return -EOPNOTSUPP; + +	switch (vcpu->arch.sie_block->ipa >> 8) { +	case 0x01: +		return kvm_s390_handle_01(vcpu); +	case 0x82: +		return kvm_s390_handle_lpsw(vcpu); +	case 0x83: +		return kvm_s390_handle_diag(vcpu); +	case 0xaa: +		return kvm_s390_handle_aa(vcpu); +	case 0xae: +		return kvm_s390_handle_sigp(vcpu); +	case 0xb2: +		return kvm_s390_handle_b2(vcpu); +	case 0xb6: +		return kvm_s390_handle_stctl(vcpu); +	case 0xb7: +		return kvm_s390_handle_lctl(vcpu); +	case 0xb9: +		return kvm_s390_handle_b9(vcpu); +	case 0xe3: +		return kvm_s390_handle_e3(vcpu); +	case 0xe5: +		return kvm_s390_handle_e5(vcpu); +	case 0xeb: +		return kvm_s390_handle_eb(vcpu); +	default: +		return -EOPNOTSUPP; +	}  }  static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index aabf46f5f883..b04616b57a94 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)  static int ckc_irq_pending(struct kvm_vcpu *vcpu)  { -	if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm)) +	const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); +	const u64 ckc = vcpu->arch.sie_block->ckc; + +	if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { +		if ((s64)ckc >= (s64)now) +			return 0; +	} else if (ckc >= now) {  		return 0; +	}  	return ckc_interrupts_enabled(vcpu);  } @@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)  	return kvm_s390_get_cpu_timer(vcpu) >> 63;  } -static inline int is_ioirq(unsigned long irq_type) -{ -	return ((irq_type >= IRQ_PEND_IO_ISC_7) && -		(irq_type <= IRQ_PEND_IO_ISC_0)); -} -  static uint64_t isc_to_isc_bits(int isc)  {  	return (0x80 >> isc) << 24; @@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis  	return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);  } -static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) +static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)  {  	return vcpu->kvm->arch.float_int.pending_irqs | -		vcpu->arch.local_int.pending_irqs | +		vcpu->arch.local_int.pending_irqs; +} + +static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) +{ +	return pending_irqs_no_gisa(vcpu) |  		kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;  } @@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)  static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)  { -	if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) +	if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))  		return;  	else if (psw_ioint_disabled(vcpu))  		kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); @@ -1011,24 +1017,6 @@ out:  	return rc;  } -typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); - -static const deliver_irq_t deliver_irq_funcs[] = { -	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check, -	[IRQ_PEND_MCHK_REP]       = __deliver_machine_check, -	[IRQ_PEND_PROG]           = __deliver_prog, -	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal, -	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call, -	[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, -	[IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer, -	[IRQ_PEND_RESTART]        = __deliver_restart, -	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix, -	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init, -	[IRQ_PEND_EXT_SERVICE]    = __deliver_service, -	[IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done, -	[IRQ_PEND_VIRTIO]         = __deliver_virtio, -}; -  /* Check whether an external call is pending (deliverable or not) */  int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)  { @@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)  static u64 __calculate_sltime(struct kvm_vcpu *vcpu)  { -	u64 now, cputm, sltime = 0; +	const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); +	const u64 ckc = vcpu->arch.sie_block->ckc; +	u64 cputm, sltime = 0;  	if (ckc_interrupts_enabled(vcpu)) { -		now = kvm_s390_get_tod_clock_fast(vcpu->kvm); -		sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); -		/* already expired or overflow? */ -		if (!sltime || vcpu->arch.sie_block->ckc <= now) +		if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { +			if ((s64)now < (s64)ckc) +				sltime = tod_to_ns((s64)ckc - (s64)now); +		} else if (now < ckc) { +			sltime = tod_to_ns(ckc - now); +		} +		/* already expired */ +		if (!sltime)  			return 0;  		if (cpu_timer_interrupts_enabled(vcpu)) {  			cputm = kvm_s390_get_cpu_timer(vcpu); @@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)  int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)  {  	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; -	deliver_irq_t func;  	int rc = 0;  	unsigned long irq_type;  	unsigned long irqs; @@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)  	while ((irqs = deliverable_irqs(vcpu)) && !rc) {  		/* bits are in the reverse order of interrupt priority */  		irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); -		if (is_ioirq(irq_type)) { +		switch (irq_type) { +		case IRQ_PEND_IO_ISC_0: +		case IRQ_PEND_IO_ISC_1: +		case IRQ_PEND_IO_ISC_2: +		case IRQ_PEND_IO_ISC_3: +		case IRQ_PEND_IO_ISC_4: +		case IRQ_PEND_IO_ISC_5: +		case IRQ_PEND_IO_ISC_6: +		case IRQ_PEND_IO_ISC_7:  			rc = __deliver_io(vcpu, irq_type); -		} else { -			func = deliver_irq_funcs[irq_type]; -			if (!func) { -				WARN_ON_ONCE(func == NULL); -				clear_bit(irq_type, &li->pending_irqs); -				continue; -			} -			rc = func(vcpu); +			break; +		case IRQ_PEND_MCHK_EX: +		case IRQ_PEND_MCHK_REP: +			rc = __deliver_machine_check(vcpu); +			break; +		case IRQ_PEND_PROG: +			rc = __deliver_prog(vcpu); +			break; +		case IRQ_PEND_EXT_EMERGENCY: +			rc = __deliver_emergency_signal(vcpu); +			break; +		case IRQ_PEND_EXT_EXTERNAL: +			rc = __deliver_external_call(vcpu); +			break; +		case IRQ_PEND_EXT_CLOCK_COMP: +			rc = __deliver_ckc(vcpu); +			break; +		case IRQ_PEND_EXT_CPU_TIMER: +			rc = __deliver_cpu_timer(vcpu); +			break; +		case IRQ_PEND_RESTART: +			rc = __deliver_restart(vcpu); +			break; +		case IRQ_PEND_SET_PREFIX: +			rc = __deliver_set_prefix(vcpu); +			break; +		case IRQ_PEND_PFAULT_INIT: +			rc = __deliver_pfault_init(vcpu); +			break; +		case IRQ_PEND_EXT_SERVICE: +			rc = __deliver_service(vcpu); +			break; +		case IRQ_PEND_PFAULT_DONE: +			rc = __deliver_pfault_done(vcpu); +			break; +		case IRQ_PEND_VIRTIO: +			rc = __deliver_virtio(vcpu); +			break; +		default: +			WARN_ONCE(1, "Unknown pending irq type %ld", irq_type); +			clear_bit(irq_type, &li->pending_irqs);  		}  	} @@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)  		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);  		break;  	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: -		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); +		if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa)) +			kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);  		break;  	default:  		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ba4c7092335a..339ac0964590 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {  	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },  	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },  	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, +	{ "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },  	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },  	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },  	{ "instruction_gs", VCPU_STAT(instruction_gs) }, @@ -179,6 +180,28 @@ int kvm_arch_hardware_enable(void)  static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,  			      unsigned long end); +static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) +{ +	u8 delta_idx = 0; + +	/* +	 * The TOD jumps by delta, we have to compensate this by adding +	 * -delta to the epoch. +	 */ +	delta = -delta; + +	/* sign-extension - we're adding to signed values below */ +	if ((s64)delta < 0) +		delta_idx = -1; + +	scb->epoch += delta; +	if (scb->ecd & ECD_MEF) { +		scb->epdx += delta_idx; +		if (scb->epoch < delta) +			scb->epdx += 1; +	} +} +  /*   * This callback is executed during stop_machine(). All CPUs are therefore   * temporarily stopped. In order not to change guest behavior, we have to @@ -194,13 +217,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,  	unsigned long long *delta = v;  	list_for_each_entry(kvm, &vm_list, vm_list) { -		kvm->arch.epoch -= *delta;  		kvm_for_each_vcpu(i, vcpu, kvm) { -			vcpu->arch.sie_block->epoch -= *delta; +			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); +			if (i == 0) { +				kvm->arch.epoch = vcpu->arch.sie_block->epoch; +				kvm->arch.epdx = vcpu->arch.sie_block->epdx; +			}  			if (vcpu->arch.cputm_enabled)  				vcpu->arch.cputm_start += *delta;  			if (vcpu->arch.vsie_block) -				vcpu->arch.vsie_block->epoch -= *delta; +				kvm_clock_sync_scb(vcpu->arch.vsie_block, +						   *delta);  		}  	}  	return NOTIFY_OK; @@ -902,12 +929,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)  	if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))  		return -EFAULT; -	if (test_kvm_facility(kvm, 139)) -		kvm_s390_set_tod_clock_ext(kvm, >od); -	else if (gtod.epoch_idx == 0) -		kvm_s390_set_tod_clock(kvm, gtod.tod); -	else +	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)  		return -EINVAL; +	kvm_s390_set_tod_clock(kvm, >od);  	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",  		gtod.epoch_idx, gtod.tod); @@ -932,13 +956,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)  static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)  { -	u64 gtod; +	struct kvm_s390_vm_tod_clock gtod = { 0 }; -	if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) +	if (copy_from_user(>od.tod, (void __user *)attr->addr, +			   sizeof(gtod.tod)))  		return -EFAULT; -	kvm_s390_set_tod_clock(kvm, gtod); -	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); +	kvm_s390_set_tod_clock(kvm, >od); +	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);  	return 0;  } @@ -2122,6 +2147,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu)  		/* we still need the basic sca for the ipte control */  		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);  		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; +		return;  	}  	read_lock(&vcpu->kvm->arch.sca_lock);  	if (vcpu->kvm->arch.use_esca) { @@ -2389,6 +2415,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)  	mutex_lock(&vcpu->kvm->lock);  	preempt_disable();  	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; +	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;  	preempt_enable();  	mutex_unlock(&vcpu->kvm->lock);  	if (!kvm_is_ucontrol(vcpu->kvm)) { @@ -3021,8 +3048,8 @@ retry:  	return 0;  } -void kvm_s390_set_tod_clock_ext(struct kvm *kvm, -				 const struct kvm_s390_vm_tod_clock *gtod) +void kvm_s390_set_tod_clock(struct kvm *kvm, +			    const struct kvm_s390_vm_tod_clock *gtod)  {  	struct kvm_vcpu *vcpu;  	struct kvm_s390_tod_clock_ext htod; @@ -3034,10 +3061,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,  	get_tod_clock_ext((char *)&htod);  	kvm->arch.epoch = gtod->tod - htod.tod; -	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; - -	if (kvm->arch.epoch > gtod->tod) -		kvm->arch.epdx -= 1; +	kvm->arch.epdx = 0; +	if (test_kvm_facility(kvm, 139)) { +		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; +		if (kvm->arch.epoch > gtod->tod) +			kvm->arch.epdx -= 1; +	}  	kvm_s390_vcpu_block_all(kvm);  	kvm_for_each_vcpu(i, vcpu, kvm) { @@ -3050,22 +3079,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,  	mutex_unlock(&kvm->lock);  } -void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) -{ -	struct kvm_vcpu *vcpu; -	int i; - -	mutex_lock(&kvm->lock); -	preempt_disable(); -	kvm->arch.epoch = tod - get_tod_clock(); -	kvm_s390_vcpu_block_all(kvm); -	kvm_for_each_vcpu(i, vcpu, kvm) -		vcpu->arch.sie_block->epoch = kvm->arch.epoch; -	kvm_s390_vcpu_unblock_all(kvm); -	preempt_enable(); -	mutex_unlock(&kvm->lock); -} -  /**   * kvm_arch_fault_in_page - fault-in guest page if necessary   * @vcpu: The corresponding virtual cpu diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index bd31b37b0e6f..f55ac0ef99ea 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,8 +19,6 @@  #include <asm/processor.h>  #include <asm/sclp.h> -typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); -  /* Transactional Memory Execution related macros */  #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & ECB_TE))  #define TDB_FORMAT1		1 @@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);  int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);  /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock_ext(struct kvm *kvm, -				 const struct kvm_s390_vm_tod_clock *gtod); -void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); +void kvm_s390_set_tod_clock(struct kvm *kvm, +			    const struct kvm_s390_vm_tod_clock *gtod);  long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);  int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);  int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c4c4e157c036..f0b4185158af 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)  /* Handle SCK (SET CLOCK) interception */  static int handle_set_clock(struct kvm_vcpu *vcpu)  { +	struct kvm_s390_vm_tod_clock gtod = { 0 };  	int rc;  	u8 ar; -	u64 op2, val; +	u64 op2;  	vcpu->stat.instruction_sck++; @@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)  	op2 = kvm_s390_get_base_disp_s(vcpu, &ar);  	if (op2 & 7)	/* Operand must be on a doubleword boundary */  		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); -	rc = read_guest(vcpu, op2, ar, &val, sizeof(val)); +	rc = read_guest(vcpu, op2, ar, >od.tod, sizeof(gtod.tod));  	if (rc)  		return kvm_s390_inject_prog_cond(vcpu, rc); -	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); -	kvm_s390_set_tod_clock(vcpu->kvm, val); +	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); +	kvm_s390_set_tod_clock(vcpu->kvm, >od);  	kvm_s390_set_psw_cc(vcpu, 0);  	return 0; @@ -795,55 +796,60 @@ out:  	return rc;  } -static const intercept_handler_t b2_handlers[256] = { -	[0x02] = handle_stidp, -	[0x04] = handle_set_clock, -	[0x10] = handle_set_prefix, -	[0x11] = handle_store_prefix, -	[0x12] = handle_store_cpu_address, -	[0x14] = kvm_s390_handle_vsie, -	[0x21] = handle_ipte_interlock, -	[0x29] = handle_iske, -	[0x2a] = handle_rrbe, -	[0x2b] = handle_sske, -	[0x2c] = handle_test_block, -	[0x30] = handle_io_inst, -	[0x31] = handle_io_inst, -	[0x32] = handle_io_inst, -	[0x33] = handle_io_inst, -	[0x34] = handle_io_inst, -	[0x35] = handle_io_inst, -	[0x36] = handle_io_inst, -	[0x37] = handle_io_inst, -	[0x38] = handle_io_inst, -	[0x39] = handle_io_inst, -	[0x3a] = handle_io_inst, -	[0x3b] = handle_io_inst, -	[0x3c] = handle_io_inst, -	[0x50] = handle_ipte_interlock, -	[0x56] = handle_sthyi, -	[0x5f] = handle_io_inst, -	[0x74] = handle_io_inst, -	[0x76] = handle_io_inst, -	[0x7d] = handle_stsi, -	[0xb1] = handle_stfl, -	[0xb2] = handle_lpswe, -}; -  int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)  { -	intercept_handler_t handler; - -	/* -	 * A lot of B2 instructions are priviledged. Here we check for -	 * the privileged ones, that we can handle in the kernel. -	 * Anything else goes to userspace. -	 */ -	handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; -	if (handler) -		return handler(vcpu); - -	return -EOPNOTSUPP; +	switch (vcpu->arch.sie_block->ipa & 0x00ff) { +	case 0x02: +		return handle_stidp(vcpu); +	case 0x04: +		return handle_set_clock(vcpu); +	case 0x10: +		return handle_set_prefix(vcpu); +	case 0x11: +		return handle_store_prefix(vcpu); +	case 0x12: +		return handle_store_cpu_address(vcpu); +	case 0x14: +		return kvm_s390_handle_vsie(vcpu); +	case 0x21: +	case 0x50: +		return handle_ipte_interlock(vcpu); +	case 0x29: +		return handle_iske(vcpu); +	case 0x2a: +		return handle_rrbe(vcpu); +	case 0x2b: +		return handle_sske(vcpu); +	case 0x2c: +		return handle_test_block(vcpu); +	case 0x30: +	case 0x31: +	case 0x32: +	case 0x33: +	case 0x34: +	case 0x35: +	case 0x36: +	case 0x37: +	case 0x38: +	case 0x39: +	case 0x3a: +	case 0x3b: +	case 0x3c: +	case 0x5f: +	case 0x74: +	case 0x76: +		return handle_io_inst(vcpu); +	case 0x56: +		return handle_sthyi(vcpu); +	case 0x7d: +		return handle_stsi(vcpu); +	case 0xb1: +		return handle_stfl(vcpu); +	case 0xb2: +		return handle_lpswe(vcpu); +	default: +		return -EOPNOTSUPP; +	}  }  static int handle_epsw(struct kvm_vcpu *vcpu) @@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)  	return 0;  } -static const intercept_handler_t b9_handlers[256] = { -	[0x8a] = handle_ipte_interlock, -	[0x8d] = handle_epsw, -	[0x8e] = handle_ipte_interlock, -	[0x8f] = handle_ipte_interlock, -	[0xab] = handle_essa, -	[0xaf] = handle_pfmf, -}; -  int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)  { -	intercept_handler_t handler; - -	/* This is handled just as for the B2 instructions. */ -	handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; -	if (handler) -		return handler(vcpu); - -	return -EOPNOTSUPP; +	switch (vcpu->arch.sie_block->ipa & 0x00ff) { +	case 0x8a: +	case 0x8e: +	case 0x8f: +		return handle_ipte_interlock(vcpu); +	case 0x8d: +		return handle_epsw(vcpu); +	case 0xab: +		return handle_essa(vcpu); +	case 0xaf: +		return handle_pfmf(vcpu); +	default: +		return -EOPNOTSUPP; +	}  }  int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) @@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu)  	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;  } -static const intercept_handler_t eb_handlers[256] = { -	[0x2f] = handle_lctlg, -	[0x25] = handle_stctg, -	[0x60] = handle_ri, -	[0x61] = handle_ri, -	[0x62] = handle_ri, -}; -  int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)  { -	intercept_handler_t handler; - -	handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; -	if (handler) -		return handler(vcpu); -	return -EOPNOTSUPP; +	switch (vcpu->arch.sie_block->ipb & 0x000000ff) { +	case 0x25: +		return handle_stctg(vcpu); +	case 0x2f: +		return handle_lctlg(vcpu); +	case 0x60: +	case 0x61: +	case 0x62: +		return handle_ri(vcpu); +	default: +		return -EOPNOTSUPP; +	}  }  static int handle_tprot(struct kvm_vcpu *vcpu) @@ -1346,10 +1347,12 @@ out_unlock:  int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)  { -	/* For e5xx... instructions we only handle TPROT */ -	if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) +	switch (vcpu->arch.sie_block->ipa & 0x00ff) { +	case 0x01:  		return handle_tprot(vcpu); -	return -EOPNOTSUPP; +	default: +		return -EOPNOTSUPP; +	}  }  static int handle_sckpf(struct kvm_vcpu *vcpu) @@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu)  	return 0;  } -static const intercept_handler_t x01_handlers[256] = { -	[0x04] = handle_ptff, -	[0x07] = handle_sckpf, -}; -  int kvm_s390_handle_01(struct kvm_vcpu *vcpu)  { -	intercept_handler_t handler; - -	handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; -	if (handler) -		return handler(vcpu); -	return -EOPNOTSUPP; +	switch (vcpu->arch.sie_block->ipa & 0x00ff) { +	case 0x04: +		return handle_ptff(vcpu); +	case 0x07: +		return handle_sckpf(vcpu); +	default: +		return -EOPNOTSUPP; +	}  } diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ec772700ff96..8961e3970901 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)  {  	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;  	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; +	int guest_bp_isolation;  	int rc;  	handle_last_fault(vcpu, vsie_page); @@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)  		s390_handle_mcck();  	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + +	/* save current guest state of bp isolation override */ +	guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST); + +	/* +	 * The guest is running with BPBC, so we have to force it on for our +	 * nested guest. This is done by enabling BPBC globally, so the BPBC +	 * control in the SCB (which the nested guest can modify) is simply +	 * ignored. +	 */ +	if (test_kvm_facility(vcpu->kvm, 82) && +	    vcpu->arch.sie_block->fpf & FPF_BPBC) +		set_thread_flag(TIF_ISOLATE_BP_GUEST); +  	local_irq_disable();  	guest_enter_irqoff();  	local_irq_enable(); @@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)  	local_irq_disable();  	guest_exit_irqoff();  	local_irq_enable(); + +	/* restore guest state for bp isolation override */ +	if (!guest_bp_isolation) +		clear_thread_flag(TIF_ISOLATE_BP_GUEST); +  	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);  	if (rc == -EINTR) { diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile index 715def00a436..01d0f7fb14cc 100644 --- a/arch/sh/boot/dts/Makefile +++ b/arch/sh/boot/dts/Makefile @@ -1 +1,3 @@ -obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"") +obj-y += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +endif diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 6bf594ace663..8767e45f1b2b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -430,6 +430,8 @@ config SPARC_LEON  	depends on SPARC32  	select USB_EHCI_BIG_ENDIAN_MMIO  	select USB_EHCI_BIG_ENDIAN_DESC +	select USB_UHCI_BIG_ENDIAN_MMIO +	select USB_UHCI_BIG_ENDIAN_DESC  	---help---  	  If you say Y here if you are running on a SPARC-LEON processor.  	  The LEON processor is a synthesizable VHDL model of the diff --git a/arch/sparc/include/asm/bug.h b/arch/sparc/include/asm/bug.h index 6f17528356b2..ea53e418f6c0 100644 --- a/arch/sparc/include/asm/bug.h +++ b/arch/sparc/include/asm/bug.h @@ -9,10 +9,14 @@  void do_BUG(const char *file, int line);  #define BUG() do {					\  	do_BUG(__FILE__, __LINE__);			\ +	barrier_before_unreachable();			\  	__builtin_trap();				\  } while (0)  #else -#define BUG()		__builtin_trap() +#define BUG() do {					\ +	barrier_before_unreachable();			\ +	__builtin_trap();				\ +} while (0)  #endif  #define HAVE_ARCH_BUG diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 847ddffbf38a..b5cfab711651 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -163,13 +163,10 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,  	pte_unmap(pte);  } -void set_pmd_at(struct mm_struct *mm, unsigned long addr, -		pmd_t *pmdp, pmd_t pmd) -{ -	pmd_t orig = *pmdp; - -	*pmdp = pmd; +static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr, +			   pmd_t orig, pmd_t pmd) +{  	if (mm == &init_mm)  		return; @@ -219,6 +216,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,  	}  } +void set_pmd_at(struct mm_struct *mm, unsigned long addr, +		pmd_t *pmdp, pmd_t pmd) +{ +	pmd_t orig = *pmdp; + +	*pmdp = pmd; +	__set_pmd_acct(mm, addr, orig, pmd); +} +  static inline pmd_t pmdp_establish(struct vm_area_struct *vma,  		unsigned long address, pmd_t *pmdp, pmd_t pmd)  { @@ -227,6 +233,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,  	do {  		old = *pmdp;  	} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); +	__set_pmd_acct(vma->vm_mm, address, old, pmd);  	return old;  } diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore index aff152c87cf4..5a82bac5e0bc 100644 --- a/arch/x86/.gitignore +++ b/arch/x86/.gitignore @@ -1,6 +1,7 @@  boot/compressed/vmlinux  tools/test_get_len  tools/insn_sanity +tools/insn_decoder_test  purgatory/kexec-purgatory.c  purgatory/purgatory.ro diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 63bf349b2b24..0fa71a78ec99 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -423,12 +423,6 @@ config X86_MPPARSE  	  For old smp systems that do not have proper acpi support. Newer systems  	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it -config X86_BIGSMP -	bool "Support for big SMP systems with more than 8 CPUs" -	depends on X86_32 && SMP -	---help--- -	  This option is needed for the systems that have more than 8 CPUs -  config GOLDFISH         def_bool y         depends on X86_GOLDFISH @@ -436,6 +430,7 @@ config GOLDFISH  config RETPOLINE  	bool "Avoid speculative indirect branches in kernel"  	default y +	select STACK_VALIDATION if HAVE_STACK_VALIDATION  	help  	  Compile kernel with the retpoline compiler options to guard against  	  kernel-to-user data leaks by avoiding speculative indirect @@ -460,6 +455,12 @@ config INTEL_RDT  	  Say N if unsure.  if X86_32 +config X86_BIGSMP +	bool "Support for big SMP systems with more than 8 CPUs" +	depends on SMP +	---help--- +	  This option is needed for the systems that have more than 8 CPUs +  config X86_EXTENDED_PLATFORM  	bool "Support for extended (non-PC) x86 platforms"  	default y @@ -949,25 +950,66 @@ config MAXSMP  	  Enable maximum number of CPUS and NUMA Nodes for this architecture.  	  If unsure, say N. +# +# The maximum number of CPUs supported: +# +# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT, +# and which can be configured interactively in the +# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range. +# +# The ranges are different on 32-bit and 64-bit kernels, depending on +# hardware capabilities and scalability features of the kernel. +# +# ( If MAXSMP is enabled we just use the highest possible value and disable +#   interactive configuration. ) +# + +config NR_CPUS_RANGE_BEGIN +	int +	default NR_CPUS_RANGE_END if MAXSMP +	default    1 if !SMP +	default    2 + +config NR_CPUS_RANGE_END +	int +	depends on X86_32 +	default   64 if  SMP &&  X86_BIGSMP +	default    8 if  SMP && !X86_BIGSMP +	default    1 if !SMP + +config NR_CPUS_RANGE_END +	int +	depends on X86_64 +	default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK) +	default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK) +	default    1 if !SMP + +config NR_CPUS_DEFAULT +	int +	depends on X86_32 +	default   32 if  X86_BIGSMP +	default    8 if  SMP +	default    1 if !SMP + +config NR_CPUS_DEFAULT +	int +	depends on X86_64 +	default 8192 if  MAXSMP +	default   64 if  SMP +	default    1 if !SMP +  config NR_CPUS  	int "Maximum number of CPUs" if SMP && !MAXSMP -	range 2 8 if SMP && X86_32 && !X86_BIGSMP -	range 2 64 if SMP && X86_32 && X86_BIGSMP -	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64 -	range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 -	default "1" if !SMP -	default "8192" if MAXSMP -	default "32" if SMP && X86_BIGSMP -	default "8" if SMP && X86_32 -	default "64" if SMP +	range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END +	default NR_CPUS_DEFAULT  	---help---  	  This allows you to specify the maximum number of CPUs which this  	  kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum  	  supported value is 8192, otherwise the maximum value is 512.  The  	  minimum value which makes sense is 2. -	  This is purely to save memory - each supported CPU adds -	  approximately eight kilobytes to the kernel image. +	  This is purely to save memory: each supported CPU adds about 8KB +	  to the kernel image.  config SCHED_SMT  	bool "SMT (Hyperthreading) scheduler support" @@ -1363,7 +1405,7 @@ config HIGHMEM4G  config HIGHMEM64G  	bool "64GB" -	depends on !M486 +	depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6  	select X86_PAE  	---help---  	  Select this if you have a 32-bit processor and more than 4 @@ -2265,7 +2307,7 @@ choice  	  it can be used to assist security vulnerability exploitation.  	  This setting can be changed at boot time via the kernel command -	  line parameter vsyscall=[native|emulate|none]. +	  line parameter vsyscall=[emulate|none].  	  On a system with recent enough glibc (2.14 or newer) and no  	  static binaries, you can say None without a performance penalty @@ -2273,15 +2315,6 @@ choice  	  If unsure, select "Emulate". -	config LEGACY_VSYSCALL_NATIVE -		bool "Native" -		help -		  Actual executable code is located in the fixed vsyscall -		  address mapping, implementing time() efficiently. Since -		  this makes the mapping executable, it can be used during -		  security vulnerability exploitation (traditionally as -		  ROP gadgets). This configuration is not recommended. -  	config LEGACY_VSYSCALL_EMULATE  		bool "Emulate"  		help diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 65a9a4716e34..638411f22267 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT  	default "4" if MELAN || M486 || MGEODEGX1  	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX -config X86_PPRO_FENCE -	bool "PentiumPro memory ordering errata workaround" -	depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1 -	---help--- -	  Old PentiumPro multiprocessor systems had errata that could cause -	  memory operations to violate the x86 ordering standard in rare cases. -	  Enabling this option will attempt to work around some (but not all) -	  occurrences of this problem, at the cost of much heavier spinlock and -	  memory barrier operations. - -	  If unsure, say n here. Even distro kernels should think twice before -	  enabling this: there are few systems, and an unlikely bug. -  config X86_F00F_BUG  	def_bool y  	depends on M586MMX || M586TSC || M586 || M486 @@ -374,7 +361,7 @@ config X86_TSC  config X86_CMPXCHG64  	def_bool y -	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM +	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8  # this should be set for all -march=.. options where the compiler  # generates cmov. @@ -385,7 +372,7 @@ config X86_CMOV  config X86_MINIMUM_CPU_FAMILY  	int  	default "64" if X86_64 -	default "6" if X86_32 && X86_P6_NOP +	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)  	default "5" if X86_32 && X86_CMPXCHG64  	default "4" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index fad55160dcb9..1c4d012550ec 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -223,6 +223,15 @@ KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)  LDFLAGS := -m elf_$(UTS_MACHINE) +# +# The 64-bit kernel must be aligned to 2MB.  Pass -z max-page-size=0x200000 to +# the linker to force 2MB page size regardless of the default page size used +# by the linker. +# +ifdef CONFIG_X86_64 +LDFLAGS += $(call ld-option, -z max-page-size=0x200000) +endif +  # Speed up the build  KBUILD_CFLAGS += -pipe  # Workaround for a gcc prelease that unfortunately was shipped in a suse release @@ -232,10 +241,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables  # Avoid indirect branches in kernel to deal with Spectre  ifdef CONFIG_RETPOLINE -    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) -    ifneq ($(RETPOLINE_CFLAGS),) -        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE -    endif +ifneq ($(RETPOLINE_CFLAGS),) +  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +endif  endif  archscripts: scripts_basic diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 353e20c3f114..886a9115af62 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)  	struct efi_uga_draw_protocol *uga = NULL, *first_uga;  	efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;  	unsigned long nr_ugas; -	u32 *handles = (u32 *)uga_handle;; +	u32 *handles = (u32 *)uga_handle;  	efi_status_t status = EFI_INVALID_PARAMETER;  	int i; @@ -484,7 +484,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)  	struct efi_uga_draw_protocol *uga = NULL, *first_uga;  	efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;  	unsigned long nr_ugas; -	u64 *handles = (u64 *)uga_handle;; +	u64 *handles = (u64 *)uga_handle;  	efi_status_t status = EFI_INVALID_PARAMETER;  	int i; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 98761a1576ce..252fee320816 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -309,6 +309,10 @@ static void parse_elf(void *output)  		switch (phdr->p_type) {  		case PT_LOAD: +#ifdef CONFIG_X86_64 +			if ((phdr->p_align % 0x200000) != 0) +				error("Alignment of LOAD segment isn't multiple of 2MB"); +#endif  #ifdef CONFIG_RELOCATABLE  			dest = output;  			dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c index 36870b26067a..d08805032f01 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c @@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)  {  	unsigned int j; -	state->lens[0] = 0; -	state->lens[1] = 1; -	state->lens[2] = 2; -	state->lens[3] = 3; +	/* initially all lanes are unused */ +	state->lens[0] = 0xFFFFFFFF00000000; +	state->lens[1] = 0xFFFFFFFF00000001; +	state->lens[2] = 0xFFFFFFFF00000002; +	state->lens[3] = 0xFFFFFFFF00000003; +  	state->unused_lanes = 0xFF03020100;  	for (j = 0; j < 4; j++)  		state->ldata[j].job_in_lane = NULL; diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3f48f695d5e6..be63330c5511 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -97,80 +97,78 @@ For 32-bit we have the following conventions - kernel is built with  #define SIZEOF_PTREGS	21*8 -	.macro ALLOC_PT_GPREGS_ON_STACK -	addq	$-(15*8), %rsp -	.endm - -	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 -	.if \r11 -	movq %r11, 6*8+\offset(%rsp) -	.endif -	.if \r8910 -	movq %r10, 7*8+\offset(%rsp) -	movq %r9,  8*8+\offset(%rsp) -	movq %r8,  9*8+\offset(%rsp) -	.endif -	.if \rax -	movq %rax, 10*8+\offset(%rsp) +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 +	/* +	 * Push registers and sanitize registers of values that a +	 * speculation attack might otherwise want to exploit. The +	 * lower registers are likely clobbered well before they +	 * could be put to use in a speculative execution gadget. +	 * Interleave XOR with PUSH for better uop scheduling: +	 */ +	.if \save_ret +	pushq	%rsi		/* pt_regs->si */ +	movq	8(%rsp), %rsi	/* temporarily store the return address in %rsi */ +	movq	%rdi, 8(%rsp)	/* pt_regs->di (overwriting original return address) */ +	.else +	pushq   %rdi		/* pt_regs->di */ +	pushq   %rsi		/* pt_regs->si */  	.endif -	.if \rcx -	movq %rcx, 11*8+\offset(%rsp) +	pushq	\rdx		/* pt_regs->dx */ +	pushq   %rcx		/* pt_regs->cx */ +	pushq   \rax		/* pt_regs->ax */ +	pushq   %r8		/* pt_regs->r8 */ +	xorl	%r8d, %r8d	/* nospec   r8 */ +	pushq   %r9		/* pt_regs->r9 */ +	xorl	%r9d, %r9d	/* nospec   r9 */ +	pushq   %r10		/* pt_regs->r10 */ +	xorl	%r10d, %r10d	/* nospec   r10 */ +	pushq   %r11		/* pt_regs->r11 */ +	xorl	%r11d, %r11d	/* nospec   r11*/ +	pushq	%rbx		/* pt_regs->rbx */ +	xorl    %ebx, %ebx	/* nospec   rbx*/ +	pushq	%rbp		/* pt_regs->rbp */ +	xorl    %ebp, %ebp	/* nospec   rbp*/ +	pushq	%r12		/* pt_regs->r12 */ +	xorl	%r12d, %r12d	/* nospec   r12*/ +	pushq	%r13		/* pt_regs->r13 */ +	xorl	%r13d, %r13d	/* nospec   r13*/ +	pushq	%r14		/* pt_regs->r14 */ +	xorl	%r14d, %r14d	/* nospec   r14*/ +	pushq	%r15		/* pt_regs->r15 */ +	xorl	%r15d, %r15d	/* nospec   r15*/ +	UNWIND_HINT_REGS +	.if \save_ret +	pushq	%rsi		/* return address on top of stack */  	.endif -	movq %rdx, 12*8+\offset(%rsp) -	movq %rsi, 13*8+\offset(%rsp) -	movq %rdi, 14*8+\offset(%rsp) -	UNWIND_HINT_REGS offset=\offset extra=0 -	.endm -	.macro SAVE_C_REGS offset=0 -	SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 -	.endm -	.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 -	SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 -	.endm -	.macro SAVE_C_REGS_EXCEPT_R891011 -	SAVE_C_REGS_HELPER 0, 1, 1, 0, 0 -	.endm -	.macro SAVE_C_REGS_EXCEPT_RCX_R891011 -	SAVE_C_REGS_HELPER 0, 1, 0, 0, 0 -	.endm -	.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11 -	SAVE_C_REGS_HELPER 0, 0, 0, 1, 0 -	.endm - -	.macro SAVE_EXTRA_REGS offset=0 -	movq %r15, 0*8+\offset(%rsp) -	movq %r14, 1*8+\offset(%rsp) -	movq %r13, 2*8+\offset(%rsp) -	movq %r12, 3*8+\offset(%rsp) -	movq %rbp, 4*8+\offset(%rsp) -	movq %rbx, 5*8+\offset(%rsp) -	UNWIND_HINT_REGS offset=\offset -	.endm - -	.macro POP_EXTRA_REGS +.endm + +.macro POP_REGS pop_rdi=1 skip_r11rcx=0  	popq %r15  	popq %r14  	popq %r13  	popq %r12  	popq %rbp  	popq %rbx -	.endm - -	.macro POP_C_REGS +	.if \skip_r11rcx +	popq %rsi +	.else  	popq %r11 +	.endif  	popq %r10  	popq %r9  	popq %r8  	popq %rax +	.if \skip_r11rcx +	popq %rsi +	.else  	popq %rcx +	.endif  	popq %rdx  	popq %rsi +	.if \pop_rdi  	popq %rdi -	.endm - -	.macro icebp -	.byte 0xf1 -	.endm +	.endif +.endm  /*   * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The @@ -178,17 +176,12 @@ For 32-bit we have the following conventions - kernel is built with   * is just setting the LSB, which makes it an invalid stack address and is also   * a signal to the unwinder that it's a pt_regs pointer in disguise.   * - * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts + * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts   * the original rbp.   */  .macro ENCODE_FRAME_POINTER ptregs_offset=0  #ifdef CONFIG_FRAME_POINTER -	.if \ptregs_offset -		leaq \ptregs_offset(%rsp), %rbp -	.else -		mov %rsp, %rbp -	.endif -	orq	$0x1, %rbp +	leaq 1+\ptregs_offset(%rsp), %rbp  #endif  .endm diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 16c2c022540d..6ad064c8cf35 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)  	 * exist, overwrite the RSB with entries which capture  	 * speculative execution to prevent attack.  	 */ -	/* Clobbers %ebx */ -	FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW  #endif  	/* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 30c8c5344c4a..18ed349b4f83 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -55,7 +55,7 @@ END(native_usergs_sysret64)  .macro TRACE_IRQS_FLAGS flags:req  #ifdef CONFIG_TRACE_IRQFLAGS -	bt	$9, \flags		/* interrupts off? */ +	btl	$9, \flags		/* interrupts off? */  	jnc	1f  	TRACE_IRQS_ON  1: @@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64)  	swapgs  	/* -	 * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it +	 * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it  	 * is not required to switch CR3.  	 */  	movq	%rsp, PER_CPU_VAR(rsp_scratch) @@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64)  	pushq	%rcx				/* pt_regs->ip */  GLOBAL(entry_SYSCALL_64_after_hwframe)  	pushq	%rax				/* pt_regs->orig_ax */ -	pushq	%rdi				/* pt_regs->di */ -	pushq	%rsi				/* pt_regs->si */ -	pushq	%rdx				/* pt_regs->dx */ -	pushq	%rcx				/* pt_regs->cx */ -	pushq	$-ENOSYS			/* pt_regs->ax */ -	pushq	%r8				/* pt_regs->r8 */ -	pushq	%r9				/* pt_regs->r9 */ -	pushq	%r10				/* pt_regs->r10 */ -	pushq	%r11				/* pt_regs->r11 */ -	pushq	%rbx				/* pt_regs->rbx */ -	pushq	%rbp				/* pt_regs->rbp */ -	pushq	%r12				/* pt_regs->r12 */ -	pushq	%r13				/* pt_regs->r13 */ -	pushq	%r14				/* pt_regs->r14 */ -	pushq	%r15				/* pt_regs->r15 */ -	UNWIND_HINT_REGS + +	PUSH_AND_CLEAR_REGS rax=$-ENOSYS  	TRACE_IRQS_OFF @@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)  syscall_return_via_sysret:  	/* rcx and r11 are already restored (see code above) */  	UNWIND_HINT_EMPTY -	POP_EXTRA_REGS -	popq	%rsi	/* skip r11 */ -	popq	%r10 -	popq	%r9 -	popq	%r8 -	popq	%rax -	popq	%rsi	/* skip rcx */ -	popq	%rdx -	popq	%rsi +	POP_REGS pop_rdi=0 skip_r11rcx=1  	/*  	 * Now all regs are restored except RSP and RDI. @@ -386,8 +364,7 @@ ENTRY(__switch_to_asm)  	 * exist, overwrite the RSB with entries which capture  	 * speculative execution to prevent attack.  	 */ -	/* Clobbers %rbx */ -	FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW  #endif  	/* restore callee-saved registers */ @@ -471,9 +448,19 @@ END(irq_entries_start)   *   * The invariant is that, if irq_count != -1, then the IRQ stack is in use.   */ -.macro ENTER_IRQ_STACK regs=1 old_rsp +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0  	DEBUG_ENTRY_ASSERT_IRQS_OFF + +	.if \save_ret +	/* +	 * If save_ret is set, the original stack contains one additional +	 * entry -- the return address. Therefore, move the address one +	 * entry below %rsp to \old_rsp. +	 */ +	leaq	8(%rsp), \old_rsp +	.else  	movq	%rsp, \old_rsp +	.endif  	.if \regs  	UNWIND_HINT_REGS base=\old_rsp @@ -519,6 +506,15 @@ END(irq_entries_start)  	.if \regs  	UNWIND_HINT_REGS indirect=1  	.endif + +	.if \save_ret +	/* +	 * Push the return address to the stack. This return address can +	 * be found at the "real" original RSP, which was offset by 8 at +	 * the beginning of this macro. +	 */ +	pushq	-8(\old_rsp) +	.endif  .endm  /* @@ -542,29 +538,65 @@ END(irq_entries_start)  .endm  /* - * Interrupt entry/exit. - * - * Interrupt entry points save only callee clobbered registers in fast path. + * Interrupt entry helper function.   * - * Entry runs with interrupts off. + * Entry runs with interrupts off. Stack layout at entry: + * +----------------------------------------------------+ + * | regs->ss						| + * | regs->rsp						| + * | regs->eflags					| + * | regs->cs						| + * | regs->ip						| + * +----------------------------------------------------+ + * | regs->orig_ax = ~(interrupt number)		| + * +----------------------------------------------------+ + * | return address					| + * +----------------------------------------------------+   */ - -/* 0(%rsp): ~(interrupt number) */ -	.macro interrupt func +ENTRY(interrupt_entry) +	UNWIND_HINT_FUNC +	ASM_CLAC  	cld -	testb	$3, CS-ORIG_RAX(%rsp) +	testb	$3, CS-ORIG_RAX+8(%rsp)  	jz	1f  	SWAPGS -	call	switch_to_thread_stack + +	/* +	 * Switch to the thread stack. The IRET frame and orig_ax are +	 * on the stack, as well as the return address. RDI..R12 are +	 * not (yet) on the stack and space has not (yet) been +	 * allocated for them. +	 */ +	pushq	%rdi + +	/* Need to switch before accessing the thread stack. */ +	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi +	movq	%rsp, %rdi +	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp + +	 /* +	  * We have RDI, return address, and orig_ax on the stack on +	  * top of the IRET frame. That means offset=24 +	  */ +	UNWIND_HINT_IRET_REGS base=%rdi offset=24 + +	pushq	7*8(%rdi)		/* regs->ss */ +	pushq	6*8(%rdi)		/* regs->rsp */ +	pushq	5*8(%rdi)		/* regs->eflags */ +	pushq	4*8(%rdi)		/* regs->cs */ +	pushq	3*8(%rdi)		/* regs->ip */ +	pushq	2*8(%rdi)		/* regs->orig_ax */ +	pushq	8(%rdi)			/* return address */ +	UNWIND_HINT_FUNC + +	movq	(%rdi), %rdi  1: -	ALLOC_PT_GPREGS_ON_STACK -	SAVE_C_REGS -	SAVE_EXTRA_REGS -	ENCODE_FRAME_POINTER +	PUSH_AND_CLEAR_REGS save_ret=1 +	ENCODE_FRAME_POINTER 8 -	testb	$3, CS(%rsp) +	testb	$3, CS+8(%rsp)  	jz	1f  	/* @@ -572,7 +604,7 @@ END(irq_entries_start)  	 *  	 * We need to tell lockdep that IRQs are off.  We can't do this until  	 * we fix gsbase, and we should do it before enter_from_user_mode -	 * (which can take locks).  Since TRACE_IRQS_OFF idempotent, +	 * (which can take locks).  Since TRACE_IRQS_OFF is idempotent,  	 * the simplest way to handle it is to just call it twice if  	 * we enter from user mode.  There's no reason to optimize this since  	 * TRACE_IRQS_OFF is a no-op if lockdep is off. @@ -582,12 +614,15 @@ END(irq_entries_start)  	CALL_enter_from_user_mode  1: -	ENTER_IRQ_STACK old_rsp=%rdi +	ENTER_IRQ_STACK old_rsp=%rdi save_ret=1  	/* We entered an interrupt context - irqs are off: */  	TRACE_IRQS_OFF -	call	\func	/* rdi points to pt_regs */ -	.endm +	ret +END(interrupt_entry) + + +/* Interrupt entry/exit. */  	/*  	 * The interrupt stubs push (~vector+0x80) onto the stack and @@ -595,9 +630,10 @@ END(irq_entries_start)  	 */  	.p2align CONFIG_X86_L1_CACHE_SHIFT  common_interrupt: -	ASM_CLAC  	addq	$-0x80, (%rsp)			/* Adjust vector to [-256, -1] range */ -	interrupt do_IRQ +	call	interrupt_entry +	UNWIND_HINT_REGS indirect=1 +	call	do_IRQ	/* rdi points to pt_regs */  	/* 0(%rsp): old RSP */  ret_from_intr:  	DISABLE_INTERRUPTS(CLBR_ANY) @@ -622,15 +658,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)  	ud2  1:  #endif -	POP_EXTRA_REGS -	popq	%r11 -	popq	%r10 -	popq	%r9 -	popq	%r8 -	popq	%rax -	popq	%rcx -	popq	%rdx -	popq	%rsi +	POP_REGS pop_rdi=0  	/*  	 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. @@ -688,8 +716,7 @@ GLOBAL(restore_regs_and_return_to_kernel)  	ud2  1:  #endif -	POP_EXTRA_REGS -	POP_C_REGS +	POP_REGS  	addq	$8, %rsp	/* skip regs->orig_ax */  	/*  	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -799,10 +826,11 @@ END(common_interrupt)  .macro apicinterrupt3 num sym do_sym  ENTRY(\sym)  	UNWIND_HINT_IRET_REGS -	ASM_CLAC  	pushq	$~(\num)  .Lcommon_\sym: -	interrupt \do_sym +	call	interrupt_entry +	UNWIND_HINT_REGS indirect=1 +	call	\do_sym	/* rdi points to pt_regs */  	jmp	ret_from_intr  END(\sym)  .endm @@ -865,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt   */  #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) -/* - * Switch to the thread stack.  This is called with the IRET frame and - * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and - * space has not been allocated for them.) - */ -ENTRY(switch_to_thread_stack) -	UNWIND_HINT_FUNC - -	pushq	%rdi -	/* Need to switch before accessing the thread stack. */ -	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi -	movq	%rsp, %rdi -	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp -	UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI - -	pushq	7*8(%rdi)		/* regs->ss */ -	pushq	6*8(%rdi)		/* regs->rsp */ -	pushq	5*8(%rdi)		/* regs->eflags */ -	pushq	4*8(%rdi)		/* regs->cs */ -	pushq	3*8(%rdi)		/* regs->ip */ -	pushq	2*8(%rdi)		/* regs->orig_ax */ -	pushq	8(%rdi)			/* return address */ -	UNWIND_HINT_FUNC - -	movq	(%rdi), %rdi -	ret -END(switch_to_thread_stack) -  .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1  ENTRY(\sym)  	UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -908,10 +908,8 @@ ENTRY(\sym)  	pushq	$-1				/* ORIG_RAX: no syscall to restart */  	.endif -	ALLOC_PT_GPREGS_ON_STACK -  	.if \paranoid < 2 -	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */ +	testb	$3, CS-ORIG_RAX(%rsp)		/* If coming from userspace, switch stacks */  	jnz	.Lfrom_usermode_switch_stack_\@  	.endif @@ -1121,9 +1119,7 @@ ENTRY(xen_failsafe_callback)  	addq	$0x30, %rsp  	UNWIND_HINT_IRET_REGS  	pushq	$-1 /* orig_ax = -1 => not a system call */ -	ALLOC_PT_GPREGS_ON_STACK -	SAVE_C_REGS -	SAVE_EXTRA_REGS +	PUSH_AND_CLEAR_REGS  	ENCODE_FRAME_POINTER  	jmp	error_exit  END(xen_failsafe_callback) @@ -1142,7 +1138,7 @@ apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \  #endif /* CONFIG_HYPERV */  idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK -idtentry int3			do_int3			has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK +idtentry int3			do_int3			has_error_code=0  idtentry stack_segment		do_stack_segment	has_error_code=1  #ifdef CONFIG_XEN @@ -1170,8 +1166,7 @@ idtentry machine_check		do_mce			has_error_code=0	paranoid=1  ENTRY(paranoid_entry)  	UNWIND_HINT_FUNC  	cld -	SAVE_C_REGS 8 -	SAVE_EXTRA_REGS 8 +	PUSH_AND_CLEAR_REGS save_ret=1  	ENCODE_FRAME_POINTER 8  	movl	$1, %ebx  	movl	$MSR_GS_BASE, %ecx @@ -1211,21 +1206,20 @@ ENTRY(paranoid_exit)  	jmp	.Lparanoid_exit_restore  .Lparanoid_exit_no_swapgs:  	TRACE_IRQS_IRETQ_DEBUG +	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14  .Lparanoid_exit_restore:  	jmp restore_regs_and_return_to_kernel  END(paranoid_exit)  /* - * Save all registers in pt_regs, and switch gs if needed. + * Save all registers in pt_regs, and switch GS if needed.   * Return: EBX=0: came from user mode; EBX=1: otherwise   */  ENTRY(error_entry)  	UNWIND_HINT_FUNC  	cld -	SAVE_C_REGS 8 -	SAVE_EXTRA_REGS 8 +	PUSH_AND_CLEAR_REGS save_ret=1  	ENCODE_FRAME_POINTER 8 -	xorl	%ebx, %ebx  	testb	$3, CS+8(%rsp)  	jz	.Lerror_kernelspace @@ -1406,22 +1400,7 @@ ENTRY(nmi)  	pushq	1*8(%rdx)	/* pt_regs->rip */  	UNWIND_HINT_IRET_REGS  	pushq   $-1		/* pt_regs->orig_ax */ -	pushq   %rdi		/* pt_regs->di */ -	pushq   %rsi		/* pt_regs->si */ -	pushq   (%rdx)		/* pt_regs->dx */ -	pushq   %rcx		/* pt_regs->cx */ -	pushq   %rax		/* pt_regs->ax */ -	pushq   %r8		/* pt_regs->r8 */ -	pushq   %r9		/* pt_regs->r9 */ -	pushq   %r10		/* pt_regs->r10 */ -	pushq   %r11		/* pt_regs->r11 */ -	pushq	%rbx		/* pt_regs->rbx */ -	pushq	%rbp		/* pt_regs->rbp */ -	pushq	%r12		/* pt_regs->r12 */ -	pushq	%r13		/* pt_regs->r13 */ -	pushq	%r14		/* pt_regs->r14 */ -	pushq	%r15		/* pt_regs->r15 */ -	UNWIND_HINT_REGS +	PUSH_AND_CLEAR_REGS rdx=(%rdx)  	ENCODE_FRAME_POINTER  	/* @@ -1631,7 +1610,6 @@ end_repeat_nmi:  	 * frame to point back to repeat_nmi.  	 */  	pushq	$-1				/* ORIG_RAX: no syscall to restart */ -	ALLOC_PT_GPREGS_ON_STACK  	/*  	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit @@ -1655,8 +1633,7 @@ end_repeat_nmi:  nmi_swapgs:  	SWAPGS_UNSAFE_STACK  nmi_restore: -	POP_EXTRA_REGS -	POP_C_REGS +	POP_REGS  	/*  	 * Skip orig_ax and the "outermost" frame to point RSP at the "iret" diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 98d5358e4041..08425c42f8b7 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat)  	pushq	%rcx			/* pt_regs->cx */  	pushq	$-ENOSYS		/* pt_regs->ax */  	pushq   $0			/* pt_regs->r8  = 0 */ +	xorl	%r8d, %r8d		/* nospec   r8 */  	pushq   $0			/* pt_regs->r9  = 0 */ +	xorl	%r9d, %r9d		/* nospec   r9 */  	pushq   $0			/* pt_regs->r10 = 0 */ +	xorl	%r10d, %r10d		/* nospec   r10 */  	pushq   $0			/* pt_regs->r11 = 0 */ +	xorl	%r11d, %r11d		/* nospec   r11 */  	pushq   %rbx                    /* pt_regs->rbx */ +	xorl	%ebx, %ebx		/* nospec   rbx */  	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */ +	xorl	%ebp, %ebp		/* nospec   rbp */  	pushq   $0			/* pt_regs->r12 = 0 */ +	xorl	%r12d, %r12d		/* nospec   r12 */  	pushq   $0			/* pt_regs->r13 = 0 */ +	xorl	%r13d, %r13d		/* nospec   r13 */  	pushq   $0			/* pt_regs->r14 = 0 */ +	xorl	%r14d, %r14d		/* nospec   r14 */  	pushq   $0			/* pt_regs->r15 = 0 */ +	xorl	%r15d, %r15d		/* nospec   r15 */  	cld  	/* @@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)  	pushq	%rbp			/* pt_regs->cx (stashed in bp) */  	pushq	$-ENOSYS		/* pt_regs->ax */  	pushq   $0			/* pt_regs->r8  = 0 */ +	xorl	%r8d, %r8d		/* nospec   r8 */  	pushq   $0			/* pt_regs->r9  = 0 */ +	xorl	%r9d, %r9d		/* nospec   r9 */  	pushq   $0			/* pt_regs->r10 = 0 */ +	xorl	%r10d, %r10d		/* nospec   r10 */  	pushq   $0			/* pt_regs->r11 = 0 */ +	xorl	%r11d, %r11d		/* nospec   r11 */  	pushq   %rbx                    /* pt_regs->rbx */ +	xorl	%ebx, %ebx		/* nospec   rbx */  	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */ +	xorl	%ebp, %ebp		/* nospec   rbp */  	pushq   $0			/* pt_regs->r12 = 0 */ +	xorl	%r12d, %r12d		/* nospec   r12 */  	pushq   $0			/* pt_regs->r13 = 0 */ +	xorl	%r13d, %r13d		/* nospec   r13 */  	pushq   $0			/* pt_regs->r14 = 0 */ +	xorl	%r14d, %r14d		/* nospec   r14 */  	pushq   $0			/* pt_regs->r15 = 0 */ +	xorl	%r15d, %r15d		/* nospec   r15 */  	/*  	 * User mode is traced as though IRQs are on, and SYSENTER @@ -278,9 +298,9 @@ sysret32_from_system_call:  	 */  	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 -	xorq	%r8, %r8 -	xorq	%r9, %r9 -	xorq	%r10, %r10 +	xorl	%r8d, %r8d +	xorl	%r9d, %r9d +	xorl	%r10d, %r10d  	swapgs  	sysretl  END(entry_SYSCALL_compat) @@ -327,26 +347,47 @@ ENTRY(entry_INT80_compat)  	 */  	movl	%eax, %eax +	/* switch to thread stack expects orig_ax and rdi to be pushed */  	pushq	%rax			/* pt_regs->orig_ax */ +	pushq	%rdi			/* pt_regs->di */ -	/* switch to thread stack expects orig_ax to be pushed */ -	call	switch_to_thread_stack +	/* Need to switch before accessing the thread stack. */ +	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi +	movq	%rsp, %rdi +	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp -	pushq	%rdi			/* pt_regs->di */ +	pushq	6*8(%rdi)		/* regs->ss */ +	pushq	5*8(%rdi)		/* regs->rsp */ +	pushq	4*8(%rdi)		/* regs->eflags */ +	pushq	3*8(%rdi)		/* regs->cs */ +	pushq	2*8(%rdi)		/* regs->ip */ +	pushq	1*8(%rdi)		/* regs->orig_ax */ + +	pushq	(%rdi)			/* pt_regs->di */  	pushq	%rsi			/* pt_regs->si */  	pushq	%rdx			/* pt_regs->dx */  	pushq	%rcx			/* pt_regs->cx */  	pushq	$-ENOSYS		/* pt_regs->ax */  	pushq   $0			/* pt_regs->r8  = 0 */ +	xorl	%r8d, %r8d		/* nospec   r8 */  	pushq   $0			/* pt_regs->r9  = 0 */ +	xorl	%r9d, %r9d		/* nospec   r9 */  	pushq   $0			/* pt_regs->r10 = 0 */ +	xorl	%r10d, %r10d		/* nospec   r10 */  	pushq   $0			/* pt_regs->r11 = 0 */ +	xorl	%r11d, %r11d		/* nospec   r11 */  	pushq   %rbx                    /* pt_regs->rbx */ +	xorl	%ebx, %ebx		/* nospec   rbx */  	pushq   %rbp                    /* pt_regs->rbp */ +	xorl	%ebp, %ebp		/* nospec   rbp */  	pushq   %r12                    /* pt_regs->r12 */ +	xorl	%r12d, %r12d		/* nospec   r12 */  	pushq   %r13                    /* pt_regs->r13 */ +	xorl	%r13d, %r13d		/* nospec   r13 */  	pushq   %r14                    /* pt_regs->r14 */ +	xorl	%r14d, %r14d		/* nospec   r14 */  	pushq   %r15                    /* pt_regs->r15 */ +	xorl	%r15d, %r15d		/* nospec   r15 */  	cld  	/* @@ -363,15 +404,3 @@ ENTRY(entry_INT80_compat)  	TRACE_IRQS_ON  	jmp	swapgs_restore_regs_and_return_to_usermode  END(entry_INT80_compat) - -ENTRY(stub32_clone) -	/* -	 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). -	 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). -	 * -	 * The native 64-bit kernel's sys_clone() implements the latter, -	 * so we need to swap arguments here before calling it: -	 */ -	xchg	%r8, %rcx -	jmp	sys_clone -ENDPROC(stub32_clone) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 448ac2161112..2a5e99cff859 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -8,12 +8,12 @@  #  0	i386	restart_syscall		sys_restart_syscall  1	i386	exit			sys_exit -2	i386	fork			sys_fork			sys_fork +2	i386	fork			sys_fork  3	i386	read			sys_read  4	i386	write			sys_write  5	i386	open			sys_open			compat_sys_open  6	i386	close			sys_close -7	i386	waitpid			sys_waitpid			sys32_waitpid +7	i386	waitpid			sys_waitpid			compat_sys_x86_waitpid  8	i386	creat			sys_creat  9	i386	link			sys_link  10	i386	unlink			sys_unlink @@ -78,7 +78,7 @@  69	i386	ssetmask		sys_ssetmask  70	i386	setreuid		sys_setreuid16  71	i386	setregid		sys_setregid16 -72	i386	sigsuspend		sys_sigsuspend			sys_sigsuspend +72	i386	sigsuspend		sys_sigsuspend  73	i386	sigpending		sys_sigpending			compat_sys_sigpending  74	i386	sethostname		sys_sethostname  75	i386	setrlimit		sys_setrlimit			compat_sys_setrlimit @@ -96,7 +96,7 @@  87	i386	swapon			sys_swapon  88	i386	reboot			sys_reboot  89	i386	readdir			sys_old_readdir			compat_sys_old_readdir -90	i386	mmap			sys_old_mmap			sys32_mmap +90	i386	mmap			sys_old_mmap			compat_sys_x86_mmap  91	i386	munmap			sys_munmap  92	i386	truncate		sys_truncate			compat_sys_truncate  93	i386	ftruncate		sys_ftruncate			compat_sys_ftruncate @@ -126,7 +126,7 @@  117	i386	ipc			sys_ipc				compat_sys_ipc  118	i386	fsync			sys_fsync  119	i386	sigreturn		sys_sigreturn			sys32_sigreturn -120	i386	clone			sys_clone			stub32_clone +120	i386	clone			sys_clone			compat_sys_x86_clone  121	i386	setdomainname		sys_setdomainname  122	i386	uname			sys_newuname  123	i386	modify_ldt		sys_modify_ldt @@ -186,8 +186,8 @@  177	i386	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait  178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo  179	i386	rt_sigsuspend		sys_rt_sigsuspend -180	i386	pread64			sys_pread64			sys32_pread -181	i386	pwrite64		sys_pwrite64			sys32_pwrite +180	i386	pread64			sys_pread64			compat_sys_x86_pread +181	i386	pwrite64		sys_pwrite64			compat_sys_x86_pwrite  182	i386	chown			sys_chown16  183	i386	getcwd			sys_getcwd  184	i386	capget			sys_capget @@ -196,14 +196,14 @@  187	i386	sendfile		sys_sendfile			compat_sys_sendfile  188	i386	getpmsg  189	i386	putpmsg -190	i386	vfork			sys_vfork			sys_vfork +190	i386	vfork			sys_vfork  191	i386	ugetrlimit		sys_getrlimit			compat_sys_getrlimit  192	i386	mmap2			sys_mmap_pgoff -193	i386	truncate64		sys_truncate64			sys32_truncate64 -194	i386	ftruncate64		sys_ftruncate64			sys32_ftruncate64 -195	i386	stat64			sys_stat64			sys32_stat64 -196	i386	lstat64			sys_lstat64			sys32_lstat64 -197	i386	fstat64			sys_fstat64			sys32_fstat64 +193	i386	truncate64		sys_truncate64			compat_sys_x86_truncate64 +194	i386	ftruncate64		sys_ftruncate64			compat_sys_x86_ftruncate64 +195	i386	stat64			sys_stat64			compat_sys_x86_stat64 +196	i386	lstat64			sys_lstat64			compat_sys_x86_lstat64 +197	i386	fstat64			sys_fstat64			compat_sys_x86_fstat64  198	i386	lchown32		sys_lchown  199	i386	getuid32		sys_getuid  200	i386	getgid32		sys_getgid @@ -231,7 +231,7 @@  # 222 is unused  # 223 is unused  224	i386	gettid			sys_gettid -225	i386	readahead		sys_readahead			sys32_readahead +225	i386	readahead		sys_readahead			compat_sys_x86_readahead  226	i386	setxattr		sys_setxattr  227	i386	lsetxattr		sys_lsetxattr  228	i386	fsetxattr		sys_fsetxattr @@ -256,7 +256,7 @@  247	i386	io_getevents		sys_io_getevents		compat_sys_io_getevents  248	i386	io_submit		sys_io_submit			compat_sys_io_submit  249	i386	io_cancel		sys_io_cancel -250	i386	fadvise64		sys_fadvise64			sys32_fadvise64 +250	i386	fadvise64		sys_fadvise64			compat_sys_x86_fadvise64  # 251 is available for reuse (was briefly sys_set_zone_reclaim)  252	i386	exit_group		sys_exit_group  253	i386	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie @@ -278,7 +278,7 @@  269	i386	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64  270	i386	tgkill			sys_tgkill  271	i386	utimes			sys_utimes			compat_sys_utimes -272	i386	fadvise64_64		sys_fadvise64_64		sys32_fadvise64_64 +272	i386	fadvise64_64		sys_fadvise64_64		compat_sys_x86_fadvise64_64  273	i386	vserver  274	i386	mbind			sys_mbind  275	i386	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy @@ -306,7 +306,7 @@  297	i386	mknodat			sys_mknodat  298	i386	fchownat		sys_fchownat  299	i386	futimesat		sys_futimesat			compat_sys_futimesat -300	i386	fstatat64		sys_fstatat64			sys32_fstatat +300	i386	fstatat64		sys_fstatat64			compat_sys_x86_fstatat  301	i386	unlinkat		sys_unlinkat  302	i386	renameat		sys_renameat  303	i386	linkat			sys_linkat @@ -320,7 +320,7 @@  311	i386	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list  312	i386	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list  313	i386	splice			sys_splice -314	i386	sync_file_range		sys_sync_file_range		sys32_sync_file_range +314	i386	sync_file_range		sys_sync_file_range		compat_sys_x86_sync_file_range  315	i386	tee			sys_tee  316	i386	vmsplice		sys_vmsplice			compat_sys_vmsplice  317	i386	move_pages		sys_move_pages			compat_sys_move_pages @@ -330,7 +330,7 @@  321	i386	signalfd		sys_signalfd			compat_sys_signalfd  322	i386	timerfd_create		sys_timerfd_create  323	i386	eventfd			sys_eventfd -324	i386	fallocate		sys_fallocate			sys32_fallocate +324	i386	fallocate		sys_fallocate			compat_sys_x86_fallocate  325	i386	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime  326	i386	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime  327	i386	signalfd4		sys_signalfd4			compat_sys_signalfd4 diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 7780bbfb06ef..9242b28418d5 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -5,8 +5,6 @@  #undef CONFIG_OPTIMIZE_INLINING  #endif -#undef CONFIG_X86_PPRO_FENCE -  #ifdef CONFIG_X86_64  /* diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 577fa8adb785..317be365bce3 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,10 +42,8 @@  #define CREATE_TRACE_POINTS  #include "vsyscall_trace.h" -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = -#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE) -	NATIVE; -#elif defined(CONFIG_LEGACY_VSYSCALL_NONE) +static enum { EMULATE, NONE } vsyscall_mode = +#ifdef CONFIG_LEGACY_VSYSCALL_NONE  	NONE;  #else  	EMULATE; @@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)  	if (str) {  		if (!strcmp("emulate", str))  			vsyscall_mode = EMULATE; -		else if (!strcmp("native", str)) -			vsyscall_mode = NATIVE;  		else if (!strcmp("none", str))  			vsyscall_mode = NONE;  		else @@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)  	WARN_ON_ONCE(address != regs->ip); -	/* This should be unreachable in NATIVE mode. */ -	if (WARN_ON(vsyscall_mode == NATIVE)) -		return false; -  	if (vsyscall_mode == NONE) {  		warn_bad_vsyscall(KERN_INFO, regs,  				  "vsyscall attempted with vsyscall=none"); @@ -355,7 +347,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root)  	set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));  	p4d = p4d_offset(pgd, VSYSCALL_ADDR);  #if CONFIG_PGTABLE_LEVELS >= 5 -	p4d->p4d |= _PAGE_USER; +	set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER));  #endif  	pud = pud_offset(p4d, VSYSCALL_ADDR);  	set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); @@ -370,9 +362,7 @@ void __init map_vsyscall(void)  	if (vsyscall_mode != NONE) {  		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, -			     vsyscall_mode == NATIVE -			     ? PAGE_KERNEL_VSYSCALL -			     : PAGE_KERNEL_VVAR); +			     PAGE_KERNEL_VVAR);  		set_vsyscall_pgtable_user_bits(swapper_pg_dir);  	} diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 140d33288e78..88797c80b3e0 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2118,7 +2118,8 @@ static int x86_pmu_event_init(struct perf_event *event)  			event->destroy(event);  	} -	if (READ_ONCE(x86_pmu.attr_rdpmc)) +	if (READ_ONCE(x86_pmu.attr_rdpmc) && +	    !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))  		event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;  	return err; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 731153a4681e..1e41d7508d99 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2952,9 +2952,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event)  	return intel_pebs_aliases_precdist(event);  } -static unsigned long intel_pmu_free_running_flags(struct perf_event *event) +static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)  { -	unsigned long flags = x86_pmu.free_running_flags; +	unsigned long flags = x86_pmu.large_pebs_flags;  	if (event->attr.use_clockid)  		flags &= ~PERF_SAMPLE_TIME; @@ -2976,8 +2976,8 @@ static int intel_pmu_hw_config(struct perf_event *event)  		if (!event->attr.freq) {  			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;  			if (!(event->attr.sample_type & -			      ~intel_pmu_free_running_flags(event))) -				event->hw.flags |= PERF_X86_EVENT_FREERUNNING; +			      ~intel_pmu_large_pebs_flags(event))) +				event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;  		}  		if (x86_pmu.pebs_aliases)  			x86_pmu.pebs_aliases(event); @@ -3194,7 +3194,7 @@ static unsigned bdw_limit_period(struct perf_event *event, unsigned left)  			X86_CONFIG(.event=0xc0, .umask=0x01)) {  		if (left < 128)  			left = 128; -		left &= ~0x3fu; +		left &= ~0x3fULL;  	}  	return left;  } @@ -3460,7 +3460,7 @@ static __initconst const struct x86_pmu core_pmu = {  	.event_map		= intel_pmu_event_map,  	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),  	.apic			= 1, -	.free_running_flags	= PEBS_FREERUNNING_FLAGS, +	.large_pebs_flags	= LARGE_PEBS_FLAGS,  	/*  	 * Intel PMCs cannot be accessed sanely above 32-bit width, @@ -3502,7 +3502,7 @@ static __initconst const struct x86_pmu intel_pmu = {  	.event_map		= intel_pmu_event_map,  	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),  	.apic			= 1, -	.free_running_flags	= PEBS_FREERUNNING_FLAGS, +	.large_pebs_flags	= LARGE_PEBS_FLAGS,  	/*  	 * Intel PMCs cannot be accessed sanely above 32 bit width,  	 * so we install an artificial 1<<31 period regardless of @@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu)  		break;  	case INTEL_FAM6_SANDYBRIDGE_X: -		switch (cpu_data(cpu).x86_mask) { +		switch (cpu_data(cpu).x86_stepping) {  		case 6: rev = 0x618; break;  		case 7: rev = 0x70c; break;  		} diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 18c25ab28557..d8015235ba76 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -935,7 +935,7 @@ void intel_pmu_pebs_add(struct perf_event *event)  	bool needed_cb = pebs_needs_sched_cb(cpuc);  	cpuc->n_pebs++; -	if (hwc->flags & PERF_X86_EVENT_FREERUNNING) +	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)  		cpuc->n_large_pebs++;  	pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -975,7 +975,7 @@ void intel_pmu_pebs_del(struct perf_event *event)  	bool needed_cb = pebs_needs_sched_cb(cpuc);  	cpuc->n_pebs--; -	if (hwc->flags & PERF_X86_EVENT_FREERUNNING) +	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)  		cpuc->n_large_pebs--;  	pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -1530,7 +1530,7 @@ void __init intel_ds_init(void)  			x86_pmu.pebs_record_size =  						sizeof(struct pebs_record_skl);  			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; -			x86_pmu.free_running_flags |= PERF_SAMPLE_TIME; +			x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;  			break;  		default: diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index ae64d0b69729..cf372b90557e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void)  	 * on PMU interrupt  	 */  	if (boot_cpu_data.x86_model == 28 -	    && boot_cpu_data.x86_mask < 10) { +	    && boot_cpu_data.x86_stepping < 10) {  		pr_cont("LBR disabled due to erratum");  		return;  	} diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index a5604c352930..408879b0c0d4 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = {  static __init void p6_pmu_rdpmc_quirk(void)  { -	if (boot_cpu_data.x86_mask < 9) { +	if (boot_cpu_data.x86_stepping < 9) {  		/*  		 * PPro erratum 26; fixed in stepping 9 and above.  		 */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 6d8044ab1060..c98b943e58b4 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3343,6 +3343,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = {  	SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4),  	SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8),  	SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), +	SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3),  	EVENT_EXTRA_END  }; @@ -3562,24 +3563,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = {  	NULL,  }; +/* + * To determine the number of CHAs, it should read bits 27:0 in the CAPID6 + * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083. + */ +#define SKX_CAPID6		0x9c +#define SKX_CHA_BIT_MASK	GENMASK(27, 0) +  static int skx_count_chabox(void)  { -	struct pci_dev *chabox_dev = NULL; -	int bus, count = 0; +	struct pci_dev *dev = NULL; +	u32 val = 0; -	while (1) { -		chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev); -		if (!chabox_dev) -			break; -		if (count == 0) -			bus = chabox_dev->bus->number; -		if (bus != chabox_dev->bus->number) -			break; -		count++; -	} +	dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev); +	if (!dev) +		goto out; -	pci_dev_put(chabox_dev); -	return count; +	pci_read_config_dword(dev, SKX_CAPID6, &val); +	val &= SKX_CHA_BIT_MASK; +out: +	pci_dev_put(dev); +	return hweight32(val);  }  void skx_uncore_cpu_init(void) @@ -3606,7 +3610,7 @@ static struct intel_uncore_type skx_uncore_imc = {  };  static struct attribute *skx_upi_uncore_formats_attr[] = { -	&format_attr_event_ext.attr, +	&format_attr_event.attr,  	&format_attr_umask_ext.attr,  	&format_attr_edge.attr,  	&format_attr_inv.attr, diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78f91ec1056e..39cd0615f04f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -69,7 +69,7 @@ struct event_constraint {  #define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */  #define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */  #define PERF_X86_EVENT_AUTO_RELOAD	0x0400 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_FREERUNNING	0x0800 /* use freerunning PEBS */ +#define PERF_X86_EVENT_LARGE_PEBS	0x0800 /* use large PEBS */  struct amd_nb { @@ -88,7 +88,7 @@ struct amd_nb {   * REGS_USER can be handled for events limited to ring 3.   *   */ -#define PEBS_FREERUNNING_FLAGS \ +#define LARGE_PEBS_FLAGS \  	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \  	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \  	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ @@ -608,7 +608,7 @@ struct x86_pmu {  	struct event_constraint *pebs_constraints;  	void		(*pebs_aliases)(struct perf_event *event);  	int 		max_pebs_events; -	unsigned long	free_running_flags; +	unsigned long	large_pebs_flags;  	/*  	 * Intel LBR diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 96cd33bbfc85..6512498bbef6 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -51,15 +51,14 @@  #define AA(__x)		((unsigned long)(__x)) -asmlinkage long sys32_truncate64(const char __user *filename, -				 unsigned long offset_low, -				 unsigned long offset_high) +COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename, +		       unsigned long, offset_low, unsigned long, offset_high)  {         return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);  } -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, -				  unsigned long offset_high) +COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd, +		       unsigned long, offset_low, unsigned long, offset_high)  {         return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);  } @@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)  	return 0;  } -asmlinkage long sys32_stat64(const char __user *filename, -			     struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename, +		       struct stat64 __user *, statbuf)  {  	struct kstat stat;  	int ret = vfs_stat(filename, &stat); @@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename,  	return ret;  } -asmlinkage long sys32_lstat64(const char __user *filename, -			      struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename, +		       struct stat64 __user *, statbuf)  {  	struct kstat stat;  	int ret = vfs_lstat(filename, &stat); @@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,  	return ret;  } -asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd, +		       struct stat64 __user *, statbuf)  {  	struct kstat stat;  	int ret = vfs_fstat(fd, &stat); @@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)  	return ret;  } -asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename, -			      struct stat64 __user *statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd, +		       const char __user *, filename, +		       struct stat64 __user *, statbuf, int, flag)  {  	struct kstat stat;  	int error; @@ -153,7 +154,7 @@ struct mmap_arg_struct32 {  	unsigned int offset;  }; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) +COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)  {  	struct mmap_arg_struct32 a; @@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)  			       a.offset>>PAGE_SHIFT);  } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, -			      int options) +COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *, +		       stat_addr, int, options)  {  	return compat_sys_wait4(pid, stat_addr, options, NULL);  }  /* warning: next two assume little endian */ -asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, -			    u32 poslo, u32 poshi) +COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf, +		       u32, count, u32, poslo, u32, poshi)  {  	return sys_pread64(fd, ubuf, count,  			 ((loff_t)AA(poshi) << 32) | AA(poslo));  } -asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, -			     u32 count, u32 poslo, u32 poshi) +COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf, +		       u32, count, u32, poslo, u32, poshi)  {  	return sys_pwrite64(fd, ubuf, count,  			  ((loff_t)AA(poshi) << 32) | AA(poslo)); @@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,   * Some system calls that need sign extended arguments. This could be   * done by a generic wrapper.   */ -long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, -			__u32 len_low, __u32 len_high, int advice) +COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low, +		       __u32, offset_high, __u32, len_low, __u32, len_high, +		       int, advice)  {  	return sys_fadvise64_64(fd,  			       (((u64)offset_high)<<32) | offset_low, @@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,  				advice);  } -asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, -				   size_t count) +COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo, +		       unsigned int, off_hi, size_t, count)  {  	return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);  } -asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, -				      unsigned n_low, unsigned n_hi,  int flags) +COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low, +		       unsigned int, off_hi, unsigned int, n_low, +		       unsigned int, n_hi, int, flags)  {  	return sys_sync_file_range(fd,  				   ((u64)off_hi << 32) | off_low,  				   ((u64)n_hi << 32) | n_low, flags);  } -asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, -				size_t len, int advice) +COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo, +		       unsigned int, offset_hi, size_t, len, int, advice)  {  	return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,  				len, advice);  } -asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo, -				unsigned offset_hi, unsigned len_lo, -				unsigned len_hi) +COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode, +		       unsigned int, offset_lo, unsigned int, offset_hi, +		       unsigned int, len_lo, unsigned int, len_hi)  {  	return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,  			     ((u64)len_hi << 32) | len_lo);  } + +/* + * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS + */ +COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, +		       unsigned long, newsp, int __user *, parent_tidptr, +		       unsigned long, tls_val, int __user *, child_tidptr) +{ +	return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr, +			tls_val); +} diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 44f5d79d5105..11881726ed37 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)  	if (boot_cpu_data.x86 == 0x0F &&  	    boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&  	    boot_cpu_data.x86_model <= 0x05 && -	    boot_cpu_data.x86_mask < 0x0A) +	    boot_cpu_data.x86_stepping < 0x0A)  		return 1;  	else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))  		return 1; diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 4d4015ddcf26..c356098b6fb9 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,6 +7,8 @@  #ifndef _ASM_X86_MACH_DEFAULT_APM_H  #define _ASM_X86_MACH_DEFAULT_APM_H +#include <asm/nospec-branch.h> +  #ifdef APM_ZERO_SEGS  #	define APM_DO_ZERO_SEGS \  		"pushl %%ds\n\t" \ @@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,  	 * N.B. We do NOT need a cld after the BIOS call  	 * because we always save and restore the flags.  	 */ +	firmware_restrict_branch_speculation_start();  	__asm__ __volatile__(APM_DO_ZERO_SEGS  		"pushl %%edi\n\t"  		"pushl %%ebp\n\t" @@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,  		  "=S" (*esi)  		: "a" (func), "b" (ebx_in), "c" (ecx_in)  		: "memory", "cc"); +	firmware_restrict_branch_speculation_end();  }  static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,  	 * N.B. We do NOT need a cld after the BIOS call  	 * because we always save and restore the flags.  	 */ +	firmware_restrict_branch_speculation_start();  	__asm__ __volatile__(APM_DO_ZERO_SEGS  		"pushl %%edi\n\t"  		"pushl %%ebp\n\t" @@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,  		  "=S" (si)  		: "a" (func), "b" (ebx_in), "c" (ecx_in)  		: "memory", "cc"); +	firmware_restrict_branch_speculation_end();  	return error;  } diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 4d111616524b..1908214b9125 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)  INDIRECT_THUNK(si)  INDIRECT_THUNK(di)  INDIRECT_THUNK(bp) -asmlinkage void __fill_rsb(void); -asmlinkage void __clear_rsb(void); -  #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 30d406146016..042b5e892ed1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,  	asm ("cmp %1,%2; sbb %0,%0;"  			:"=r" (mask) -			:"r"(size),"r" (index) +			:"g"(size),"r" (index)  			:"cc");  	return mask;  } @@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,  #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \  					   "lfence", X86_FEATURE_LFENCE_RDTSC) -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb()	rmb() -#else  #define dma_rmb()	barrier() -#endif  #define dma_wmb()	barrier()  #ifdef CONFIG_X86_32 @@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,  #define __smp_wmb()	barrier()  #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -#if defined(CONFIG_X86_PPRO_FENCE) - -/* - * For this option x86 doesn't have a strong TSO memory - * model and we should fall back to full barriers. - */ - -#define __smp_store_release(p, v)					\ -do {									\ -	compiletime_assert_atomic_type(*p);				\ -	__smp_mb();							\ -	WRITE_ONCE(*p, v);						\ -} while (0) - -#define __smp_load_acquire(p)						\ -({									\ -	typeof(*p) ___p1 = READ_ONCE(*p);				\ -	compiletime_assert_atomic_type(*p);				\ -	__smp_mb();							\ -	___p1;								\ -}) - -#else /* regular x86 TSO memory ordering */ -  #define __smp_store_release(p, v)					\  do {									\  	compiletime_assert_atomic_type(*p);				\ @@ -107,8 +79,6 @@ do {									\  	___p1;								\  }) -#endif -  /* Atomic operations are already serializing on x86 */  #define __smp_mb__before_atomic()	barrier()  #define __smp_mb__after_atomic()	barrier() diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 3fa039855b8f..9f645ba57dbb 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr)  			: "iq" ((u8)CONST_MASK(nr))  			: "memory");  	} else { -		asm volatile(LOCK_PREFIX "bts %1,%0" +		asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"  			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");  	}  } @@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr)   */  static __always_inline void __set_bit(long nr, volatile unsigned long *addr)  { -	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); +	asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");  }  /** @@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr)  			: CONST_MASK_ADDR(nr, addr)  			: "iq" ((u8)~CONST_MASK(nr)));  	} else { -		asm volatile(LOCK_PREFIX "btr %1,%0" +		asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"  			: BITOP_ADDR(addr)  			: "Ir" (nr));  	} @@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad  static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)  { -	asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); +	asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));  }  static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) @@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *   */  static __always_inline void __change_bit(long nr, volatile unsigned long *addr)  { -	asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); +	asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));  }  /** @@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)  			: CONST_MASK_ADDR(nr, addr)  			: "iq" ((u8)CONST_MASK(nr)));  	} else { -		asm volatile(LOCK_PREFIX "btc %1,%0" +		asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"  			: BITOP_ADDR(addr)  			: "Ir" (nr));  	} @@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)   */  static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)  { -	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c); +	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), +	                 *addr, "Ir", nr, "%0", c);  }  /** @@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *  {  	bool oldbit; -	asm("bts %2,%1" +	asm(__ASM_SIZE(bts) " %2,%1"  	    CC_SET(c)  	    : CC_OUT(c) (oldbit), ADDR  	    : "Ir" (nr)); @@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *   */  static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)  { -	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c); +	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), +	                 *addr, "Ir", nr, "%0", c);  }  /** @@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long  {  	bool oldbit; -	asm volatile("btr %2,%1" +	asm volatile(__ASM_SIZE(btr) " %2,%1"  		     CC_SET(c)  		     : CC_OUT(c) (oldbit), ADDR  		     : "Ir" (nr)); @@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon  {  	bool oldbit; -	asm volatile("btc %2,%1" +	asm volatile(__ASM_SIZE(btc) " %2,%1"  		     CC_SET(c)  		     : CC_OUT(c) (oldbit), ADDR  		     : "Ir" (nr) : "memory"); @@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon   */  static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)  { -	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c); +	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), +	                 *addr, "Ir", nr, "%0", c);  }  static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) @@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l  {  	bool oldbit; -	asm volatile("bt %2,%1" +	asm volatile(__ASM_SIZE(bt) " %2,%1"  		     CC_SET(c)  		     : CC_OUT(c) (oldbit)  		     : "m" (*(unsigned long *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 34d99af43994..6804d6642767 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -5,23 +5,20 @@  #include <linux/stringify.h>  /* - * Since some emulators terminate on UD2, we cannot use it for WARN. - * Since various instruction decoders disagree on the length of UD1, - * we cannot use it either. So use UD0 for WARN. + * Despite that some emulators terminate on UD2, we use it for WARN().   * - * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas - *  our kernel decoder thinks it takes a ModRM byte, which seems consistent - *  with various things like the Intel SDM instruction encoding rules) + * Since various instruction decoders/specs disagree on the encoding of + * UD0/UD1.   */ -#define ASM_UD0		".byte 0x0f, 0xff" +#define ASM_UD0		".byte 0x0f, 0xff" /* + ModRM (for Intel) */  #define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */  #define ASM_UD2		".byte 0x0f, 0x0b"  #define INSN_UD0	0xff0f  #define INSN_UD2	0x0b0f -#define LEN_UD0		2 +#define LEN_UD2		2  #ifdef CONFIG_GENERIC_BUG @@ -77,7 +74,11 @@ do {								\  	unreachable();						\  } while (0) -#define __WARN_FLAGS(flags)	_BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags)) +#define __WARN_FLAGS(flags)					\ +do {								\ +	_BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags));		\ +	annotate_reachable();					\ +} while (0)  #include <asm-generic/bug.h> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 70eddb3922ff..736771c9822e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);   */  static __always_inline __pure bool _static_cpu_has(u16 bit)  { -		asm_volatile_goto("1: jmp 6f\n" -			 "2:\n" -			 ".skip -(((5f-4f) - (2b-1b)) > 0) * " -			         "((5f-4f) - (2b-1b)),0x90\n" -			 "3:\n" -			 ".section .altinstructions,\"a\"\n" -			 " .long 1b - .\n"		/* src offset */ -			 " .long 4f - .\n"		/* repl offset */ -			 " .word %P1\n"			/* always replace */ -			 " .byte 3b - 1b\n"		/* src len */ -			 " .byte 5f - 4f\n"		/* repl len */ -			 " .byte 3b - 2b\n"		/* pad len */ -			 ".previous\n" -			 ".section .altinstr_replacement,\"ax\"\n" -			 "4: jmp %l[t_no]\n" -			 "5:\n" -			 ".previous\n" -			 ".section .altinstructions,\"a\"\n" -			 " .long 1b - .\n"		/* src offset */ -			 " .long 0\n"			/* no replacement */ -			 " .word %P0\n"			/* feature bit */ -			 " .byte 3b - 1b\n"		/* src len */ -			 " .byte 0\n"			/* repl len */ -			 " .byte 0\n"			/* pad len */ -			 ".previous\n" -			 ".section .altinstr_aux,\"ax\"\n" -			 "6:\n" -			 " testb %[bitnum],%[cap_byte]\n" -			 " jnz %l[t_yes]\n" -			 " jmp %l[t_no]\n" -			 ".previous\n" -			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS), -			     [bitnum] "i" (1 << (bit & 7)), -			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) -			 : : t_yes, t_no); -	t_yes: -		return true; -	t_no: -		return false; +	asm_volatile_goto("1: jmp 6f\n" +		 "2:\n" +		 ".skip -(((5f-4f) - (2b-1b)) > 0) * " +			 "((5f-4f) - (2b-1b)),0x90\n" +		 "3:\n" +		 ".section .altinstructions,\"a\"\n" +		 " .long 1b - .\n"		/* src offset */ +		 " .long 4f - .\n"		/* repl offset */ +		 " .word %P[always]\n"		/* always replace */ +		 " .byte 3b - 1b\n"		/* src len */ +		 " .byte 5f - 4f\n"		/* repl len */ +		 " .byte 3b - 2b\n"		/* pad len */ +		 ".previous\n" +		 ".section .altinstr_replacement,\"ax\"\n" +		 "4: jmp %l[t_no]\n" +		 "5:\n" +		 ".previous\n" +		 ".section .altinstructions,\"a\"\n" +		 " .long 1b - .\n"		/* src offset */ +		 " .long 0\n"			/* no replacement */ +		 " .word %P[feature]\n"		/* feature bit */ +		 " .byte 3b - 1b\n"		/* src len */ +		 " .byte 0\n"			/* repl len */ +		 " .byte 0\n"			/* pad len */ +		 ".previous\n" +		 ".section .altinstr_aux,\"ax\"\n" +		 "6:\n" +		 " testb %[bitnum],%[cap_byte]\n" +		 " jnz %l[t_yes]\n" +		 " jmp %l[t_no]\n" +		 ".previous\n" +		 : : [feature]  "i" (bit), +		     [always]   "i" (X86_FEATURE_ALWAYS), +		     [bitnum]   "i" (1 << (bit & 7)), +		     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) +		 : : t_yes, t_no); +t_yes: +	return true; +t_no: +	return false;  }  #define static_cpu_has(bit)					\ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0dfe4d3f74e2..d554c11e01ff 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -213,6 +213,7 @@  #define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */  #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */  /* Virtualization flags: Linux defined, word 8 */  #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */ @@ -315,6 +316,7 @@  #define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* Carry-Less Multiplication Double Quadword */  #define X86_FEATURE_AVX512_VNNI		(16*32+11) /* Vector Neural Network Instructions */  #define X86_FEATURE_AVX512_BITALG	(16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME			(16*32+13) /* Intel Total Memory Encryption */  #define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */  #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */  #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */ @@ -327,6 +329,7 @@  /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */  #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */  #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */  #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */  #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */  #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 85f6ccb80b91..a399c1ebf6f0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -6,6 +6,7 @@  #include <asm/pgtable.h>  #include <asm/processor-flags.h>  #include <asm/tlb.h> +#include <asm/nospec-branch.h>  /*   * We map the EFI regions needed for runtime services non-contiguously, @@ -36,8 +37,18 @@  extern asmlinkage unsigned long efi_call_phys(void *, ...); -#define arch_efi_call_virt_setup()	kernel_fpu_begin() -#define arch_efi_call_virt_teardown()	kernel_fpu_end() +#define arch_efi_call_virt_setup()					\ +({									\ +	kernel_fpu_begin();						\ +	firmware_restrict_branch_speculation_start();			\ +}) + +#define arch_efi_call_virt_teardown()					\ +({									\ +	firmware_restrict_branch_speculation_end();			\ +	kernel_fpu_end();						\ +}) +  /*   * Wrap all the virtual calls in a way that forces the parameters on the stack. @@ -73,6 +84,7 @@ struct efi_scratch {  	efi_sync_low_kernel_mappings();					\  	preempt_disable();						\  	__kernel_fpu_begin();						\ +	firmware_restrict_branch_speculation_start();			\  									\  	if (efi_scratch.use_pgd) {					\  		efi_scratch.prev_cr3 = __read_cr3();			\ @@ -91,6 +103,7 @@ struct efi_scratch {  		__flush_tlb_all();					\  	}								\  									\ +	firmware_restrict_branch_speculation_end();			\  	__kernel_fpu_end();						\  	preempt_enable();						\  }) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 95e948627fd0..f6e5b9375d8c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void);   */  #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) -/* - *	Cache management - * - *	This needed for two cases - *	1. Out of order aware processors - *	2. Accidentally out of order processors (PPro errata #51) - */ - -static inline void flush_write_buffers(void) -{ -#if defined(CONFIG_X86_PPRO_FENCE) -	asm volatile("lock; addl $0,0(%%esp)": : :"memory"); -#endif -} -  #endif /* __KERNEL__ */  extern void native_io_delay(void); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd6f57a54a26..b605a5b6a30c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -507,6 +507,7 @@ struct kvm_vcpu_arch {  	u64 smi_count;  	bool tpr_access_reporting;  	u64 ia32_xss; +	u64 microcode_version;  	/*  	 * Paging state of the vcpu @@ -1095,6 +1096,8 @@ struct kvm_x86_ops {  	int (*mem_enc_op)(struct kvm *kvm, void __user *argp);  	int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);  	int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); + +	int (*get_msr_feature)(struct kvm_msr_entry *entry);  };  struct kvm_arch_async_pf { @@ -1464,7 +1467,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)  #define put_smstate(type, buf, offset, val)                      \  	*(type *)((buf) + (offset) - 0x7e00) = val -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, -		unsigned long start, unsigned long end); -  #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 55520cec8b27..6cf0e4cb7b97 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -37,7 +37,13 @@ struct cpu_signature {  struct device; -enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; +enum ucode_state { +	UCODE_OK	= 0, +	UCODE_NEW, +	UCODE_UPDATED, +	UCODE_NFOUND, +	UCODE_ERROR, +};  struct microcode_ops {  	enum ucode_state (*request_microcode_user) (int cpu, @@ -54,7 +60,7 @@ struct microcode_ops {  	 * are being called.  	 * See also the "Synchronization" section in microcode_core.c.  	 */ -	int (*apply_microcode) (int cpu); +	enum ucode_state (*apply_microcode) (int cpu);  	int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);  }; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c931b88982a0..1de72ce514cd 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)  	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);  #else  	BUG(); +	return (void *)fix_to_virt(FIX_HOLE);  #endif  } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4d57894635f2..f928ad9b143f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -6,6 +6,51 @@  #include <asm/alternative.h>  #include <asm/alternative-asm.h>  #include <asm/cpufeatures.h> +#include <asm/msr-index.h> + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS		16	/* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp)	\ +	mov	$(nr/2), reg;			\ +771:						\ +	call	772f;				\ +773:	/* speculation trap */			\ +	pause;					\ +	lfence;					\ +	jmp	773b;				\ +772:						\ +	call	774f;				\ +775:	/* speculation trap */			\ +	pause;					\ +	lfence;					\ +	jmp	775b;				\ +774:						\ +	dec	reg;				\ +	jnz	771b;				\ +	add	$(BITS_PER_LONG/8) * nr, sp;  #ifdef __ASSEMBLY__ @@ -23,6 +68,18 @@  .endm  /* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +.macro ANNOTATE_RETPOLINE_SAFE +	.Lannotate_\@: +	.pushsection .discard.retpoline_safe +	_ASM_PTR .Lannotate_\@ +	.popsection +.endm + +/*   * These are the bare retpoline primitives for indirect jmp and call.   * Do not use these directly; they only exist to make the ALTERNATIVE   * invocation below less ugly. @@ -58,9 +115,9 @@  .macro JMP_NOSPEC reg:req  #ifdef CONFIG_RETPOLINE  	ANNOTATE_NOSPEC_ALTERNATIVE -	ALTERNATIVE_2 __stringify(jmp *\reg),				\ +	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),	\  		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\ -		__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD  #else  	jmp	*\reg  #endif @@ -69,18 +126,25 @@  .macro CALL_NOSPEC reg:req  #ifdef CONFIG_RETPOLINE  	ANNOTATE_NOSPEC_ALTERNATIVE -	ALTERNATIVE_2 __stringify(call *\reg),				\ +	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg),	\  		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ -		__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD +		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD  #else  	call	*\reg  #endif  .endm -/* This clobbers the BX register */ -.macro FILL_RETURN_BUFFER nr:req ftr:req + /* +  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP +  * monstrosity above, manually. +  */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req  #ifdef CONFIG_RETPOLINE -	ALTERNATIVE "", "call __clear_rsb", \ftr +	ANNOTATE_NOSPEC_ALTERNATIVE +	ALTERNATIVE "jmp .Lskip_rsb_\@",				\ +		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\ +		\ftr +.Lskip_rsb_\@:  #endif  .endm @@ -92,6 +156,12 @@  	".long 999b - .\n\t"					\  	".popsection\n\t" +#define ANNOTATE_RETPOLINE_SAFE					\ +	"999:\n\t"						\ +	".pushsection .discard.retpoline_safe\n\t"		\ +	_ASM_PTR " 999b\n\t"					\ +	".popsection\n\t" +  #if defined(CONFIG_X86_64) && defined(RETPOLINE)  /* @@ -101,6 +171,7 @@  # define CALL_NOSPEC						\  	ANNOTATE_NOSPEC_ALTERNATIVE				\  	ALTERNATIVE(						\ +	ANNOTATE_RETPOLINE_SAFE					\  	"call *%[thunk_target]\n",				\  	"call __x86_indirect_thunk_%V[thunk_target]\n",		\  	X86_FEATURE_RETPOLINE) @@ -112,7 +183,10 @@   * otherwise we'll run out of registers. We don't care about CET   * here, anyway.   */ -# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",	\ +# define CALL_NOSPEC						\ +	ALTERNATIVE(						\ +	ANNOTATE_RETPOLINE_SAFE					\ +	"call *%[thunk_target]\n",				\  	"       jmp    904f;\n"					\  	"       .align 16\n"					\  	"901:	call   903f;\n"					\ @@ -155,20 +229,90 @@ extern char __indirect_thunk_end[];  static inline void vmexit_fill_RSB(void)  {  #ifdef CONFIG_RETPOLINE -	alternative_input("", -			  "call __fill_rsb", -			  X86_FEATURE_RETPOLINE, -			  ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); +	unsigned long loops; + +	asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE +		      ALTERNATIVE("jmp 910f", +				  __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), +				  X86_FEATURE_RETPOLINE) +		      "910:" +		      : "=r" (loops), ASM_CALL_CONSTRAINT +		      : : "memory" );  #endif  } +#define alternative_msr_write(_msr, _val, _feature)		\ +	asm volatile(ALTERNATIVE("",				\ +				 "movl %[msr], %%ecx\n\t"	\ +				 "movl %[val], %%eax\n\t"	\ +				 "movl $0, %%edx\n\t"		\ +				 "wrmsr",			\ +				 _feature)			\ +		     : : [msr] "i" (_msr), [val] "i" (_val)	\ +		     : "eax", "ecx", "edx", "memory") +  static inline void indirect_branch_prediction_barrier(void)  { -	alternative_input("", -			  "call __ibp_barrier", -			  X86_FEATURE_USE_IBPB, -			  ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); +	alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, +			      X86_FEATURE_USE_IBPB);  } +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + * + * (Implemented as CPP macros due to header hell.) + */ +#define firmware_restrict_branch_speculation_start()			\ +do {									\ +	preempt_disable();						\ +	alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,	\ +			      X86_FEATURE_USE_IBRS_FW);			\ +} while (0) + +#define firmware_restrict_branch_speculation_end()			\ +do {									\ +	alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,			\ +			      X86_FEATURE_USE_IBRS_FW);			\ +	preempt_enable();						\ +} while (0) +  #endif /* __ASSEMBLY__ */ + +/* + * Below is used in the eBPF JIT compiler and emits the byte sequence + * for the following assembly: + * + * With retpolines configured: + * + *    callq do_rop + *  spec_trap: + *    pause + *    lfence + *    jmp spec_trap + *  do_rop: + *    mov %rax,(%rsp) + *    retq + * + * Without retpolines configured: + * + *    jmp *%rax + */ +#ifdef CONFIG_RETPOLINE +# define RETPOLINE_RAX_BPF_JIT_SIZE	17 +# define RETPOLINE_RAX_BPF_JIT()				\ +	EMIT1_off32(0xE8, 7);	 /* callq do_rop */		\ +	/* spec_trap: */					\ +	EMIT2(0xF3, 0x90);       /* pause */			\ +	EMIT3(0x0F, 0xAE, 0xE8); /* lfence */			\ +	EMIT2(0xEB, 0xF9);       /* jmp spec_trap */		\ +	/* do_rop: */						\ +	EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */	\ +	EMIT1(0xC3);             /* retq */ +#else +# define RETPOLINE_RAX_BPF_JIT_SIZE	2 +# define RETPOLINE_RAX_BPF_JIT()				\ +	EMIT2(0xFF, 0xE0);	 /* jmp *%rax */ +#endif +  #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4baa6bceb232..d652a3808065 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -52,10 +52,6 @@ static inline void clear_page(void *page)  void copy_page(void *to, void *from); -#ifdef CONFIG_X86_MCE -#define arch_unmap_kpfn arch_unmap_kpfn -#endif -  #endif	/* !__ASSEMBLY__ */  #ifdef CONFIG_X86_VSYSCALL_EMULATION diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 892df375b615..c83a2f418cea 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -7,6 +7,7 @@  #ifdef CONFIG_PARAVIRT  #include <asm/pgtable_types.h>  #include <asm/asm.h> +#include <asm/nospec-branch.h>  #include <asm/paravirt_types.h> @@ -297,9 +298,9 @@ static inline void __flush_tlb_global(void)  {  	PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);  } -static inline void __flush_tlb_single(unsigned long addr) +static inline void __flush_tlb_one_user(unsigned long addr)  { -	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); +	PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);  }  static inline void flush_tlb_others(const struct cpumask *cpumask, @@ -879,23 +880,27 @@ extern void default_banner(void);  #define INTERRUPT_RETURN						\  	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\ -		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) +		  ANNOTATE_RETPOLINE_SAFE;					\ +		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)  #define DISABLE_INTERRUPTS(clobbers)					\  	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \  		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\ +		  ANNOTATE_RETPOLINE_SAFE;					\  		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\  		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)  #define ENABLE_INTERRUPTS(clobbers)					\  	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\  		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\ +		  ANNOTATE_RETPOLINE_SAFE;					\  		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\  		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)  #ifdef CONFIG_X86_32  #define GET_CR0_INTO_EAX				\  	push %ecx; push %edx;				\ +	ANNOTATE_RETPOLINE_SAFE;				\  	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\  	pop %edx; pop %ecx  #else	/* !CONFIG_X86_32 */ @@ -917,21 +922,25 @@ extern void default_banner(void);   */  #define SWAPGS								\  	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\ -		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\ +		  ANNOTATE_RETPOLINE_SAFE;					\ +		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);		\  		 )  #define GET_CR2_INTO_RAX				\ -	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) +	ANNOTATE_RETPOLINE_SAFE;				\ +	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);  #define USERGS_SYSRET64							\  	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\  		  CLBR_NONE,						\ -		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) +		  ANNOTATE_RETPOLINE_SAFE;					\ +		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)  #ifdef CONFIG_DEBUG_ENTRY  #define SAVE_FLAGS(clobbers)                                        \  	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \  		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \ +		  ANNOTATE_RETPOLINE_SAFE;				    \  		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \  		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)  #endif diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 6ec54d01972d..180bc0bff0fb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -43,6 +43,7 @@  #include <asm/desc_defs.h>  #include <asm/kmap_types.h>  #include <asm/pgtable_types.h> +#include <asm/nospec-branch.h>  struct page;  struct thread_struct; @@ -217,7 +218,7 @@ struct pv_mmu_ops {  	/* TLB operations */  	void (*flush_tlb_user)(void);  	void (*flush_tlb_kernel)(void); -	void (*flush_tlb_single)(unsigned long addr); +	void (*flush_tlb_one_user)(unsigned long addr);  	void (*flush_tlb_others)(const struct cpumask *cpus,  				 const struct flush_tlb_info *info); @@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);   * offset into the paravirt_patch_template structure, and can therefore be   * freely converted back into a structure offset.   */ -#define PARAVIRT_CALL	"call *%c[paravirt_opptr];" +#define PARAVIRT_CALL					\ +	ANNOTATE_RETPOLINE_SAFE				\ +	"call *%c[paravirt_opptr];"  /*   * These macros are intended to wrap calls through one of the paravirt diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ba3c523aaf16..a06b07399d17 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,  {  	bool oldbit; -	asm volatile("bt "__percpu_arg(2)",%1" +	asm volatile("btl "__percpu_arg(2)",%1"  			CC_SET(c)  			: CC_OUT(c) (oldbit)  			: "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 63c2552b6b65..b444d83cfc95 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)  {  	pmdval_t v = native_pmd_val(pmd); -	return __pmd(v | set); +	return native_make_pmd(v | set);  }  static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)  {  	pmdval_t v = native_pmd_val(pmd); -	return __pmd(v & ~clear); +	return native_make_pmd(v & ~clear);  }  static inline pmd_t pmd_mkold(pmd_t pmd) @@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)  {  	pudval_t v = native_pud_val(pud); -	return __pud(v | set); +	return native_make_pud(v | set);  }  static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)  {  	pudval_t v = native_pud_val(pud); -	return __pud(v & ~clear); +	return native_make_pud(v & ~clear);  }  static inline pud_t pud_mkold(pud_t pud) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e67c0620aec2..b3ec519e3982 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[];  static inline void pgtable_cache_init(void) { }  static inline void check_pgt_cache(void) { }  void paging_init(void); +void sync_initial_page_table(void);  /*   * Define this if things work differently on an i386 and an i486: @@ -61,7 +62,7 @@ void paging_init(void);  #define kpte_clear_flush(ptep, vaddr)		\  do {						\  	pte_clear(&init_mm, (vaddr), (ptep));	\ -	__flush_tlb_one((vaddr));		\ +	__flush_tlb_one_kernel((vaddr));		\  } while (0)  #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81462e9a34f6..1149d2112b2e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[];  #define swapper_pg_dir init_top_pgt  extern void paging_init(void); +static inline void sync_initial_page_table(void) { }  #define pte_ERROR(e)					\  	pr_err("%s:%d: bad pte %p(%016lx)\n",		\ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 3696398a9475..acfe755562a6 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -174,7 +174,6 @@ enum page_cache_mode {  #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)  #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)  #define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)  #define __PAGE_KERNEL_VVAR		(__PAGE_KERNEL_RO | _PAGE_USER)  #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)  #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -206,7 +205,6 @@ enum page_cache_mode {  #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)  #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)  #define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VSYSCALL	__pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)  #define PAGE_KERNEL_VVAR	__pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)  #define PAGE_KERNEL_IO		__pgprot(__PAGE_KERNEL_IO) @@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud)  #else  #include <asm-generic/pgtable-nopud.h> +static inline pud_t native_make_pud(pudval_t val) +{ +	return (pud_t) { .p4d.pgd = native_make_pgd(val) }; +} +  static inline pudval_t native_pud_val(pud_t pud)  {  	return native_pgd_val(pud.p4d.pgd); @@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)  #else  #include <asm-generic/pgtable-nopmd.h> +static inline pmd_t native_make_pmd(pmdval_t val) +{ +	return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) }; +} +  static inline pmdval_t native_pmd_val(pmd_t pmd)  {  	return native_pgd_val(pmd.pud.p4d.pgd); diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h index 5973a2f3db3d..059823bb8af7 100644 --- a/arch/x86/include/asm/platform_sst_audio.h +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -135,6 +135,7 @@ struct sst_platform_info {  	const struct sst_res_info *res_info;  	const struct sst_lib_dnld_info *lib_info;  	const char *platform; +	bool streams_lost_on_suspend;  };  int add_sst_platform_device(void);  #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 793bae7e7ce3..b0ccd4847a58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -91,7 +91,7 @@ struct cpuinfo_x86 {  	__u8			x86;		/* CPU family */  	__u8			x86_vendor;	/* CPU vendor */  	__u8			x86_model; -	__u8			x86_mask; +	__u8			x86_stepping;  #ifdef CONFIG_X86_64  	/* Number of 4K pages in DTLB/ITLB combined(in pages): */  	int			x86_tlbsize; @@ -109,7 +109,7 @@ struct cpuinfo_x86 {  	char			x86_vendor_id[16];  	char			x86_model_id[64];  	/* in KB - valid for CPUS which support this call: */ -	int			x86_cache_size; +	unsigned int		x86_cache_size;  	int			x86_cache_alignment;	/* In bytes */  	/* Cache QoS architectural values: */  	int			x86_cache_max_rmid;	/* max index */ @@ -977,7 +977,5 @@ bool xen_set_default_idle(void);  void stop_this_cpu(void *dummy);  void df_debug(struct pt_regs *regs, long error_code); - -void __ibp_barrier(void); - +void microcode_check(void);  #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 4e44250e7d0d..4cf11d88d3b3 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -17,7 +17,7 @@  #define _REFCOUNT_EXCEPTION				\  	".pushsection .text..refcount\n"		\  	"111:\tlea %[counter], %%" _ASM_CX "\n"		\ -	"112:\t" ASM_UD0 "\n"				\ +	"112:\t" ASM_UD2 "\n"				\  	ASM_UNREACHABLE					\  	".popsection\n"					\  	"113:\n"					\ @@ -67,13 +67,13 @@ static __always_inline __must_check  bool refcount_sub_and_test(unsigned int i, refcount_t *r)  {  	GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, -				  r->refs.counter, "er", i, "%0", e); +				  r->refs.counter, "er", i, "%0", e, "cx");  }  static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)  {  	GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, -				 r->refs.counter, "%0", e); +				 r->refs.counter, "%0", e, "cx");  }  static __always_inline __must_check diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index f91c365e57c3..4914a3e7c803 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,8 +2,7 @@  #ifndef _ASM_X86_RMWcc  #define _ASM_X86_RMWcc -#define __CLOBBERS_MEM		"memory" -#define __CLOBBERS_MEM_CC_CX	"memory", "cc", "cx" +#define __CLOBBERS_MEM(clb...)	"memory", ## clb  #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) @@ -40,18 +39,19 @@ do {									\  #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */  #define GEN_UNARY_RMWcc(op, var, arg0, cc)				\ -	__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) +	__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) -#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc)		\ +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\  	__GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc,			\ -		    __CLOBBERS_MEM_CC_CX) +		    __CLOBBERS_MEM(clobbers))  #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\  	__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc,		\ -		    __CLOBBERS_MEM, vcon (val)) +		    __CLOBBERS_MEM(), vcon (val)) -#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc)	\ +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc,	\ +				  clobbers...)				\  	__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc,	\ -		    __CLOBBERS_MEM_CC_CX, vcon (val)) +		    __CLOBBERS_MEM(clobbers), vcon (val))  #endif /* _ASM_X86_RMWcc */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index d6baf23782bc..5c019d23d06b 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];  #if defined(CONFIG_X86_64)  extern char __end_rodata_hpage_align[]; +extern char __entry_trampoline_start[], __entry_trampoline_end[];  #endif  #endif	/* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 461f53d27708..a4189762b266 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)  void cpu_disable_common(void);  void native_smp_prepare_boot_cpu(void);  void native_smp_prepare_cpus(unsigned int max_cpus); +void calculate_max_logical_packages(void);  void native_smp_cpus_done(unsigned int max_cpus);  void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);  int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 82c34ee25a65..906794aa034e 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -20,31 +20,43 @@  #include <asm/ia32.h>  /* ia32/sys_ia32.c */ -asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long); -asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); +asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long, +					  unsigned long); +asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long, +					   unsigned long); -asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long sys32_fstatat(unsigned int, const char __user *, +asmlinkage long compat_sys_x86_stat64(const char __user *, +				      struct stat64 __user *); +asmlinkage long compat_sys_x86_lstat64(const char __user *, +				       struct stat64 __user *); +asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *); +asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *,  			      struct stat64 __user *, int);  struct mmap_arg_struct32; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); +asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); +asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *, +				       int); -asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); -asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); +asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, +				     u32); +asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, +				      u32, u32); -long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); -long sys32_vm86_warning(void); +asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32, +					    int); -asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); -asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, -				      unsigned, unsigned, int); -asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); -asmlinkage long sys32_fallocate(int, int, unsigned, -				unsigned, unsigned, unsigned); +asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int, +					    size_t); +asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int, +					       unsigned int, unsigned int, +					       int); +asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, +					 size_t, int); +asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, +					 unsigned int, unsigned int); +asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *, +				     unsigned long, int __user *);  /* ia32/ia32_signal.c */  asmlinkage long sys32_sigreturn(void); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 2b8f18ca5874..84137c22fdfa 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)  #else  #define __flush_tlb() __native_flush_tlb()  #define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)  #endif  static inline bool tlb_defer_switch_to_init_mm(void) @@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void)  /*   * flush one page in the user mapping   */ -static inline void __native_flush_tlb_single(unsigned long addr) +static inline void __native_flush_tlb_one_user(unsigned long addr)  {  	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); @@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void)  /*   * flush one page in the kernel mapping   */ -static inline void __flush_tlb_one(unsigned long addr) +static inline void __flush_tlb_one_kernel(unsigned long addr)  {  	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); -	__flush_tlb_single(addr); + +	/* +	 * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its +	 * paravirt equivalent.  Even with PCID, this is sufficient: we only +	 * use PCID if we also use global PTEs for the kernel mapping, and +	 * INVLPG flushes global translations across all address spaces. +	 * +	 * If PTI is on, then the kernel is mapped with non-global PTEs, and +	 * __flush_tlb_one_user() will flush the given address for the current +	 * kernel address space and for its usermode counterpart, but it does +	 * not flush it for other address spaces. +	 */ +	__flush_tlb_one_user(addr);  	if (!static_cpu_has(X86_FEATURE_PTI))  		return;  	/* -	 * __flush_tlb_single() will have cleared the TLB entry for this ASID, -	 * but since kernel space is replicated across all, we must also -	 * invalidate all others. +	 * See above.  We need to propagate the flush to all other address +	 * spaces.  In principle, we only need to propagate it to kernelmode +	 * address spaces, but the extra bookkeeping we would need is not +	 * worth it.  	 */  	invalidate_other_asid();  } diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 8b6780751132..5db8b0b10766 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field {  #define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */  #define INTR_TYPE_HARD_EXCEPTION	(3 << 8) /* processor exception */  #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */ +#define INTR_TYPE_PRIV_SW_EXCEPTION	(5 << 8) /* ICE breakpoint - undocumented */  #define INTR_TYPE_SOFT_EXCEPTION	(6 << 8) /* software exception */  /* GUEST_INTERRUPTIBILITY_INFO flags. */ diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 197c2e6c7376..099414345865 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -241,24 +241,24 @@  #define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106  struct hv_reenlightenment_control { -	u64 vector:8; -	u64 reserved1:8; -	u64 enabled:1; -	u64 reserved2:15; -	u64 target_vp:32; +	__u64 vector:8; +	__u64 reserved1:8; +	__u64 enabled:1; +	__u64 reserved2:15; +	__u64 target_vp:32;  };  #define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107  #define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108  struct hv_tsc_emulation_control { -	u64 enabled:1; -	u64 reserved:63; +	__u64 enabled:1; +	__u64 reserved:63;  };  struct hv_tsc_emulation_status { -	u64 inprogress:1; -	u64 reserved:63; +	__u64 inprogress:1; +	__u64 reserved:63;  };  #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001 diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 7a2ade4aa235..6cfa9c8cb7d6 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -26,6 +26,7 @@  #define KVM_FEATURE_PV_EOI		6  #define KVM_FEATURE_PV_UNHALT		7  #define KVM_FEATURE_PV_TLB_FLUSH	9 +#define KVM_FEATURE_ASYNC_PF_VMEXIT	10  /* The last 8 bits are used to indicate how to interpret the flags field   * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 91723461dc1f..435db58a7bad 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -30,6 +30,7 @@ struct mce {  	__u64 synd;	/* MCA_SYND MSR: only valid on SMCA systems */  	__u64 ipid;	/* MCA_IPID MSR: only valid on SMCA systems */  	__u64 ppin;	/* Protected Processor Inventory Number */ +	__u32 microcode;/* Microcode revision */  };  #define MCE_GET_RECORD_LEN   _IOR('M', 1, int) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 6db28f17ff28..c88e0b127810 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -235,7 +235,7 @@ int amd_cache_northbridges(void)  	if (boot_cpu_data.x86 == 0x10 &&  	    boot_cpu_data.x86_model >= 0x8 &&  	    (boot_cpu_data.x86_model > 0x9 || -	     boot_cpu_data.x86_mask >= 0x1)) +	     boot_cpu_data.x86_stepping >= 0x1))  		amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;  	if (boot_cpu_data.x86 == 0x15) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 25ddf02598d2..b203af0855b5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);  static u32 hsx_deadline_rev(void)  { -	switch (boot_cpu_data.x86_mask) { +	switch (boot_cpu_data.x86_stepping) {  	case 0x02: return 0x3a; /* EP */  	case 0x04: return 0x0f; /* EX */  	} @@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void)  static u32 bdx_deadline_rev(void)  { -	switch (boot_cpu_data.x86_mask) { +	switch (boot_cpu_data.x86_stepping) {  	case 0x02: return 0x00000011;  	case 0x03: return 0x0700000e;  	case 0x04: return 0x0f00000c; @@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void)  static u32 skx_deadline_rev(void)  { -	switch (boot_cpu_data.x86_mask) { +	switch (boot_cpu_data.x86_stepping) {  	case 0x03: return 0x01000136;  	case 0x04: return 0x02000014;  	} diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ad2e410974f..7c5538769f7e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)  	do {  		rep_nop();  		now = rdtsc(); -	} while ((now - start) < 40000000000UL / HZ && +	} while ((now - start) < 40000000000ULL / HZ &&  		time_before_eq(jiffies, end));  } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3cc471beb50b..bb6f7a2148d7 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,  {  	struct apic_chip_data *apicd = apic_chip_data(irqd);  	struct irq_desc *desc = irq_data_to_desc(irqd); +	bool managed = irqd_affinity_is_managed(irqd);  	lockdep_assert_held(&vector_lock);  	trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,  			    apicd->cpu); -	/* Setup the vector move, if required  */ -	if (apicd->vector && cpu_online(apicd->cpu)) { +	/* +	 * If there is no vector associated or if the associated vector is +	 * the shutdown vector, which is associated to make PCI/MSI +	 * shutdown mode work, then there is nothing to release. Clear out +	 * prev_vector for this and the offlined target case. +	 */ +	apicd->prev_vector = 0; +	if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR) +		goto setnew; +	/* +	 * If the target CPU of the previous vector is online, then mark +	 * the vector as move in progress and store it for cleanup when the +	 * first interrupt on the new vector arrives. If the target CPU is +	 * offline then the regular release mechanism via the cleanup +	 * vector is not possible and the vector can be immediately freed +	 * in the underlying matrix allocator. +	 */ +	if (cpu_online(apicd->cpu)) {  		apicd->move_in_progress = true;  		apicd->prev_vector = apicd->vector;  		apicd->prev_cpu = apicd->cpu;  	} else { -		apicd->prev_vector = 0; +		irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, +				managed);  	} +setnew:  	apicd->vector = newvec;  	apicd->cpu = newcpu;  	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 46b675aaf20b..f11910b44638 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)  	uv_gre_table = gre;  	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { +		unsigned long size = ((unsigned long)(gre->limit - lgre) +					<< UV_GAM_RANGE_SHFT); +		int order = 0; +		char suffix[] = " KMGTPE"; + +		while (size > 9999 && order < sizeof(suffix)) { +			size /= 1024; +			order++; +		} +  		if (!index) {  			pr_info("UV: GAM Range Table...\n");  			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");  		} -		pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n", +		pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",  			index++,  			(unsigned long)lgre << UV_GAM_RANGE_SHFT,  			(unsigned long)gre->limit << UV_GAM_RANGE_SHFT, -			((unsigned long)(gre->limit - lgre)) >> -				(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ +			size, suffix[order],  			gre->type, gre->nasid, gre->sockid, gre->pnode);  		lgre = gre->limit; diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index fa1261eefa16..f91ba53e06c8 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -18,7 +18,7 @@ void foo(void)  	OFFSET(CPUINFO_x86, cpuinfo_x86, x86);  	OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);  	OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); -	OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); +	OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);  	OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);  	OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);  	OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5bddbdcbc4a3..f0e6456ca7d3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)  		return;  	} -	if (c->x86_model == 6 && c->x86_mask == 1) { +	if (c->x86_model == 6 && c->x86_stepping == 1) {  		const int K6_BUG_LOOP = 1000000;  		int n;  		void (*f_vide)(void); @@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)  	/* K6 with old style WHCR */  	if (c->x86_model < 8 || -	   (c->x86_model == 8 && c->x86_mask < 8)) { +	   (c->x86_model == 8 && c->x86_stepping < 8)) {  		/* We can only write allocate on the low 508Mb */  		if (mbytes > 508)  			mbytes = 508; @@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)  		return;  	} -	if ((c->x86_model == 8 && c->x86_mask > 7) || +	if ((c->x86_model == 8 && c->x86_stepping > 7) ||  	     c->x86_model == 9 || c->x86_model == 13) {  		/* The more serious chips .. */ @@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)  	 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx  	 * As per AMD technical note 27212 0.2  	 */ -	if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { +	if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {  		rdmsr(MSR_K7_CLK_CTL, l, h);  		if ((l & 0xfff00000) != 0x20000000) {  			pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", @@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)  	 * but they are not certified as MP capable.  	 */  	/* Athlon 660/661 is valid. */ -	if ((c->x86_model == 6) && ((c->x86_mask == 0) || -	    (c->x86_mask == 1))) +	if ((c->x86_model == 6) && ((c->x86_stepping == 0) || +	    (c->x86_stepping == 1)))  		return;  	/* Duron 670 is valid */ -	if ((c->x86_model == 7) && (c->x86_mask == 0)) +	if ((c->x86_model == 7) && (c->x86_stepping == 0))  		return;  	/* @@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)  	 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for  	 * more.  	 */ -	if (((c->x86_model == 6) && (c->x86_mask >= 2)) || -	    ((c->x86_model == 7) && (c->x86_mask >= 1)) || +	if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || +	    ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||  	     (c->x86_model > 7))  		if (cpu_has(c, X86_FEATURE_MP))  			return; @@ -628,7 +628,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)  	/*  Set MTRR capability flag if appropriate */  	if (c->x86 == 5)  		if (c->x86_model == 13 || c->x86_model == 9 || -		    (c->x86_model == 8 && c->x86_mask >= 8)) +		    (c->x86_model == 8 && c->x86_stepping >= 8))  			set_cpu_cap(c, X86_FEATURE_K6_MTRR);  #endif  #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) @@ -795,7 +795,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c)  	 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects  	 * all up to and including B1.  	 */ -	if (c->x86_model <= 1 && c->x86_mask <= 1) +	if (c->x86_model <= 1 && c->x86_stepping <= 1)  		set_cpu_cap(c, X86_FEATURE_CPB);  } @@ -906,11 +906,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)  	/* AMD errata T13 (order #21922) */  	if ((c->x86 == 6)) {  		/* Duron Rev A0 */ -		if (c->x86_model == 3 && c->x86_mask == 0) +		if (c->x86_model == 3 && c->x86_stepping == 0)  			size = 64;  		/* Tbird rev A1/A2 */  		if (c->x86_model == 4 && -			(c->x86_mask == 0 || c->x86_mask == 1)) +			(c->x86_stepping == 0 || c->x86_stepping == 1))  			size = 256;  	}  	return size; @@ -1047,7 +1047,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)  	}  	/* OSVW unavailable or ID unknown, match family-model-stepping range */ -	ms = (cpu->x86_model << 4) | cpu->x86_mask; +	ms = (cpu->x86_model << 4) | cpu->x86_stepping;  	while ((range = *erratum++))  		if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&  		    (ms >= AMD_MODEL_RANGE_START(range)) && diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 71949bf2de5a..bfca937bdcc3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)  	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))  		return SPECTRE_V2_CMD_NONE;  	else { -		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, -					  sizeof(arg)); +		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));  		if (ret < 0)  			return SPECTRE_V2_CMD_AUTO; @@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)  		}  		if (i >= ARRAY_SIZE(mitigation_options)) { -			pr_err("unknown option (%s). Switching to AUTO select\n", -			       mitigation_options[i].option); +			pr_err("unknown option (%s). Switching to AUTO select\n", arg);  			return SPECTRE_V2_CMD_AUTO;  		}  	} @@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)  	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||  	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&  	    !IS_ENABLED(CONFIG_RETPOLINE)) { -		pr_err("%s selected but not compiled in. Switching to AUTO select\n", -		       mitigation_options[i].option); +		pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);  		return SPECTRE_V2_CMD_AUTO;  	} @@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void)  			goto retpoline_auto;  		break;  	} -	pr_err("kernel not compiled with retpoline; no mitigation available!"); +	pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");  	return;  retpoline_auto:  	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {  	retpoline_amd:  		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { -			pr_err("LFENCE not serializing. Switching to generic retpoline\n"); +			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");  			goto retpoline_generic;  		}  		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : @@ -281,7 +278,7 @@ retpoline_auto:  	pr_info("%s\n", spectre_v2_strings[mode]);  	/* -	 * If neither SMEP or KPTI are available, there is a risk of +	 * If neither SMEP nor PTI are available, there is a risk of  	 * hitting userspace addresses in the RSB after a context switch  	 * from a shallow call stack to a deeper one. To prevent this fill  	 * the entire RSB, even when using IBRS. @@ -295,21 +292,29 @@ retpoline_auto:  	if ((!boot_cpu_has(X86_FEATURE_PTI) &&  	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {  		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); -		pr_info("Filling RSB on context switch\n"); +		pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");  	}  	/* Initialize Indirect Branch Prediction Barrier if supported */  	if (boot_cpu_has(X86_FEATURE_IBPB)) {  		setup_force_cpu_cap(X86_FEATURE_USE_IBPB); -		pr_info("Enabling Indirect Branch Prediction Barrier\n"); +		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); +	} + +	/* +	 * Retpoline means the kernel is safe because it has no indirect +	 * branches. But firmware isn't, so use IBRS to protect that. +	 */ +	if (boot_cpu_has(X86_FEATURE_IBRS)) { +		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); +		pr_info("Enabling Restricted Speculation for firmware calls\n");  	}  }  #undef pr_fmt  #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, -			  struct device_attribute *attr, char *buf) +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)  {  	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))  		return sprintf(buf, "Not affected\n"); @@ -318,28 +323,21 @@ ssize_t cpu_show_meltdown(struct device *dev,  	return sprintf(buf, "Vulnerable\n");  } -ssize_t cpu_show_spectre_v1(struct device *dev, -			    struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)  {  	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))  		return sprintf(buf, "Not affected\n");  	return sprintf(buf, "Mitigation: __user pointer sanitization\n");  } -ssize_t cpu_show_spectre_v2(struct device *dev, -			    struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)  {  	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))  		return sprintf(buf, "Not affected\n"); -	return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], +	return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],  		       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", +		       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",  		       spectre_v2_module_string());  }  #endif - -void __ibp_barrier(void) -{ -	__wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); -} -EXPORT_SYMBOL_GPL(__ibp_barrier); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c578cd29c2d2..e5ec0f11c0de 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -140,7 +140,7 @@ static void init_centaur(struct cpuinfo_x86 *c)  			clear_cpu_cap(c, X86_FEATURE_TSC);  			break;  		case 8: -			switch (c->x86_mask) { +			switch (c->x86_stepping) {  			default:  			name = "2";  				break; @@ -215,7 +215,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)  	 *  - Note, it seems this may only be in engineering samples.  	 */  	if ((c->x86 == 6) && (c->x86_model == 9) && -				(c->x86_mask == 1) && (size == 65)) +				(c->x86_stepping == 1) && (size == 65))  		size -= 1;  	return size;  } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d63f4b5706e4..348cf4821240 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c)  		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);  		c->x86		= x86_family(tfms);  		c->x86_model	= x86_model(tfms); -		c->x86_mask	= x86_stepping(tfms); +		c->x86_stepping	= x86_stepping(tfms);  		if (cap0 & (1<<19)) {  			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; @@ -1184,9 +1184,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)  	int i;  	c->loops_per_jiffy = loops_per_jiffy; -	c->x86_cache_size = -1; +	c->x86_cache_size = 0;  	c->x86_vendor = X86_VENDOR_UNKNOWN; -	c->x86_model = c->x86_mask = 0;	/* So far unknown... */ +	c->x86_model = c->x86_stepping = 0;	/* So far unknown... */  	c->x86_vendor_id[0] = '\0'; /* Unset */  	c->x86_model_id[0] = '\0';  /* Unset */  	c->x86_max_cores = 1; @@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)  	pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); -	if (c->x86_mask || c->cpuid_level >= 0) -		pr_cont(", stepping: 0x%x)\n", c->x86_mask); +	if (c->x86_stepping || c->cpuid_level >= 0) +		pr_cont(", stepping: 0x%x)\n", c->x86_stepping);  	else  		pr_cont(")\n");  } @@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)  	return 0;  }  core_initcall(init_cpu_syscore); + +/* + * The microcode loader calls this upon late microcode load to recheck features, + * only when microcode has been updated. Caller holds microcode_mutex and CPU + * hotplug lock. + */ +void microcode_check(void) +{ +	struct cpuinfo_x86 info; + +	perf_check_microcode(); + +	/* Reload CPUID max function as it might've changed. */ +	info.cpuid_level = cpuid_eax(0); + +	/* +	 * Copy all capability leafs to pick up the synthetic ones so that +	 * memcmp() below doesn't fail on that. The ones coming from CPUID will +	 * get overwritten in get_cpu_cap(). +	 */ +	memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); + +	get_cpu_cap(&info); + +	if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) +		return; + +	pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); +	pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); +} diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 6b4bb335641f..8949b7ae6d92 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)  	/* common case step number/rev -- exceptions handled below */  	c->x86_model = (dir1 >> 4) + 1; -	c->x86_mask = dir1 & 0xf; +	c->x86_stepping = dir1 & 0xf;  	/* Now cook; the original recipe is by Channing Corn, from Cyrix.  	 * We do the same thing for each generation: we work out diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 319bf989fad1..c3af167d0a70 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)  /*   * Early microcode releases for the Spectre v2 mitigation were broken.   * Information taken from; - * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf   * - https://kb.vmware.com/s/article/52345   * - Microcode revisions observed in the wild   * - Release note from 20180108 microcode release @@ -116,15 +116,13 @@ struct sku_microcode {  	u32 microcode;  };  static const struct sku_microcode spectre_bad_microcodes[] = { -	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x84 }, -	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x84 }, -	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x84 }, -	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x84 }, -	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x84 }, +	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0B,	0x80 }, +	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x0A,	0x80 }, +	{ INTEL_FAM6_KABYLAKE_DESKTOP,	0x09,	0x80 }, +	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x0A,	0x80 }, +	{ INTEL_FAM6_KABYLAKE_MOBILE,	0x09,	0x80 },  	{ INTEL_FAM6_SKYLAKE_X,		0x03,	0x0100013e },  	{ INTEL_FAM6_SKYLAKE_X,		0x04,	0x0200003c }, -	{ INTEL_FAM6_SKYLAKE_MOBILE,	0x03,	0xc2 }, -	{ INTEL_FAM6_SKYLAKE_DESKTOP,	0x03,	0xc2 },  	{ INTEL_FAM6_BROADWELL_CORE,	0x04,	0x28 },  	{ INTEL_FAM6_BROADWELL_GT3E,	0x01,	0x1b },  	{ INTEL_FAM6_BROADWELL_XEON_D,	0x02,	0x14 }, @@ -136,8 +134,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {  	{ INTEL_FAM6_HASWELL_X,		0x02,	0x3b },  	{ INTEL_FAM6_HASWELL_X,		0x04,	0x10 },  	{ INTEL_FAM6_IVYBRIDGE_X,	0x04,	0x42a }, -	/* Updated in the 20180108 release; blacklist until we know otherwise */ -	{ INTEL_FAM6_ATOM_GEMINI_LAKE,	0x01,	0x22 },  	/* Observed in the wild */  	{ INTEL_FAM6_SANDYBRIDGE_X,	0x06,	0x61b },  	{ INTEL_FAM6_SANDYBRIDGE_X,	0x07,	0x712 }, @@ -147,9 +143,16 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)  {  	int i; +	/* +	 * We know that the hypervisor lie to us on the microcode version so +	 * we may as well hope that it is running the correct version. +	 */ +	if (cpu_has(c, X86_FEATURE_HYPERVISOR)) +		return false; +  	for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {  		if (c->x86_model == spectre_bad_microcodes[i].model && -		    c->x86_mask == spectre_bad_microcodes[i].stepping) +		    c->x86_stepping == spectre_bad_microcodes[i].stepping)  			return (c->microcode <= spectre_bad_microcodes[i].microcode);  	}  	return false; @@ -196,7 +199,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)  	 * need the microcode to have already been loaded... so if it is  	 * not, recommend a BIOS update and disable large pages.  	 */ -	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && +	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&  	    c->microcode < 0x20e) {  		pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");  		clear_cpu_cap(c, X86_FEATURE_PSE); @@ -212,7 +215,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)  	/* CPUID workaround for 0F33/0F34 CPU */  	if (c->x86 == 0xF && c->x86_model == 0x3 -	    && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) +	    && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))  		c->x86_phys_bits = 36;  	/* @@ -310,7 +313,7 @@ int ppro_with_ram_bug(void)  	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&  	    boot_cpu_data.x86 == 6 &&  	    boot_cpu_data.x86_model == 1 && -	    boot_cpu_data.x86_mask < 8) { +	    boot_cpu_data.x86_stepping < 8) {  		pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");  		return 1;  	} @@ -327,7 +330,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)  	 * Mask B, Pentium, but not Pentium MMX  	 */  	if (c->x86 == 5 && -	    c->x86_mask >= 1 && c->x86_mask <= 4 && +	    c->x86_stepping >= 1 && c->x86_stepping <= 4 &&  	    c->x86_model <= 3) {  		/*  		 * Remember we have B step Pentia with bugs @@ -370,7 +373,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)  	 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until  	 * model 3 mask 3  	 */ -	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) +	if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)  		clear_cpu_cap(c, X86_FEATURE_SEP);  	/* @@ -388,7 +391,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)  	 * P4 Xeon erratum 037 workaround.  	 * Hardware prefetcher may cause stale data to be loaded into the cache.  	 */ -	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { +	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {  		if (msr_set_bit(MSR_IA32_MISC_ENABLE,  				MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {  			pr_info("CPU: C0 stepping P4 Xeon detected.\n"); @@ -403,7 +406,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)  	 * Specification Update").  	 */  	if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && -	    (c->x86_mask < 0x6 || c->x86_mask == 0xb)) +	    (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))  		set_cpu_bug(c, X86_BUG_11AP); @@ -650,7 +653,7 @@ static void init_intel(struct cpuinfo_x86 *c)  		case 6:  			if (l2 == 128)  				p = "Celeron (Mendocino)"; -			else if (c->x86_mask == 0 || c->x86_mask == 5) +			else if (c->x86_stepping == 0 || c->x86_stepping == 5)  				p = "Celeron-A";  			break; diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 410629f10ad3..589b948e6e01 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -819,7 +819,7 @@ static __init void rdt_quirks(void)  			cache_alloc_hsw_probe();  		break;  	case INTEL_FAM6_SKYLAKE_X: -		if (boot_cpu_data.x86_mask <= 4) +		if (boot_cpu_data.x86_stepping <= 4)  			set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");  	}  } diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index bdab7d2f51af..fca759d272a1 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,  		goto out_common_fail;  	}  	closid = ret; +	ret = 0;  	rdtgrp->closid = closid;  	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index aa0d5df9dc60..e956eb267061 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }  extern struct mca_config mca_cfg; +#ifndef CONFIG_X86_64 +/* + * On 32-bit systems it would be difficult to safely unmap a poison page + * from the kernel 1:1 map because there are no non-canonical addresses that + * we can use to refer to the address without risking a speculative access. + * However, this isn't much of an issue because: + * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which + *    are only mapped into the kernel as needed + * 2) Few people would run a 32-bit kernel on a machine that supports + *    recoverable errors because they have too much memory to boot 32-bit. + */ +static inline void mce_unmap_kpfn(unsigned long pfn) {} +#define mce_unmap_kpfn mce_unmap_kpfn +#endif +  #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3a8e88a611eb..466f47301334 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -56,6 +56,9 @@  static DEFINE_MUTEX(mce_log_mutex); +/* sysfs synchronization */ +static DEFINE_MUTEX(mce_sysfs_mutex); +  #define CREATE_TRACE_POINTS  #include <trace/events/mce.h> @@ -105,6 +108,10 @@ static struct irq_work mce_irq_work;  static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); +#ifndef mce_unmap_kpfn +static void mce_unmap_kpfn(unsigned long pfn); +#endif +  /*   * CPU/chipset specific EDAC code can register a notifier call here to print   * MCE errors in a human-readable form. @@ -126,6 +133,8 @@ void mce_setup(struct mce *m)  	if (this_cpu_has(X86_FEATURE_INTEL_PPIN))  		rdmsrl(MSR_PPIN, m->ppin); + +	m->microcode = boot_cpu_data.microcode;  }  DEFINE_PER_CPU(struct mce, injectm); @@ -234,7 +243,7 @@ static void __print_mce(struct mce *m)  			m->cs, m->ip);  		if (m->cs == __KERNEL_CS) -			pr_cont("{%pS}", (void *)m->ip); +			pr_cont("{%pS}", (void *)(unsigned long)m->ip);  		pr_cont("\n");  	} @@ -258,7 +267,7 @@ static void __print_mce(struct mce *m)  	 */  	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",  		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, -		cpu_data(m->extcpu).microcode); +		m->microcode);  }  static void print_mce(struct mce *m) @@ -590,7 +599,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,  	if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {  		pfn = mce->addr >> PAGE_SHIFT; -		memory_failure(pfn, 0); +		if (!memory_failure(pfn, 0)) +			mce_unmap_kpfn(pfn);  	}  	return NOTIFY_OK; @@ -1057,12 +1067,13 @@ static int do_memory_failure(struct mce *m)  	ret = memory_failure(m->addr >> PAGE_SHIFT, flags);  	if (ret)  		pr_err("Memory error not recovered"); +	else +		mce_unmap_kpfn(m->addr >> PAGE_SHIFT);  	return ret;  } -#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE) - -void arch_unmap_kpfn(unsigned long pfn) +#ifndef mce_unmap_kpfn +static void mce_unmap_kpfn(unsigned long pfn)  {  	unsigned long decoy_addr; @@ -1073,7 +1084,7 @@ void arch_unmap_kpfn(unsigned long pfn)  	 * We would like to just call:  	 *	set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);  	 * but doing that would radically increase the odds of a -	 * speculative access to the posion page because we'd have +	 * speculative access to the poison page because we'd have  	 * the virtual address of the kernel 1:1 mapping sitting  	 * around in registers.  	 * Instead we get tricky.  We create a non-canonical address @@ -1098,7 +1109,6 @@ void arch_unmap_kpfn(unsigned long pfn)  	if (set_memory_np(decoy_addr, 1))  		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); -  }  #endif @@ -2081,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s,  	if (kstrtou64(buf, 0, &new) < 0)  		return -EINVAL; +	mutex_lock(&mce_sysfs_mutex);  	if (mca_cfg.ignore_ce ^ !!new) {  		if (new) {  			/* disable ce features */ @@ -2093,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s,  			on_each_cpu(mce_enable_ce, (void *)1, 1);  		}  	} +	mutex_unlock(&mce_sysfs_mutex); +  	return size;  } @@ -2105,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s,  	if (kstrtou64(buf, 0, &new) < 0)  		return -EINVAL; +	mutex_lock(&mce_sysfs_mutex);  	if (mca_cfg.cmci_disabled ^ !!new) {  		if (new) {  			/* disable cmci */ @@ -2116,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s,  			on_each_cpu(mce_enable_ce, NULL, 1);  		}  	} +	mutex_unlock(&mce_sysfs_mutex); +  	return size;  } @@ -2123,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s,  				      struct device_attribute *attr,  				      const char *buf, size_t size)  { -	ssize_t ret = device_store_int(s, attr, buf, size); +	unsigned long old_check_interval = check_interval; +	ssize_t ret = device_store_ulong(s, attr, buf, size); + +	if (check_interval == old_check_interval) +		return ret; + +	if (check_interval < 1) +		check_interval = 1; + +	mutex_lock(&mce_sysfs_mutex);  	mce_restart(); +	mutex_unlock(&mce_sysfs_mutex); +  	return ret;  } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 330b8462d426..48179928ff38 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)  		return -EINVAL;  	ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); -	if (ret != UCODE_OK) +	if (ret > UCODE_UPDATED)  		return -EINVAL;  	return 0; @@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,  	return patch_size;  } -static int apply_microcode_amd(int cpu) +static enum ucode_state apply_microcode_amd(int cpu)  {  	struct cpuinfo_x86 *c = &cpu_data(cpu);  	struct microcode_amd *mc_amd; @@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)  	p = find_patch(cpu);  	if (!p) -		return 0; +		return UCODE_NFOUND;  	mc_amd  = p->data;  	uci->mc = p->data; @@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)  	if (rev >= mc_amd->hdr.patch_id) {  		c->microcode = rev;  		uci->cpu_sig.rev = rev; -		return 0; +		return UCODE_OK;  	}  	if (__apply_microcode_amd(mc_amd)) {  		pr_err("CPU%d: update failed for patch_level=0x%08x\n",  			cpu, mc_amd->hdr.patch_id); -		return -1; +		return UCODE_ERROR;  	}  	pr_info("CPU%d: new patch_level=0x%08x\n", cpu,  		mc_amd->hdr.patch_id); @@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)  	uci->cpu_sig.rev = mc_amd->hdr.patch_id;  	c->microcode = mc_amd->hdr.patch_id; -	return 0; +	return UCODE_UPDATED;  }  static int install_equiv_cpu_table(const u8 *buf) @@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,  static enum ucode_state  load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)  { +	struct ucode_patch *p;  	enum ucode_state ret;  	/* free old equiv table */  	free_equiv_cpu_table();  	ret = __load_microcode_amd(family, data, size); - -	if (ret != UCODE_OK) +	if (ret != UCODE_OK) {  		cleanup(); +		return ret; +	} -#ifdef CONFIG_X86_32 -	/* save BSP's matching patch for early load */ -	if (save) { -		struct ucode_patch *p = find_patch(0); -		if (p) { -			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); -			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), -							       PATCH_MAX_SIZE)); -		} +	p = find_patch(0); +	if (!p) { +		return ret; +	} else { +		if (boot_cpu_data.microcode == p->patch_id) +			return ret; + +		ret = UCODE_NEW;  	} -#endif + +	/* save BSP's matching patch for early load */ +	if (!save) +		return ret; + +	memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); +	memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE)); +  	return ret;  } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 319dd65f98a2..10c4fc2c91f8 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -22,13 +22,16 @@  #define pr_fmt(fmt) "microcode: " fmt  #include <linux/platform_device.h> +#include <linux/stop_machine.h>  #include <linux/syscore_ops.h>  #include <linux/miscdevice.h>  #include <linux/capability.h>  #include <linux/firmware.h>  #include <linux/kernel.h> +#include <linux/delay.h>  #include <linux/mutex.h>  #include <linux/cpu.h> +#include <linux/nmi.h>  #include <linux/fs.h>  #include <linux/mm.h> @@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);   */  static DEFINE_MUTEX(microcode_mutex); +/* + * Serialize late loading so that CPUs get updated one-by-one. + */ +static DEFINE_SPINLOCK(update_lock); +  struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];  struct cpu_info_ctx { @@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)  	return ret;  } -struct apply_microcode_ctx { -	int err; -}; -  static void apply_microcode_local(void *arg)  { -	struct apply_microcode_ctx *ctx = arg; +	enum ucode_state *err = arg; -	ctx->err = microcode_ops->apply_microcode(smp_processor_id()); +	*err = microcode_ops->apply_microcode(smp_processor_id());  }  static int apply_microcode_on_target(int cpu)  { -	struct apply_microcode_ctx ctx = { .err = 0 }; +	enum ucode_state err;  	int ret; -	ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); -	if (!ret) -		ret = ctx.err; - +	ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1); +	if (!ret) { +		if (err == UCODE_ERROR) +			ret = 1; +	}  	return ret;  } @@ -489,31 +494,124 @@ static void __exit microcode_dev_exit(void)  /* fake device for request_firmware */  static struct platform_device	*microcode_pdev; -static int reload_for_cpu(int cpu) +/* + * Late loading dance. Why the heavy-handed stomp_machine effort? + * + * - HT siblings must be idle and not execute other code while the other sibling + *   is loading microcode in order to avoid any negative interactions caused by + *   the loading. + * + * - In addition, microcode update on the cores must be serialized until this + *   requirement can be relaxed in the future. Right now, this is conservative + *   and good. + */ +#define SPINUNIT 100 /* 100 nsec */ + +static int check_online_cpus(void)  { -	struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -	enum ucode_state ustate; -	int err = 0; +	if (num_online_cpus() == num_present_cpus()) +		return 0; -	if (!uci->valid) -		return err; +	pr_err("Not all CPUs online, aborting microcode update.\n"); -	ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); -	if (ustate == UCODE_OK) -		apply_microcode_on_target(cpu); -	else -		if (ustate == UCODE_ERROR) -			err = -EINVAL; -	return err; +	return -EINVAL; +} + +static atomic_t late_cpus_in; +static atomic_t late_cpus_out; + +static int __wait_for_cpus(atomic_t *t, long long timeout) +{ +	int all_cpus = num_online_cpus(); + +	atomic_inc(t); + +	while (atomic_read(t) < all_cpus) { +		if (timeout < SPINUNIT) { +			pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", +				all_cpus - atomic_read(t)); +			return 1; +		} + +		ndelay(SPINUNIT); +		timeout -= SPINUNIT; + +		touch_nmi_watchdog(); +	} +	return 0; +} + +/* + * Returns: + * < 0 - on error + *   0 - no update done + *   1 - microcode was updated + */ +static int __reload_late(void *info) +{ +	int cpu = smp_processor_id(); +	enum ucode_state err; +	int ret = 0; + +	/* +	 * Wait for all CPUs to arrive. A load will not be attempted unless all +	 * CPUs show up. +	 * */ +	if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) +		return -1; + +	spin_lock(&update_lock); +	apply_microcode_local(&err); +	spin_unlock(&update_lock); + +	if (err > UCODE_NFOUND) { +		pr_warn("Error reloading microcode on CPU %d\n", cpu); +		return -1; +	/* siblings return UCODE_OK because their engine got updated already */ +	} else if (err == UCODE_UPDATED || err == UCODE_OK) { +		ret = 1; +	} else { +		return ret; +	} + +	/* +	 * Increase the wait timeout to a safe value here since we're +	 * serializing the microcode update and that could take a while on a +	 * large number of CPUs. And that is fine as the *actual* timeout will +	 * be determined by the last CPU finished updating and thus cut short. +	 */ +	if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus())) +		panic("Timeout during microcode update!\n"); + +	return ret; +} + +/* + * Reload microcode late on all CPUs. Wait for a sec until they + * all gather together. + */ +static int microcode_reload_late(void) +{ +	int ret; + +	atomic_set(&late_cpus_in,  0); +	atomic_set(&late_cpus_out, 0); + +	ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); +	if (ret > 0) +		microcode_check(); + +	return ret;  }  static ssize_t reload_store(struct device *dev,  			    struct device_attribute *attr,  			    const char *buf, size_t size)  { +	enum ucode_state tmp_ret = UCODE_OK; +	int bsp = boot_cpu_data.cpu_index;  	unsigned long val; -	int cpu; -	ssize_t ret = 0, tmp_ret; +	ssize_t ret = 0;  	ret = kstrtoul(buf, 0, &val);  	if (ret) @@ -522,23 +620,24 @@ static ssize_t reload_store(struct device *dev,  	if (val != 1)  		return size; +	tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); +	if (tmp_ret != UCODE_NEW) +		return size; +  	get_online_cpus(); -	mutex_lock(µcode_mutex); -	for_each_online_cpu(cpu) { -		tmp_ret = reload_for_cpu(cpu); -		if (tmp_ret != 0) -			pr_warn("Error reloading microcode on CPU %d\n", cpu); -		/* save retval of the first encountered reload error */ -		if (!ret) -			ret = tmp_ret; -	} -	if (!ret) -		perf_check_microcode(); +	ret = check_online_cpus(); +	if (ret) +		goto put; + +	mutex_lock(µcode_mutex); +	ret = microcode_reload_late();  	mutex_unlock(µcode_mutex); + +put:  	put_online_cpus(); -	if (!ret) +	if (ret >= 0)  		ret = size;  	return ret; @@ -606,10 +705,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)  	if (system_state != SYSTEM_RUNNING)  		return UCODE_NFOUND; -	ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, -						     refresh_fw); - -	if (ustate == UCODE_OK) { +	ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, refresh_fw); +	if (ustate == UCODE_NEW) {  		pr_debug("CPU%d updated upon init\n", cpu);  		apply_microcode_on_target(cpu);  	} diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index f7c55b0e753a..32b8e5724f96 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)  	if (!mc)  		return 0; +	/* +	 * Save us the MSR write below - which is a particular expensive +	 * operation - when the other hyperthread has updated the microcode +	 * already. +	 */ +	rev = intel_get_microcode_revision(); +	if (rev >= mc->hdr.rev) { +		uci->cpu_sig.rev = rev; +		return UCODE_OK; +	} + +	/* +	 * Writeback and invalidate caches before updating microcode to avoid +	 * internal issues depending on what the microcode is updating. +	 */ +	native_wbinvd(); +  	/* write microcode via MSR 0x79 */  	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)  	return 0;  } -static int apply_microcode_intel(int cpu) +static enum ucode_state apply_microcode_intel(int cpu)  { +	struct ucode_cpu_info *uci = ucode_cpu_info + cpu; +	struct cpuinfo_x86 *c = &cpu_data(cpu);  	struct microcode_intel *mc; -	struct ucode_cpu_info *uci; -	struct cpuinfo_x86 *c;  	static int prev_rev;  	u32 rev;  	/* We should bind the task to the CPU */  	if (WARN_ON(raw_smp_processor_id() != cpu)) -		return -1; +		return UCODE_ERROR; -	uci = ucode_cpu_info + cpu; -	mc = uci->mc; +	/* Look for a newer patch in our cache: */ +	mc = find_patch(uci);  	if (!mc) { -		/* Look for a newer patch in our cache: */ -		mc = find_patch(uci); +		mc = uci->mc;  		if (!mc) -			return 0; +			return UCODE_NFOUND;  	} +	/* +	 * Save us the MSR write below - which is a particular expensive +	 * operation - when the other hyperthread has updated the microcode +	 * already. +	 */ +	rev = intel_get_microcode_revision(); +	if (rev >= mc->hdr.rev) { +		uci->cpu_sig.rev = rev; +		c->microcode = rev; +		return UCODE_OK; +	} + +	/* +	 * Writeback and invalidate caches before updating microcode to avoid +	 * internal issues depending on what the microcode is updating. +	 */ +	native_wbinvd(); +  	/* write microcode via MSR 0x79 */  	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu)  	if (rev != mc->hdr.rev) {  		pr_err("CPU%d update to revision 0x%x failed\n",  		       cpu, mc->hdr.rev); -		return -1; +		return UCODE_ERROR;  	}  	if (rev != prev_rev) { @@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu)  		prev_rev = rev;  	} -	c = &cpu_data(cpu); -  	uci->cpu_sig.rev = rev;  	c->microcode = rev; -	return 0; +	return UCODE_UPDATED;  }  static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, @@ -830,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,  	unsigned int leftover = size;  	unsigned int curr_mc_size = 0, new_mc_size = 0;  	unsigned int csig, cpf; +	enum ucode_state ret = UCODE_OK;  	while (leftover) {  		struct microcode_header_intel mc_header; @@ -871,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,  			new_mc  = mc;  			new_mc_size = mc_size;  			mc = NULL;	/* trigger new vmalloc */ +			ret = UCODE_NEW;  		}  		ucode_ptr += mc_size; @@ -900,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,  	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",  		 cpu, new_rev, uci->cpu_sig.rev); -	return UCODE_OK; +	return ret;  }  static int get_ucode_fw(void *to, const void *from, size_t n) @@ -921,7 +955,7 @@ static bool is_blacklisted(unsigned int cpu)  	 */  	if (c->x86 == 6 &&  	    c->x86_model == INTEL_FAM6_BROADWELL_X && -	    c->x86_mask == 0x01 && +	    c->x86_stepping == 0x01 &&  	    llc_size_per_core > 2621440 &&  	    c->microcode < 0x0b000021) {  		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); @@ -944,7 +978,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,  		return UCODE_NFOUND;  	sprintf(name, "intel-ucode/%02x-%02x-%02x", -		c->x86, c->x86_model, c->x86_mask); +		c->x86, c->x86_model, c->x86_stepping);  	if (request_firmware_direct(&firmware, name, device)) {  		pr_debug("data file %s load failed\n", name); @@ -982,7 +1016,7 @@ static struct microcode_ops microcode_intel_ops = {  static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)  { -	u64 llc_size = c->x86_cache_size * 1024; +	u64 llc_size = c->x86_cache_size * 1024ULL;  	do_div(llc_size, c->x86_max_cores); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fdc55215d44d..e12ee86906c6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,  	 */  	if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&  	    boot_cpu_data.x86_model == 1 && -	    boot_cpu_data.x86_mask <= 7) { +	    boot_cpu_data.x86_stepping <= 7) {  		if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {  			pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);  			return -EINVAL; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 40d5a8a75212..7468de429087 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -711,8 +711,8 @@ void __init mtrr_bp_init(void)  			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&  			    boot_cpu_data.x86 == 0xF &&  			    boot_cpu_data.x86_model == 0x3 && -			    (boot_cpu_data.x86_mask == 0x3 || -			     boot_cpu_data.x86_mask == 0x4)) +			    (boot_cpu_data.x86_stepping == 0x3 || +			     boot_cpu_data.x86_stepping == 0x4))  				phys_addr = 36;  			size_or_mask = SIZE_OR_MASK_BITS(phys_addr); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index e7ecedafa1c8..2c8522a39ed5 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -72,8 +72,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)  		   c->x86_model,  		   c->x86_model_id[0] ? c->x86_model_id : "unknown"); -	if (c->x86_mask || c->cpuid_level >= 0) -		seq_printf(m, "stepping\t: %d\n", c->x86_mask); +	if (c->x86_stepping || c->cpuid_level >= 0) +		seq_printf(m, "stepping\t: %d\n", c->x86_stepping);  	else  		seq_puts(m, "stepping\t: unknown\n");  	if (c->microcode) @@ -91,8 +91,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)  	}  	/* Cache size */ -	if (c->x86_cache_size >= 0) -		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); +	if (c->x86_cache_size) +		seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size);  	show_cpuinfo_core(m, c, cpu);  	show_cpuinfo_misc(m, c); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c29020907886..b59e4fb40fd9 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -37,7 +37,7 @@  #define X86		new_cpu_data+CPUINFO_x86  #define X86_VENDOR	new_cpu_data+CPUINFO_x86_vendor  #define X86_MODEL	new_cpu_data+CPUINFO_x86_model -#define X86_MASK	new_cpu_data+CPUINFO_x86_mask +#define X86_STEPPING	new_cpu_data+CPUINFO_x86_stepping  #define X86_HARD_MATH	new_cpu_data+CPUINFO_hard_math  #define X86_CPUID	new_cpu_data+CPUINFO_cpuid_level  #define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability @@ -332,7 +332,7 @@ ENTRY(startup_32_smp)  	shrb $4,%al  	movb %al,X86_MODEL  	andb $0x0f,%cl		# mask mask revision -	movb %cl,X86_MASK +	movb %cl,X86_STEPPING  	movl %edx,X86_CAPABILITY  .Lis486: diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04a625f0fcda..0f545b3cf926 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -23,6 +23,7 @@  #include <asm/nops.h>  #include "../entry/calling.h"  #include <asm/export.h> +#include <asm/nospec-branch.h>  #ifdef CONFIG_PARAVIRT  #include <asm/asm-offsets.h> @@ -134,6 +135,7 @@ ENTRY(secondary_startup_64)  	/* Ensure I am executing from virtual addresses */  	movq	$1f, %rax +	ANNOTATE_RETPOLINE_SAFE  	jmp	*%rax  1:  	UNWIND_HINT_EMPTY diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 56d99be3706a..50bee5fe1140 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -160,7 +160,6 @@ static const __initconst struct idt_data early_pf_idts[] = {   */  static const __initconst struct idt_data dbg_idts[] = {  	INTG(X86_TRAP_DB,	debug), -	INTG(X86_TRAP_BP,	int3),  };  #endif @@ -183,7 +182,6 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;  static const __initconst struct idt_data ist_idts[] = {  	ISTG(X86_TRAP_DB,	debug,		DEBUG_STACK),  	ISTG(X86_TRAP_NMI,	nmi,		NMI_STACK), -	SISTG(X86_TRAP_BP,	int3,		DEBUG_STACK),  	ISTG(X86_TRAP_DF,	double_fault,	DOUBLEFAULT_STACK),  #ifdef CONFIG_X86_MCE  	ISTG(X86_TRAP_MC,	&machine_check,	MCE_STACK), diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 2f723301eb58..38deafebb21b 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -23,7 +23,7 @@  /*   * this changes the io permissions bitmap in the current task.   */ -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)  {  	struct thread_struct *t = ¤t->thread;  	struct tss_struct *tss; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd36f3c33cd0..0715f827607c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);  bool arch_within_kprobe_blacklist(unsigned long addr)  { +	bool is_in_entry_trampoline_section = false; + +#ifdef CONFIG_X86_64 +	is_in_entry_trampoline_section = +		(addr >= (unsigned long)__entry_trampoline_start && +		 addr < (unsigned long)__entry_trampoline_end); +#endif  	return  (addr >= (unsigned long)__kprobes_text_start &&  		 addr < (unsigned long)__kprobes_text_end) ||  		(addr >= (unsigned long)__entry_text_start && -		 addr < (unsigned long)__entry_text_end); +		 addr < (unsigned long)__entry_text_end) || +		is_in_entry_trampoline_section;  }  int __init arch_init_kprobes(void) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4e37d1a851a6..bc1a27280c4b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -49,7 +49,7 @@  static int kvmapf = 1; -static int parse_no_kvmapf(char *arg) +static int __init parse_no_kvmapf(char *arg)  {          kvmapf = 0;          return 0; @@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)  early_param("no-kvmapf", parse_no_kvmapf);  static int steal_acc = 1; -static int parse_no_stealacc(char *arg) +static int __init parse_no_stealacc(char *arg)  {          steal_acc = 0;          return 0; @@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)  early_param("no-steal-acc", parse_no_stealacc);  static int kvmclock_vsyscall = 1; -static int parse_no_kvmclock_vsyscall(char *arg) +static int __init parse_no_kvmclock_vsyscall(char *arg)  {          kvmclock_vsyscall = 0;          return 0; @@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)  #endif  		pa |= KVM_ASYNC_PF_ENABLED; -		/* Async page fault support for L1 hypervisor is optional */ -		if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, -			(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) -			wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); +		if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) +			pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; + +		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);  		__this_cpu_write(apf_reason.enabled, 1);  		printk(KERN_INFO"KVM setup async PF for cpu %d\n",  		       smp_processor_id()); @@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)  		pv_time_ops.steal_clock = kvm_steal_clock;  	} -	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) +	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && +	    !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))  		pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;  	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)  {  	int cpu; -	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { +	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && +	    !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {  		for_each_possible_cpu(cpu) {  			zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),  				GFP_KERNEL, cpu_to_node(cpu)); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 1f790cf9d38f..3b7427aa7d85 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,  				goto overflow;  			break;  		case R_X86_64_PC32: +		case R_X86_64_PLT32:  			value -= (u64)address;  			*(u32 *)location = value;  			break; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index da0c160e5589..f58336af095c 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  				goto overflow;  			break;  		case R_X86_64_PC32: +		case R_X86_64_PLT32:  			if (*(u32 *)loc != 0)  				goto invalid_relocation;  			val -= (u64)loc; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 27d0a1712663..f1c5eb99d445 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -410,7 +410,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)  	processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;  	processor.cpuflag = CPU_ENABLED;  	processor.cpufeature = (boot_cpu_data.x86 << 8) | -	    (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; +	    (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping;  	processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];  	processor.reserved[0] = 0;  	processor.reserved[1] = 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 041096bdef86..99dc79e76bdc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -200,9 +200,9 @@ static void native_flush_tlb_global(void)  	__native_flush_tlb_global();  } -static void native_flush_tlb_single(unsigned long addr) +static void native_flush_tlb_one_user(unsigned long addr)  { -	__native_flush_tlb_single(addr); +	__native_flush_tlb_one_user(addr);  }  struct static_key paravirt_steal_enabled; @@ -401,7 +401,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {  	.flush_tlb_user = native_flush_tlb,  	.flush_tlb_kernel = native_flush_tlb_global, -	.flush_tlb_single = native_flush_tlb_single, +	.flush_tlb_one_user = native_flush_tlb_one_user,  	.flush_tlb_others = native_flush_tlb_others,  	.pgd_alloc = __paravirt_pgd_alloc, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 618285e475c6..ac7ea3a8242f 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,  	WARN_ON(size == 0);  	if (!check_addr("map_single", dev, bus, size))  		return NOMMU_MAPPING_ERROR; -	flush_write_buffers();  	return bus;  } @@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,  			return 0;  		s->dma_length = s->length;  	} -	flush_write_buffers();  	return nents;  } -static void nommu_sync_single_for_device(struct device *dev, -			dma_addr_t addr, size_t size, -			enum dma_data_direction dir) -{ -	flush_write_buffers(); -} - - -static void nommu_sync_sg_for_device(struct device *dev, -			struct scatterlist *sg, int nelems, -			enum dma_data_direction dir) -{ -	flush_write_buffers(); -} -  static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)  {  	return dma_addr == NOMMU_MAPPING_ERROR; @@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = {  	.free			= dma_generic_free_coherent,  	.map_sg			= nommu_map_sg,  	.map_page		= nommu_map_page, -	.sync_single_for_device = nommu_sync_single_for_device, -	.sync_sg_for_device	= nommu_sync_sg_for_device,  	.is_phys		= 1,  	.mapping_error		= nommu_mapping_error,  	.dma_supported		= x86_dma_supported, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1ae67e982af7..4c616be28506 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1204,20 +1204,13 @@ void __init setup_arch(char **cmdline_p)  	kasan_init(); -#ifdef CONFIG_X86_32 -	/* sync back kernel address range */ -	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, -			swapper_pg_dir     + KERNEL_PGD_BOUNDARY, -			KERNEL_PGD_PTRS); -  	/* -	 * sync back low identity map too.  It is used for example -	 * in the 32-bit EFI stub. +	 * Sync back kernel address range. +	 * +	 * FIXME: Can the later sync in setup_cpu_entry_areas() replace +	 * this call?  	 */ -	clone_pgd_range(initial_page_table, -			swapper_pg_dir     + KERNEL_PGD_BOUNDARY, -			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif +	sync_initial_page_table();  	tboot_probe(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 497aa766fab3..ea554f812ee1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void)  	/* Setup cpu initialized, callin, callout masks */  	setup_cpu_local_masks(); -#ifdef CONFIG_X86_32  	/*  	 * Sync back kernel address range again.  We already did this in  	 * setup_arch(), but percpu data also needs to be available in  	 * the smpboot asm.  We can't reliably pick up percpu mappings  	 * using vmalloc_fault(), because exception dispatch needs  	 * percpu data. +	 * +	 * FIXME: Can the later sync in setup_cpu_entry_areas() replace +	 * this call?  	 */ -	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, -			swapper_pg_dir     + KERNEL_PGD_BOUNDARY, -			KERNEL_PGD_PTRS); - -	/* -	 * sync back low identity map too.  It is used for example -	 * in the 32-bit EFI stub. -	 */ -	clone_pgd_range(initial_page_table, -			swapper_pg_dir     + KERNEL_PGD_BOUNDARY, -			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif +	sync_initial_page_table();  } diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index ac057f9b0763..0d930d8987cc 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void)  	BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));  #define CHECK_CSI_OFFSET(name)	  BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name)) +	BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0); +	BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4); +	BUILD_BUG_ON(offsetof(siginfo_t, si_code)  != 8); + +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code)  != 8);  	 /*  	 * Ensure that the size of each si_field never changes.  	 * If it does, it is a sign that the @@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void)  	CHECK_CSI_SIZE  (_kill, 2*sizeof(int));  	CHECK_SI_SIZE   (_kill, 2*sizeof(int)); +	BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); +	BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); +  	CHECK_CSI_OFFSET(_timer);  	CHECK_CSI_SIZE  (_timer, 3*sizeof(int));  	CHECK_SI_SIZE   (_timer, 6*sizeof(int)); +	BUILD_BUG_ON(offsetof(siginfo_t, si_tid)     != 0x10); +	BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14); +	BUILD_BUG_ON(offsetof(siginfo_t, si_value)   != 0x18); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid)     != 0x0C); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value)   != 0x14); +  	CHECK_CSI_OFFSET(_rt);  	CHECK_CSI_SIZE  (_rt, 3*sizeof(int));  	CHECK_SI_SIZE   (_rt, 4*sizeof(int)); +	BUILD_BUG_ON(offsetof(siginfo_t, si_pid)   != 0x10); +	BUILD_BUG_ON(offsetof(siginfo_t, si_uid)   != 0x14); +	BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid)   != 0x0C); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid)   != 0x10); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14); +  	CHECK_CSI_OFFSET(_sigchld);  	CHECK_CSI_SIZE  (_sigchld, 5*sizeof(int));  	CHECK_SI_SIZE   (_sigchld, 8*sizeof(int)); +	BUILD_BUG_ON(offsetof(siginfo_t, si_pid)    != 0x10); +	BUILD_BUG_ON(offsetof(siginfo_t, si_uid)    != 0x14); +	BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18); +	BUILD_BUG_ON(offsetof(siginfo_t, si_utime)  != 0x20); +	BUILD_BUG_ON(offsetof(siginfo_t, si_stime)  != 0x28); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid)    != 0x0C); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid)    != 0x10); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime)  != 0x18); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime)  != 0x1C); +  #ifdef CONFIG_X86_X32_ABI  	CHECK_CSI_OFFSET(_sigchld_x32);  	CHECK_CSI_SIZE  (_sigchld_x32, 7*sizeof(int));  	/* no _sigchld_x32 in the generic siginfo_t */ +	BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime)  != 0x18); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime)  != 0x20);  #endif  	CHECK_CSI_OFFSET(_sigfault);  	CHECK_CSI_SIZE  (_sigfault, 4*sizeof(int));  	CHECK_SI_SIZE   (_sigfault, 8*sizeof(int)); +	BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + +	BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); + +	BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20); +	BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18); + +	BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); +  	CHECK_CSI_OFFSET(_sigpoll);  	CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));  	CHECK_SI_SIZE   (_sigpoll, 4*sizeof(int)); +	BUILD_BUG_ON(offsetof(siginfo_t, si_band)   != 0x10); +	BUILD_BUG_ON(offsetof(siginfo_t, si_fd)     != 0x18); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd)   != 0x10); +  	CHECK_CSI_OFFSET(_sigsys);  	CHECK_CSI_SIZE  (_sigsys, 3*sizeof(int));  	CHECK_SI_SIZE   (_sigsys, 4*sizeof(int)); +	BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10); +	BUILD_BUG_ON(offsetof(siginfo_t, si_syscall)   != 0x18); +	BUILD_BUG_ON(offsetof(siginfo_t, si_arch)      != 0x1C); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall)   != 0x10); +	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch)      != 0x14); +  	/* any new si_fields should be added here */  } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6f27facbaa9b..ff99e2b6fc54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1281,11 +1281,10 @@ void __init native_smp_prepare_boot_cpu(void)  	cpu_set_state_online(me);  } -void __init native_smp_cpus_done(unsigned int max_cpus) +void __init calculate_max_logical_packages(void)  {  	int ncpus; -	pr_debug("Boot done\n");  	/*  	 * Today neither Intel nor AMD support heterogenous systems so  	 * extrapolate the boot cpu's data to all packages. @@ -1293,6 +1292,13 @@ void __init native_smp_cpus_done(unsigned int max_cpus)  	ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();  	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);  	pr_info("Max logical packages: %u\n", __max_logical_packages); +} + +void __init native_smp_cpus_done(unsigned int max_cpus) +{ +	pr_debug("Boot done\n"); + +	calculate_max_logical_packages();  	if (x86_has_numa_in_package)  		set_sched_topology(x86_numa_in_package_topology); @@ -1430,8 +1436,8 @@ static void remove_siblinginfo(int cpu)  	cpumask_clear(cpu_llc_shared_mask(cpu));  	cpumask_clear(topology_sibling_cpumask(cpu));  	cpumask_clear(topology_core_cpumask(cpu)); -	c->phys_proc_id = 0;  	c->cpu_core_id = 0; +	c->booted_cores = 0;  	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);  	recompute_smt_state();  } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 446c9ef8cfc3..03f3d7695dac 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -181,7 +181,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr)  		break;  	case BUG_TRAP_TYPE_WARN: -		regs->ip += LEN_UD0; +		regs->ip += LEN_UD2;  		return 1;  	} @@ -577,7 +577,6 @@ do_general_protection(struct pt_regs *regs, long error_code)  }  NOKPROBE_SYMBOL(do_general_protection); -/* May run on IST stack. */  dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)  {  #ifdef CONFIG_DYNAMIC_FTRACE @@ -592,6 +591,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)  	if (poke_int3_handler(regs))  		return; +	/* +	 * Use ist_enter despite the fact that we don't use an IST stack. +	 * We can be called from a kprobe in non-CONTEXT_KERNEL kernel +	 * mode or even during context tracking state changes. +	 * +	 * This means that we can't schedule.  That's okay. +	 */  	ist_enter(regs);  	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");  #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -609,15 +615,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)  			SIGTRAP) == NOTIFY_STOP)  		goto exit; -	/* -	 * Let others (NMI) know that the debug stack is in use -	 * as we may switch to the interrupt stack. -	 */ -	debug_stack_usage_inc();  	cond_local_irq_enable(regs);  	do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);  	cond_local_irq_disable(regs); -	debug_stack_usage_dec(); +  exit:  	ist_exit(regs);  } diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1f9188f5357c..feb28fee6cea 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -5,7 +5,6 @@  #include <asm/unwind.h>  #include <asm/orc_types.h>  #include <asm/orc_lookup.h> -#include <asm/sections.h>  #define orc_warn(fmt, ...) \  	printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) @@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)  	}  	/* vmlinux .init slow lookup: */ -	if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) +	if (init_kernel_text(ip))  		return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,  				  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5edb27f1a2c4..9d0b5af7db91 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)  	return;  check_vip: -	if (VEFLAGS & X86_EFLAGS_VIP) { +	if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) == +	    (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) {  		save_v86_state(regs, VM86_STI);  		return;  	} diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9b138a06c1a4..b854ebf5851b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -118,9 +118,11 @@ SECTIONS  #ifdef CONFIG_X86_64  		. = ALIGN(PAGE_SIZE); +		VMLINUX_SYMBOL(__entry_trampoline_start) = .;  		_entry_trampoline = .;  		*(.entry_trampoline)  		. = ALIGN(PAGE_SIZE); +		VMLINUX_SYMBOL(__entry_trampoline_end) = .;  		ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");  #endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a0c5a69bc7c4..b671fc2d0422 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,  			     (1 << KVM_FEATURE_PV_EOI) |  			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |  			     (1 << KVM_FEATURE_PV_UNHALT) | -			     (1 << KVM_FEATURE_PV_TLB_FLUSH); +			     (1 << KVM_FEATURE_PV_TLB_FLUSH) | +			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);  		if (sched_info_on())  			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 924ac8ce9d50..391dda8d43b7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)  void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)  { -	struct kvm_lapic *apic; +	struct kvm_lapic *apic = vcpu->arch.apic;  	int i; -	apic_debug("%s\n", __func__); +	if (!apic) +		return; -	ASSERT(vcpu); -	apic = vcpu->arch.apic; -	ASSERT(apic != NULL); +	apic_debug("%s\n", __func__);  	/* Stop the timer in case it's a reset to an active apic */  	hrtimer_cancel(&apic->lapic_timer.timer); @@ -2165,7 +2164,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)  	 */  	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;  	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ -	kvm_lapic_reset(vcpu, false);  	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);  	return 0; @@ -2569,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)  	pe = xchg(&apic->pending_events, 0);  	if (test_bit(KVM_APIC_INIT, &pe)) { -		kvm_lapic_reset(vcpu, true);  		kvm_vcpu_reset(vcpu, true);  		if (kvm_vcpu_is_bsp(apic->vcpu))  			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8eca1d04aeb8..763bb3bade63 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,  	else  		pte_access &= ~ACC_WRITE_MASK; +	if (!kvm_is_mmio_pfn(pfn)) +		spte |= shadow_me_mask; +  	spte |= (u64)pfn << PAGE_SHIFT; -	spte |= shadow_me_mask;  	if (pte_access & ACC_WRITE_MASK) { @@ -3029,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)  		return RET_PF_RETRY;  	} -	return -EFAULT; +	return RET_PF_EMULATE;  }  static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, @@ -5080,7 +5082,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)  typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);  /* The caller should hold mmu-lock before calling this function. */ -static bool +static __always_inline bool  slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,  			slot_level_handler fn, int start_level, int end_level,  			gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) @@ -5110,7 +5112,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,  	return flush;  } -static bool +static __always_inline bool  slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,  		  slot_level_handler fn, int start_level, int end_level,  		  bool lock_flush_tlb) @@ -5121,7 +5123,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,  			lock_flush_tlb);  } -static bool +static __always_inline bool  slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,  		      slot_level_handler fn, bool lock_flush_tlb)  { @@ -5129,7 +5131,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,  				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);  } -static bool +static __always_inline bool  slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,  			slot_level_handler fn, bool lock_flush_tlb)  { @@ -5137,7 +5139,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,  				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);  } -static bool +static __always_inline bool  slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,  		 slot_level_handler fn, bool lock_flush_tlb)  { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a74828..be9c839e2c89 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,6 +49,7 @@  #include <asm/debugreg.h>  #include <asm/kvm_para.h>  #include <asm/irq_remapping.h> +#include <asm/microcode.h>  #include <asm/nospec-branch.h>  #include <asm/virtext.h> @@ -178,6 +179,8 @@ struct vcpu_svm {  	uint64_t sysenter_eip;  	uint64_t tsc_aux; +	u64 msr_decfg; +  	u64 next_rip;  	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; @@ -300,6 +303,8 @@ module_param(vgif, int, 0444);  static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);  module_param(sev, int, 0444); +static u8 rsm_ins_bytes[] = "\x0f\xaa"; +  static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);  static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);  static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -1383,6 +1388,7 @@ static void init_vmcb(struct vcpu_svm *svm)  	set_intercept(svm, INTERCEPT_SKINIT);  	set_intercept(svm, INTERCEPT_WBINVD);  	set_intercept(svm, INTERCEPT_XSETBV); +	set_intercept(svm, INTERCEPT_RSM);  	if (!kvm_mwait_in_guest()) {  		set_intercept(svm, INTERCEPT_MONITOR); @@ -1902,6 +1908,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)  	u32 dummy;  	u32 eax = 1; +	vcpu->arch.microcode_version = 0x01000065;  	svm->spec_ctrl = 0;  	if (!init_event) { @@ -3699,6 +3706,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)  	return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;  } +static int rsm_interception(struct vcpu_svm *svm) +{ +	return x86_emulate_instruction(&svm->vcpu, 0, 0, +				       rsm_ins_bytes, 2) == EMULATE_DONE; +} +  static int rdpmc_interception(struct vcpu_svm *svm)  {  	int err; @@ -3860,6 +3873,22 @@ static int cr8_write_interception(struct vcpu_svm *svm)  	return 0;  } +static int svm_get_msr_feature(struct kvm_msr_entry *msr) +{ +	msr->data = 0; + +	switch (msr->index) { +	case MSR_F10H_DECFG: +		if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) +			msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; +		break; +	default: +		return 1; +	} + +	return 0; +} +  static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  {  	struct vcpu_svm *svm = to_svm(vcpu); @@ -3935,9 +3964,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  		msr_info->data = svm->spec_ctrl;  		break; -	case MSR_IA32_UCODE_REV: -		msr_info->data = 0x01000065; -		break;  	case MSR_F15H_IC_CFG: {  		int family, model; @@ -3955,6 +3981,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  			msr_info->data = 0x1E;  		}  		break; +	case MSR_F10H_DECFG: +		msr_info->data = svm->msr_decfg; +		break;  	default:  		return kvm_get_msr_common(vcpu, msr_info);  	} @@ -4133,6 +4162,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)  	case MSR_VM_IGNNE:  		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);  		break; +	case MSR_F10H_DECFG: { +		struct kvm_msr_entry msr_entry; + +		msr_entry.index = msr->index; +		if (svm_get_msr_feature(&msr_entry)) +			return 1; + +		/* Check the supported bits */ +		if (data & ~msr_entry.data) +			return 1; + +		/* Don't allow the guest to change a bit, #GP */ +		if (!msr->host_initiated && (data ^ msr_entry.data)) +			return 1; + +		svm->msr_decfg = data; +		break; +	}  	case MSR_IA32_APICBASE:  		if (kvm_vcpu_apicv_active(vcpu))  			avic_update_vapic_bar(to_svm(vcpu), data); @@ -4541,7 +4588,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {  	[SVM_EXIT_MWAIT]			= mwait_interception,  	[SVM_EXIT_XSETBV]			= xsetbv_interception,  	[SVM_EXIT_NPF]				= npf_interception, -	[SVM_EXIT_RSM]                          = emulate_on_interception, +	[SVM_EXIT_RSM]                          = rsm_interception,  	[SVM_EXIT_AVIC_INCOMPLETE_IPI]		= avic_incomplete_ipi_interception,  	[SVM_EXIT_AVIC_UNACCELERATED_ACCESS]	= avic_unaccelerated_access_interception,  }; @@ -5355,7 +5402,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)  	 * being speculatively taken.  	 */  	if (svm->spec_ctrl) -		wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); +		native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);  	asm volatile (  		"push %%" _ASM_BP "; \n\t" @@ -5464,11 +5511,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)  	 * If the L02 MSR bitmap does not intercept the MSR, then we need to  	 * save it.  	 */ -	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) -		rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); +	if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) +		svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);  	if (svm->spec_ctrl) -		wrmsrl(MSR_IA32_SPEC_CTRL, 0); +		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);  	/* Eliminate branch target predictions from guest mode */  	vmexit_fill_RSB(); @@ -6236,16 +6283,18 @@ e_free:  static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)  { +	void __user *measure = (void __user *)(uintptr_t)argp->data;  	struct kvm_sev_info *sev = &kvm->arch.sev_info;  	struct sev_data_launch_measure *data;  	struct kvm_sev_launch_measure params; +	void __user *p = NULL;  	void *blob = NULL;  	int ret;  	if (!sev_guest(kvm))  		return -ENOTTY; -	if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) +	if (copy_from_user(¶ms, measure, sizeof(params)))  		return -EFAULT;  	data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -6256,17 +6305,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)  	if (!params.len)  		goto cmd; -	if (params.uaddr) { +	p = (void __user *)(uintptr_t)params.uaddr; +	if (p) {  		if (params.len > SEV_FW_BLOB_MAX_SIZE) {  			ret = -EINVAL;  			goto e_free;  		} -		if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) { -			ret = -EFAULT; -			goto e_free; -		} -  		ret = -ENOMEM;  		blob = kmalloc(params.len, GFP_KERNEL);  		if (!blob) @@ -6290,13 +6335,13 @@ cmd:  		goto e_free_blob;  	if (blob) { -		if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) +		if (copy_to_user(p, blob, params.len))  			ret = -EFAULT;  	}  done:  	params.len = data->len; -	if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) +	if (copy_to_user(measure, ¶ms, sizeof(params)))  		ret = -EFAULT;  e_free_blob:  	kfree(blob); @@ -6597,7 +6642,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)  	struct page **pages;  	void *blob, *hdr;  	unsigned long n; -	int ret; +	int ret, offset;  	if (!sev_guest(kvm))  		return -ENOTTY; @@ -6623,6 +6668,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)  	if (!data)  		goto e_unpin_memory; +	offset = params.guest_uaddr & (PAGE_SIZE - 1); +	data->guest_address = __sme_page_pa(pages[0]) + offset; +	data->guest_len = params.guest_len; +  	blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);  	if (IS_ERR(blob)) {  		ret = PTR_ERR(blob); @@ -6637,8 +6686,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)  		ret = PTR_ERR(hdr);  		goto e_free_blob;  	} -	data->trans_address = __psp_pa(blob); -	data->trans_len = params.trans_len; +	data->hdr_address = __psp_pa(hdr); +	data->hdr_len = params.hdr_len;  	data->handle = sev->handle;  	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); @@ -6821,6 +6870,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {  	.vcpu_unblocking = svm_vcpu_unblocking,  	.update_bp_intercept = update_bp_intercept, +	.get_msr_feature = svm_get_msr_feature,  	.get_msr = svm_get_msr,  	.set_msr = svm_set_msr,  	.get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f427723dc7db..2d87603f9179 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,6 +51,7 @@  #include <asm/apic.h>  #include <asm/irq_remapping.h>  #include <asm/mmu_context.h> +#include <asm/microcode.h>  #include <asm/nospec-branch.h>  #include "trace.h" @@ -1044,6 +1045,13 @@ static inline bool is_machine_check(u32 intr_info)  		(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);  } +/* Undocumented: icebp/int1 */ +static inline bool is_icebp(u32 intr_info) +{ +	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) +		== (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); +} +  static inline bool cpu_has_vmx_msr_bitmap(void)  {  	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; @@ -3226,6 +3234,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,  	return !(val & ~valid_bits);  } +static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +{ +	return 1; +} +  /*   * Reads an msr value (of 'msr_index') into 'pdata'.   * Returns 0 on success, non-0 otherwise. @@ -4485,7 +4498,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)  		vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,  			      SECONDARY_EXEC_DESC);  		hw_cr4 &= ~X86_CR4_UMIP; -	} else +	} else if (!is_guest_mode(vcpu) || +	           !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))  		vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,  				SECONDARY_EXEC_DESC); @@ -5765,6 +5779,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)  	vmx->rmode.vm86_active = 0;  	vmx->spec_ctrl = 0; +	vcpu->arch.microcode_version = 0x100000000ULL;  	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();  	kvm_set_cr8(vcpu, 0); @@ -6171,7 +6186,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)  		      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {  			vcpu->arch.dr6 &= ~15;  			vcpu->arch.dr6 |= dr6 | DR6_RTM; -			if (!(dr6 & ~DR6_RESERVED)) /* icebp */ +			if (is_icebp(intr_info))  				skip_emulated_instruction(vcpu);  			kvm_queue_exception(vcpu, DB_VECTOR); @@ -9452,7 +9467,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)  	 * being speculatively taken.  	 */  	if (vmx->spec_ctrl) -		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); +		native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);  	vmx->__launched = vmx->loaded_vmcs->launched;  	asm( @@ -9587,11 +9602,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)  	 * If the L02 MSR bitmap does not intercept the MSR, then we need to  	 * save it.  	 */ -	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) -		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); +	if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) +		vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);  	if (vmx->spec_ctrl) -		wrmsrl(MSR_IA32_SPEC_CTRL, 0); +		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);  	/* Eliminate branch target predictions from guest mode */  	vmexit_fill_RSB(); @@ -10136,7 +10151,10 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,  			(unsigned long)(vmcs12->posted_intr_desc_addr &  			(PAGE_SIZE - 1)));  	} -	if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) +	if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) +		vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, +			      CPU_BASED_USE_MSR_BITMAPS); +	else  		vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,  				CPU_BASED_USE_MSR_BITMAPS);  } @@ -10224,8 +10242,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,  	 *    updated to reflect this when L1 (or its L2s) actually write to  	 *    the MSR.  	 */ -	bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); -	bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); +	bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); +	bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);  	/* Nothing to do if the MSR bitmap is not in use.  */  	if (!cpu_has_vmx_msr_bitmap() || @@ -11196,7 +11214,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)  	if (ret)  		return ret; -	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) +	/* +	 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken +	 * by event injection, halt vcpu. +	 */ +	if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && +	    !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))  		return kvm_vcpu_halt(vcpu);  	vmx->nested.nested_run_pending = 1; @@ -12287,6 +12310,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {  	.vcpu_put = vmx_vcpu_put,  	.update_bp_intercept = update_exception_bitmap, +	.get_msr_feature = vmx_get_msr_feature,  	.get_msr = vmx_get_msr,  	.set_msr = vmx_set_msr,  	.get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8a0b545ac20..18b5ca7a3197 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1049,6 +1049,45 @@ static u32 emulated_msrs[] = {  static unsigned num_emulated_msrs; +/* + * List of msr numbers which are used to expose MSR-based features that + * can be used by a hypervisor to validate requested CPU features. + */ +static u32 msr_based_features[] = { +	MSR_F10H_DECFG, +	MSR_IA32_UCODE_REV, +}; + +static unsigned int num_msr_based_features; + +static int kvm_get_msr_feature(struct kvm_msr_entry *msr) +{ +	switch (msr->index) { +	case MSR_IA32_UCODE_REV: +		rdmsrl(msr->index, msr->data); +		break; +	default: +		if (kvm_x86_ops->get_msr_feature(msr)) +			return 1; +	} +	return 0; +} + +static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +{ +	struct kvm_msr_entry msr; +	int r; + +	msr.index = index; +	r = kvm_get_msr_feature(&msr); +	if (r) +		return r; + +	*data = msr.data; + +	return 0; +} +  bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)  {  	if (efer & efer_reserved_bits) @@ -2222,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  	switch (msr) {  	case MSR_AMD64_NB_CFG: -	case MSR_IA32_UCODE_REV:  	case MSR_IA32_UCODE_WRITE:  	case MSR_VM_HSAVE_PA:  	case MSR_AMD64_PATCH_LOADER: @@ -2230,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  	case MSR_AMD64_DC_CFG:  		break; +	case MSR_IA32_UCODE_REV: +		if (msr_info->host_initiated) +			vcpu->arch.microcode_version = data; +		break;  	case MSR_EFER:  		return set_efer(vcpu, data);  	case MSR_K7_HWCR: @@ -2525,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  		msr_info->data = 0;  		break;  	case MSR_IA32_UCODE_REV: -		msr_info->data = 0x100000000ULL; +		msr_info->data = vcpu->arch.microcode_version;  		break;  	case MSR_MTRRcap:  	case 0x200 ... 0x2ff: @@ -2680,13 +2722,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,  		    int (*do_msr)(struct kvm_vcpu *vcpu,  				  unsigned index, u64 *data))  { -	int i, idx; +	int i; -	idx = srcu_read_lock(&vcpu->kvm->srcu);  	for (i = 0; i < msrs->nmsrs; ++i)  		if (do_msr(vcpu, entries[i].index, &entries[i].data))  			break; -	srcu_read_unlock(&vcpu->kvm->srcu, idx);  	return i;  } @@ -2785,6 +2825,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)  	case KVM_CAP_SET_BOOT_CPU_ID:   	case KVM_CAP_SPLIT_IRQCHIP:  	case KVM_CAP_IMMEDIATE_EXIT: +	case KVM_CAP_GET_MSR_FEATURES:  		r = 1;  		break;  	case KVM_CAP_ADJUST_CLOCK: @@ -2899,6 +2940,31 @@ long kvm_arch_dev_ioctl(struct file *filp,  			goto out;  		r = 0;  		break; +	case KVM_GET_MSR_FEATURE_INDEX_LIST: { +		struct kvm_msr_list __user *user_msr_list = argp; +		struct kvm_msr_list msr_list; +		unsigned int n; + +		r = -EFAULT; +		if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) +			goto out; +		n = msr_list.nmsrs; +		msr_list.nmsrs = num_msr_based_features; +		if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) +			goto out; +		r = -E2BIG; +		if (n < msr_list.nmsrs) +			goto out; +		r = -EFAULT; +		if (copy_to_user(user_msr_list->indices, &msr_based_features, +				 num_msr_based_features * sizeof(u32))) +			goto out; +		r = 0; +		break; +	} +	case KVM_GET_MSRS: +		r = msr_io(NULL, argp, do_get_msr_feature, 1); +		break;  	}  	default:  		r = -EINVAL; @@ -3636,12 +3702,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,  		r = 0;  		break;  	} -	case KVM_GET_MSRS: +	case KVM_GET_MSRS: { +		int idx = srcu_read_lock(&vcpu->kvm->srcu);  		r = msr_io(vcpu, argp, do_get_msr, 1); +		srcu_read_unlock(&vcpu->kvm->srcu, idx);  		break; -	case KVM_SET_MSRS: +	} +	case KVM_SET_MSRS: { +		int idx = srcu_read_lock(&vcpu->kvm->srcu);  		r = msr_io(vcpu, argp, do_set_msr, 0); +		srcu_read_unlock(&vcpu->kvm->srcu, idx);  		break; +	}  	case KVM_TPR_ACCESS_REPORTING: {  		struct kvm_tpr_access_ctl tac; @@ -4464,6 +4536,19 @@ static void kvm_init_msr_list(void)  		j++;  	}  	num_emulated_msrs = j; + +	for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { +		struct kvm_msr_entry msr; + +		msr.index = msr_based_features[i]; +		if (kvm_get_msr_feature(&msr)) +			continue; + +		if (j < i) +			msr_based_features[j] = msr_based_features[i]; +		j++; +	} +	num_msr_based_features = j;  }  static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, @@ -8017,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)  void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)  { +	kvm_lapic_reset(vcpu, init_event); +  	vcpu->arch.hflags = 0;  	vcpu->arch.smi_pending = 0; @@ -8460,10 +8547,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)  			return r;  	} -	if (!size) { -		r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); -		WARN_ON(r < 0); -	} +	if (!size) +		vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);  	return 0;  } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 91e9700cc6dc..25a972c61b0a 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o  lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o  lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o  lib-$(CONFIG_RETPOLINE) += retpoline.o -OBJECT_FILES_NON_STANDARD_retpoline.o :=y  obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c index d6f848d1211d..2dd1fe13a37b 100644 --- a/arch/x86/lib/cpu.c +++ b/arch/x86/lib/cpu.c @@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig)  {  	unsigned int fam, model; -	 fam = x86_family(sig); +	fam = x86_family(sig);  	model = (sig >> 4) & 0xf; diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 7b881d03d0dd..3cdf06128d13 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);  asm(  	".type just_return_func, @function\n" +	".globl just_return_func\n"  	"just_return_func:\n"  	"	ret\n"  	".size just_return_func, .-just_return_func\n" diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 480edc3a5e03..c909961e678a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,7 +7,6 @@  #include <asm/alternative-asm.h>  #include <asm/export.h>  #include <asm/nospec-branch.h> -#include <asm/bitsperlong.h>  .macro THUNK reg  	.section .text.__x86.indirect_thunk @@ -47,58 +46,3 @@ GENERATE_THUNK(r13)  GENERATE_THUNK(r14)  GENERATE_THUNK(r15)  #endif - -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * Google experimented with loop-unrolling and this turned out to be - * the optimal version - two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -.macro STUFF_RSB nr:req sp:req -	mov	$(\nr / 2), %_ASM_BX -	.align 16 -771: -	call	772f -773:						/* speculation trap */ -	pause -	lfence -	jmp	773b -	.align 16 -772: -	call	774f -775:						/* speculation trap */ -	pause -	lfence -	jmp	775b -	.align 16 -774: -	dec	%_ASM_BX -	jnz	771b -	add	$((BITS_PER_LONG/8) * \nr), \sp -.endm - -#define RSB_FILL_LOOPS		16	/* To avoid underflow */ - -ENTRY(__fill_rsb) -	STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP -	ret -END(__fill_rsb) -EXPORT_SYMBOL_GPL(__fill_rsb) - -#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */ - -ENTRY(__clear_rsb) -	STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP -	ret -END(__clear_rsb) -EXPORT_SYMBOL_GPL(__clear_rsb) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index b9283cc27622..476d810639a8 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void)  	for_each_possible_cpu(cpu)  		setup_cpu_entry_area(cpu); + +	/* +	 * This is the last essential update to swapper_pgdir which needs +	 * to be synchronized to initial_page_table on 32bit. +	 */ +	sync_initial_page_table();  } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 800de815519c..25a30b5d6582 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address)  	if (!pmd_k)  		return -1; -	if (pmd_huge(*pmd_k)) +	if (pmd_large(*pmd_k))  		return 0;  	pte_k = pte_offset_kernel(pmd_k, address); @@ -475,7 +475,7 @@ static noinline int vmalloc_fault(unsigned long address)  	if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))  		BUG(); -	if (pud_huge(*pud)) +	if (pud_large(*pud))  		return 0;  	pmd = pmd_offset(pud, address); @@ -486,7 +486,7 @@ static noinline int vmalloc_fault(unsigned long address)  	if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))  		BUG(); -	if (pmd_huge(*pmd)) +	if (pmd_large(*pmd))  		return 0;  	pte_ref = pte_offset_kernel(pmd_ref, address); @@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,  	tsk = current;  	mm = tsk->mm; -	/* -	 * Detect and handle instructions that would cause a page fault for -	 * both a tracked kernel page and a userspace page. -	 */  	prefetchw(&mm->mmap_sem);  	if (unlikely(kmmio_fault(regs, address))) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 79cb066f40c0..396e1f0151ac 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)  }  #endif /* CONFIG_HIGHMEM */ +void __init sync_initial_page_table(void) +{ +	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, +			swapper_pg_dir     + KERNEL_PGD_BOUNDARY, +			KERNEL_PGD_PTRS); + +	/* +	 * sync back low identity map too.  It is used for example +	 * in the 32-bit EFI stub. +	 */ +	clone_pgd_range(initial_page_table, +			swapper_pg_dir     + KERNEL_PGD_BOUNDARY, +			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); +} +  void __init native_pagetable_init(void)  {  	unsigned long pfn, va; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ab42c852069..af11a2890235 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -256,7 +256,7 @@ static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)  	 * It's enough to flush this one mapping.  	 * (PGE mappings get flushed as well)  	 */ -	__flush_tlb_one(vaddr); +	__flush_tlb_one_kernel(vaddr);  }  void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) @@ -800,17 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,  #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order, -		struct vmem_altmap *altmap) +static void __meminit free_pagetable(struct page *page, int order)  {  	unsigned long magic;  	unsigned int nr_pages = 1 << order; -	if (altmap) { -		vmem_altmap_free(altmap, nr_pages); -		return; -	} -  	/* bootmem page has reserved flag */  	if (PageReserved(page)) {  		__ClearPageReserved(page); @@ -826,9 +820,17 @@ static void __meminit free_pagetable(struct page *page, int order,  		free_pages((unsigned long)page_address(page), order);  } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, +static void __meminit free_hugepage_table(struct page *page,  		struct vmem_altmap *altmap)  { +	if (altmap) +		vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE); +	else +		free_pagetable(page, get_order(PMD_SIZE)); +} + +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{  	pte_t *pte;  	int i; @@ -839,14 +841,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,  	}  	/* free a pte talbe */ -	free_pagetable(pmd_page(*pmd), 0, altmap); +	free_pagetable(pmd_page(*pmd), 0);  	spin_lock(&init_mm.page_table_lock);  	pmd_clear(pmd);  	spin_unlock(&init_mm.page_table_lock);  } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, -		struct vmem_altmap *altmap) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)  {  	pmd_t *pmd;  	int i; @@ -858,14 +859,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,  	}  	/* free a pmd talbe */ -	free_pagetable(pud_page(*pud), 0, altmap); +	free_pagetable(pud_page(*pud), 0);  	spin_lock(&init_mm.page_table_lock);  	pud_clear(pud);  	spin_unlock(&init_mm.page_table_lock);  } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, -		struct vmem_altmap *altmap) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)  {  	pud_t *pud;  	int i; @@ -877,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,  	}  	/* free a pud talbe */ -	free_pagetable(p4d_page(*p4d), 0, altmap); +	free_pagetable(p4d_page(*p4d), 0);  	spin_lock(&init_mm.page_table_lock);  	p4d_clear(p4d);  	spin_unlock(&init_mm.page_table_lock); @@ -885,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,  static void __meminit  remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, -		 struct vmem_altmap *altmap, bool direct) +		 bool direct)  {  	unsigned long next, pages = 0;  	pte_t *pte; @@ -916,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,  			 * freed when offlining, or simplely not in use.  			 */  			if (!direct) -				free_pagetable(pte_page(*pte), 0, altmap); +				free_pagetable(pte_page(*pte), 0);  			spin_lock(&init_mm.page_table_lock);  			pte_clear(&init_mm, addr, pte); @@ -939,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,  			page_addr = page_address(pte_page(*pte));  			if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { -				free_pagetable(pte_page(*pte), 0, altmap); +				free_pagetable(pte_page(*pte), 0);  				spin_lock(&init_mm.page_table_lock);  				pte_clear(&init_mm, addr, pte); @@ -974,9 +974,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,  			if (IS_ALIGNED(addr, PMD_SIZE) &&  			    IS_ALIGNED(next, PMD_SIZE)) {  				if (!direct) -					free_pagetable(pmd_page(*pmd), -						       get_order(PMD_SIZE), -						       altmap); +					free_hugepage_table(pmd_page(*pmd), +							    altmap);  				spin_lock(&init_mm.page_table_lock);  				pmd_clear(pmd); @@ -989,9 +988,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,  				page_addr = page_address(pmd_page(*pmd));  				if (!memchr_inv(page_addr, PAGE_INUSE,  						PMD_SIZE)) { -					free_pagetable(pmd_page(*pmd), -						       get_order(PMD_SIZE), -						       altmap); +					free_hugepage_table(pmd_page(*pmd), +							    altmap);  					spin_lock(&init_mm.page_table_lock);  					pmd_clear(pmd); @@ -1003,8 +1001,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,  		}  		pte_base = (pte_t *)pmd_page_vaddr(*pmd); -		remove_pte_table(pte_base, addr, next, altmap, direct); -		free_pte_table(pte_base, pmd, altmap); +		remove_pte_table(pte_base, addr, next, direct); +		free_pte_table(pte_base, pmd);  	}  	/* Call free_pmd_table() in remove_pud_table(). */ @@ -1033,8 +1031,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,  			    IS_ALIGNED(next, PUD_SIZE)) {  				if (!direct)  					free_pagetable(pud_page(*pud), -						       get_order(PUD_SIZE), -						       altmap); +						       get_order(PUD_SIZE));  				spin_lock(&init_mm.page_table_lock);  				pud_clear(pud); @@ -1048,8 +1045,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,  				if (!memchr_inv(page_addr, PAGE_INUSE,  						PUD_SIZE)) {  					free_pagetable(pud_page(*pud), -						       get_order(PUD_SIZE), -						       altmap); +						       get_order(PUD_SIZE));  					spin_lock(&init_mm.page_table_lock);  					pud_clear(pud); @@ -1062,7 +1058,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,  		pmd_base = pmd_offset(pud, 0);  		remove_pmd_table(pmd_base, addr, next, direct, altmap); -		free_pmd_table(pmd_base, pud, altmap); +		free_pmd_table(pmd_base, pud);  	}  	if (direct) @@ -1094,7 +1090,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,  		 * to adapt for boot-time switching between 4 and 5 level page tables.  		 */  		if (CONFIG_PGTABLE_LEVELS == 5) -			free_pud_table(pud_base, p4d, altmap); +			free_pud_table(pud_base, p4d);  	}  	if (direct) @@ -1193,8 +1189,8 @@ void __init mem_init(void)  	register_page_bootmem_info();  	/* Register memory areas for /proc/kcore */ -	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, -			 PAGE_SIZE, KCORE_OTHER); +	if (get_gate_vma(&init_mm)) +		kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);  	mem_init_print_info(NULL);  } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c45b6ec5357b..e2db83bebc3b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -820,5 +820,5 @@ void __init __early_set_fixmap(enum fixed_addresses idx,  		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));  	else  		pte_clear(&init_mm, addr, pte); -	__flush_tlb_one(addr); +	__flush_tlb_one_kernel(addr);  } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 58477ec3d66d..7c8686709636 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -168,7 +168,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)  		return -1;  	} -	__flush_tlb_one(f->addr); +	__flush_tlb_one_kernel(f->addr);  	return 0;  } diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 01f682cf77a8..40a6085063d6 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -15,6 +15,7 @@  #include <asm/page.h>  #include <asm/processor-flags.h>  #include <asm/msr-index.h> +#include <asm/nospec-branch.h>  	.text  	.code64 @@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)  	movq	%rax, %r8		/* Workarea encryption routine */  	addq	$PAGE_SIZE, %r8		/* Workarea intermediate copy buffer */ +	ANNOTATE_RETPOLINE_SAFE  	call	*%rax			/* Call the encryption routine */  	pop	%r12 diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 004abf9ebf12..34cda7e0551b 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd)  	return 0;  } + +/** + * pud_free_pmd_page - Clear pud entry and free pmd page. + * @pud: Pointer to a PUD. + * + * Context: The pud range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pud_free_pmd_page(pud_t *pud) +{ +	pmd_t *pmd; +	int i; + +	if (pud_none(*pud)) +		return 1; + +	pmd = (pmd_t *)pud_page_vaddr(*pud); + +	for (i = 0; i < PTRS_PER_PMD; i++) +		if (!pmd_free_pte_page(&pmd[i])) +			return 0; + +	pud_clear(pud); +	free_page((unsigned long)pmd); + +	return 1; +} + +/** + * pmd_free_pte_page - Clear pmd entry and free pte page. + * @pmd: Pointer to a PMD. + * + * Context: The pmd range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pmd_free_pte_page(pmd_t *pmd) +{ +	pte_t *pte; + +	if (pmd_none(*pmd)) +		return 1; + +	pte = (pte_t *)pmd_page_vaddr(*pmd); +	pmd_clear(pmd); +	free_page((unsigned long)pte); + +	return 1; +}  #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */ diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index c3c5274410a9..9bb7f0ab9fe6 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -63,7 +63,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)  	 * It's enough to flush this one mapping.  	 * (PGE mappings get flushed as well)  	 */ -	__flush_tlb_one(vaddr); +	__flush_tlb_one_kernel(vaddr);  }  unsigned long __FIXADDR_TOP = 0xfffff000; diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ce38f165489b..631507f0c198 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)  }  /* - * Clone the ESPFIX P4D into the user space visinble page table + * Clone the ESPFIX P4D into the user space visible page table   */  static void __init pti_setup_espfix64(void)  { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8dcc0607f805..7f1a51399674 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -498,7 +498,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,  	 *    flush that changes context.tlb_gen from 2 to 3.  If they get  	 *    processed on this CPU in reverse order, we'll see  	 *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL. -	 *    If we were to use __flush_tlb_single() and set local_tlb_gen to +	 *    If we were to use __flush_tlb_one_user() and set local_tlb_gen to  	 *    3, we'd be break the invariant: we'd update local_tlb_gen above  	 *    1 without the full flush that's needed for tlb_gen 2.  	 * @@ -519,7 +519,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,  		addr = f->start;  		while (addr < f->end) { -			__flush_tlb_single(addr); +			__flush_tlb_one_user(addr);  			addr += PAGE_SIZE;  		}  		if (local) @@ -666,7 +666,7 @@ static void do_kernel_range_flush(void *info)  	/* flush range by one by one 'invlpg' */  	for (addr = f->start; addr < f->end; addr += PAGE_SIZE) -		__flush_tlb_one(addr); +		__flush_tlb_one_kernel(addr);  }  void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4923d92f918d..ce5b2ebd5701 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -13,6 +13,7 @@  #include <linux/if_vlan.h>  #include <asm/cacheflush.h>  #include <asm/set_memory.h> +#include <asm/nospec-branch.h>  #include <linux/bpf.h>  /* @@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog)  	EMIT2(0x89, 0xD2);                        /* mov edx, edx */  	EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */  	      offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 43 /* number of bytes to jump */ +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */  	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */  	label1 = cnt; @@ -299,7 +300,7 @@ static void emit_bpf_tail_call(u8 **pprog)  	 */  	EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */  	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 32 +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)  	EMIT2(X86_JA, OFFSET2);                   /* ja out */  	label2 = cnt;  	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */ @@ -313,7 +314,7 @@ static void emit_bpf_tail_call(u8 **pprog)  	 *   goto out;  	 */  	EMIT3(0x48, 0x85, 0xC0);		  /* test rax,rax */ -#define OFFSET3 10 +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)  	EMIT2(X86_JE, OFFSET3);                   /* je out */  	label3 = cnt; @@ -326,7 +327,7 @@ static void emit_bpf_tail_call(u8 **pprog)  	 * rdi == ctx (1st arg)  	 * rax == prog->bpf_func + prologue_size  	 */ -	EMIT2(0xFF, 0xE0);                        /* jmp rax */ +	RETPOLINE_RAX_BPF_JIT();  	/* out: */  	BUILD_BUG_ON(cnt - label1 != OFFSET1); @@ -1187,7 +1188,7 @@ skip_init_addrs:  	 * may converge on the last pass. In such case do one more  	 * pass to emit the final image  	 */ -	for (pass = 0; pass < 10 || image; pass++) { +	for (pass = 0; pass < 20 || image; pass++) {  		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);  		if (proglen <= 0) {  			image = NULL; @@ -1214,6 +1215,7 @@ skip_init_addrs:  			}  		}  		oldproglen = proglen; +		cond_resched();  	}  	if (bpf_jit_enable > 1) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 174c59774cc9..a7a7677265b6 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -460,7 +460,7 @@ static int nmi_setup(void)  		goto fail;  	for_each_possible_cpu(cpu) { -		if (!cpu) +		if (!IS_ENABLED(CONFIG_SMP) || !cpu)  			continue;  		memcpy(per_cpu(cpu_msrs, cpu).counters, diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c310a8284358..f9cfbc0d1f33 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -227,7 +227,7 @@ int __init efi_alloc_page_tables(void)  	if (!pud) {  		if (CONFIG_PGTABLE_LEVELS > 4)  			free_page((unsigned long) pgd_page_vaddr(*pgd)); -		free_page((unsigned long)efi_pgd); +		free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);  		return -ENOMEM;  	} diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 2c67bae6bb53..fb1df9488e98 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -79,7 +79,7 @@ static void intel_mid_power_off(void)  static void intel_mid_reboot(void)  { -	intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); +	intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);  }  static unsigned long __init intel_mid_calibrate_tsc(void) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index c2e9285d1bf1..db77e087adaf 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,  		local_flush_tlb();  		stat->d_alltlb++;  	} else { -		__flush_tlb_single(msg->address); +		__flush_tlb_one_user(msg->address);  		stat->d_onetlb++;  	}  	stat->d_requestee++; diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index de53bd15df5a..24bb7598774e 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -102,7 +102,7 @@ ENTRY(startup_32)  	 * don't we'll eventually crash trying to execute encrypted  	 * instructions.  	 */ -	bt	$TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags +	btl	$TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags  	jnc	.Ldone  	movl	$MSR_K8_SYSCFG, %ecx  	rdmsr diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5d73c443e778..220e97841e49 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,  		break;  	case R_X86_64_PC32: +	case R_X86_64_PLT32:  		/*  		 * PC relative relocations don't need to be adjusted unless  		 * referencing a percpu symbol. +		 * +		 * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.  		 */  		if (is_percpu_sym(sym, symname))  			add_reloc(&relocs32neg, offset); diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index b7d73400ea29..f31e5d903161 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -30,11 +30,7 @@  #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb()	rmb() -#else /* CONFIG_X86_PPRO_FENCE */  #define dma_rmb()	barrier() -#endif /* CONFIG_X86_PPRO_FENCE */  #define dma_wmb()	barrier()  #include <asm-generic/barrier.h> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c047f42552e1..3c2c2530737e 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void)  	if (!xen_initial_domain()) {  		add_preferred_console("xenboot", 0, NULL); -		add_preferred_console("tty", 0, NULL); -		add_preferred_console("hvc", 0, NULL);  		if (pci_xen)  			x86_init.pci.arch_init = pci_xen_init;  	} else { @@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void)  		xen_boot_params_init_edd();  	} + +	add_preferred_console("tty", 0, NULL); +	add_preferred_console("hvc", 0, NULL); +  #ifdef CONFIG_PCI  	/* PCI BIOS service won't work from a PV guest. */  	pci_probe &= ~PCI_PROBE_BIOS; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index d85076223a69..aae88fec9941 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1300,12 +1300,12 @@ static void xen_flush_tlb(void)  	preempt_enable();  } -static void xen_flush_tlb_single(unsigned long addr) +static void xen_flush_tlb_one_user(unsigned long addr)  {  	struct mmuext_op *op;  	struct multicall_space mcs; -	trace_xen_mmu_flush_tlb_single(addr); +	trace_xen_mmu_flush_tlb_one_user(addr);  	preempt_disable(); @@ -2370,7 +2370,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {  	.flush_tlb_user = xen_flush_tlb,  	.flush_tlb_kernel = xen_flush_tlb, -	.flush_tlb_single = xen_flush_tlb_single, +	.flush_tlb_one_user = xen_flush_tlb_one_user,  	.flush_tlb_others = xen_flush_tlb_others,  	.pgd_alloc = xen_pgd_alloc, diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 77c959cf81e7..7a43b2ae19f1 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -122,6 +122,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)  	if (xen_hvm_domain())  		native_smp_cpus_done(max_cpus); +	else +		calculate_max_logical_packages();  	if (xen_have_vcpu_info_placement)  		return; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d9f96cc5d743..1d83152c761b 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,12 +1,15 @@  // SPDX-License-Identifier: GPL-2.0  #include <linux/types.h>  #include <linux/tick.h> +#include <linux/percpu-defs.h>  #include <xen/xen.h>  #include <xen/interface/xen.h>  #include <xen/grant_table.h>  #include <xen/events.h> +#include <asm/cpufeatures.h> +#include <asm/msr-index.h>  #include <asm/xen/hypercall.h>  #include <asm/xen/page.h>  #include <asm/fixmap.h> @@ -15,6 +18,8 @@  #include "mmu.h"  #include "pmu.h" +static DEFINE_PER_CPU(u64, spec_ctrl); +  void xen_arch_pre_suspend(void)  {  	xen_save_time_memory_area(); @@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)  static void xen_vcpu_notify_restore(void *data)  { +	if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) +		wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); +  	/* Boot processor notified via generic timekeeping_resume() */  	if (smp_processor_id() == 0)  		return; @@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)  static void xen_vcpu_notify_suspend(void *data)  { +	u64 tmp; +  	tick_suspend_local(); + +	if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { +		rdmsrl(MSR_IA32_SPEC_CTRL, tmp); +		this_cpu_write(spec_ctrl, tmp); +		wrmsrl(MSR_IA32_SPEC_CTRL, 0); +	}  }  void xen_arch_resume(void) diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 623720a11143..732631ce250f 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -16,6 +16,7 @@   */  #include <linux/dma-contiguous.h> +#include <linux/dma-direct.h>  #include <linux/gfp.h>  #include <linux/highmem.h>  #include <linux/mm.h> @@ -123,7 +124,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,  			      unsigned long attrs)  {  	unsigned long ret; -	unsigned long uncached = 0; +	unsigned long uncached;  	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;  	struct page *page = NULL; @@ -144,15 +145,27 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,  	if (!page)  		return NULL; -	ret = (unsigned long)page_address(page); +	*handle = phys_to_dma(dev, page_to_phys(page)); -	/* We currently don't support coherent memory outside KSEG */ +#ifdef CONFIG_MMU +	if (PageHighMem(page)) { +		void *p; +		p = dma_common_contiguous_remap(page, size, VM_MAP, +						pgprot_noncached(PAGE_KERNEL), +						__builtin_return_address(0)); +		if (!p) { +			if (!dma_release_from_contiguous(dev, page, count)) +				__free_pages(page, get_order(size)); +		} +		return p; +	} +#endif +	ret = (unsigned long)page_address(page);  	BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||  	       ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);  	uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR; -	*handle = virt_to_bus((void *)ret);  	__invalidate_dcache_range(ret, size);  	return (void *)uncached; @@ -161,13 +174,20 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,  static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,  			    dma_addr_t dma_handle, unsigned long attrs)  { -	unsigned long addr = (unsigned long)vaddr + -		XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; -	struct page *page = virt_to_page(addr);  	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - -	BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR || -	       addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); +	unsigned long addr = (unsigned long)vaddr; +	struct page *page; + +	if (addr >= XCHAL_KSEG_BYPASS_VADDR && +	    addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) { +		addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; +		page = virt_to_page(addr); +	} else { +#ifdef CONFIG_MMU +		dma_common_free_remap(vaddr, size, VM_MAP); +#endif +		page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle))); +	}  	if (!dma_release_from_contiguous(dev, page, count))  		__free_pages(page, get_order(size)); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index d776ec0d7b22..34aead7dcb48 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -79,19 +79,75 @@ void __init zones_init(void)  	free_area_init_node(0, zones_size, ARCH_PFN_OFFSET, NULL);  } +#ifdef CONFIG_HIGHMEM +static void __init free_area_high(unsigned long pfn, unsigned long end) +{ +	for (; pfn < end; pfn++) +		free_highmem_page(pfn_to_page(pfn)); +} + +static void __init free_highpages(void) +{ +	unsigned long max_low = max_low_pfn; +	struct memblock_region *mem, *res; + +	reset_all_zones_managed_pages(); +	/* set highmem page free */ +	for_each_memblock(memory, mem) { +		unsigned long start = memblock_region_memory_base_pfn(mem); +		unsigned long end = memblock_region_memory_end_pfn(mem); + +		/* Ignore complete lowmem entries */ +		if (end <= max_low) +			continue; + +		if (memblock_is_nomap(mem)) +			continue; + +		/* Truncate partial highmem entries */ +		if (start < max_low) +			start = max_low; + +		/* Find and exclude any reserved regions */ +		for_each_memblock(reserved, res) { +			unsigned long res_start, res_end; + +			res_start = memblock_region_reserved_base_pfn(res); +			res_end = memblock_region_reserved_end_pfn(res); + +			if (res_end < start) +				continue; +			if (res_start < start) +				res_start = start; +			if (res_start > end) +				res_start = end; +			if (res_end > end) +				res_end = end; +			if (res_start != start) +				free_area_high(start, res_start); +			start = res_end; +			if (start == end) +				break; +		} + +		/* And now free anything which remains */ +		if (start < end) +			free_area_high(start, end); +	} +} +#else +static void __init free_highpages(void) +{ +} +#endif +  /*   * Initialize memory pages.   */  void __init mem_init(void)  { -#ifdef CONFIG_HIGHMEM -	unsigned long tmp; - -	reset_all_zones_managed_pages(); -	for (tmp = max_low_pfn; tmp < max_pfn; tmp++) -		free_highmem_page(pfn_to_page(tmp)); -#endif +	free_highpages();  	max_mapnr = max_pfn - ARCH_PFN_OFFSET;  	high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT);  | 

