diff options
Diffstat (limited to 'arch')
311 files changed, 2986 insertions, 1816 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index ed27fd262627..e1e540ffa979 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -429,6 +429,13 @@ config SECCOMP_FILTER See Documentation/userspace-api/seccomp_filter.rst for details. +config HAVE_ARCH_STACKLEAK + bool + help + An architecture should select this if it has the code which + fills the used part of the kernel stack with the STACKLEAK_POISON + value before returning from system calls. + config HAVE_STACKPROTECTOR bool help diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h index 6a8c53dec57e..b7c77bb1bfd2 100644 --- a/arch/alpha/include/asm/termios.h +++ b/arch/alpha/include/asm/termios.h @@ -73,9 +73,15 @@ }) #define user_termios_to_kernel_termios(k, u) \ - copy_from_user(k, u, sizeof(struct termios)) + copy_from_user(k, u, sizeof(struct termios2)) #define kernel_termios_to_user_termios(u, k) \ + copy_to_user(u, k, sizeof(struct termios2)) + +#define user_termios_to_kernel_termios_1(k, u) \ + copy_from_user(k, u, sizeof(struct termios)) + +#define kernel_termios_to_user_termios_1(u, k) \ copy_to_user(u, k, sizeof(struct termios)) #endif /* _ALPHA_TERMIOS_H */ diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h index 1e9121c9b3c7..971311605288 100644 --- a/arch/alpha/include/uapi/asm/ioctls.h +++ b/arch/alpha/include/uapi/asm/ioctls.h @@ -32,6 +32,11 @@ #define TCXONC _IO('t', 30) #define TCFLSH _IO('t', 31) +#define TCGETS2 _IOR('T', 42, struct termios2) +#define TCSETS2 _IOW('T', 43, struct termios2) +#define TCSETSW2 _IOW('T', 44, struct termios2) +#define TCSETSF2 _IOW('T', 45, struct termios2) + #define TIOCSWINSZ _IOW('t', 103, struct winsize) #define TIOCGWINSZ _IOR('t', 104, struct winsize) #define TIOCSTART _IO('t', 110) /* start output, like ^Q */ diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h index de6c8360fbe3..4575ba34a0ea 100644 --- a/arch/alpha/include/uapi/asm/termbits.h +++ b/arch/alpha/include/uapi/asm/termbits.h @@ -26,6 +26,19 @@ struct termios { speed_t c_ospeed; /* output speed */ }; +/* Alpha has identical termios and termios2 */ + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_cc[NCCS]; /* control characters */ + cc_t c_line; /* line discipline (== c_cc[19]) */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + /* Alpha has matching termios and ktermios */ struct ktermios { @@ -152,6 +165,7 @@ struct ktermios { #define B3000000 00034 #define B3500000 00035 #define B4000000 00036 +#define BOTHER 00037 #define CSIZE 00001400 #define CS5 00000000 @@ -169,6 +183,9 @@ struct ktermios { #define CMSPAR 010000000000 /* mark or space (stick) parity */ #define CRTSCTS 020000000000 /* flow control */ +#define CIBAUD 07600000 +#define IBSHIFT 16 + /* c_lflag bits */ #define ISIG 0x00000080 #define ICANON 0x00000100 diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index c9e2a1323536..6dd783557330 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -109,7 +109,7 @@ endmenu choice prompt "ARC Instruction Set" - default ISA_ARCOMPACT + default ISA_ARCV2 config ISA_ARCOMPACT bool "ARCompact ISA" @@ -176,13 +176,11 @@ endchoice config CPU_BIG_ENDIAN bool "Enable Big Endian Mode" - default n help Build kernel for Big Endian Mode of ARC CPU config SMP bool "Symmetric Multi-Processing" - default n select ARC_MCIP if ISA_ARCV2 help This enables support for systems with more than one CPU. @@ -254,7 +252,6 @@ config ARC_CACHE_PAGES config ARC_CACHE_VIPT_ALIASING bool "Support VIPT Aliasing D$" depends on ARC_HAS_DCACHE && ISA_ARCOMPACT - default n endif #ARC_CACHE @@ -262,7 +259,6 @@ config ARC_HAS_ICCM bool "Use ICCM" help Single Cycle RAMS to store Fast Path Code - default n config ARC_ICCM_SZ int "ICCM Size in KB" @@ -273,7 +269,6 @@ config ARC_HAS_DCCM bool "Use DCCM" help Single Cycle RAMS to store Fast Path Data - default n config ARC_DCCM_SZ int "DCCM Size in KB" @@ -366,13 +361,11 @@ if ISA_ARCOMPACT config ARC_COMPACT_IRQ_LEVELS bool "Setup Timer IRQ as high Priority" - default n # if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy depends on !SMP config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" - default n help Double Precision Floating Point unit had dedicated regs which need to be saved/restored across context-switch. @@ -453,7 +446,6 @@ config HIGHMEM config ARC_HAS_PAE40 bool "Support for the 40-bit Physical Address Extension" - default n depends on ISA_ARCV2 select HIGHMEM select PHYS_ADDR_T_64BIT @@ -496,7 +488,6 @@ config HZ config ARC_METAWARE_HLINK bool "Support for Metaware debugger assisted Host access" - default n help This options allows a Linux userland apps to directly access host file system (open/creat/read/write etc) with help from @@ -524,13 +515,11 @@ config ARC_DW2_UNWIND config ARC_DBG_TLB_PARANOIA bool "Paranoia Checks in Low Level TLB Handlers" - default n endif config ARC_UBOOT_SUPPORT bool "Support uboot arg Handling" - default n help ARC Linux by default checks for uboot provided args as pointers to external cmdline or DTB. This however breaks in absence of uboot, diff --git a/arch/arc/Makefile b/arch/arc/Makefile index c64c505d966c..df00578c279d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,7 +6,7 @@ # published by the Free Software Foundation. # -KBUILD_DEFCONFIG := nsim_700_defconfig +KBUILD_DEFCONFIG := nsim_hs_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index ef149f59929a..43f17b51ee89 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -222,6 +222,21 @@ bus-width = <4>; dma-coherent; }; + + gpio: gpio@3000 { + compatible = "snps,dw-apb-gpio"; + reg = <0x3000 0x20>; + #address-cells = <1>; + #size-cells = <0>; + + gpio_port_a: gpio-controller@0 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <24>; + reg = <0>; + }; + }; }; memory@80000000 { diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 41bc08be6a3b..020d4493edfd 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -14,6 +14,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -95,6 +96,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 1e1c4a8011b5..666314fffc60 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -94,6 +94,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 6b0c0cfd5c30..429832b8560b 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -97,6 +97,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 1dec2b4bc5e6..87b23b7fb781 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -45,6 +45,9 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set +CONFIG_GPIOLIB=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_DWAPB=y # CONFIG_HWMON is not set CONFIG_DRM=y # CONFIG_DRM_FBDEV_EMULATION is not set @@ -65,6 +68,7 @@ CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 31ba224bbfb4..6e84060e7c90 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -15,6 +15,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y @@ -73,6 +74,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 8e0b8b134cd9..219c2a65294b 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -15,6 +15,7 @@ CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_LBDAF is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index f14eeff7d308..35dfc6491a09 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -15,6 +15,7 @@ CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_LBDAF is not set @@ -66,5 +67,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 025298a48305..1638e5bc9672 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -65,5 +65,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index df7b77b13b82..11cfbdb0f441 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -76,6 +76,7 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FTRACE=y diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index a7f65313f84a..e71ade3cf9c8 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -19,6 +19,7 @@ CONFIG_KALLSYMS_ALL=y # CONFIG_AIO is not set CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index db47c3541f15..1e59a2e9c602 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -85,6 +85,7 @@ CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index a8ac5e917d9a..b5c3f6c54b03 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -90,6 +90,7 @@ CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index ff7d3232764a..f393b663413e 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -113,7 +113,9 @@ extern unsigned long perip_base, perip_end; /* IO coherency related Auxiliary registers */ #define ARC_REG_IO_COH_ENABLE 0x500 +#define ARC_IO_COH_ENABLE_BIT BIT(0) #define ARC_REG_IO_COH_PARTIAL 0x501 +#define ARC_IO_COH_PARTIAL_BIT BIT(0) #define ARC_REG_IO_COH_AP0_BASE 0x508 #define ARC_REG_IO_COH_AP0_SIZE 0x509 diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index c22b181e8206..2f39d9b3886e 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -12,6 +12,7 @@ #include <linux/types.h> #include <asm/byteorder.h> #include <asm/page.h> +#include <asm/unaligned.h> #ifdef CONFIG_ISA_ARCV2 #include <asm/barrier.h> @@ -94,6 +95,42 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) return w; } +/* + * {read,write}s{b,w,l}() repeatedly access the same IO address in + * native endianness in 8-, 16-, 32-bit chunks {into,from} memory, + * @count times + */ +#define __raw_readsx(t,f) \ +static inline void __raw_reads##f(const volatile void __iomem *addr, \ + void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + u##t x = __raw_read##f(addr); \ + *buf++ = x; \ + } while (--count); \ + } else { \ + do { \ + u##t x = __raw_read##f(addr); \ + put_unaligned(x, buf++); \ + } while (--count); \ + } \ +} + +#define __raw_readsb __raw_readsb +__raw_readsx(8, b) +#define __raw_readsw __raw_readsw +__raw_readsx(16, w) +#define __raw_readsl __raw_readsl +__raw_readsx(32, l) + #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 b, volatile void __iomem *addr) { @@ -126,6 +163,35 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } +#define __raw_writesx(t,f) \ +static inline void __raw_writes##f(volatile void __iomem *addr, \ + const void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + const u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + __raw_write##f(*buf++, addr); \ + } while (--count); \ + } else { \ + do { \ + __raw_write##f(get_unaligned(buf++), addr); \ + } while (--count); \ + } \ +} + +#define __raw_writesb __raw_writesb +__raw_writesx(8, b) +#define __raw_writesw __raw_writesw +__raw_writesx(16, w) +#define __raw_writesl __raw_writesl +__raw_writesx(32, l) + /* * MMIO can also get buffered/optimized in micro-arch, so barriers needed * Based on ARM model for the typical use case @@ -141,10 +207,16 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) #define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) #define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) +#define readsb(p,d,l) ({ __raw_readsb(p,d,l); __iormb(); }) +#define readsw(p,d,l) ({ __raw_readsw(p,d,l); __iormb(); }) +#define readsl(p,d,l) ({ __raw_readsl(p,d,l); __iormb(); }) #define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); }) #define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); }) #define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) +#define writesb(p,d,l) ({ __iowmb(); __raw_writesb(p,d,l); }) +#define writesw(p,d,l) ({ __iowmb(); __raw_writesw(p,d,l); }) +#define writesl(p,d,l) ({ __iowmb(); __raw_writesl(p,d,l); }) /* * Relaxed API for drivers which can handle barrier ordering themselves diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index b2cae79a25d7..eea8c5ce6335 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -243,7 +243,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; struct bcr_identity *core = &cpu->core; - int i, n = 0; + int i, n = 0, ua = 0; FIX_PTR(cpu); @@ -263,10 +263,13 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT), IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT)); - n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s", +#ifdef __ARC_UNALIGNED__ + ua = 1; +#endif + n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s%s", IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), - IS_AVAIL1(cpu->isa.unalign, "unalign (not used)")); + IS_AVAIL1(cpu->isa.unalign, "unalign "), IS_USED_RUN(ua)); if (i) n += scnprintf(buf + n, len - n, "\n\t\t: "); diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index f2701c13a66b..cf9619d4efb4 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1145,6 +1145,20 @@ noinline void __init arc_ioc_setup(void) unsigned int ioc_base, mem_sz; /* + * If IOC was already enabled (due to bootloader) it technically needs to + * be reconfigured with aperture base,size corresponding to Linux memory map + * which will certainly be different than uboot's. But disabling and + * reenabling IOC when DMA might be potentially active is tricky business. + * To avoid random memory issues later, just panic here and ask user to + * upgrade bootloader to one which doesn't enable IOC + */ + if (read_aux_reg(ARC_REG_IO_COH_ENABLE) & ARC_IO_COH_ENABLE_BIT) + panic("IOC already enabled, please upgrade bootloader!\n"); + + if (!ioc_enable) + return; + + /* * As for today we don't support both IOC and ZONE_HIGHMEM enabled * simultaneously. This happens because as of today IOC aperture covers * only ZONE_NORMAL (low mem) and any dma transactions outside this @@ -1187,8 +1201,8 @@ noinline void __init arc_ioc_setup(void) panic("IOC Aperture start must be aligned to the size of the aperture"); write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12); - write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1); - write_aux_reg(ARC_REG_IO_COH_ENABLE, 1); + write_aux_reg(ARC_REG_IO_COH_PARTIAL, ARC_IO_COH_PARTIAL_BIT); + write_aux_reg(ARC_REG_IO_COH_ENABLE, ARC_IO_COH_ENABLE_BIT); /* Re-enable L1 dcache */ __dc_enable(); @@ -1265,7 +1279,7 @@ void __init arc_cache_init_master(void) if (is_isa_arcv2() && l2_line_sz && !slc_enable) arc_slc_disable(); - if (is_isa_arcv2() && ioc_enable) + if (is_isa_arcv2() && ioc_exists) arc_ioc_setup(); if (is_isa_arcv2() && l2_line_sz && slc_enable) { diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index c9da6102eb4f..e2d9fc3fea01 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -66,7 +66,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - int si_code; + int si_code = 0; int ret; vm_fault_t fault; int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index d4d33cd7adad..1e2bb68231ad 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -228,7 +228,7 @@ vmmc-supply = <&vmmc_fixed>; bus-width = <4>; wp-gpios = <&gpio4 30 GPIO_ACTIVE_HIGH>; /* gpio_126 */ - cd-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>; /* gpio_127 */ + cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio_127 */ }; &mmc3 { diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi index dae6e458e59f..b1c988eed87c 100644 --- a/arch/arm/boot/dts/am3517-som.dtsi +++ b/arch/arm/boot/dts/am3517-som.dtsi @@ -163,7 +163,7 @@ compatible = "ti,wl1271"; reg = <2>; interrupt-parent = <&gpio6>; - interrupts = <10 IRQ_TYPE_LEVEL_HIGH>; /* gpio_170 */ + interrupts = <10 IRQ_TYPE_EDGE_RISING>; /* gpio_170 */ ref-clock-frequency = <26000000>; tcxo-clock-frequency = <26000000>; }; diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts index f2a1d25eb6cf..83e0fbc4a1a1 100644 --- a/arch/arm/boot/dts/arm-realview-pb1176.dts +++ b/arch/arm/boot/dts/arm-realview-pb1176.dts @@ -45,7 +45,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -53,7 +53,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/arm-realview-pb11mp.dts b/arch/arm/boot/dts/arm-realview-pb11mp.dts index 7f9cbdf33a51..2f6aa24a0b67 100644 --- a/arch/arm/boot/dts/arm-realview-pb11mp.dts +++ b/arch/arm/boot/dts/arm-realview-pb11mp.dts @@ -145,7 +145,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -153,7 +153,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index 4adb85e66be3..93762244be7f 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -31,7 +31,7 @@ wifi_pwrseq: wifi-pwrseq { compatible = "mmc-pwrseq-simple"; - reset-gpios = <&expgpio 1 GPIO_ACTIVE_HIGH>; + reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts index c318bcbc6ba7..89e6fd547c75 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts @@ -26,7 +26,7 @@ wifi_pwrseq: wifi-pwrseq { compatible = "mmc-pwrseq-simple"; - reset-gpios = <&expgpio 1 GPIO_ACTIVE_HIGH>; + reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts index e45a15ceb94b..69d753cac89a 100644 --- a/arch/arm/boot/dts/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts @@ -492,12 +492,6 @@ pinctrl-0 = <&pinctrl_i2c2>; status = "okay"; - eeprom@50 { - compatible = "atmel,24c04"; - pagesize = <16>; - reg = <0x50>; - }; - hpa1: amp@60 { compatible = "ti,tpa6130a2"; reg = <0x60>; diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts index b560ff88459b..5ff9a179c83c 100644 --- a/arch/arm/boot/dts/imx53-ppd.dts +++ b/arch/arm/boot/dts/imx53-ppd.dts @@ -55,7 +55,7 @@ }; chosen { - stdout-path = "&uart1:115200n8"; + stdout-path = "serial0:115200n8"; }; memory@70000000 { diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index ed9a980bce85..beefa1b2049d 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -740,7 +740,7 @@ i2c1: i2c@21a0000 { #address-cells = <1>; #size-cells = <0>; - compatible = "fs,imx6sll-i2c", "fsl,imx21-i2c"; + compatible = "fsl,imx6sll-i2c", "fsl,imx21-i2c"; reg = <0x021a0000 0x4000>; interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SLL_CLK_I2C1>; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi index 53b3408b5fab..7d7d679945d2 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dtsi +++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi @@ -117,7 +117,9 @@ regulator-name = "enet_3v3"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; + gpio = <&gpio2 6 GPIO_ACTIVE_LOW>; + regulator-boot-on; + regulator-always-on; }; reg_pcie_gpio: regulator-pcie-gpio { @@ -180,6 +182,7 @@ phy-supply = <®_enet_3v3>; phy-mode = "rgmii"; phy-handle = <ðphy1>; + phy-reset-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; status = "okay"; mdio { @@ -373,6 +376,8 @@ MX6SX_PAD_RGMII1_RD3__ENET1_RX_DATA_3 0x3081 MX6SX_PAD_RGMII1_RX_CTL__ENET1_RX_EN 0x3081 MX6SX_PAD_ENET2_RX_CLK__ENET2_REF_CLK_25M 0x91 + /* phy reset */ + MX6SX_PAD_ENET2_CRS__GPIO2_IO_7 0x10b0 >; }; diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts index d8aac4a2d02a..177d21fdeb28 100644 --- a/arch/arm/boot/dts/imx7d-nitrogen7.dts +++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts @@ -86,13 +86,17 @@ compatible = "regulator-fixed"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - clocks = <&clks IMX7D_CLKO2_ROOT_DIV>; - clock-names = "slow"; regulator-name = "reg_wlan"; startup-delay-us = <70000>; gpio = <&gpio4 21 GPIO_ACTIVE_HIGH>; enable-active-high; }; + + usdhc2_pwrseq: usdhc2_pwrseq { + compatible = "mmc-pwrseq-simple"; + clocks = <&clks IMX7D_CLKO2_ROOT_DIV>; + clock-names = "ext_clock"; + }; }; &adc1 { @@ -375,6 +379,7 @@ bus-width = <4>; non-removable; vmmc-supply = <®_wlan>; + mmc-pwrseq = <&usdhc2_pwrseq>; cap-power-off-card; keep-power-in-suspend; status = "okay"; diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi index 21973eb55671..f27b3849d3ff 100644 --- a/arch/arm/boot/dts/imx7d-pico.dtsi +++ b/arch/arm/boot/dts/imx7d-pico.dtsi @@ -100,6 +100,19 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; }; + + usdhc2_pwrseq: usdhc2_pwrseq { + compatible = "mmc-pwrseq-simple"; + clocks = <&clks IMX7D_CLKO2_ROOT_DIV>; + clock-names = "ext_clock"; + }; +}; + +&clks { + assigned-clocks = <&clks IMX7D_CLKO2_ROOT_SRC>, + <&clks IMX7D_CLKO2_ROOT_DIV>; + assigned-clock-parents = <&clks IMX7D_CKIL>; + assigned-clock-rates = <0>, <32768>; }; &i2c4 { @@ -199,12 +212,13 @@ &usdhc2 { /* Wifi SDIO */ pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc2>; + pinctrl-0 = <&pinctrl_usdhc2 &pinctrl_wifi_clk>; no-1-8-v; non-removable; keep-power-in-suspend; wakeup-source; vmmc-supply = <®_ap6212>; + mmc-pwrseq = <&usdhc2_pwrseq>; status = "okay"; }; @@ -301,6 +315,12 @@ }; &iomuxc_lpsr { + pinctrl_wifi_clk: wificlkgrp { + fsl,pins = < + MX7D_PAD_LPSR_GPIO1_IO03__CCM_CLKO2 0x7d + >; + }; + pinctrl_wdog: wdoggrp { fsl,pins = < MX7D_PAD_LPSR_GPIO1_IO00__WDOG1_WDOG_B 0x74 diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index ac343330d0c8..98b682a8080c 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -129,7 +129,7 @@ }; &mmc3 { - interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; + interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>; pinctrl-0 = <&mmc3_pins &wl127x_gpio>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 9d5d53fbe9c0..c39cf2ca54da 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -35,7 +35,7 @@ * jumpering combinations for the long run. */ &mmc3 { - interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; + interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>; pinctrl-0 = <&mmc3_pins &mmc3_core2_pins>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 2075120cfc4d..d8bf939a3aff 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -10,7 +10,11 @@ #include "rk3288.dtsi" / { - memory@0 { + /* + * The default coreboot on veyron devices ignores memory@0 nodes + * and would instead create another memory node. + */ + memory { device_type = "memory"; reg = <0x0 0x0 0x0 0x80000000>; }; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 843052f14f1c..dd0dda6ed44b 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -314,7 +314,7 @@ 0x1 0x0 0x60000000 0x10000000 0x2 0x0 0x70000000 0x10000000 0x3 0x0 0x80000000 0x10000000>; - clocks = <&mck>; + clocks = <&h32ck>; status = "disabled"; nand_controller: nand-controller { diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index c50c36baba75..8bf1c17f8cef 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -923,7 +923,7 @@ interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>; clocks = <&rcc HASH1>; resets = <&rcc HASH1_R>; - dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0 0x0>; + dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0>; dma-names = "in"; dma-maxburst = <2>; status = "disabled"; diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index 742d2946b08b..583a5a01642f 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -314,8 +314,8 @@ ®_dldo3 { regulator-always-on; - regulator-min-microvolt = <2500000>; - regulator-max-microvolt = <2500000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-name = "vcc-pd"; }; diff --git a/arch/arm/boot/dts/vf610m4-colibri.dts b/arch/arm/boot/dts/vf610m4-colibri.dts index 41ec66a96990..ca6249558760 100644 --- a/arch/arm/boot/dts/vf610m4-colibri.dts +++ b/arch/arm/boot/dts/vf610m4-colibri.dts @@ -50,8 +50,8 @@ compatible = "fsl,vf610m4"; chosen { - bootargs = "console=ttyLP2,115200 clk_ignore_unused init=/linuxrc rw"; - stdout-path = "&uart2"; + bootargs = "clk_ignore_unused init=/linuxrc rw"; + stdout-path = "serial2:115200"; }; memory@8c000000 { diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 0d289240b6ca..775cac3c02bb 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -111,6 +111,7 @@ #include <linux/kernel.h> extern unsigned int processor_id; +struct proc_info_list *lookup_processor(u32 midr); #ifdef CONFIG_CPU_CP15 #define read_cpuid(reg) \ diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 92fd2c8a9af0..12659ce5c1f3 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -10,7 +10,7 @@ #ifndef _ASM_PGTABLE_2LEVEL_H #define _ASM_PGTABLE_2LEVEL_H -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 /* * Hardware-wise, we have a two level page table structure, where the first diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index e25f4392e1b2..e1b6f280ab08 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -23,7 +23,7 @@ struct mm_struct; /* * Don't change this structure - ASM code relies on it. */ -extern struct processor { +struct processor { /* MISC * get data abort address/flags */ @@ -79,9 +79,13 @@ extern struct processor { unsigned int suspend_size; void (*do_suspend)(void *); void (*do_resume)(void *); -} processor; +}; #ifndef MULTI_CPU +static inline void init_proc_vtable(const struct processor *p) +{ +} + extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); @@ -98,17 +102,50 @@ extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn)); extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else -#define cpu_proc_init processor._proc_init -#define cpu_proc_fin processor._proc_fin -#define cpu_reset processor.reset -#define cpu_do_idle processor._do_idle -#define cpu_dcache_clean_area processor.dcache_clean_area -#define cpu_set_pte_ext processor.set_pte_ext -#define cpu_do_switch_mm processor.switch_mm -/* These three are private to arch/arm/kernel/suspend.c */ -#define cpu_do_suspend processor.do_suspend -#define cpu_do_resume processor.do_resume +extern struct processor processor; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#include <linux/smp.h> +/* + * This can't be a per-cpu variable because we need to access it before + * per-cpu has been initialised. We have a couple of functions that are + * called in a pre-emptible context, and so can't use smp_processor_id() + * there, hence PROC_TABLE(). We insist in init_proc_vtable() that the + * function pointers for these are identical across all CPUs. + */ +extern struct processor *cpu_vtable[]; +#define PROC_VTABLE(f) cpu_vtable[smp_processor_id()]->f +#define PROC_TABLE(f) cpu_vtable[0]->f +static inline void init_proc_vtable(const struct processor *p) +{ + unsigned int cpu = smp_processor_id(); + *cpu_vtable[cpu] = *p; + WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area != + cpu_vtable[0]->dcache_clean_area); + WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext != + cpu_vtable[0]->set_pte_ext); +} +#else +#define PROC_VTABLE(f) processor.f +#define PROC_TABLE(f) processor.f +static inline void init_proc_vtable(const struct processor *p) +{ + processor = *p; +} +#endif + +#define cpu_proc_init PROC_VTABLE(_proc_init) +#define cpu_check_bugs PROC_VTABLE(check_bugs) +#define cpu_proc_fin PROC_VTABLE(_proc_fin) +#define cpu_reset PROC_VTABLE(reset) +#define cpu_do_idle PROC_VTABLE(_do_idle) +#define cpu_dcache_clean_area PROC_TABLE(dcache_clean_area) +#define cpu_set_pte_ext PROC_TABLE(set_pte_ext) +#define cpu_do_switch_mm PROC_VTABLE(switch_mm) + +/* These two are private to arch/arm/kernel/suspend.c */ +#define cpu_do_suspend PROC_VTABLE(do_suspend) +#define cpu_do_resume PROC_VTABLE(do_resume) #endif extern void cpu_resume(void); diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 7be511310191..d41d3598e5e5 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -6,8 +6,8 @@ void check_other_bugs(void) { #ifdef MULTI_CPU - if (processor.check_bugs) - processor.check_bugs(); + if (cpu_check_bugs) + cpu_check_bugs(); #endif } diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 0142fcfcc3d3..bda949fd84e8 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -183,9 +183,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; - struct ftrace_graph_ent trace; unsigned long old; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; @@ -193,21 +191,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, old = *parent; *parent = return_hooker; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { + if (function_graph_enter(old, self_addr, frame_pointer, NULL)) *parent = old; - return; - } - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - if (err == -EBUSY) { - *parent = old; - return; - } } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 6e0375e7db05..997b02302c31 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -145,6 +145,9 @@ __mmap_switched_data: #endif .size __mmap_switched_data, . - __mmap_switched_data + __FINIT + .text + /* * This provides a C-API version of __lookup_processor_type */ @@ -156,9 +159,6 @@ ENTRY(lookup_processor_type) ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type) - __FINIT - .text - /* * Read processor ID register (CP#15, CR0), and look up in the linker-built * supported processor list. Note that we can't use the absolute addresses diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index ac7e08886863..375b13f7e780 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -114,6 +114,11 @@ EXPORT_SYMBOL(elf_hwcap2); #ifdef MULTI_CPU struct processor processor __ro_after_init; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +struct processor *cpu_vtable[NR_CPUS] = { + [0] = &processor, +}; +#endif #endif #ifdef MULTI_TLB struct cpu_tlb_fns cpu_tlb __ro_after_init; @@ -666,28 +671,33 @@ static void __init smp_build_mpidr_hash(void) } #endif -static void __init setup_processor(void) +/* + * locate processor in the list of supported processor types. The linker + * builds this table for us from the entries in arch/arm/mm/proc-*.S + */ +struct proc_info_list *lookup_processor(u32 midr) { - struct proc_info_list *list; + struct proc_info_list *list = lookup_processor_type(midr); - /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc-*.S - */ - list = lookup_processor_type(read_cpuid_id()); if (!list) { - pr_err("CPU configuration botched (ID %08x), unable to continue.\n", - read_cpuid_id()); - while (1); + pr_err("CPU%u: configuration botched (ID %08x), CPU halted\n", + smp_processor_id(), midr); + while (1) + /* can't use cpu_relax() here as it may require MMU setup */; } + return list; +} + +static void __init setup_processor(void) +{ + unsigned int midr = read_cpuid_id(); + struct proc_info_list *list = lookup_processor(midr); + cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); -#ifdef MULTI_CPU - processor = *list->proc; -#endif + init_proc_vtable(list->proc); #ifdef MULTI_TLB cpu_tlb = *list->tlb; #endif @@ -699,7 +709,7 @@ static void __init setup_processor(void) #endif pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15, + list->cpu_name, midr, midr & 15, proc_arch[cpu_architecture()], get_cr()); snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c", diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 0978282d5fc2..12a6172263c0 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -42,6 +42,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/procinfo.h> #include <asm/processor.h> #include <asm/sections.h> #include <asm/tlbflush.h> @@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd) #endif } +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +static int secondary_biglittle_prepare(unsigned int cpu) +{ + if (!cpu_vtable[cpu]) + cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL); + + return cpu_vtable[cpu] ? 0 : -ENOMEM; +} + +static void secondary_biglittle_init(void) +{ + init_proc_vtable(lookup_processor(read_cpuid_id())->proc); +} +#else +static int secondary_biglittle_prepare(unsigned int cpu) +{ + return 0; +} + +static void secondary_biglittle_init(void) +{ +} +#endif + int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (!smp_ops.smp_boot_secondary) return -ENOSYS; + ret = secondary_biglittle_prepare(cpu); + if (ret) + return ret; + /* * We need to tell the secondary core where to find * its stack and the page tables. @@ -359,6 +388,8 @@ asmlinkage void secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu; + secondary_biglittle_init(); + /* * The identity mapping is uncached (strongly ordered), so * switch away from it before attempting any exclusive accesses. diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c index 0bc5bd2665df..2cc9fe4c3a91 100644 --- a/arch/arm/mach-davinci/da830.c +++ b/arch/arm/mach-davinci/da830.c @@ -759,7 +759,9 @@ static struct davinci_id da830_ids[] = { }; static struct davinci_gpio_platform_data da830_gpio_platform_data = { - .ngpio = 128, + .no_auto_base = true, + .base = 0, + .ngpio = 128, }; int __init da830_register_gpio(void) diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index 4528bbf0c861..e7b78df2bfef 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -719,7 +719,9 @@ int __init da850_register_vpif_capture(struct vpif_capture_config } static struct davinci_gpio_platform_data da850_gpio_platform_data = { - .ngpio = 144, + .no_auto_base = true, + .base = 0, + .ngpio = 144, }; int __init da850_register_gpio(void) diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 1fd3619f6a09..cf78da5ab054 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -701,6 +701,46 @@ static struct resource da8xx_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DA8XX_GPIO0, + .end = IRQ_DA8XX_GPIO0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO1, + .end = IRQ_DA8XX_GPIO1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO2, + .end = IRQ_DA8XX_GPIO2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO3, + .end = IRQ_DA8XX_GPIO3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO4, + .end = IRQ_DA8XX_GPIO4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO5, + .end = IRQ_DA8XX_GPIO5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO6, + .end = IRQ_DA8XX_GPIO6, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO7, + .end = IRQ_DA8XX_GPIO7, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO8, .end = IRQ_DA8XX_GPIO8, .flags = IORESOURCE_IRQ, }, diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 9f7d38d12c88..4c6e0bef4509 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -548,12 +548,44 @@ static struct resource dm355_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM355_GPIOBNK0, + .end = IRQ_DM355_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK1, + .end = IRQ_DM355_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK2, + .end = IRQ_DM355_GPIOBNK2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK3, + .end = IRQ_DM355_GPIOBNK3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK4, + .end = IRQ_DM355_GPIOBNK4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK5, + .end = IRQ_DM355_GPIOBNK5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK6, .end = IRQ_DM355_GPIOBNK6, .flags = IORESOURCE_IRQ, }, }; static struct davinci_gpio_platform_data dm355_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 104, }; diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index abcf2a5ed89b..01fb2b0c82de 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -267,12 +267,49 @@ static struct resource dm365_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM365_GPIO0, + .end = IRQ_DM365_GPIO0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO1, + .end = IRQ_DM365_GPIO1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO2, + .end = IRQ_DM365_GPIO2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO3, + .end = IRQ_DM365_GPIO3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO4, + .end = IRQ_DM365_GPIO4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO5, + .end = IRQ_DM365_GPIO5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO6, + .end = IRQ_DM365_GPIO6, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO7, .end = IRQ_DM365_GPIO7, .flags = IORESOURCE_IRQ, }, }; static struct davinci_gpio_platform_data dm365_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 104, .gpio_unbanked = 8, }; diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index 0720da7809a6..38f92b7d413e 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -492,12 +492,34 @@ static struct resource dm644_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_GPIOBNK0, + .end = IRQ_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK1, + .end = IRQ_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK2, + .end = IRQ_GPIOBNK2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK3, + .end = IRQ_GPIOBNK3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK4, .end = IRQ_GPIOBNK4, .flags = IORESOURCE_IRQ, }, }; static struct davinci_gpio_platform_data dm644_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 71, }; diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index 6bd2ed069d0d..7dc54b2a610f 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -442,12 +442,24 @@ static struct resource dm646x_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM646X_GPIOBNK0, + .end = IRQ_DM646X_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM646X_GPIOBNK1, + .end = IRQ_DM646X_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM646X_GPIOBNK2, .end = IRQ_DM646X_GPIOBNK2, .flags = IORESOURCE_IRQ, }, }; static struct davinci_gpio_platform_data dm646x_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 43, }; diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c index 243a108a940b..fd0053e47a15 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sx.c +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c @@ -110,7 +110,7 @@ int __init imx6sx_cpuidle_init(void) * except for power up sw2iso which need to be * larger than LDO ramp up time. */ - imx_gpc_set_arm_power_up_timing(2, 1); + imx_gpc_set_arm_power_up_timing(0xf, 1); imx_gpc_set_arm_power_down_timing(1, 1); return cpuidle_register(&imx6sx_cpuidle_driver, NULL); diff --git a/arch/arm/mach-mmp/cputype.h b/arch/arm/mach-mmp/cputype.h index 446edaeb78a7..a96abcf521b4 100644 --- a/arch/arm/mach-mmp/cputype.h +++ b/arch/arm/mach-mmp/cputype.h @@ -44,10 +44,12 @@ static inline int cpu_is_pxa910(void) #define cpu_is_pxa910() (0) #endif -#ifdef CONFIG_CPU_MMP2 +#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT) static inline int cpu_is_mmp2(void) { - return (((read_cpuid_id() >> 8) & 0xff) == 0x58); + return (((read_cpuid_id() >> 8) & 0xff) == 0x58) && + (((mmp_chip_id & 0xfff) == 0x410) || + ((mmp_chip_id & 0xfff) == 0x610)); } #else #define cpu_is_mmp2() (0) diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index af318d958fd2..17886744dbe6 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -750,6 +750,9 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) struct modem_private_data *priv = port->private_data; int ret; + if (!priv) + return; + if (IS_ERR(priv->regulator)) return; @@ -773,7 +776,7 @@ static struct plat_serial8250_port ams_delta_modem_ports[] = { { .membase = IOMEM(MODEM_VIRT), .mapbase = MODEM_PHYS, - .irq = -EINVAL, /* changed later */ + .irq = IRQ_NOTCONNECTED, /* changed later */ .flags = UPF_BOOT_AUTOCONF, .irqflags = IRQF_TRIGGER_RISING, .iotype = UPIO_MEM, @@ -864,8 +867,7 @@ static int __init modem_nreset_init(void) /* - * This function expects MODEM IRQ number already assigned to the port - * and fails if it's not. + * This function expects MODEM IRQ number already assigned to the port. * The MODEM device requires its RESET# pin kept high during probe. * That requirement can be fulfilled in several ways: * - with a descriptor of already functional modem_nreset regulator @@ -888,9 +890,6 @@ static int __init ams_delta_modem_init(void) if (!machine_is_ams_delta()) return -ENODEV; - if (ams_delta_modem_ports[0].irq < 0) - return ams_delta_modem_ports[0].irq; - omap_cfg_reg(M14_1510_GPIO2); /* Initialize the modem_nreset regulator consumer before use */ diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 9500b6e27380..f86b72d1d59e 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -209,11 +209,61 @@ static int __init omapdss_init_fbdev(void) return 0; } -#else -static inline int omapdss_init_fbdev(void) + +static const char * const omapdss_compat_names[] __initconst = { + "ti,omap2-dss", + "ti,omap3-dss", + "ti,omap4-dss", + "ti,omap5-dss", + "ti,dra7-dss", +}; + +static struct device_node * __init omapdss_find_dss_of_node(void) { - return 0; + struct device_node *node; + int i; + + for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { + node = of_find_compatible_node(NULL, NULL, + omapdss_compat_names[i]); + if (node) + return node; + } + + return NULL; } + +static int __init omapdss_init_of(void) +{ + int r; + struct device_node *node; + struct platform_device *pdev; + + /* only create dss helper devices if dss is enabled in the .dts */ + + node = omapdss_find_dss_of_node(); + if (!node) + return 0; + + if (!of_device_is_available(node)) + return 0; + + pdev = of_find_device_by_node(node); + + if (!pdev) { + pr_err("Unable to find DSS platform device\n"); + return -ENODEV; + } + + r = of_platform_populate(node, NULL, NULL, &pdev->dev); + if (r) { + pr_err("Unable to populate DSS submodule devices\n"); + return r; + } + + return omapdss_init_fbdev(); +} +omap_device_initcall(omapdss_init_of); #endif /* CONFIG_FB_OMAP2 */ static void dispc_disable_outputs(void) @@ -361,58 +411,3 @@ int omap_dss_reset(struct omap_hwmod *oh) return r; } - -static const char * const omapdss_compat_names[] __initconst = { - "ti,omap2-dss", - "ti,omap3-dss", - "ti,omap4-dss", - "ti,omap5-dss", - "ti,dra7-dss", -}; - -static struct device_node * __init omapdss_find_dss_of_node(void) -{ - struct device_node *node; - int i; - - for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { - node = of_find_compatible_node(NULL, NULL, - omapdss_compat_names[i]); - if (node) - return node; - } - - return NULL; -} - -static int __init omapdss_init_of(void) -{ - int r; - struct device_node *node; - struct platform_device *pdev; - - /* only create dss helper devices if dss is enabled in the .dts */ - - node = omapdss_find_dss_of_node(); - if (!node) - return 0; - - if (!of_device_is_available(node)) - return 0; - - pdev = of_find_device_by_node(node); - - if (!pdev) { - pr_err("Unable to find DSS platform device\n"); - return -ENODEV; - } - - r = of_platform_populate(node, NULL, NULL, &pdev->dev); - if (r) { - pr_err("Unable to populate DSS submodule devices\n"); - return r; - } - - return omapdss_init_fbdev(); -} -omap_device_initcall(omapdss_init_of); diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 7b95729e8359..38a1be6c3694 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -351,7 +351,7 @@ static void omap44xx_prm_reconfigure_io_chain(void) * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and * omap44xx_prm_reconfigure_io_chain() must be called. No return value. */ -static void __init omap44xx_prm_enable_io_wakeup(void) +static void omap44xx_prm_enable_io_wakeup(void) { s32 inst = omap4_prmst_get_prm_dev_inst(); diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 215df435bfb9..2149b47a0c5a 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -360,14 +360,16 @@ v7_dma_inv_range: ALT_UP(W(nop)) #endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + addne r0, r0, r2 tst r1, r3 bic r1, r1, r3 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line -1: - mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line - add r0, r0, r2 cmp r0, r1 +1: + mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index 788486e830d3..32aa2a2aa260 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -73,9 +73,11 @@ /* * dcimvac: Invalidate data cache line by MVA to PoC */ -.macro dcimvac, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC +.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo +.macro dcimvac\c, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c .endm +.endr /* * dccmvau: Clean data cache line by MVA to PoU @@ -369,14 +371,16 @@ v7m_dma_inv_range: tst r0, r3 bic r0, r0, r3 dccimvacne r0, r3 + addne r0, r0, r2 subne r3, r2, #1 @ restore r3, corrupted by v7m's dccimvac tst r1, r3 bic r1, r1, r3 dccimvacne r1, r3 -1: - dcimvac r0, r3 - add r0, r0, r2 cmp r0, r1 +1: + dcimvaclo r0, r3 + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 661fe48ab78d..78de138aa66d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -829,7 +829,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - int ret; + int ret = -ENXIO; unsigned long nr_vma_pages = vma_pages(vma); unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pfn = dma_to_pfn(dev, dma_addr); diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 81d0efb055c6..19516fbc2c55 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -274,6 +274,13 @@ .endm .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0 +/* + * If we are building for big.Little with branch predictor hardening, + * we need the processor function tables to remain available after boot. + */ +#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .section ".rodata" +#endif .type \name\()_processor_functions, #object .align 2 ENTRY(\name\()_processor_functions) @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions) .endif .size \name\()_processor_functions, . - \name\()_processor_functions +#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .previous +#endif .endm .macro define_cache_functions name:req diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 5544b82a2e7a..9a07916af8dd 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - if (processor.switch_mm != cpu_v7_bpiall_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_bpiall; spectre_v2_method = "BPIALL"; @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - if (processor.switch_mm != cpu_v7_iciallu_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; - processor.switch_mm = cpu_v7_hvc_switch_mm; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break; @@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; - processor.switch_mm = cpu_v7_smc_switch_mm; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; spectre_v2_method = "firmware"; break; @@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void) if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); - return; - -bl_error: - pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n", - cpu); } #else static void cpu_v7_spectre_init(void) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 6fe52819e014..339eb17c9808 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -112,7 +112,7 @@ ENTRY(cpu_v7_hvc_switch_mm) hvc #0 ldmfd sp!, {r0 - r3} b cpu_v7_switch_mm -ENDPROC(cpu_v7_smc_switch_mm) +ENDPROC(cpu_v7_hvc_switch_mm) #endif ENTRY(cpu_v7_iciallu_switch_mm) mov r3, #0 diff --git a/arch/arm/plat-orion/mpp.c b/arch/arm/plat-orion/mpp.c index 5b4ff9373c89..8a6880d528b6 100644 --- a/arch/arm/plat-orion/mpp.c +++ b/arch/arm/plat-orion/mpp.c @@ -28,10 +28,15 @@ void __init orion_mpp_conf(unsigned int *mpp_list, unsigned int variant_mask, unsigned int mpp_max, void __iomem *dev_bus) { unsigned int mpp_nr_regs = (1 + mpp_max/8); - u32 mpp_ctrl[mpp_nr_regs]; + u32 mpp_ctrl[8]; int i; printk(KERN_DEBUG "initial MPP regs:"); + if (mpp_nr_regs > ARRAY_SIZE(mpp_ctrl)) { + printk(KERN_ERR "orion_mpp_conf: invalid mpp_max\n"); + return; + } + for (i = 0; i < mpp_nr_regs; i++) { mpp_ctrl[i] = readl(mpp_ctrl_addr(i, dev_bus)); printk(" %08x", mpp_ctrl[i]); diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index b2aa9b32bff2..2c118a6ab358 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, &optprobe_template_entry, + memcpy(code, (unsigned char *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index aff6e6eadc70..ee7b07938dd5 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -573,7 +573,7 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp, */ ufp_exc->fpexc = hwstate->fpexc; ufp_exc->fpinst = hwstate->fpinst; - ufp_exc->fpinst2 = ufp_exc->fpinst2; + ufp_exc->fpinst2 = hwstate->fpinst2; /* Ensure that VFP is disabled. */ vfp_flush_hwstate(thread); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 787d7850e064..ea2ab0330e3a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -497,6 +497,24 @@ config ARM64_ERRATUM_1188873 If unsure, say Y. +config ARM64_ERRATUM_1286807 + bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" + default y + select ARM64_WORKAROUND_REPEAT_TLBI + help + This option adds workaround for ARM Cortex-A76 erratum 1286807 + + On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual + address for a cacheable mapping of a location is being + accessed by a core while another core is remapping the virtual + address to a new physical page using the recommended + break-before-make sequence, then under very rare circumstances + TLBI+DSB completes before a read using the translation being + invalidated has been observed by other observers. The + workaround repeats the TLBI+DSB operation. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -566,9 +584,16 @@ config QCOM_FALKOR_ERRATUM_1003 is unchanged. Work around the erratum by invalidating the walk cache entries for the trampoline before entering the kernel proper. +config ARM64_WORKAROUND_REPEAT_TLBI + bool + help + Enable the repeat TLBI workaround for Falkor erratum 1009 and + Cortex-A76 erratum 1286807. + config QCOM_FALKOR_ERRATUM_1009 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" default y + select ARM64_WORKAROUND_REPEAT_TLBI help On Falkor v1, the CPU may prematurely complete a DSB following a TLBI xxIS invalidate maintenance operation. Repeat the TLBI operation diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b4e994cd3a42..6cb9fc7e9382 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -134,6 +134,7 @@ vdso_install: archclean: $(Q)$(MAKE) $(clean)=$(boot) +ifeq ($(KBUILD_EXTMOD),) # We need to generate vdso-offsets.h before compiling certain files in kernel/. # In order to do that, we should use the archprepare target, but we can't since # asm-offsets.h is included in some files used to generate vdso-offsets.h, and @@ -143,6 +144,7 @@ archclean: prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h +endif define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 8253a1a9e985..fef7351e9f67 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -139,6 +139,7 @@ clock-names = "stmmaceth"; tx-fifo-depth = <16384>; rx-fifo-depth = <16384>; + snps,multicast-filter-bins = <256>; status = "disabled"; }; @@ -154,6 +155,7 @@ clock-names = "stmmaceth"; tx-fifo-depth = <16384>; rx-fifo-depth = <16384>; + snps,multicast-filter-bins = <256>; status = "disabled"; }; @@ -169,6 +171,7 @@ clock-names = "stmmaceth"; tx-fifo-depth = <16384>; rx-fifo-depth = <16384>; + snps,multicast-filter-bins = <256>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi index 64632c873888..01ea662afba8 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi @@ -20,28 +20,24 @@ compatible = "arm,cortex-a72", "arm,armv8"; reg = <0x000>; enable-method = "psci"; - cpu-idle-states = <&CPU_SLEEP_0>; }; cpu1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a72", "arm,armv8"; reg = <0x001>; enable-method = "psci"; - cpu-idle-states = <&CPU_SLEEP_0>; }; cpu2: cpu@100 { device_type = "cpu"; compatible = "arm,cortex-a72", "arm,armv8"; reg = <0x100>; enable-method = "psci"; - cpu-idle-states = <&CPU_SLEEP_0>; }; cpu3: cpu@101 { device_type = "cpu"; compatible = "arm,cortex-a72", "arm,armv8"; reg = <0x101>; enable-method = "psci"; - cpu-idle-states = <&CPU_SLEEP_0>; }; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index 073610ac0a53..7d94c1fa592a 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -28,33 +28,6 @@ method = "smc"; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - idle_states { - entry_method = "arm,pcsi"; - - CPU_SLEEP_0: cpu-sleep-0 { - compatible = "arm,idle-state"; - local-timer-stop; - arm,psci-suspend-param = <0x0010000>; - entry-latency-us = <80>; - exit-latency-us = <160>; - min-residency-us = <320>; - }; - - CLUSTER_SLEEP_0: cluster-sleep-0 { - compatible = "arm,idle-state"; - local-timer-stop; - arm,psci-suspend-param = <0x1010000>; - entry-latency-us = <500>; - exit-latency-us = <1000>; - min-residency-us = <2500>; - }; - }; - }; - ap806 { #address-cells = <2>; #size-cells = <2>; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 5d6005c9b097..710c5c3d87d3 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -16,8 +16,13 @@ model = "Bananapi BPI-R64"; compatible = "bananapi,bpi-r64", "mediatek,mt7622"; + aliases { + serial0 = &uart0; + }; + chosen { - bootargs = "earlycon=uart8250,mmio32,0x11002000 console=ttyS0,115200n1 swiotlb=512"; + stdout-path = "serial0:115200n8"; + bootargs = "earlycon=uart8250,mmio32,0x11002000 swiotlb=512"; }; cpus { diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index dcad0869b84c..3f783348c66a 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -17,8 +17,13 @@ model = "MediaTek MT7622 RFB1 board"; compatible = "mediatek,mt7622-rfb1", "mediatek,mt7622"; + aliases { + serial0 = &uart0; + }; + chosen { - bootargs = "earlycon=uart8250,mmio32,0x11002000 console=ttyS0,115200n1 swiotlb=512"; + stdout-path = "serial0:115200n8"; + bootargs = "earlycon=uart8250,mmio32,0x11002000 swiotlb=512"; }; cpus { diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index fe0c875f1d95..14a1028ca3a6 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -227,16 +227,6 @@ #reset-cells = <1>; }; - timer: timer@10004000 { - compatible = "mediatek,mt7622-timer", - "mediatek,mt6577-timer"; - reg = <0 0x10004000 0 0x80>; - interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_LOW>; - clocks = <&infracfg CLK_INFRA_APXGPT_PD>, - <&topckgen CLK_TOP_RTC>; - clock-names = "system-clk", "rtc-clk"; - }; - scpsys: scpsys@10006000 { compatible = "mediatek,mt7622-scpsys", "syscon"; diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi index b4276da1fb0d..11fd1fe8bdb5 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi @@ -241,3 +241,7 @@ }; }; }; + +&tlmm { + gpio-reserved-ranges = <0 4>, <81 4>; +}; diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts index eedfaf8922e2..b3def0358177 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts @@ -343,6 +343,12 @@ }; }; +&gcc { + protected-clocks = <GCC_QSPI_CORE_CLK>, + <GCC_QSPI_CORE_CLK_SRC>, + <GCC_QSPI_CNOC_PERIPH_AHB_CLK>; +}; + &i2c10 { status = "okay"; clock-frequency = <400000>; @@ -352,6 +358,10 @@ status = "okay"; }; +&tlmm { + gpio-reserved-ranges = <0 4>, <81 4>; +}; + &uart9 { status = "okay"; }; diff --git a/arch/arm64/boot/dts/renesas/r8a7795.dtsi b/arch/arm64/boot/dts/renesas/r8a7795.dtsi index b5f2273caca4..a79c8d369e0b 100644 --- a/arch/arm64/boot/dts/renesas/r8a7795.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a7795.dtsi @@ -652,7 +652,7 @@ clock-names = "fck", "brg_int", "scif_clk"; dmas = <&dmac1 0x35>, <&dmac1 0x34>, <&dmac2 0x35>, <&dmac2 0x34>; - dma-names = "tx", "rx"; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A7795_PD_ALWAYS_ON>; resets = <&cpg 518>; status = "disabled"; diff --git a/arch/arm64/boot/dts/renesas/r8a77980-condor.dts b/arch/arm64/boot/dts/renesas/r8a77980-condor.dts index fe2e2c051cc9..5a7012be0d6a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980-condor.dts +++ b/arch/arm64/boot/dts/renesas/r8a77980-condor.dts @@ -15,7 +15,7 @@ aliases { serial0 = &scif0; - ethernet0 = &avb; + ethernet0 = &gether; }; chosen { @@ -97,23 +97,6 @@ }; }; -&avb { - pinctrl-0 = <&avb_pins>; - pinctrl-names = "default"; - - phy-mode = "rgmii-id"; - phy-handle = <&phy0>; - renesas,no-ether-link; - status = "okay"; - - phy0: ethernet-phy@0 { - rxc-skew-ps = <1500>; - reg = <0>; - interrupt-parent = <&gpio1>; - interrupts = <17 IRQ_TYPE_LEVEL_LOW>; - }; -}; - &canfd { pinctrl-0 = <&canfd0_pins>; pinctrl-names = "default"; @@ -139,6 +122,23 @@ clock-frequency = <32768>; }; +&gether { + pinctrl-0 = <&gether_pins>; + pinctrl-names = "default"; + + phy-mode = "rgmii-id"; + phy-handle = <&phy0>; + renesas,no-ether-link; + status = "okay"; + + phy0: ethernet-phy@0 { + rxc-skew-ps = <1500>; + reg = <0>; + interrupt-parent = <&gpio4>; + interrupts = <23 IRQ_TYPE_LEVEL_LOW>; + }; +}; + &i2c0 { pinctrl-0 = <&i2c0_pins>; pinctrl-names = "default"; @@ -236,16 +236,17 @@ }; &pfc { - avb_pins: avb { - groups = "avb_mdio", "avb_rgmii"; - function = "avb"; - }; - canfd0_pins: canfd0 { groups = "canfd0_data_a"; function = "canfd0"; }; + gether_pins: gether { + groups = "gether_mdio_a", "gether_rgmii", + "gether_txcrefclk", "gether_txcrefclk_mega"; + function = "gether"; + }; + i2c0_pins: i2c0 { groups = "i2c0"; function = "i2c0"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 2dceeea29b83..1e6a71066c16 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -153,7 +153,7 @@ }; &pcie0 { - ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>; + ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi index 6c8c4ab044aa..56abbb08c133 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi @@ -57,18 +57,6 @@ regulator-always-on; vin-supply = <&vcc_sys>; }; - - vdd_log: vdd-log { - compatible = "pwm-regulator"; - pwms = <&pwm2 0 25000 0>; - regulator-name = "vdd_log"; - regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; - regulator-always-on; - regulator-boot-on; - vin-supply = <&vcc_sys>; - }; - }; &cpu_l0 { diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index affc3c309353..8d7b47f9dfbf 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -36,7 +36,7 @@ wkup_uart0: serial@42300000 { compatible = "ti,am654-uart"; - reg = <0x00 0x42300000 0x00 0x100>; + reg = <0x42300000 0x100>; reg-shift = <2>; reg-io-width = <4>; interrupts = <GIC_SPI 697 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 3cb995606e60..c9a57d11330b 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -308,6 +308,9 @@ CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y CONFIG_SERIAL_MVEBU_UART=y CONFIG_SERIAL_DEV_BUS=y CONFIG_VIRTIO_CONSOLE=y +CONFIG_IPMI_HANDLER=m +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_SI=m CONFIG_TCG_TPM=y CONFIG_TCG_TIS_I2C_INFINEON=y CONFIG_I2C_CHARDEV=y diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index caa955f10e19..fac54fb050d0 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -56,6 +56,19 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) { return is_compat_task(); } + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME + +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + /* + * Since all syscall functions have __arm64_ prefix, we must skip it. + * However, as we described above, we decided to ignore compat + * syscalls, so we don't care about __arm64_compat_ prefix here. + */ + return !strcmp(sym + 8, name); +} #endif /* ifndef __ASSEMBLY__ */ #endif /* __ASM_FTRACE_H */ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 9234013e759e..21a81b59a0cc 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -96,6 +96,7 @@ static inline unsigned long __percpu_##op(void *ptr, \ : [val] "Ir" (val)); \ break; \ default: \ + ret = 0; \ BUILD_BUG(); \ } \ \ @@ -125,6 +126,7 @@ static inline unsigned long __percpu_read(void *ptr, int size) ret = READ_ONCE(*(u64 *)ptr); break; default: + ret = 0; BUILD_BUG(); } @@ -194,6 +196,7 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, : [val] "r" (val)); break; default: + ret = 0; BUILD_BUG(); } diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 3e2091708b8e..6b0d4dff5012 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -24,6 +24,14 @@ #define KERNEL_DS UL(-1) #define USER_DS (TASK_SIZE_64 - 1) +/* + * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is + * no point in shifting all network buffers by 2 bytes just to make some IP + * header fields appear aligned in memory, potentially sacrificing some DMA + * performance on some platforms. + */ +#define NET_IP_ALIGN 0 + #ifndef __ASSEMBLY__ #ifdef __KERNEL__ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0c909c4a932f..842fb9572661 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -468,7 +468,7 @@ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) -#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff +#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffffUL #error "Inconsistent SCTLR_EL2 set/clear bits" #endif @@ -509,7 +509,7 @@ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) -#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff +#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffffUL #error "Inconsistent SCTLR_EL1 set/clear bits" #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c3c0387aee18..5dfd23897dea 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -41,14 +41,14 @@ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op, \ ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_QCOM_FALKOR_ERRATUM_1009) \ + CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : ) #define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op ", %0", \ ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_QCOM_FALKOR_ERRATUM_1009) \ + CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : "r" (arg)) #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a509e35132d2..6ad715d67df8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -570,6 +570,20 @@ static const struct midr_range arm64_harden_el2_vectors[] = { #endif +#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI + +static const struct midr_range arm64_repeat_tlbi_cpus[] = { +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 + MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0), +#endif +#ifdef CONFIG_ARM64_ERRATUM_1286807 + MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), +#endif + {}, +}; + +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -695,11 +709,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = is_kryo_midr, }, #endif -#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 +#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm Technologies Falkor erratum 1009", + .desc = "Qualcomm erratum 1009, ARM erratum 1286807", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), + ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus), }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af50064dea51..aec5ecb85737 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1333,7 +1333,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif -#ifdef CONFIG_ARM64_SSBD { .desc = "CRC32 instructions", .capability = ARM64_HAS_CRC32, @@ -1343,6 +1342,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR0_CRC32_SHIFT, .min_field_value = 1, }, +#ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypassing Safe (SSBS)", .capability = ARM64_SSBS, diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c index f46d57c31443..6b5037ed15b2 100644 --- a/arch/arm64/kernel/crash_dump.c +++ b/arch/arm64/kernel/crash_dump.c @@ -58,7 +58,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, /** * elfcorehdr_read - read from ELF core header * @buf: buffer where the data is placed - * @csize: number of bytes to read + * @count: number of bytes to read * @ppos: address in the memory * * This function reads @count bytes from elf core header which exists diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 50986e388d2b..57e962290df3 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -216,8 +216,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, { unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; - struct ftrace_graph_ent trace; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; @@ -229,18 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - if (err == -EBUSY) - return; - else + if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) *parent = return_hooker; } diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 6b2686d54411..29cdc99688f3 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -214,7 +214,7 @@ static int create_safe_exec_page(void *src_start, size_t length, } memcpy((void *)dst, src_start, length); - flush_icache_range(dst, dst + length); + __flush_icache_range(dst, dst + length); pgdp = pgd_offset_raw(allocator(mask), dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 9b65132e789a..2a5b338b2542 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -23,7 +23,9 @@ #include <linux/slab.h> #include <linux/stop_machine.h> #include <linux/sched/debug.h> +#include <linux/set_memory.h> #include <linux/stringify.h> +#include <linux/vmalloc.h> #include <asm/traps.h> #include <asm/ptrace.h> #include <asm/cacheflush.h> @@ -42,10 +44,21 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = addr; + insns[0] = opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { /* prepare insn slot */ - p->ainsn.api.insn[0] = cpu_to_le32(p->opcode); + patch_text(p->ainsn.api.insn, p->opcode); flush_icache_range((uintptr_t) (p->ainsn.api.insn), (uintptr_t) (p->ainsn.api.insn) + @@ -118,15 +131,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +void *alloc_insn_page(void) { - void *addrs[1]; - u32 insns[1]; + void *page; - addrs[0] = (void *)addr; - insns[0] = (u32)opcode; + page = vmalloc_exec(PAGE_SIZE); + if (page) + set_memory_ro((unsigned long)page, 1); - return aarch64_insn_patch_text(addrs, insns, 1); + return page; } /* arm kprobe: install breakpoint in text */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ce99c58cd1f1..d9a4c2d6dd8b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -497,25 +497,3 @@ void arch_setup_new_exec(void) { current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; } - -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK -void __used stackleak_check_alloca(unsigned long size) -{ - unsigned long stack_left; - unsigned long current_sp = current_stack_pointer; - struct stack_info info; - - BUG_ON(!on_accessible_stack(current, current_sp, &info)); - - stack_left = current_sp - info.low; - - /* - * There's a good chance we're almost out of stack space if this - * is true. Using panic() over BUG() is more likely to give - * reliable debugging output. - */ - if (size >= stack_left) - panic("alloca() over the kernel stack boundary\n"); -} -EXPORT_SYMBOL(stackleak_check_alloca); -#endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 953e316521fc..f4fc1e0544b7 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -313,6 +313,7 @@ void __init setup_arch(char **cmdline_p) arm64_memblock_init(); paging_init(); + efi_apply_persistent_mem_reservations(); acpi_table_upgrade(); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3a703e5d4e32..a3ac26284845 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -160,6 +160,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, __dma_unmap_area(phys_to_virt(paddr), size, dir); } +#ifdef CONFIG_IOMMU_DMA static int __swiotlb_get_sgtable_page(struct sg_table *sgt, struct page *page, size_t size) { @@ -188,6 +189,7 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, return ret; } +#endif /* CONFIG_IOMMU_DMA */ static int __init atomic_pool_init(void) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9d9582cac6c4..9b432d9fcada 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -483,8 +483,6 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma_phys_limit); - - memblock_allow_resize(); } void __init bootmem_init(void) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 394b8d554def..d1d6601b385d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -659,6 +659,8 @@ void __init paging_init(void) memblock_free(__pa_symbol(init_pg_dir), __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); + + memblock_allow_resize(); } /* diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a6fdaea07c63..89198017e8e6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -351,7 +351,8 @@ static void build_epilogue(struct jit_ctx *ctx) * >0 - successfully JITed a 16-byte eBPF instruction. * <0 - failed to JIT. */ -static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + bool extra_pass) { const u8 code = insn->code; const u8 dst = bpf2a64[insn->dst_reg]; @@ -625,12 +626,19 @@ emit_cond_jmp: case BPF_JMP | BPF_CALL: { const u8 r0 = bpf2a64[BPF_REG_0]; - const u64 func = (u64)__bpf_call_base + imm; + bool func_addr_fixed; + u64 func_addr; + int ret; - if (ctx->prog->is_func) - emit_addr_mov_i64(tmp, func, ctx); + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + if (func_addr_fixed) + /* We can use optimized emission here. */ + emit_a64_mov_i64(tmp, func_addr, ctx); else - emit_a64_mov_i64(tmp, func, ctx); + emit_addr_mov_i64(tmp, func_addr, ctx); emit(A64_BLR(tmp), ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; @@ -753,7 +761,7 @@ emit_cond_jmp: return 0; } -static int build_body(struct jit_ctx *ctx) +static int build_body(struct jit_ctx *ctx, bool extra_pass) { const struct bpf_prog *prog = ctx->prog; int i; @@ -762,7 +770,7 @@ static int build_body(struct jit_ctx *ctx) const struct bpf_insn *insn = &prog->insnsi[i]; int ret; - ret = build_insn(insn, ctx); + ret = build_insn(insn, ctx, extra_pass); if (ret > 0) { i++; if (ctx->image == NULL) @@ -858,7 +866,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 1. Initial fake pass to compute ctx->idx. */ /* Fake pass to fill in ctx->offset. */ - if (build_body(&ctx)) { + if (build_body(&ctx, extra_pass)) { prog = orig_prog; goto out_off; } @@ -888,7 +896,7 @@ skip_init_ctx: build_prologue(&ctx, was_classic); - if (build_body(&ctx)) { + if (build_body(&ctx, extra_pass)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_off; diff --git a/arch/csky/Kconfig.debug b/arch/csky/Kconfig.debug index 48cf6ff9df4a..22a162cd99e8 100644 --- a/arch/csky/Kconfig.debug +++ b/arch/csky/Kconfig.debug @@ -1,9 +1 @@ -menu "C-SKY Debug Options" -config CSKY_BUILTIN_DTB - string "Use kernel builtin dtb" - help - User could define the dtb instead of the one which is passed from - bootloader. - Sometimes for debug, we want to use a built-in dtb and then we needn't - modify bootloader at all. -endmenu +# dummy file, do not delete diff --git a/arch/csky/Makefile b/arch/csky/Makefile index 67a4ae1fba2b..c639fc167895 100644 --- a/arch/csky/Makefile +++ b/arch/csky/Makefile @@ -65,26 +65,15 @@ libs-y += arch/csky/lib/ \ $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) boot := arch/csky/boot -ifneq '$(CONFIG_CSKY_BUILTIN_DTB)' '""' core-y += $(boot)/dts/ -endif all: zImage - -dtbs: scripts - $(Q)$(MAKE) $(build)=$(boot)/dts - -%.dtb %.dtb.S %.dtb.o: scripts - $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@ - -zImage Image uImage: vmlinux dtbs +zImage Image uImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ archclean: $(Q)$(MAKE) $(clean)=$(boot) - $(Q)$(MAKE) $(clean)=$(boot)/dts - rm -rf arch/csky/include/generated define archhelp echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' diff --git a/arch/csky/boot/dts/Makefile b/arch/csky/boot/dts/Makefile index 305e81a5e91e..c57ad3c880bf 100644 --- a/arch/csky/boot/dts/Makefile +++ b/arch/csky/boot/dts/Makefile @@ -1,13 +1,3 @@ dtstree := $(srctree)/$(src) -ifneq '$(CONFIG_CSKY_BUILTIN_DTB)' '""' -builtindtb-y := $(patsubst "%",%,$(CONFIG_CSKY_BUILTIN_DTB)) -dtb-y += $(builtindtb-y).dtb -obj-y += $(builtindtb-y).dtb.o -.SECONDARY: $(obj)/$(builtindtb-y).dtb.S -else dtb-y := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) -endif - -always += $(dtb-y) -clean-files += *.dtb *.dtb.S diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h index c410aa4fff1a..b2905c0485a7 100644 --- a/arch/csky/include/asm/mmu_context.h +++ b/arch/csky/include/asm/mmu_context.h @@ -16,7 +16,7 @@ static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel) { - pgd &= ~(1<<31); + pgd -= PAGE_OFFSET; pgd += PHYS_OFFSET; pgd |= 1; setup_pgd(pgd, kernel); @@ -29,7 +29,7 @@ static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel) static inline unsigned long tlb_get_pgd(void) { - return ((get_pgd()|(1<<31)) - PHYS_OFFSET) & ~1; + return ((get_pgd() - PHYS_OFFSET) & ~1) + PAGE_OFFSET; } #define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h index ebef7f40aabb..c5c253cb9bd6 100644 --- a/arch/ia64/include/asm/numa.h +++ b/arch/ia64/include/asm/numa.h @@ -59,7 +59,9 @@ extern struct node_cpuid_s node_cpuid[NR_CPUS]; */ extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; -#define node_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)]) +#define slit_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)]) +extern int __node_distance(int from, int to); +#define node_distance(from,to) __node_distance(from, to) extern int paddr_to_nid(unsigned long paddr); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 1dacbf5e9e09..41eb281709da 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -578,8 +578,8 @@ void __init acpi_numa_fixup(void) if (!slit_table) { for (i = 0; i < MAX_NUMNODES; i++) for (j = 0; j < MAX_NUMNODES; j++) - node_distance(i, j) = i == j ? LOCAL_DISTANCE : - REMOTE_DISTANCE; + slit_distance(i, j) = i == j ? + LOCAL_DISTANCE : REMOTE_DISTANCE; return; } @@ -592,7 +592,7 @@ void __init acpi_numa_fixup(void) if (!pxm_bit_test(j)) continue; node_to = pxm_to_node(j); - node_distance(node_from, node_to) = + slit_distance(node_from, node_to) = slit_table->entry[i * slit_table->locality_count + j]; } } diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 3861d6e32d5f..a03803506b0c 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -36,6 +36,12 @@ struct node_cpuid_s node_cpuid[NR_CPUS] = */ u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; +int __node_distance(int from, int to) +{ + return slit_distance(from, to); +} +EXPORT_SYMBOL(__node_distance); + /* Identify which cnode a physical address resides on */ int paddr_to_nid(unsigned long paddr) diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index 6181e4134483..fe3ddd73a0cc 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -55,12 +55,12 @@ */ #ifdef CONFIG_SUN3 #define PTRS_PER_PTE 16 -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 2048 #elif defined(CONFIG_COLDFIRE) #define PTRS_PER_PTE 512 -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 1024 #else diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index f64ebb9c9a41..e14b6621c933 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -63,7 +63,7 @@ extern int mem_init_done; #include <asm-generic/4level-fixup.h> -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index d57563c58a26..224eea40e1ee 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -22,8 +22,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - int faulted, err; - struct ftrace_graph_ent trace; + int faulted; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -63,18 +62,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); - if (err == -EBUSY) { + if (function_graph_enter(old, self_addr, 0, NULL)) *parent = old; - return; - } - - trace.func = self_addr; - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - *parent = old; - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 15a84cfd0719..68410490e12f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -128,7 +128,7 @@ cflags-y += -ffreestanding # clang's output will be based upon the build machine. So for clang we simply # unconditionally specify -EB or -EL as appropriate. # -ifeq ($(cc-name),clang) +ifdef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -EL else diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c index 75108ec669eb..6c79e8a16a26 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c @@ -67,7 +67,7 @@ void (*cvmx_override_pko_queue_priority) (int pko_port, void (*cvmx_override_ipd_port_setup) (int ipd_port); /* Port count per interface */ -static int interface_port_count[5]; +static int interface_port_count[9]; /** * Return the number of interfaces the chip has. Each interface diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index 490b12af103c..c52d0efacd14 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -140,6 +140,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_STAGING=y CONFIG_OCTEON_ETHERNET=y +CONFIG_OCTEON_USB=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_RAS=y CONFIG_EXT4_FS=y diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 0170602a1e4e..6cf8ffb5367e 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -73,7 +73,7 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg, #ifdef CONFIG_64BIT case 4: case 5: case 6: case 7: #ifdef CONFIG_MIPS32_O32 - if (test_thread_flag(TIF_32BIT_REGS)) + if (test_tsk_thread_flag(task, TIF_32BIT_REGS)) return get_user(*arg, (int *)usp + n); else #endif diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 7f3dfdbc3657..b122cbb4aad1 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -322,7 +322,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, unsigned long fp) { unsigned long old_parent_ra; - struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; int faulted, insns; @@ -369,12 +368,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, if (unlikely(faulted)) goto out; - if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp, - NULL) == -EBUSY) { - *parent_ra_addr = old_parent_ra; - return; - } - /* * Get the recorded ip of the current mcount calling site in the * __mcount_loc section, which will be used to filter the function @@ -382,13 +375,10 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, */ insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1; - trace.func = self_ra - (MCOUNT_INSN_SIZE * insns); + self_ra -= (MCOUNT_INSN_SIZE * insns); - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; + if (function_graph_enter(old_parent_ra, self_ra, fp, NULL)) *parent_ra_addr = old_parent_ra; - } return; out: ftrace_graph_stop(); diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index ea09ed6a80a9..8c6c48ed786a 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -794,6 +794,7 @@ static void __init arch_mem_init(char **cmdline_p) /* call board setup routine */ plat_mem_setup(); + memblock_set_bottom_up(true); /* * Make sure all kernel memory is in the maps. The "UP" and diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 0f852e1b5891..15e103c6d799 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2260,10 +2260,8 @@ void __init trap_init(void) unsigned long size = 0x200 + VECTORSPACING*64; phys_addr_t ebase_pa; - memblock_set_bottom_up(true); ebase = (unsigned long) memblock_alloc_from(size, 1 << fls(size), 0); - memblock_set_bottom_up(false); /* * Try to ensure ebase resides in KSeg0 if possible. @@ -2307,6 +2305,7 @@ void __init trap_init(void) if (board_ebase_setup) board_ebase_setup(); per_cpu_trap_init(true); + memblock_set_bottom_up(false); /* * Copy the generic exception handlers to their final destination. diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 622761878cd1..60bf0a1cb757 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -231,6 +231,8 @@ static __init void prom_meminit(void) cpumask_clear(&__node_data[(node)]->cpumask); } } + max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) { node = cpu / loongson_sysconf.cores_per_node; if (node >= num_online_nodes()) @@ -248,19 +250,9 @@ static __init void prom_meminit(void) void __init paging_init(void) { - unsigned node; unsigned long zones_size[MAX_NR_ZONES] = {0, }; pagetable_init(); - - for_each_online_node(node) { - unsigned long start_pfn, end_pfn; - - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - - if (end_pfn > max_low_pfn) - max_low_pfn = end_pfn; - } #ifdef CONFIG_ZONE_DMA32 zones_size[ZONE_DMA32] = MAX_DMA32_PFN; #endif diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index e6c9485cadcf..cb38461391cb 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -50,7 +50,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, void *ret; ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); - if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { + if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { dma_cache_wback_inv((unsigned long) ret, size); ret = (void *)UNCAC_ADDR(ret); } diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 41b71c4352c2..c1ce6f43642b 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -84,7 +84,7 @@ static struct rt2880_pmx_func pcie_rst_grp[] = { }; static struct rt2880_pmx_func nd_sd_grp[] = { FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15), - FUNC("sd", MT7620_GPIO_MODE_SD, 45, 15) + FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13) }; static struct rt2880_pmx_group mt7620a_pinmux_data[] = { diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index d8b8444d6795..813d13f92957 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -435,6 +435,7 @@ void __init prom_meminit(void) mlreset(); szmem(); + max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); for (node = 0; node < MAX_COMPACT_NODES; node++) { if (node_online(node)) { @@ -455,18 +456,8 @@ extern void setup_zero_pages(void); void __init paging_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, }; - unsigned node; pagetable_init(); - - for_each_online_node(node) { - unsigned long start_pfn, end_pfn; - - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - - if (end_pfn > max_low_pfn) - max_low_pfn = end_pfn; - } zones_size[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(zones_size); } diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 34605ca21498..58a0315ad743 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -10,7 +10,7 @@ ccflags-vdso := \ $(filter -march=%,$(KBUILD_CFLAGS)) \ -D__VDSO__ -ifeq ($(cc-name),clang) +ifdef CONFIG_CC_IS_CLANG ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) endif diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index d3e19a55cf53..9f52db930c00 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -4,7 +4,7 @@ #ifndef _ASMNDS32_PGTABLE_H #define _ASMNDS32_PGTABLE_H -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #include <asm-generic/4level-fixup.h> #include <asm-generic/sizes.h> diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index a0a9679ad5de..8a41372551ff 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -211,29 +211,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long)&return_to_handler; - struct ftrace_graph_ent trace; unsigned long old; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - - if (err == -EBUSY) - return; - - *parent = return_hooker; + if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) + *parent = return_hooker; } noinline void ftrace_graph_caller(void) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index d047a09d660f..1085385e1f06 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -71,6 +71,13 @@ ifdef CONFIG_MLONGCALLS KBUILD_CFLAGS_KERNEL += -mlong-calls endif +# Without this, "ld -r" results in .text sections that are too big (> 0x40000) +# for branches to reach stubs. And multiple .text sections trigger a warning +# when creating the sysfs module information section. +ifndef CONFIG_64BIT +KBUILD_CFLAGS_MODULE += -ffunction-sections +endif + # select which processor to optimise for cflags-$(CONFIG_PA7000) += -march=1.1 -mschedule=7100 cflags-$(CONFIG_PA7200) += -march=1.1 -mschedule=7200 diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index b941ac7d4e70..c7bb74e22436 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -111,7 +111,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #if CONFIG_PGTABLE_LEVELS == 3 #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) #else -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #define BITS_PER_PMD 0 #endif #define PTRS_PER_PMD (1UL << BITS_PER_PMD) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 16aec9ba2580..8a63515f03bf 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -37,8 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); - /* Release with ordered store. */ - __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory"); + mb(); + *a = 1; } static inline int arch_spin_trylock(arch_spinlock_t *x) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 6fa8535d3cce..e46a4157a894 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -30,7 +30,6 @@ static void __hot prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - struct ftrace_graph_ent trace; extern int parisc_return_to_handler; if (unlikely(ftrace_graph_is_dead())) @@ -41,19 +40,9 @@ static void __hot prepare_ftrace_return(unsigned long *parent, old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - 0, NULL) == -EBUSY) - return; - - /* activate parisc_return_to_handler() as return point */ - *parent = (unsigned long) &parisc_return_to_handler; + if (!function_graph_enter(old, self_addr, 0, NULL)) + /* activate parisc_return_to_handler() as return point */ + *parent = (unsigned long) &parisc_return_to_handler; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 9505c317818d..a9bc90dc4ae7 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -640,7 +640,8 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw %r24, 0(%r26) /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -654,7 +655,8 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) #endif @@ -855,7 +857,8 @@ cas2_action: cas2_end: /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ @@ -865,7 +868,8 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 b lws_exit diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2d51b2bd4aa1..8be31261aec8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -930,10 +930,6 @@ config FSL_GTM help Freescale General-purpose Timers support -# Yes MCA RS/6000s exist but Linux-PPC does not currently support any -config MCA - bool - # Platforms that what PCI turned unconditionally just do select PCI # in their config node. Platforms that want to choose at config # time should select PPC_PCI_CHOICE @@ -944,7 +940,6 @@ config PCI bool "PCI support" if PPC_PCI_CHOICE default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \ && !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON - default PCI_QSPAN if PPC_8xx select GENERIC_PCI_IOMAP help Find out whether your system includes a PCI bus. PCI is the name of @@ -958,14 +953,6 @@ config PCI_DOMAINS config PCI_SYSCALL def_bool PCI -config PCI_QSPAN - bool "QSpan PCI" - depends on PPC_8xx - select PPC_I8259 - help - Say Y here if you have a system based on a Motorola 8xx-series - embedded processor with a QSPAN PCI interface, otherwise say N. - config PCI_8260 bool depends on PCI && 8260 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 17be664dafa2..8a2ce14d68d0 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -96,7 +96,7 @@ aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 endif -ifneq ($(cc-name),clang) +ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -175,7 +175,7 @@ endif # Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828 -ifneq ($(cc-name),clang) +ifndef CONFIG_CC_IS_CLANG CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog) endif endif diff --git a/arch/powerpc/boot/dts/fsl/t2080rdb.dts b/arch/powerpc/boot/dts/fsl/t2080rdb.dts index 55c0210a771d..092a400740f8 100644 --- a/arch/powerpc/boot/dts/fsl/t2080rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t2080rdb.dts @@ -77,12 +77,12 @@ }; ethernet@f0000 { - phy-handle = <&xg_cs4315_phy1>; + phy-handle = <&xg_cs4315_phy2>; phy-connection-type = "xgmii"; }; ethernet@f2000 { - phy-handle = <&xg_cs4315_phy2>; + phy-handle = <&xg_cs4315_phy1>; phy-connection-type = "xgmii"; }; diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts index 5b037f51741d..3aa300afbbca 100644 --- a/arch/powerpc/boot/dts/mpc885ads.dts +++ b/arch/powerpc/boot/dts/mpc885ads.dts @@ -72,7 +72,7 @@ #address-cells = <1>; #size-cells = <1>; device_type = "soc"; - ranges = <0x0 0xff000000 0x4000>; + ranges = <0x0 0xff000000 0x28000>; bus-frequency = <0>; // Temporary -- will go away once kernel uses ranges for get_immrbase(). @@ -224,6 +224,17 @@ #size-cells = <0>; }; }; + + crypto@20000 { + compatible = "fsl,sec1.2", "fsl,sec1.0"; + reg = <0x20000 0x8000>; + interrupts = <1 1>; + interrupt-parent = <&PIC>; + fsl,num-channels = <1>; + fsl,channel-fifo-len = <24>; + fsl,exec-units-mask = <0x4c>; + fsl,descriptor-types-mask = <0x05000154>; + }; }; chosen { diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 31733a95bbd0..3d5acd2b113a 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -36,6 +36,11 @@ int raw_patch_instruction(unsigned int *addr, unsigned int instr); int patch_instruction_site(s32 *addr, unsigned int instr); int patch_branch_site(s32 *site, unsigned long target, int flags); +static inline unsigned long patch_site_addr(s32 *site) +{ + return (unsigned long)site + *site; +} + int instr_is_relative_branch(unsigned int instr); int instr_is_relative_link_branch(unsigned int instr); int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr); diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 3ef40b703c4a..e746becd9d6f 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -268,19 +268,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * - * This bit field is 12 bits and is at the top of the IO virtual - * addresses PCI_IO_INDIRECT_TOKEN_MASK. + * The highest address in the kernel virtual space are: * - * The kernel virtual space is thus: + * d0003fffffffffff # with Hash MMU + * c00fffffffffffff # with Radix MMU * - * 0xD000000000000000 : vmalloc - * 0xD000080000000000 : PCI PHB IO space - * 0xD000080080000000 : ioremap - * 0xD0000fffffffffff : end of ioremap region - * - * Since the top 4 bits are reserved as the region ID, we use thus - * the next 12 bits and keep 4 bits available for the future if the - * virtual address space is ever to be extended. + * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits + * that can be used for the field. * * The direct IO mapping operations will then mask off those bits * before doing the actual access, though that only happen when @@ -292,8 +286,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, */ #ifdef CONFIG_PPC_INDIRECT_MMIO -#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul -#define PCI_IO_IND_TOKEN_SHIFT 48 +#define PCI_IO_IND_TOKEN_SHIFT 52 +#define PCI_IO_IND_TOKEN_MASK (0xfful << PCI_IO_IND_TOKEN_SHIFT) #define PCI_FIX_ADDR(addr) \ ((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK)) #define PCI_GET_ADDR_TOKEN(addr) \ diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 4f547752ae79..fa05aa566ece 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -34,20 +34,12 @@ * respectively NA for All or X for Supervisor and no access for User. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: - * When that bit is not set access is done iaw "all user" - * which means no access iaw page rules. - * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED - * 0x => No access => 11 (all accesses performed as user iaw page definition) - * 10 => No user => 01 (all accesses performed according to page definition) - * 11 => User => 00 (all accesses performed as supervisor iaw page definition) + * Therefore, we define 2 APG groups. lsb is _PMD_USER + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 00 (all accesses performed as supervisor iaw page definition) * We define all 16 groups so that all other bits of APG can take any value */ -#ifdef CONFIG_SWAP -#define MI_APG_INIT 0xf4f4f4f4 -#else #define MI_APG_INIT 0x44444444 -#endif /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -115,20 +107,12 @@ * Supervisor and no access for user and NA for ALL. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: - * When that bit is not set access is done iaw "all user" - * which means no access iaw page rules. - * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED - * 0x => No access => 11 (all accesses performed as user iaw page definition) - * 10 => No user => 01 (all accesses performed according to page definition) - * 11 => User => 00 (all accesses performed as supervisor iaw page definition) + * Therefore, we define 2 APG groups. lsb is _PMD_USER + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 00 (all accesses performed as supervisor iaw page definition) * We define all 16 groups so that all other bits of APG can take any value */ -#ifdef CONFIG_SWAP -#define MD_APG_INIT 0xf4f4f4f4 -#else #define MD_APG_INIT 0x44444444 -#endif /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in @@ -180,12 +164,6 @@ */ #define SPRN_M_TW 799 -/* APGs */ -#define M_APG0 0x00000000 -#define M_APG1 0x00000020 -#define M_APG2 0x00000040 -#define M_APG3 0x00000060 - #ifdef CONFIG_PPC_MM_SLICES #include <asm/nohash/32/slice.h> #define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) @@ -251,6 +229,15 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) BUG(); } +/* patch sites */ +extern s32 patch__itlbmiss_linmem_top; +extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp; +extern s32 patch__fixupdar_linmem_top; + +extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2; +extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3; +extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf; + #endif /* !__ASSEMBLY__ */ #if defined(CONFIG_PPC_4K_PAGES) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 6093bc8f74e5..a6e9e314c707 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -493,6 +493,8 @@ __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ __PPC_RT(t) | __PPC_RB(b)) +#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ + ___PPC_RT(t) | ___PPC_RB(b)) #define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \ __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) /* PASemi instructions */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index f73886a1a7f5..0b8a735b6d85 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -54,6 +54,7 @@ struct pt_regs #ifdef CONFIG_PPC64 unsigned long ppr; + unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */ #endif }; #endif diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index bb38dd67d47d..1b06add4f092 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -5,6 +5,7 @@ #include <linux/spinlock.h> #include <asm/page.h> #include <linux/time.h> +#include <linux/cpumask.h> /* * Definitions for talking to the RTAS on CHRP machines. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 134a573a9f2d..3b67b9533c82 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -31,6 +31,7 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/export.h> +#include <asm/code-patching-asm.h> #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ @@ -318,8 +319,8 @@ InstructionTLBMiss: cmpli cr0, r11, PAGE_OFFSET@h #ifndef CONFIG_PIN_TLB_TEXT /* It is assumed that kernel code fits into the first 8M page */ -_ENTRY(ITLBMiss_cmp) - cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h +0: cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h + patch_site 0b, patch__itlbmiss_linmem_top #endif #endif #endif @@ -353,13 +354,14 @@ _ENTRY(ITLBMiss_cmp) #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtcr r12 #endif - -#ifdef CONFIG_SWAP - rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 -#endif /* Load the MI_TWC with the attributes for this "segment." */ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ +#ifdef CONFIG_SWAP + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT +#endif li r11, RPN_PATTERN | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. @@ -372,16 +374,17 @@ _ENTRY(ITLBMiss_cmp) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -_ENTRY(itlb_miss_exit_1) - mfspr r10, SPRN_SPRG_SCRATCH0 +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mfspr r12, SPRN_SPRG_SCRATCH2 #endif rfi + patch_site 0b, patch__itlbmiss_exit_1 + #ifdef CONFIG_PERF_EVENTS -_ENTRY(itlb_miss_perf) - lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha + patch_site 0f, patch__itlbmiss_perf +0: lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) addi r11, r11, 1 stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) @@ -435,11 +438,11 @@ DataStoreTLBMiss: #ifndef CONFIG_PIN_TLB_IMMR cmpli cr0, r11, VIRT_IMMR_BASE@h #endif -_ENTRY(DTLBMiss_cmp) - cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h +0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h + patch_site 0b, patch__dtlbmiss_linmem_top #ifndef CONFIG_PIN_TLB_IMMR -_ENTRY(DTLBMiss_jmp) - beq- DTLBMissIMMR +0: beq- DTLBMissIMMR + patch_site 0b, patch__dtlbmiss_immr_jmp #endif blt cr7, DTLBMissLinear lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha @@ -470,14 +473,22 @@ _ENTRY(DTLBMiss_jmp) * above. */ rlwimi r11, r10, 0, _PAGE_GUARDED -#ifdef CONFIG_SWAP - /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0 - * on that bit will represent a Non Access group - */ - rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 -#endif mtspr SPRN_MD_TWC, r11 + /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. + * We also need to know if the insn is a load/store, so: + * Clear _PAGE_PRESENT and load that which will + * trap into DTLB Error with store bit set accordinly. + */ + /* PRESENT=0x1, ACCESSED=0x20 + * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); + * r10 = (r10 & ~PRESENT) | r11; + */ +#ifdef CONFIG_SWAP + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT +#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior @@ -489,14 +500,16 @@ _ENTRY(DTLBMiss_jmp) /* Restore registers */ mtspr SPRN_DAR, r11 /* Tag DAR */ -_ENTRY(dtlb_miss_exit_1) - mfspr r10, SPRN_SPRG_SCRATCH0 + +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 rfi + patch_site 0b, patch__dtlbmiss_exit_1 + #ifdef CONFIG_PERF_EVENTS -_ENTRY(dtlb_miss_perf) - lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha + patch_site 0f, patch__dtlbmiss_perf +0: lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) addi r11, r11, 1 stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) @@ -637,8 +650,8 @@ InstructionBreakpoint: */ DTLBMissIMMR: mtcr r12 - /* Set 512k byte guarded page and mark it valid and accessed */ - li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2 + /* Set 512k byte guarded page and mark it valid */ + li r10, MD_PS512K | MD_GUARDED | MD_SVALID mtspr SPRN_MD_TWC, r10 mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ @@ -648,16 +661,17 @@ DTLBMissIMMR: li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ -_ENTRY(dtlb_miss_exit_2) - mfspr r10, SPRN_SPRG_SCRATCH0 + +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 rfi + patch_site 0b, patch__dtlbmiss_exit_2 DTLBMissLinear: mtcr r12 - /* Set 8M byte page and mark it valid and accessed */ - li r11, MD_PS8MEG | MD_SVALID | M_APG2 + /* Set 8M byte page and mark it valid */ + li r11, MD_PS8MEG | MD_SVALID mtspr SPRN_MD_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ @@ -666,28 +680,29 @@ DTLBMissLinear: li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ -_ENTRY(dtlb_miss_exit_3) - mfspr r10, SPRN_SPRG_SCRATCH0 + +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 rfi + patch_site 0b, patch__dtlbmiss_exit_3 #ifndef CONFIG_PIN_TLB_TEXT ITLBMissLinear: mtcr r12 - /* Set 8M byte page and mark it valid,accessed */ - li r11, MI_PS8MEG | MI_SVALID | M_APG2 + /* Set 8M byte page and mark it valid */ + li r11, MI_PS8MEG | MI_SVALID mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ -_ENTRY(itlb_miss_exit_2) - mfspr r10, SPRN_SPRG_SCRATCH0 +0: mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 rfi + patch_site 0b, patch__itlbmiss_exit_2 #endif /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions @@ -705,8 +720,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r11, SPRN_M_TW /* Get level 1 table */ blt+ 3f rlwinm r11, r10, 16, 0xfff8 -_ENTRY(FixupDAR_cmp) - cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h + +0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h + patch_site 0b, patch__fixupdar_linmem_top + /* create physical page address from effective address */ tophys(r11, r10) blt- cr7, 201f @@ -960,7 +977,7 @@ initial_mmu: ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 li r8, MI_PS8MEG /* Set 8M byte page */ - ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */ + ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MI_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ @@ -987,7 +1004,7 @@ initial_mmu: ori r8, r8, MD_EVALID /* Mark it valid */ mtspr SPRN_MD_EPN, r8 li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ - ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */ + ori r8, r8, MD_SVALID /* Make it valid */ mtspr SPRN_MD_TWC, r8 mr r8, r9 /* Create paddr for TLB */ ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4d5322cfad25..96f34730010f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -590,12 +590,11 @@ void flush_all_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); BUG_ON(tsk != current); - save_all(tsk); - #ifdef CONFIG_SPE if (tsk->thread.regs->msr & MSR_SPE) tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); #endif + save_all(tsk); preempt_enable(); } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2a51e4cc8246..236c1151a3a7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -636,6 +636,8 @@ static void *__init alloc_stack(unsigned long limit, int cpu) { unsigned long pa; + BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); + pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, early_cpu_to_node(cpu), MEMBLOCK_NONE); if (!pa) { diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 4bf051d3e21e..b65c8a34ad6e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -950,7 +950,6 @@ int ftrace_disable_ftrace_graph_caller(void) */ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - struct ftrace_graph_ent trace; unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) @@ -961,18 +960,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) return_hooker = ppc_function_entry(return_to_handler); - trace.func = ip; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - goto out; - - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, - NULL) == -EBUSY) - goto out; - - parent = return_hooker; + if (!function_graph_enter(parent, ip, 0, NULL)) + parent = return_hooker; out: return parent; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index bf8def2159c3..a56f8413758a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -983,6 +983,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = kvmhv_enter_nested_guest(vcpu); if (ret == H_INTERRUPT) { kvmppc_set_gpr(vcpu, 3, 0); + vcpu->arch.hcall_needed = 0; return -EINTR; } break; @@ -2337,8 +2338,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); return; } - dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC - / tb_ticks_per_sec; + dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now); hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL); vcpu->arch.timer_running = 1; } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index fa888bfc347e..9f5b8c01c4e1 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -61,11 +61,10 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) dec_time = vcpu->arch.dec; /* - * Guest timebase ticks at the same frequency as host decrementer. - * So use the host decrementer calculations for decrementer emulation. + * Guest timebase ticks at the same frequency as host timebase. + * So use the host timebase calculations for decrementer emulation. */ - dec_time = dec_time << decrementer_clockevent.shift; - do_div(dec_time, decrementer_clockevent.mult); + dec_time = tb_to_ns(dec_time); dec_nsec = do_div(dec_time, NSEC_PER_SEC); hrtimer_start(&vcpu->arch.dec_timer, ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 491b0f715d6b..ea1d7c808319 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace /* * Tracepoint for guest mode entry. @@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index ac640e81fdc5..3837842986aa 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_booke -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_booke #define kvm_trace_symbol_exit \ {0, "CRITICAL"}, \ @@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio, #endif /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index bcfe8a987f6a..8a1e3b0047f1 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -9,8 +9,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_hv -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_hv #define kvm_trace_symbol_hcall \ {H_REMOVE, "H_REMOVE"}, \ @@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, #endif /* _TRACE_KVM_HV_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 2f9a8829552b..46a46d328fbf 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -8,8 +8,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_pr TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), @@ -257,4 +255,11 @@ TRACE_EVENT(kvm_exit, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + #include <trace/define_trace.h> diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 36484a2ef915..01b7f5107c3a 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -13,6 +13,7 @@ */ #include <linux/memblock.h> +#include <linux/mmu_context.h> #include <asm/fixmap.h> #include <asm/code-patching.h> @@ -79,7 +80,7 @@ void __init MMU_init_hw(void) for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { mtspr(SPRN_MD_CTR, ctr | (i << 8)); mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); addr += LARGE_PAGE_SIZE_8M; mem -= LARGE_PAGE_SIZE_8M; @@ -97,22 +98,13 @@ static void __init mmu_mapin_immr(void) map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); } -/* Address of instructions to patch */ -#ifndef CONFIG_PIN_TLB_IMMR -extern unsigned int DTLBMiss_jmp; -#endif -extern unsigned int DTLBMiss_cmp, FixupDAR_cmp; -#ifndef CONFIG_PIN_TLB_TEXT -extern unsigned int ITLBMiss_cmp; -#endif - -static void __init mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped) +static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped) { - unsigned int instr = *addr; + unsigned int instr = *(unsigned int *)patch_site_addr(site); instr &= 0xffff0000; instr |= (unsigned long)__va(mapped) >> 16; - patch_instruction(addr, instr); + patch_instruction_site(site, instr); } unsigned long __init mmu_mapin_ram(unsigned long top) @@ -123,17 +115,17 @@ unsigned long __init mmu_mapin_ram(unsigned long top) mapped = 0; mmu_mapin_immr(); #ifndef CONFIG_PIN_TLB_IMMR - patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP); + patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); #endif #ifndef CONFIG_PIN_TLB_TEXT - mmu_patch_cmp_limit(&ITLBMiss_cmp, 0); + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); #endif } else { mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); } - mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped); - mmu_patch_cmp_limit(&FixupDAR_cmp, mapped); + mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); + mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped); /* If the size of RAM is not an exact power of two, we may not * have covered RAM in its entirety with 8 MiB diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3a048e98a132..ce28ae5ca080 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1178,7 +1178,7 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_FUNCTION: - printk(KERN_INFO + printk_once(KERN_INFO "VPHN is not supported. Disabling polling...\n"); stop_topology_update(); break; diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index c3fdf2969d9f..bc3914d54e26 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -19,6 +19,7 @@ #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/paca.h> +#include <asm/ppc-opcode.h> #include <asm/cputable.h> #include <asm/cacheflush.h> #include <asm/smp.h> @@ -58,27 +59,19 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); } -static void assert_slb_exists(unsigned long ea) +static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM unsigned long tmp; WARN_ON_ONCE(mfmsr() & MSR_EE); - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp == 0); -#endif -} - -static void assert_slb_notexists(unsigned long ea) -{ -#ifdef CONFIG_DEBUG_VM - unsigned long tmp; + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + return; - WARN_ON_ONCE(mfmsr() & MSR_EE); + asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp != 0); + WARN_ON(present == (tmp == 0)); #endif } @@ -114,7 +107,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, */ slb_shadow_update(ea, ssize, flags, index); - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, ssize, flags)), "r" (mk_esid_data(ea, ssize, index)) @@ -137,7 +130,7 @@ void __slb_restore_bolted_realmode(void) "r" (be64_to_cpu(p->save_area[index].esid))); } - assert_slb_exists(local_paca->kstack); + assert_slb_presence(true, local_paca->kstack); } /* @@ -185,7 +178,7 @@ void slb_flush_and_restore_bolted(void) :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) : "memory"); - assert_slb_exists(get_paca()->kstack); + assert_slb_presence(true, get_paca()->kstack); get_paca()->slb_cache_ptr = 0; @@ -443,9 +436,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) ea = (unsigned long) get_paca()->slb_cache[i] << SID_SHIFT; /* - * Could assert_slb_exists here, but hypervisor - * or machine check could have come in and - * removed the entry at this point. + * Could assert_slb_presence(true) here, but + * hypervisor or machine check could have come + * in and removed the entry at this point. */ slbie_data = ea; @@ -676,7 +669,7 @@ static long slb_insert_entry(unsigned long ea, unsigned long context, * User preloads should add isync afterwards in case the kernel * accesses user memory before it returns to userspace with rfid. */ - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); barrier(); @@ -715,7 +708,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) return -EFAULT; if (ea < H_VMALLOC_END) - flags = get_paca()->vmalloc_sllp; + flags = local_paca->vmalloc_sllp; else flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; } else { diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 50b129785aee..9393e231cbc2 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -166,7 +166,33 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) PPC_BLR(); } -static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) +static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, + u64 func) +{ +#ifdef PPC64_ELF_ABI_v1 + /* func points to the function descriptor */ + PPC_LI64(b2p[TMP_REG_2], func); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); + /* ... and move it to LR */ + PPC_MTLR(b2p[TMP_REG_1]); + /* + * Load TOC from function descriptor at offset 8. + * We can clobber r2 since we get called through a + * function pointer (so caller will save/restore r2) + * and since we don't use a TOC ourself. + */ + PPC_BPF_LL(2, b2p[TMP_REG_2], 8); +#else + /* We can clobber r12 */ + PPC_FUNC_ADDR(12, func); + PPC_MTLR(12); +#endif + PPC_BLRL(); +} + +static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, + u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -273,7 +299,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; - int i; + int i, ret; /* Start of epilogue code - will only be valid 2nd pass onwards */ u32 exit_addr = addrs[flen]; @@ -284,8 +310,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 src_reg = b2p[insn[i].src_reg]; s16 off = insn[i].off; s32 imm = insn[i].imm; + bool func_addr_fixed; + u64 func_addr; u64 imm64; - u8 *func; u32 true_cond; u32 tmp_idx; @@ -711,23 +738,15 @@ emit_clear: case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - /* bpf function call */ - if (insn[i].src_reg == BPF_PSEUDO_CALL) - if (!extra_pass) - func = NULL; - else if (fp->aux->func && off < fp->aux->func_cnt) - /* use the subprog id from the off - * field to lookup the callee address - */ - func = (u8 *) fp->aux->func[off]->bpf_func; - else - return -EINVAL; - /* kernel helper call */ - else - func = (u8 *) __bpf_call_base + imm; - - bpf_jit_emit_func_call(image, ctx, (u64)func); + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + if (func_addr_fixed) + bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + else + bpf_jit_emit_func_call_rel(image, ctx, func_addr); /* move return value from r3 to BPF_REG_0 */ PPC_MR(b2p[BPF_REG_0], 3); break; @@ -872,6 +891,55 @@ cond_branch: return 0; } +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) +{ + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; + + for (i = 0; i < fp->len; i++) { + /* + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. + */ + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; + + /* + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. + */ + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + /* + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; + } + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -970,6 +1038,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } + /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ @@ -983,6 +1067,7 @@ skip_init_ctx: proglen - (cgctx.idx * 4), cgctx.seen); } +skip_codegen_passes: if (bpf_jit_enable > 1) /* * Note that we output the base address of the code_base diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 6c0020d1c561..e38f74e9e7a4 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -31,9 +31,6 @@ extern unsigned long itlb_miss_counter, dtlb_miss_counter; extern atomic_t instruction_counter; -extern unsigned int itlb_miss_perf, dtlb_miss_perf; -extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2; -extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3; static atomic_t insn_ctr_ref; static atomic_t itlb_miss_ref; @@ -103,22 +100,22 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) break; case PERF_8xx_ID_ITLB_LOAD_MISS: if (atomic_inc_return(&itlb_miss_ref) == 1) { - unsigned long target = (unsigned long)&itlb_miss_perf; + unsigned long target = patch_site_addr(&patch__itlbmiss_perf); - patch_branch(&itlb_miss_exit_1, target, 0); + patch_branch_site(&patch__itlbmiss_exit_1, target, 0); #ifndef CONFIG_PIN_TLB_TEXT - patch_branch(&itlb_miss_exit_2, target, 0); + patch_branch_site(&patch__itlbmiss_exit_2, target, 0); #endif } val = itlb_miss_counter; break; case PERF_8xx_ID_DTLB_LOAD_MISS: if (atomic_inc_return(&dtlb_miss_ref) == 1) { - unsigned long target = (unsigned long)&dtlb_miss_perf; + unsigned long target = patch_site_addr(&patch__dtlbmiss_perf); - patch_branch(&dtlb_miss_exit_1, target, 0); - patch_branch(&dtlb_miss_exit_2, target, 0); - patch_branch(&dtlb_miss_exit_3, target, 0); + patch_branch_site(&patch__dtlbmiss_exit_1, target, 0); + patch_branch_site(&patch__dtlbmiss_exit_2, target, 0); + patch_branch_site(&patch__dtlbmiss_exit_3, target, 0); } val = dtlb_miss_counter; break; @@ -180,17 +177,17 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) break; case PERF_8xx_ID_ITLB_LOAD_MISS: if (atomic_dec_return(&itlb_miss_ref) == 0) { - patch_instruction(&itlb_miss_exit_1, insn); + patch_instruction_site(&patch__itlbmiss_exit_1, insn); #ifndef CONFIG_PIN_TLB_TEXT - patch_instruction(&itlb_miss_exit_2, insn); + patch_instruction_site(&patch__itlbmiss_exit_2, insn); #endif } break; case PERF_8xx_ID_DTLB_LOAD_MISS: if (atomic_dec_return(&dtlb_miss_ref) == 0) { - patch_instruction(&dtlb_miss_exit_1, insn); - patch_instruction(&dtlb_miss_exit_2, insn); - patch_instruction(&dtlb_miss_exit_3, insn); + patch_instruction_site(&patch__dtlbmiss_exit_1, insn); + patch_instruction_site(&patch__dtlbmiss_exit_2, insn); + patch_instruction_site(&patch__dtlbmiss_exit_3, insn); } break; } diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index 2a9d66254ffc..5326ece36120 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -29,6 +29,7 @@ config KILAUEA select 405EX select PPC40x_SIMPLE select PPC4xx_PCI_EXPRESS + select PCI select PCI_MSI select PPC4xx_MSI help diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index f024efd5a4c2..9a85d350b1b6 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -21,6 +21,7 @@ config BLUESTONE depends on 44x select PPC44x_SIMPLE select APM821xx + select PCI select PCI_MSI select PPC4xx_MSI select PPC4xx_PCI_EXPRESS @@ -200,6 +201,7 @@ config AKEBONO select SWIOTLB select 476FPE select PPC4xx_PCI_EXPRESS + select PCI select PCI_MSI select PPC4xx_HSTA_MSI select I2C diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 6f60e0931922..75b935252981 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -102,63 +102,6 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) } EXPORT_SYMBOL(pnv_pci_get_npu_dev); -#define NPU_DMA_OP_UNSUPPORTED() \ - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ - __func__) - -static void *dma_npu_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return NULL; -} - -static void dma_npu_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); -} - -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_dma_supported(struct device *dev, u64 mask) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static u64 dma_npu_get_required_mask(struct device *dev) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static const struct dma_map_ops dma_npu_ops = { - .map_page = dma_npu_map_page, - .map_sg = dma_npu_map_sg, - .alloc = dma_npu_alloc, - .free = dma_npu_free, - .dma_supported = dma_npu_dma_supported, - .get_required_mask = dma_npu_get_required_mask, -}; - /* * Returns the PE assoicated with the PCI device of the given * NPU. Returns the linked pci device if pci_dev != NULL. @@ -270,10 +213,11 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); /* - * We don't initialise npu_pe->tce32_table as we always use - * dma_npu_ops which are nops. + * NVLink devices use the same TCE table configuration as + * their parent device so drivers shouldn't be doing DMA + * operations directly on these devices. */ - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); + set_dma_ops(&npe->pdev->dev, NULL); } /* diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 8bd590af488a..794487313cc8 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -26,6 +26,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/hugetlb.h> #include <asm/lppaca.h> #include <asm/hvcall.h> #include <asm/firmware.h> @@ -36,6 +37,7 @@ #include <asm/vio.h> #include <asm/mmu.h> #include <asm/machdep.h> +#include <asm/drmem.h> #include "pseries.h" @@ -433,6 +435,16 @@ static void parse_em_data(struct seq_file *m) seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); } +static void maxmem_data(struct seq_file *m) +{ + unsigned long maxmem = 0; + + maxmem += drmem_info->n_lmbs * drmem_info->lmb_size; + maxmem += hugetlb_total_pages() * PAGE_SIZE; + + seq_printf(m, "MaxMem=%ld\n", maxmem); +} + static int pseries_lparcfg_data(struct seq_file *m, void *v) { int partition_potential_processors; @@ -491,6 +503,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "slb_size=%d\n", mmu_slb_size); #endif parse_em_data(m); + maxmem_data(m); return 0; } diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 69e7fb47bcaa..878f9c1d3615 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -11,6 +11,12 @@ UBSAN_SANITIZE := n ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) +ifdef CONFIG_CC_IS_CLANG +# clang stores addresses on the stack causing the frame size to blow +# out. See https://github.com/ClangBuiltLinux/linux/issues/252 +KBUILD_CFLAGS += -Wframe-larger-than=4096 +endif + ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y += xmon.o nonstdio.o spr_access.o diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index d10146197533..4b594f2e4f7e 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -71,10 +71,27 @@ KBUILD_CFLAGS += $(call cc-option,-mstrict-align) # arch specific predefines for sparse CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) +# Default target when executing plain make +boot := arch/riscv/boot +KBUILD_IMAGE := $(boot)/Image.gz + head-y := arch/riscv/kernel/head.o core-y += arch/riscv/kernel/ arch/riscv/mm/ libs-y += arch/riscv/lib/ -all: vmlinux +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ + +all: Image.gz + +Image: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +Image.%: Image + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +zinstall install: + $(Q)$(MAKE) $(build)=$(boot) $@ diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore new file mode 100644 index 000000000000..8dab0bb6ae66 --- /dev/null +++ b/arch/riscv/boot/.gitignore @@ -0,0 +1,2 @@ +Image +Image.gz diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile new file mode 100644 index 000000000000..0990a9fdbe5d --- /dev/null +++ b/arch/riscv/boot/Makefile @@ -0,0 +1,33 @@ +# +# arch/riscv/boot/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2018, Anup Patel. +# Author: Anup Patel <anup@brainfault.org> +# +# Based on the ia64 and arm64 boot/Makefile. +# + +OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S + +targets := Image + +$(obj)/Image: vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/Image.gz: $(obj)/Image FORCE + $(call if_changed,gzip) + +install: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(obj)/Image System.map "$(INSTALL_PATH)" + +zinstall: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(obj)/Image.gz System.map "$(INSTALL_PATH)" diff --git a/arch/riscv/boot/install.sh b/arch/riscv/boot/install.sh new file mode 100644 index 000000000000..18c39159c0ff --- /dev/null +++ b/arch/riscv/boot/install.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# +# arch/riscv/boot/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# Adapted from code in arch/i386/boot/install.sh by Russell King +# +# "make install" script for the RISC-V Linux port +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + +# User may have a custom install script +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + +if [ "$(basename $2)" = "Image.gz" ]; then +# Compressed install + echo "Installing compressed kernel" + base=vmlinuz +else +# Normal install + echo "Installing normal kernel" + base=vmlinux +fi + +if [ -f $4/$base-$1 ]; then + mv $4/$base-$1 $4/$base-$1.old +fi +cat $2 > $4/$base-$1 + +# Install system map file +if [ -f $4/System.map-$1 ]; then + mv $4/System.map-$1 $4/System.map-$1.old +fi +cp $3 $4/System.map-$1 diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 36473d7dbaac..ef4f15df9adf 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -1,6 +1,3 @@ -CONFIG_SMP=y -CONFIG_PCI=y -CONFIG_PCIE_XILINX=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y @@ -11,10 +8,15 @@ CONFIG_CFS_BANDWIDTH=y CONFIG_CGROUP_BPF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y -CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y +CONFIG_SMP=y +CONFIG_PCI=y +CONFIG_PCIE_XILINX=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -59,6 +61,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_VIRTIO_MMIO=y +CONFIG_SIFIVE_PLIC=y CONFIG_RAS=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -72,8 +75,6 @@ CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y -# CONFIG_RCU_TRACE is not set CONFIG_CRYPTO_USER_API_HASH=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_SIFIVE_PLIC=y +CONFIG_PRINTK_TIME=y +# CONFIG_RCU_TRACE is not set diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h index 349df33808c4..cd2af4b013e3 100644 --- a/arch/riscv/include/asm/module.h +++ b/arch/riscv/include/asm/module.h @@ -8,6 +8,7 @@ #define MODULE_ARCH_VERMAGIC "riscv" +struct module; u64 module_emit_got_entry(struct module *mod, u64 val); u64 module_emit_plt_entry(struct module *mod, u64 val); diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 2c5df945d43c..bbe1862e8f80 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -56,8 +56,8 @@ struct pt_regs { unsigned long sstatus; unsigned long sbadaddr; unsigned long scause; - /* a0 value before the syscall */ - unsigned long orig_a0; + /* a0 value before the syscall */ + unsigned long orig_a0; }; #ifdef CONFIG_64BIT diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 473cfc84e412..8c3e3e3c8be1 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -400,13 +400,13 @@ extern unsigned long __must_check __asm_copy_from_user(void *to, static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - return __asm_copy_to_user(to, from, n); + return __asm_copy_from_user(to, from, n); } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - return __asm_copy_from_user(to, from, n); + return __asm_copy_to_user(to, from, n); } extern long strncpy_from_user(char *dest, const char __user *src, long count); diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index eff7aa9aa163..fef96f117b4d 100644 --- a/arch/riscv/include/asm/unistd.h +++ b/arch/riscv/include/asm/unistd.h @@ -13,10 +13,9 @@ /* * There is explicitly no include guard here because this file is expected to - * be included multiple times. See uapi/asm/syscalls.h for more info. + * be included multiple times. */ -#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE + #include <uapi/asm/unistd.h> -#include <uapi/asm/syscalls.h> diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/unistd.h index 206dc4b0f6ea..1f3bd3ebbb0d 100644 --- a/arch/riscv/include/uapi/asm/syscalls.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -1,13 +1,25 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright (C) 2017-2018 SiFive + * Copyright (C) 2018 David Abdurachmanov <david.abdurachmanov@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/* - * There is explicitly no include guard here because this file is expected to - * be included multiple times in order to define the syscall macros via - * __SYSCALL. - */ +#ifdef __LP64__ +#define __ARCH_WANT_NEW_STAT +#endif /* __LP64__ */ + +#include <asm-generic/unistd.h> /* * Allows the instruction cache to be flushed from userspace. Despite RISC-V diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 3a5a2ee31547..b4a7d4427fbb 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -64,7 +64,7 @@ int riscv_of_processor_hartid(struct device_node *node) static void print_isa(struct seq_file *f, const char *orig_isa) { - static const char *ext = "mafdc"; + static const char *ext = "mafdcsu"; const char *isa = orig_isa; const char *e; @@ -88,11 +88,14 @@ static void print_isa(struct seq_file *f, const char *orig_isa) /* * Check the rest of the ISA string for valid extensions, printing those * we find. RISC-V ISA strings define an order, so we only print the - * extension bits when they're in order. + * extension bits when they're in order. Hide the supervisor (S) + * extension from userspace as it's not accessible from there. */ for (e = ext; *e != '\0'; ++e) { if (isa[0] == e[0]) { - seq_write(f, isa, 1); + if (isa[0] != 's') + seq_write(f, isa, 1); + isa++; } } diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 1157b6b52d25..c433f6d3dd64 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -132,7 +132,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, { unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; - struct ftrace_graph_ent trace; int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) @@ -144,17 +143,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, parent); - if (err == -EBUSY) - return; - *parent = return_hooker; + if (function_graph_enter(old, self_addr, frame_pointer, parent)) + *parent = return_hooker; } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 711190d473d4..fe884cd69abd 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -44,6 +44,16 @@ ENTRY(_start) amoadd.w a3, a2, (a3) bnez a3, .Lsecondary_start + /* Clear BSS for flat non-ELF images */ + la a3, __bss_start + la a4, __bss_stop + ble a4, a3, clear_bss_done +clear_bss: + REG_S zero, (a3) + add a3, a3, RISCV_SZPTR + blt a3, a4, clear_bss +clear_bss_done: + /* Save hart ID and DTB physical address */ mv s0, a0 mv s1, a1 diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 3303ed2cd419..7dd308129b40 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -21,7 +21,7 @@ static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { pr_err("%s: value %016llx out of range for 32-bit field\n", - me->name, v); + me->name, (long long)v); return -EINVAL; } *location = v; @@ -102,7 +102,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, if (offset != (s32)offset) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -144,7 +144,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, if (IS_ENABLED(CMODEL_MEDLOW)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -188,7 +188,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, } else { pr_err( "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -212,7 +212,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, } else { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } } @@ -234,7 +234,7 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, if (offset != fill_v) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index ece84991609c..65df1dfdc303 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS *(.sbss*) } - BSS_SECTION(0, 0, 0) + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) EXCEPTION_TABLE(0x10) NOTES diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 5739bd05d289..4e2e600f7d53 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,6 @@ lib-y += memcpy.o lib-y += memset.o lib-y += uaccess.o -lib-(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_32BIT) += udivdi3.o diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 0b33577932c3..e21053e5e0da 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) UTS_MACHINE := s390x -STACK_SIZE := $(if $(CONFIG_KASAN),32768,16384) +STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384) CHECKFLAGS += -D__s390__ -D__s390x__ export LD_BFD diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 593039620487..b1bdd15e3429 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -22,10 +22,10 @@ OBJCOPYFLAGS := OBJECTS := $(addprefix $(obj)/,$(obj-y)) LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE $(call if_changed,ld) -OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info +OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load $(obj)/info.bin: vmlinux FORCE $(call if_changed,objcopy) @@ -46,17 +46,17 @@ suffix-$(CONFIG_KERNEL_LZMA) := .lzma suffix-$(CONFIG_KERNEL_LZO) := .lzo suffix-$(CONFIG_KERNEL_XZ) := .xz -$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE $(call if_changed,gzip) -$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE $(call if_changed,bzip2) -$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE $(call if_changed,lz4) -$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzma) -$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzo) -$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE $(call if_changed,xzkern) OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 259d1698ac50..c69cb04b7a59 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -64,6 +64,8 @@ CONFIG_NUMA=y CONFIG_PREEMPT=y CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_EXPOLINE=y +CONFIG_EXPOLINE_AUTO=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -84,9 +86,11 @@ CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y +CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -161,8 +165,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m -CONFIG_NFT_EXTHDR=m -CONFIG_NFT_META=m CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m @@ -365,6 +367,8 @@ CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m +CONFIG_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y @@ -461,6 +465,7 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m +CONFIG_ISM=m CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -486,9 +491,12 @@ CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_VFIO_MDEV=m +CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -615,7 +623,6 @@ CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 CONFIG_NOTIFIER_ERROR_INJECTION=m -CONFIG_PM_NOTIFIER_ERROR_INJECT=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y CONFIG_FAILSLAB=y @@ -727,3 +734,4 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_KVM_S390_UCONTROL=y CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 37fd60c20e22..32f539dc9c19 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -65,6 +65,8 @@ CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_EXPOLINE=y +CONFIG_EXPOLINE_AUTO=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -82,9 +84,11 @@ CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y +CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -159,8 +163,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m -CONFIG_NFT_EXTHDR=m -CONFIG_NFT_META=m CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m @@ -362,6 +364,8 @@ CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m +CONFIG_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y @@ -458,6 +462,7 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m +CONFIG_ISM=m CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -483,9 +488,12 @@ CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_VFIO_MDEV=m +CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -666,3 +674,4 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_KVM_S390_UCONTROL=y CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 7cb6a52f727d..4d58a92b5d97 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -26,14 +26,23 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +CONFIG_LIVEPATCH=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y +CONFIG_CRASH_DUMP=y +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y +CONFIG_CMM=m CONFIG_OPROFILE=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -44,11 +53,7 @@ CONFIG_BLK_DEV_INTEGRITY=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y -CONFIG_LIVEPATCH=y -CONFIG_NR_CPUS=256 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y +CONFIG_BINFMT_MISC=m CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -60,9 +65,6 @@ CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_HIBERNATION=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -98,6 +100,7 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_VIRTIO_BLK=y CONFIG_SCSI=y +# CONFIG_SCSI_MQ_DEFAULT is not set CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -131,6 +134,7 @@ CONFIG_EQUALIZER=m CONFIG_TUN=m CONFIG_VIRTIO_NET=y # CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_CORTINA is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SOCIONEXT is not set @@ -157,33 +161,6 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_GDB_SCRIPTS=y -CONFIG_UNUSED_SYMBOLS=y -CONFIG_DEBUG_SECTION_MISMATCH=y -CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_PAGEALLOC=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_PANIC_ON_OOPS=y -CONFIG_PROVE_LOCKING=y -CONFIG_LOCK_STAT=y -CONFIG_DEBUG_LOCKDEP=y -CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_SG=y -CONFIG_DEBUG_NOTIFIERS=y -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -# CONFIG_RUNTIME_TESTING_MENU is not set -CONFIG_S390_PTDUMP=y CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m @@ -193,6 +170,7 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_CMAC=m @@ -231,7 +209,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_ZCRYPT=m -CONFIG_ZCRYPT_MULTIDEVNODES=y CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_SHA1_S390=m @@ -247,4 +224,30 @@ CONFIG_CRC7=m # CONFIG_XZ_DEC_ARM is not set # CONFIG_XZ_DEC_ARMTHUMB is not set # CONFIG_XZ_DEC_SPARC is not set -CONFIG_CMM=m +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCK_STAT=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_LATENCYTOP=y +CONFIG_SCHED_TRACER=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_STACK_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_FUNCTION_PROFILER=y +# CONFIG_RUNTIME_TESTING_MENU is not set +CONFIG_S390_PTDUMP=y diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index dbd689d556ce..ccbb53e22024 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.asce_limit = STACK_TOP_MAX; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION3; - /* pgd_alloc() did not account this pud */ - mm_inc_nr_puds(mm); break; case -PAGE_SIZE: /* forked 5-level task, set new asce with new_mm->pgd */ @@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk, /* forked 2-level compat task, set new asce with new mm->pgd */ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; - /* pgd_alloc() did not account this pmd */ - mm_inc_nr_pmds(mm); - mm_inc_nr_puds(mm); } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index f0f9bcf94c03..5ee733720a57 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry) static inline unsigned long pgd_entry_type(struct mm_struct *mm) { - if (mm->context.asce_limit <= _REGION3_SIZE) + if (mm_pmd_folded(mm)) return _SEGMENT_ENTRY_EMPTY; - if (mm->context.asce_limit <= _REGION2_SIZE) + if (mm_pud_folded(mm)) return _REGION3_ENTRY_EMPTY; - if (mm->context.asce_limit <= _REGION1_SIZE) + if (mm_p4d_folded(mm)) return _REGION2_ENTRY_EMPTY; return _REGION1_ENTRY_EMPTY; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 411d435e7a7d..063732414dfb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -493,6 +493,24 @@ static inline int is_module_addr(void *addr) _REGION_ENTRY_PROTECT | \ _REGION_ENTRY_NOEXEC) +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + return mm->context.asce_limit <= _REGION1_SIZE; +} +#define mm_p4d_folded(mm) mm_p4d_folded(mm) + +static inline bool mm_pud_folded(struct mm_struct *mm) +{ + return mm->context.asce_limit <= _REGION2_SIZE; +} +#define mm_pud_folded(mm) mm_pud_folded(mm) + +static inline bool mm_pmd_folded(struct mm_struct *mm) +{ + return mm->context.asce_limit <= _REGION3_SIZE; +} +#define mm_pmd_folded(mm) mm_pmd_folded(mm) + static inline int mm_has_pgste(struct mm_struct *mm) { #ifdef CONFIG_PGSTE diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 302795c47c06..81038ab357ce 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -236,7 +236,7 @@ static inline unsigned long current_stack_pointer(void) return sp; } -static __no_sanitize_address_or_inline unsigned short stap(void) +static __no_kasan_or_inline unsigned short stap(void) { unsigned short cpu_address; @@ -330,7 +330,7 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ -static __no_sanitize_address_or_inline void __load_psw_mask(unsigned long mask) +static __no_kasan_or_inline void __load_psw_mask(unsigned long mask) { unsigned long addr; psw_t psw; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 27248f42a03c..ce4e17c9aad6 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -14,7 +14,7 @@ * General size of kernel stacks */ #ifdef CONFIG_KASAN -#define THREAD_SIZE_ORDER 3 +#define THREAD_SIZE_ORDER 4 #else #define THREAD_SIZE_ORDER 2 #endif diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 457b7ba0fbb6..b31c779cf581 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { - if (tlb->mm->context.asce_limit <= _REGION3_SIZE) + if (mm_pmd_folded(tlb->mm)) return; pgtable_pmd_page_dtor(virt_to_page(pmd)); tlb_remove_table(tlb, pmd); @@ -152,7 +152,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { - if (tlb->mm->context.asce_limit <= _REGION1_SIZE) + if (mm_p4d_folded(tlb->mm)) return; tlb_remove_table(tlb, p4d); } @@ -167,7 +167,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { - if (tlb->mm->context.asce_limit <= _REGION2_SIZE) + if (mm_pud_folded(tlb->mm)) return; tlb_remove_table(tlb, pud); } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 724fba4d09d2..39191a0feed1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -236,10 +236,10 @@ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task lghi %r4,__TASK_stack lghi %r1,__TASK_thread - lg %r5,0(%r4,%r3) # start of kernel stack of next + llill %r5,STACK_INIT stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev - lgr %r15,%r5 - aghi %r15,STACK_INIT # end of kernel stack of next + lg %r15,0(%r4,%r3) # start of kernel stack of next + agr %r15,%r5 # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 84be7f02d0c2..39b13d71a8fe 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init); */ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - struct ftrace_graph_ent trace; - if (unlikely(ftrace_graph_is_dead())) goto out; if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; ip -= MCOUNT_INSN_SIZE; - trace.func = ip; - trace.depth = current->curr_ret_stack + 1; - /* Only trace if the calling function expects to. */ - if (!ftrace_graph_entry(&trace)) - goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, - NULL) == -EBUSY) - goto out; - parent = (unsigned long) return_to_handler; + if (!function_graph_enter(parent, ip, 0, NULL)) + parent = (unsigned long) return_to_handler; out: return parent; } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index cc085e2d2ce9..d5523adeddbf 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -346,6 +346,8 @@ static int __hw_perf_event_init(struct perf_event *event) break; case PERF_TYPE_HARDWARE: + if (is_sampling_event(event)) /* No sampling support */ + return -ENOENT; ev = attr->config; /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { @@ -373,7 +375,7 @@ static int __hw_perf_event_init(struct perf_event *event) return -ENOENT; if (ev > PERF_CPUM_CF_MAX_CTR) - return -EINVAL; + return -ENOENT; /* Obtain the counter set to which the specified counter belongs */ set = get_counter_set(ev); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7bf604ff50a1..bfabeb1889cc 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1842,10 +1842,30 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags) CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); -static struct attribute *cpumsf_pmu_events_attr[] = { - CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), - NULL, - NULL, +/* Attribute list for CPU_SF. + * + * The availablitiy depends on the CPU_MF sampling facility authorization + * for basic + diagnositic samples. This is determined at initialization + * time by the sampling facility device driver. + * If the authorization for basic samples is turned off, it should be + * also turned off for diagnostic sampling. + * + * During initialization of the device driver, check the authorization + * level for diagnostic sampling and installs the attribute + * file for diagnostic sampling if necessary. + * + * For now install a placeholder to reference all possible attributes: + * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG. + * Add another entry for the final NULL pointer. + */ +enum { + SF_CYCLES_BASIC_ATTR_IDX = 0, + SF_CYCLES_BASIC_DIAG_ATTR_IDX, + SF_CYCLES_ATTR_MAX +}; + +static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = { + [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC) }; PMU_FORMAT_ATTR(event, "config:0-63"); @@ -2040,7 +2060,10 @@ static int __init init_cpum_sampling_pmu(void) if (si.ad) { sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); - cpumsf_pmu_events_attr[1] = + /* Sampling of diagnostic data authorized, + * install event into attribute list of PMU device. + */ + cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); } diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index eb8aebea3ea7..e76309fbbcb3 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -37,7 +37,7 @@ KASAN_SANITIZE := n $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE $(call if_changed,vdso32ld) # strip rule for the .so file @@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S +$(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index a22b2cf86eec..f849ac61c5da 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -37,7 +37,7 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE $(call if_changed,vdso64ld) # strip rule for the .so file @@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso64): %.o: %.S +$(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 21eb7407d51b..8429ab079715 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -154,14 +154,14 @@ SECTIONS * uncompressed image info used by the decompressor * it should match struct vmlinux_info */ - .vmlinux.info 0 : { + .vmlinux.info 0 (INFO) : { QUAD(_stext) /* default_lma */ QUAD(startup_continue) /* entry */ QUAD(__bss_start - _stext) /* image_size */ QUAD(__bss_stop - __bss_start) /* bss_size */ QUAD(__boot_data_start) /* bootdata_off */ QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */ - } + } :NONE /* Debugging sections. */ STABS_DEBUG diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 76d89ee8b428..6791562779ee 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -101,6 +101,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) mm->context.asce_limit = _REGION1_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + mm_inc_nr_puds(mm); } else { crst_table_init(table, _REGION1_ENTRY_EMPTY); pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); @@ -130,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm) } pgd = mm->pgd; + mm_dec_nr_pmds(mm); mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); mm->context.asce_limit = _REGION3_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index ae0d9e889534..d31bde0870d8 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -53,6 +53,7 @@ int __node_distance(int a, int b) { return mode->distance ? mode->distance(a, b) : 0; } +EXPORT_SYMBOL(__node_distance); int numa_debug_enabled; diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 96dd9f7da250..1b04270e5460 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -321,8 +321,7 @@ int ftrace_disable_ftrace_graph_caller(void) void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - int faulted, err; - struct ftrace_graph_ent trace; + int faulted; unsigned long return_hooker = (unsigned long)&return_to_handler; if (unlikely(ftrace_graph_is_dead())) @@ -365,18 +364,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); - if (err == -EBUSY) { + if (function_graph_enter(old, self_addr, 0, NULL)) __raw_writel(old, parent); - return; - } - - trace.func = self_addr; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - __raw_writel(old, parent); - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 915dda4ae412..684b84ce397f 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; - struct ftrace_graph_ent trace; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return parent + 8UL; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return parent + 8UL; - - if (ftrace_push_return_trace(parent, self_addr, &trace.depth, - frame_pointer, NULL) == -EBUSY) + if (function_graph_enter(parent, self_addr, frame_pointer, NULL)) return parent + 8UL; return return_hooker; diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 40d008b0bd3e..05eb016fc41b 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -108,10 +108,9 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, /* Allocate and initialize the free area map. */ sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node); + iommu->tbl.map = kzalloc_node(sz, GFP_KERNEL, numa_node); if (!iommu->tbl.map) return -ENOMEM; - memset(iommu->tbl.map, 0, sz); iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT, (tlb_type != hypervisor ? iommu_flushall : NULL), diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 67b3e6b3ce5d..47c871394ccb 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1849,16 +1849,12 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs { u64 saved_fault_address = current_thread_info()->fault_address; u8 saved_fault_code = get_thread_fault_code(); - mm_segment_t old_fs; perf_callchain_store(entry, regs->tpc); if (!current->mm) return; - old_fs = get_fs(); - set_fs(USER_DS); - flushw_user(); pagefault_disable(); @@ -1870,7 +1866,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs pagefault_enable(); - set_fs(old_fs); set_thread_fault_code(saved_fault_code); current_thread_info()->fault_address = saved_fault_address; } diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 4c5b3fcbed94..e800ce13cc6e 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -683,6 +683,7 @@ void do_signal32(struct pt_regs * regs) regs->tpc -= 4; regs->tnpc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 5665261cee37..83953780ca01 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -508,6 +508,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->pc -= 4; regs->npc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->pc -= 4; diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index e9de1803a22e..ca70787efd8e 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -533,6 +533,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->tpc -= 4; regs->tnpc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index bb68c805b891..ff9389a1c9f3 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -47,9 +47,9 @@ sys_call_table32: .word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate /*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown .word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 -/*140*/ .word sys_sendfile64, sys_nis_syscall, compat_sys_futex, sys_gettid, compat_sys_getrlimit +/*140*/ .word sys_sendfile64, sys_getpeername, compat_sys_futex, sys_gettid, compat_sys_getrlimit .word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write -/*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 +/*150*/ .word sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 .word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount /*160*/ .word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall .word sys_quotactl, sys_set_tid_address, compat_sys_mount, compat_sys_ustat, sys_setxattr diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 222785af550b..5fda4f7bf15d 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -791,7 +791,7 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src, } /* Just skip the save instruction and the ctx register move. */ -#define BPF_TAILCALL_PROLOGUE_SKIP 16 +#define BPF_TAILCALL_PROLOGUE_SKIP 32 #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) static void build_prologue(struct jit_ctx *ctx) @@ -824,9 +824,15 @@ static void build_prologue(struct jit_ctx *ctx) const u8 vfp = bpf2sparc[BPF_REG_FP]; emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx); + } else { + emit_nop(ctx); } emit_reg_move(I0, O0, ctx); + emit_reg_move(I1, O1, ctx); + emit_reg_move(I2, O2, ctx); + emit_reg_move(I3, O3, ctx); + emit_reg_move(I4, O4, ctx); /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ } @@ -1270,6 +1276,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_2_used = true; emit_loadimm(imm, tmp2, ctx); @@ -1308,6 +1317,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp = bpf2sparc[TMP_REG_1]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + switch (BPF_SIZE(code)) { case BPF_W: opcode = ST32; @@ -1340,6 +1352,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; @@ -1360,6 +1375,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; @@ -1425,12 +1443,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct bpf_prog *tmp, *orig_prog = prog; struct sparc64_jit_data *jit_data; struct bpf_binary_header *header; + u32 prev_image_size, image_size; bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; - u32 image_size; u8 *image_ptr; - int pass; + int pass, i; if (!prog->jit_requested) return orig_prog; @@ -1461,61 +1479,82 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) header = jit_data->header; extra_pass = true; image_size = sizeof(u32) * ctx.idx; + prev_image_size = image_size; + pass = 1; goto skip_init_ctx; } memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL); + ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; } - /* Fake pass to detect features used, and get an accurate assessment - * of what the final image size will be. + /* Longest sequence emitted is for bswap32, 12 instructions. Pre-cook + * the offset array so that we converge faster. */ - if (build_body(&ctx)) { - prog = orig_prog; - goto out_off; - } - build_prologue(&ctx); - build_epilogue(&ctx); - - /* Now we know the actual image size. */ - image_size = sizeof(u32) * ctx.idx; - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) { - prog = orig_prog; - goto out_off; - } + for (i = 0; i < prog->len; i++) + ctx.offset[i] = i * (12 * 4); - ctx.image = (u32 *)image_ptr; -skip_init_ctx: - for (pass = 1; pass < 3; pass++) { + prev_image_size = ~0U; + for (pass = 1; pass < 40; pass++) { ctx.idx = 0; build_prologue(&ctx); - if (build_body(&ctx)) { - bpf_jit_binary_free(header); prog = orig_prog; goto out_off; } - build_epilogue(&ctx); if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass, - image_size - (ctx.idx * 4), + pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass, + ctx.idx * 4, ctx.tmp_1_used ? '1' : ' ', ctx.tmp_2_used ? '2' : ' ', ctx.tmp_3_used ? '3' : ' ', ctx.saw_frame_pointer ? 'F' : ' ', ctx.saw_call ? 'C' : ' ', ctx.saw_tail_call ? 'T' : ' '); + + if (ctx.idx * 4 == prev_image_size) + break; + prev_image_size = ctx.idx * 4; + cond_resched(); + } + + /* Now we know the actual image size. */ + image_size = sizeof(u32) * ctx.idx; + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) { + prog = orig_prog; + goto out_off; + } + + ctx.image = (u32 *)image_ptr; +skip_init_ctx: + ctx.idx = 0; + + build_prologue(&ctx); + + if (build_body(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + + build_epilogue(&ctx); + + if (ctx.idx * 4 != prev_image_size) { + pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n", + prev_image_size, ctx.idx * 4); + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; } if (bpf_jit_enable > 1) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 74c002ddc0ce..28c40624bcb6 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1305,6 +1305,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, io_req->fds[0] = dev->cow.fd; else io_req->fds[0] = dev->fd; + io_req->error = 0; if (req_op(req) == REQ_OP_FLUSH) { io_req->op = UBD_FLUSH; @@ -1313,9 +1314,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, io_req->cow_offset = -1; io_req->offset = off; io_req->length = bvec->bv_len; - io_req->error = 0; io_req->sector_mask = 0; - io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; @@ -1341,11 +1340,14 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { + struct ubd *ubd_dev = hctx->queue->queuedata; struct request *req = bd->rq; int ret = 0; blk_mq_start_request(req); + spin_lock_irq(&ubd_dev->lock); + if (req_op(req) == REQ_OP_FLUSH) { ret = ubd_queue_one_vec(hctx, req, 0, NULL); } else { @@ -1361,9 +1363,11 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, } } out: - if (ret < 0) { + spin_unlock_irq(&ubd_dev->lock); + + if (ret < 0) blk_mq_requeue_request(req, true); - } + return BLK_STS_OK; } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c51c989c19c0..8689e794a43c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -129,6 +129,7 @@ config X86 select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 @@ -443,10 +444,6 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. - Without compiler support, at least indirect branches in assembler - code are eliminated. Since this includes the syscall entry path, - it is not entirely pointless. - config INTEL_RDT bool "Intel Resource Director Technology support" depends on X86 && CPU_SUP_INTEL @@ -524,7 +521,6 @@ config X86_VSMP bool "ScaleMP vSMP" select HYPERVISOR_GUEST select PARAVIRT - select PARAVIRT_XXL depends on X86_64 && PCI depends on X86_EXTENDED_PLATFORM depends on SMP @@ -1004,13 +1000,7 @@ config NR_CPUS to the kernel image. config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on SMP - ---help--- - SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel Pentium 4 chips with HyperThreading at a - cost of slightly increased overhead in some places. If unsure say - N here. + def_bool y if SMP config SCHED_MC def_bool y diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5b562e464009..75ef499a66e2 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -213,8 +213,6 @@ ifdef CONFIG_X86_64 KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000) endif -# Speed up the build -KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release KBUILD_CFLAGS += -Wno-sign-compare # @@ -222,9 +220,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE -ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE -endif + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) endif archscripts: scripts_basic @@ -239,7 +235,7 @@ archheaders: archmacros: $(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s -ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,- +ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s export ASM_MACRO_FLAGS KBUILD_CFLAGS += $(ASM_MACRO_FLAGS) @@ -308,6 +304,13 @@ ifndef CC_HAVE_ASM_GOTO @echo Compiler lacks asm-goto support. @exit 1 endif +ifdef CONFIG_RETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + @echo "You are building kernel with non-retpoline compiler." >&2 + @echo "Please update your compiler." >&2 + @false +endif +endif archclean: $(Q)rm -rf $(objtree)/arch/i386 diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 8b4c5e001157..544ac4fafd11 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1,3 +1,4 @@ + /* ----------------------------------------------------------------------- * * Copyright 2011 Intel Corporation; author Matt Fleming @@ -634,37 +635,54 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, return status; } +static efi_status_t allocate_e820(struct boot_params *params, + struct setup_data **e820ext, + u32 *e820ext_size) +{ + unsigned long map_size, desc_size, buff_size; + struct efi_boot_memmap boot_map; + efi_memory_desc_t *map; + efi_status_t status; + __u32 nr_desc; + + boot_map.map = ↦ + boot_map.map_size = &map_size; + boot_map.desc_size = &desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table, &boot_map); + if (status != EFI_SUCCESS) + return status; + + nr_desc = buff_size / desc_size; + + if (nr_desc > ARRAY_SIZE(params->e820_table)) { + u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table); + + status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size); + if (status != EFI_SUCCESS) + return status; + } + + return EFI_SUCCESS; +} + struct exit_boot_struct { struct boot_params *boot_params; struct efi_info *efi; - struct setup_data *e820ext; - __u32 e820ext_size; }; static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, struct efi_boot_memmap *map, void *priv) { - static bool first = true; const char *signature; __u32 nr_desc; efi_status_t status; struct exit_boot_struct *p = priv; - if (first) { - nr_desc = *map->buff_size / *map->desc_size; - if (nr_desc > ARRAY_SIZE(p->boot_params->e820_table)) { - u32 nr_e820ext = nr_desc - - ARRAY_SIZE(p->boot_params->e820_table); - - status = alloc_e820ext(nr_e820ext, &p->e820ext, - &p->e820ext_size); - if (status != EFI_SUCCESS) - return status; - } - first = false; - } - signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); @@ -687,8 +705,8 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) { unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; - struct setup_data *e820ext; - __u32 e820ext_size; + struct setup_data *e820ext = NULL; + __u32 e820ext_size = 0; efi_status_t status; __u32 desc_version; struct efi_boot_memmap map; @@ -702,8 +720,10 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) map.buff_size = &buff_size; priv.boot_params = boot_params; priv.efi = &boot_params->efi_info; - priv.e820ext = NULL; - priv.e820ext_size = 0; + + status = allocate_e820(boot_params, &e820ext, &e820ext_size); + if (status != EFI_SUCCESS) + return status; /* Might as well exit boot services now */ status = efi_exit_boot_services(sys_table, handle, &map, &priv, @@ -711,9 +731,6 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) if (status != EFI_SUCCESS) return status; - e820ext = priv.e820ext; - e820ext_size = priv.e820ext_size; - /* Historic? */ boot_params->alt_mem_k = 32 * 1024; diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 8f0c4c9fc904..51079fc9298f 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -113,7 +113,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) { int err; - memset(&cpu.flags, 0, sizeof cpu.flags); + memset(&cpu.flags, 0, sizeof(cpu.flags)); cpu.level = 3; if (has_eflag(X86_EFLAGS_AC)) diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index b25c53527a94..023bf1c3de8b 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -50,7 +50,7 @@ static void parse_earlyprintk(void) int pos = 0; int port = 0; - if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) { + if (cmdline_find_option("earlyprintk", arg, sizeof(arg)) > 0) { char *e; if (!strncmp(arg, "serial", 6)) { @@ -124,7 +124,7 @@ static void parse_console_uart8250(void) * console=uart8250,io,0x3f8,115200n8 * need to make sure it is last one console ! */ - if (cmdline_find_option("console", optstr, sizeof optstr) <= 0) + if (cmdline_find_option("console", optstr, sizeof(optstr)) <= 0) return; options = optstr; diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 223e42527077..6c176b6a42ad 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -76,7 +76,7 @@ static int get_edd_info(u8 devno, struct edd_info *ei) { struct biosregs ireg, oreg; - memset(ei, 0, sizeof *ei); + memset(ei, 0, sizeof(*ei)); /* Check Extensions Present */ @@ -133,7 +133,7 @@ void query_edd(void) struct edd_info ei, *edp; u32 *mbrptr; - if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) { + if (cmdline_find_option("edd", eddarg, sizeof(eddarg)) > 0) { if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) { do_edd = 1; do_mbr = 0; @@ -166,7 +166,7 @@ void query_edd(void) */ if (!get_edd_info(devno, &ei) && boot_params.eddbuf_entries < EDDMAXNR) { - memcpy(edp, &ei, sizeof ei); + memcpy(edp, &ei, sizeof(ei)); edp++; boot_params.eddbuf_entries++; } diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 4c881c850125..850b8762e889 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -300,7 +300,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x020e # header version number (>= 0x0105) + .word 0x020d # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -558,10 +558,6 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c -acpi_rsdp_addr: .quad 0 # 64-bit physical pointer to the - # ACPI RSDP table, added with - # version 2.14 - # End of setup header ##################################################### .section ".entrytext", "ax" diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 9bcea386db65..73532543d689 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -36,8 +36,8 @@ static void copy_boot_params(void) const struct old_cmdline * const oldcmd = (const struct old_cmdline *)OLD_CL_ADDRESS; - BUILD_BUG_ON(sizeof boot_params != 4096); - memcpy(&boot_params.hdr, &hdr, sizeof hdr); + BUILD_BUG_ON(sizeof(boot_params) != 4096); + memcpy(&boot_params.hdr, &hdr, sizeof(hdr)); if (!boot_params.hdr.cmd_line_ptr && oldcmd->cl_magic == OLD_CL_MAGIC) { diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index d9c28c87e477..7df2b28207be 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -26,7 +26,7 @@ static int detect_memory_e820(void) initregs(&ireg); ireg.ax = 0xe820; - ireg.cx = sizeof buf; + ireg.cx = sizeof(buf); ireg.edx = SMAP; ireg.di = (size_t)&buf; diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c index c0fb356a3092..2fe3616ba161 100644 --- a/arch/x86/boot/regs.c +++ b/arch/x86/boot/regs.c @@ -21,7 +21,7 @@ void initregs(struct biosregs *reg) { - memset(reg, 0, sizeof *reg); + memset(reg, 0, sizeof(*reg)); reg->eflags |= X86_EFLAGS_CF; reg->ds = ds(); reg->es = ds(); diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index ba3e100654db..3ecc11a9c440 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -62,7 +62,7 @@ static int vesa_probe(void) if (mode & ~0x1ff) continue; - memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ + memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */ ireg.ax = 0x4f01; ireg.cx = mode; @@ -109,7 +109,7 @@ static int vesa_set_mode(struct mode_info *mode) int is_graphic; u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; - memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ + memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */ initregs(&ireg); ireg.ax = 0x4f01; @@ -241,7 +241,7 @@ void vesa_store_edid(void) struct biosregs ireg, oreg; /* Apparently used as a nonsense token... */ - memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); + memset(&boot_params.edid_info, 0x13, sizeof(boot_params.edid_info)); if (vginfo.version < 0x0200) return; /* EDID requires VBE 2.0+ */ diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 77780e386e9b..ac89b6624a40 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -115,7 +115,7 @@ static unsigned int get_entry(void) } else if ((key >= '0' && key <= '9') || (key >= 'A' && key <= 'Z') || (key >= 'a' && key <= 'z')) { - if (len < sizeof entry_buf) { + if (len < sizeof(entry_buf)) { entry_buf[len++] = key; putchar(key); } diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 708b46a54578..25e5a6bda8c3 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -329,8 +329,22 @@ For 32-bit we have the following conventions - kernel is built with #endif +.macro STACKLEAK_ERASE_NOCLOBBER +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + PUSH_AND_CLEAR_REGS + call stackleak_erase + POP_REGS +#endif +.endm + #endif /* CONFIG_X86_64 */ +.macro STACKLEAK_ERASE +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + call stackleak_erase +#endif +.endm + /* * This does 'call enter_from_user_mode' unless we can avoid it based on * kernel config or using the static jump infrastructure. diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 687e47f8a796..d309f30cf7af 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -46,6 +46,8 @@ #include <asm/frame.h> #include <asm/nospec-branch.h> +#include "calling.h" + .section .entry.text, "ax" /* @@ -712,6 +714,7 @@ ENTRY(ret_from_fork) /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax call syscall_return_slowpath + STACKLEAK_ERASE jmp restore_all /* kernel thread */ @@ -886,6 +889,8 @@ ENTRY(entry_SYSENTER_32) ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + STACKLEAK_ERASE + /* Opportunistic SYSEXIT */ TRACE_IRQS_ON /* User mode traces as IRQs on. */ @@ -997,6 +1002,8 @@ ENTRY(entry_INT80_32) call do_int80_syscall_32 .Lsyscall_32_done: + STACKLEAK_ERASE + restore_all: TRACE_IRQS_IRET SWITCH_TO_ENTRY_STACK diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4d7a2d9d44cf..1f0efdb7b629 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -266,6 +266,8 @@ syscall_return_via_sysret: * We are on the trampoline stack. All regs except RDI are live. * We can do future final exit work right here. */ + STACKLEAK_ERASE_NOCLOBBER + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi popq %rdi @@ -564,6 +566,7 @@ ENTRY(interrupt_entry) ret END(interrupt_entry) +_ASM_NOKPROBE(interrupt_entry) /* Interrupt entry/exit. */ @@ -625,6 +628,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) * We are on the trampoline stack. All regs except RDI are live. * We can do future final exit work right here. */ + STACKLEAK_ERASE_NOCLOBBER SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi @@ -763,6 +767,7 @@ native_irq_return_ldt: jmp native_irq_return_iret #endif END(common_interrupt) +_ASM_NOKPROBE(common_interrupt) /* * APIC interrupts. @@ -777,6 +782,7 @@ ENTRY(\sym) call \do_sym /* rdi points to pt_regs */ jmp ret_from_intr END(\sym) +_ASM_NOKPROBE(\sym) .endm /* Make sure APIC interrupt handlers end up in the irqentry section: */ @@ -957,6 +963,7 @@ ENTRY(\sym) jmp error_exit .endif +_ASM_NOKPROBE(\sym) END(\sym) .endm diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 7d0df78db727..8eaf8952c408 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -261,6 +261,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) /* Opportunistic SYSRET */ sysret32_from_system_call: + /* + * We are not going to return to userspace from the trampoline + * stack. So let's erase the thread stack right now. + */ + STACKLEAK_ERASE TRACE_IRQS_ON /* User mode traces as IRQs on. */ movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 141d415a8c80..0624bf2266fd 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -47,7 +47,7 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ - -z max-page-size=4096 -z common-page-size=4096 + -z max-page-size=4096 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) @@ -98,7 +98,7 @@ CFLAGS_REMOVE_vvar.o = -pg CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ - -z max-page-size=4096 -z common-page-size=4096 + -z max-page-size=4096 # x32-rebranded versions vobjx32s-y := $(vobjs-y:.o=-x32.o) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 106911b603bd..374a19712e20 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event) if (config == -1LL) return -EINVAL; - /* - * Branch tracing: - */ - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { - /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts_active) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (!attr->exclude_kernel) - return -EOPNOTSUPP; - - /* disallow bts if conflicting events are present */ - if (x86_add_exclusive(x86_lbr_exclusive_lbr)) - return -EBUSY; - - event->destroy = hw_perf_lbr_event_destroy; - } - hwc->config |= config; return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0fb8659b20d8..ecc3e34ca955 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2306,14 +2306,18 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) return handled; } -static bool disable_counter_freezing; +static bool disable_counter_freezing = true; static int __init intel_perf_counter_freezing_setup(char *s) { - disable_counter_freezing = true; - pr_info("Intel PMU Counter freezing feature disabled\n"); + bool res; + + if (kstrtobool(s, &res)) + return -EINVAL; + + disable_counter_freezing = !res; return 1; } -__setup("disable_counter_freezing", intel_perf_counter_freezing_setup); +__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup); /* * Simplified handler for Arch Perfmon v4: @@ -2470,16 +2474,7 @@ done: static struct event_constraint * intel_bts_constraints(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - unsigned int hw_event, bts_event; - - if (event->attr.freq) - return NULL; - - hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; - bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - - if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) + if (unlikely(intel_pmu_has_bts(event))) return &bts_constraint; return NULL; @@ -3098,6 +3093,43 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) return flags; } +static int intel_pmu_bts_config(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + if (unlikely(intel_pmu_has_bts(event))) { + /* BTS is not supported by this architecture. */ + if (!x86_pmu.bts_active) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (!attr->exclude_kernel) + return -EOPNOTSUPP; + + /* BTS is not allowed for precise events. */ + if (attr->precise_ip) + return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } + + return 0; +} + +static int core_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + return intel_pmu_bts_config(event); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3105,6 +3137,10 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; + ret = intel_pmu_bts_config(event); + if (ret) + return ret; + if (event->attr.precise_ip) { if (!event->attr.freq) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; @@ -3127,7 +3163,7 @@ static int intel_pmu_hw_config(struct perf_event *event) /* * BTS is set up earlier in this path, so don't account twice */ - if (!intel_pmu_has_bts(event)) { + if (!unlikely(intel_pmu_has_bts(event))) { /* disallow lbr if conflicting events are present */ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) return -EBUSY; @@ -3596,7 +3632,7 @@ static __initconst const struct x86_pmu core_pmu = { .enable_all = core_pmu_enable_all, .enable = core_pmu_enable_event, .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, + .hw_config = core_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, @@ -4535,7 +4571,7 @@ __init int intel_pmu_init(void) } } - snprintf(pmu_name_str, sizeof pmu_name_str, "%s", name); + snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name); if (version >= 2 && extra_attr) { x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index e17ab885b1e9..cb46d602a6b8 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -129,8 +129,15 @@ struct intel_uncore_box { struct intel_uncore_extra_reg shared_regs[0]; }; -#define UNCORE_BOX_FLAG_INITIATED 0 -#define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */ +/* CFL uncore 8th cbox MSRs */ +#define CFL_UNC_CBO_7_PERFEVTSEL0 0xf70 +#define CFL_UNC_CBO_7_PER_CTR0 0xf76 + +#define UNCORE_BOX_FLAG_INITIATED 0 +/* event config registers are 8-byte apart */ +#define UNCORE_BOX_FLAG_CTL_OFFS8 1 +/* CFL 8th CBOX has different MSR space */ +#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { struct kobj_attribute attr; @@ -297,17 +304,27 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box, static inline unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) { - return box->pmu->type->event_ctl + - (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + - uncore_msr_box_offset(box); + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PERFEVTSEL0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->event_ctl + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } } static inline unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) { - return box->pmu->type->perf_ctr + - (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + - uncore_msr_box_offset(box); + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PER_CTR0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->perf_ctr + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } } static inline diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 8527c3e1038b..2593b0d7aeee 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,25 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f +#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f +#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 +#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30 +#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18 +#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6 +#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31 +#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 +#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca +#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -202,6 +221,10 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); } + + /* The 8th CBOX has different MSR space */ + if (box->pmu->pmu_idx == 7) + __set_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags); } static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) @@ -228,7 +251,7 @@ static struct intel_uncore_ops skl_uncore_msr_ops = { static struct intel_uncore_type skl_uncore_cbox = { .name = "cbox", .num_counters = 4, - .num_boxes = 5, + .num_boxes = 8, .perf_ctr_bits = 44, .fixed_ctr_bits = 48, .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, @@ -569,7 +592,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, - + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -618,6 +716,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ + IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ + IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ + IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ + IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ + IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ + IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ + IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */ + IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */ + IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */ + IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */ + IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */ + IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */ + IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */ + IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */ + IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ + IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ + IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ { /* end marker */ } }; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index adae087cecdd..78d7b7031bfc 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -859,11 +859,16 @@ static inline int amd_pmu_init(void) static inline bool intel_pmu_has_bts(struct perf_event *event) { - if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !event->attr.freq && event->hw.sample_period == 1) - return true; + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; + + if (event->attr.freq) + return false; + + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return false; + return hw_event == bts_event && hwc->sample_period == 1; } int intel_pmu_save_and_restart(struct perf_event *event); diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index a07ffd23e4dd..f6f6ef436599 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -36,6 +36,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) */ if (boot_params->sentinel) { /* fields in boot_params are left uninitialized, clear them */ + boot_params->acpi_rsdp_addr = 0; memset(&boot_params->ext_ramdisk_image, 0, (char *)&boot_params->efi_info - (char *)&boot_params->ext_ramdisk_image); diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index fab4df16a3c4..22c4dfe65992 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -217,11 +217,18 @@ static inline bool in_x32_syscall(void) return false; } -static inline bool in_compat_syscall(void) +static inline bool in_32bit_syscall(void) { return in_ia32_syscall() || in_x32_syscall(); } + +#ifdef CONFIG_COMPAT +static inline bool in_compat_syscall(void) +{ + return in_32bit_syscall(); +} #define in_compat_syscall in_compat_syscall /* override the generic impl */ +#endif struct compat_siginfo; int __copy_siginfo_to_user32(struct compat_siginfo __user *to, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 89a048c2faec..28c4a502b419 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -331,6 +331,8 @@ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5f7290e6e954..69dcdf195b61 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) "3: movl $-2,%[err]\n\t" \ "jmp 2b\n\t" \ ".popsection\n\t" \ - _ASM_EXTABLE_UA(1b, 3b) \ + _ASM_EXTABLE(1b, 3b) \ : [err] "=r" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index c18ed65287d5..cf350639e76d 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -76,9 +76,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1 static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) { - if (in_compat_syscall()) - return true; - return false; + return in_32bit_syscall(); } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */ #endif /* !COMPILE_OFFSETS */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55e51ff7e421..fbda5a917c5b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1094,7 +1094,8 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); - void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + /* Returns actual tsc_offset set in active VMCS */ + u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4da9b1c58d28..c1a812bd5a27 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -221,6 +221,8 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); +bool mce_is_correctable(struct mce *m); +int mce_usable_address(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 0d6271cce198..1d0a7778e163 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -232,7 +232,7 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) : "cc"); } #endif - return hv_status; + return hv_status; } /* diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 80f4a4f38c79..c8f73efb4ece 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,9 +41,10 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ -#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 80dc14422495..032b6009baab 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -3,6 +3,8 @@ #ifndef _ASM_X86_NOSPEC_BRANCH_H_ #define _ASM_X86_NOSPEC_BRANCH_H_ +#include <linux/static_key.h> + #include <asm/alternative.h> #include <asm/alternative-asm.h> #include <asm/cpufeatures.h> @@ -162,11 +164,12 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#if defined(CONFIG_X86_64) && defined(RETPOLINE) +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_64 /* - * Since the inline asm uses the %V modifier which is only in newer GCC, - * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + * Inline asm uses the %V modifier which is only in newer GCC + * which is ensured when CONFIG_RETPOLINE is defined. */ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ @@ -181,7 +184,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) -#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +#else /* CONFIG_X86_32 */ /* * For i386 we use the original ret-equivalent retpoline, because * otherwise we'll run out of registers. We don't care about CET @@ -211,6 +214,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif #else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) @@ -219,13 +223,19 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_MINIMAL, - SPECTRE_V2_RETPOLINE_MINIMAL_AMD, SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, SPECTRE_V2_IBRS_ENHANCED, }; +/* The indirect branch speculation control variants */ +enum spectre_v2_user_mitigation { + SPECTRE_V2_USER_NONE, + SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_PRCTL, + SPECTRE_V2_USER_SECCOMP, +}; + /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, @@ -303,6 +313,10 @@ do { \ preempt_enable(); \ } while (0) +DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index cd0cf1c568b4..8f657286d599 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -33,12 +33,14 @@ /* * Set __PAGE_OFFSET to the most negative possible address + - * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a - * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's - * what Xen requires. + * PGDIR_SIZE*17 (pgd slot 273). + * + * The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for + * a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary, + * but it's what Xen requires. */ -#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL) -#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL) +#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL) +#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL) #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index fba54ca23b2a..26942ad63830 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -361,7 +361,6 @@ extern struct paravirt_patch_template pv_ops; __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \ asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name)) -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); unsigned paravirt_patch_default(u8 type, void *insnbuf, unsigned long addr, unsigned len); @@ -651,7 +650,6 @@ void paravirt_leave_lazy_mmu(void); void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); -u32 _paravirt_ident_32(u32); u64 _paravirt_ident_64(u64); #define paravirt_nop ((void *)_paravirt_nop) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 04edd2d58211..84bd9bdc1987 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -111,9 +111,7 @@ extern unsigned int ptrs_per_p4d; */ #define MAXMEM (1UL << MAX_PHYSMEM_BITS) -#define LDT_PGD_ENTRY_L4 -3UL -#define LDT_PGD_ENTRY_L5 -112UL -#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) +#define LDT_PGD_ENTRY -240UL #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 87623c6b13db..bd5ac6cc37db 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -13,12 +13,15 @@ #define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) { - u32 val = 0; - - if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, - "I", _Q_PENDING_OFFSET)) - val |= _Q_PENDING_VAL; + u32 val; + /* + * We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto + * and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a + * statement expression, which GCC doesn't like. + */ + val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, + "I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL; val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK; return val; diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index ae7c2c5cd7f0..5393babc0598 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; @@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(unsigned long tif); - -static inline void speculative_store_bypass_update_current(void) -{ - speculative_store_bypass_update(current_thread_info()->flags); -} +extern void speculation_ctrl_update(unsigned long tif); +extern void speculation_ctrl_update_current(void); #endif diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 36bd243843d6..7cf1a270d891 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ static inline void prepare_switch_to(struct task_struct *next) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2ff2a30a264f..82b73b75d67c 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,10 +79,12 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_SSBD 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Speculative store bypass disable */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -110,6 +112,8 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -145,8 +149,18 @@ struct thread_info { _TIF_FSCHECK) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) +#define _TIF_WORK_CTXSW_BASE \ + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) + +/* + * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. + */ +#ifdef CONFIG_SMP +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) +#else +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) +#endif #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 323a313947e0..f4204bf377fc 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -169,10 +169,14 @@ struct tlb_state { #define LOADED_MM_SWITCHING ((struct mm_struct *)1) + /* Last user mm for optimizing IBPB */ + union { + struct mm_struct *last_user_mm; + unsigned long last_user_mm_ibpb; + }; + u16 loaded_mm_asid; u16 next_asid; - /* last user mm's ctx id */ - u64 last_ctx_id; /* * We can be in one of several states: @@ -453,6 +457,12 @@ static inline void __native_flush_tlb_one_user(unsigned long addr) */ static inline void __flush_tlb_all(void) { + /* + * This is to catch users with enabled preemption and the PGE feature + * and don't trigger the warning in __native_flush_tlb(). + */ + VM_WARN_ON_ONCE(preemptible()); + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); } else { diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 0f842104862c..b85a7c54c6a1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -303,6 +303,4 @@ extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); extern bool x86_pnpbios_disabled(void); -void x86_verify_bootdata_version(void); - #endif diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 123e669bf363..790ce08e41f2 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -9,7 +9,7 @@ #include <linux/mm.h> #include <linux/device.h> -#include <linux/uaccess.h> +#include <asm/extable.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -93,12 +93,39 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, */ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) { - return __put_user(val, (unsigned long __user *)addr); + int ret = 0; + + asm volatile("1: mov %[val], %[ptr]\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: sub $1, %[ret]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [ret] "+r" (ret), [ptr] "=m" (*addr) + : [val] "r" (val)); + + return ret; } -static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) +static inline int xen_safe_read_ulong(const unsigned long *addr, + unsigned long *val) { - return __get_user(*val, (unsigned long __user *)addr); + int ret = 0; + unsigned long rval = ~0ul; + + asm volatile("1: mov %[ptr], %[rval]\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: sub $1, %[ret]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [ret] "+r" (ret), [rval] "+r" (rval) + : [ptr] "m" (*addr)); + *val = rval; + + return ret; } #ifdef CONFIG_XEN_PV diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 22f89d040ddd..60733f137e9a 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -16,9 +16,6 @@ #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 -/* version flags */ -#define VERSION_WRITTEN 0x8000 - /* loadflags */ #define LOADED_HIGH (1<<0) #define KASLR_FLAG (1<<1) @@ -89,7 +86,6 @@ struct setup_header { __u64 pref_address; __u32 init_size; __u32 handover_offset; - __u64 acpi_rsdp_addr; } __attribute__((packed)); struct sys_desc_table { @@ -159,7 +155,8 @@ struct boot_params { __u8 _pad2[4]; /* 0x054 */ __u64 tboot_addr; /* 0x058 */ struct ist_info ist_info; /* 0x060 */ - __u8 _pad3[16]; /* 0x070 */ + __u64 acpi_rsdp_addr; /* 0x070 */ + __u8 _pad3[8]; /* 0x078 */ __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ struct sys_desc_table sys_desc_table; /* obsolete! */ /* 0x0a0 */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 92c76bf97ad8..06635fbca81c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) u64 x86_default_get_root_pointer(void) { - return boot_params.hdr.acpi_rsdp_addr; + return boot_params.acpi_rsdp_addr; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c37e66e493bf..500278f5308e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/nospec.h> #include <linux/prctl.h> +#include <linux/sched/smt.h> #include <asm/spec-ctrl.h> #include <asm/cmdline.h> @@ -53,6 +54,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 __ro_after_init x86_amd_ls_cfg_base; u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; +/* Control conditional STIPB in switch_to() */ +DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); +/* Control conditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +/* Control unconditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + void __init check_bugs(void) { identify_boot_cpu(); @@ -123,31 +131,6 @@ void __init check_bugs(void) #endif } -/* The kernel command line selection */ -enum spectre_v2_mitigation_cmd { - SPECTRE_V2_CMD_NONE, - SPECTRE_V2_CMD_AUTO, - SPECTRE_V2_CMD_FORCE, - SPECTRE_V2_CMD_RETPOLINE, - SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, -}; - -static const char *spectre_v2_strings[] = { - [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", - [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", -}; - -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -169,6 +152,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + /* Conditional STIBP enabled? */ + if (static_branch_unlikely(&switch_to_cond_stibp)) + hostval |= stibp_tif_to_spec_ctrl(ti->flags); + if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -202,7 +189,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : ssbd_spec_ctrl_to_tif(hostval); - speculative_store_bypass_update(tif); + speculation_ctrl_update(tif); } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); @@ -217,6 +204,15 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + +static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = + SPECTRE_V2_USER_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -238,67 +234,217 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif -static void __init spec2_print_if_insecure(const char *reason) +static inline bool match_option(const char *arg, int arglen, const char *opt) { - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); } -static void __init spec2_print_if_secure(const char *reason) +/* The kernel command line selection for spectre v2 */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +enum spectre_v2_user_cmd { + SPECTRE_V2_USER_CMD_NONE, + SPECTRE_V2_USER_CMD_AUTO, + SPECTRE_V2_USER_CMD_FORCE, + SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_PRCTL_IBPB, + SPECTRE_V2_USER_CMD_SECCOMP, + SPECTRE_V2_USER_CMD_SECCOMP_IBPB, +}; + +static const char * const spectre_v2_user_strings[] = { + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", +}; + +static const struct { + const char *option; + enum spectre_v2_user_cmd cmd; + bool secure; +} v2_user_options[] __initdata = { + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false }, +}; + +static void __init spec_v2_user_print_cond(const char *reason, bool secure) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); } -static inline bool retp_compiler(void) +static enum spectre_v2_user_cmd __init +spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) { - return __is_defined(RETPOLINE); + char arg[20]; + int ret, i; + + switch (v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: + return SPECTRE_V2_USER_CMD_FORCE; + default: + break; + } + + ret = cmdline_find_option(boot_command_line, "spectre_v2_user", + arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_USER_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { + if (match_option(arg, ret, v2_user_options[i].option)) { + spec_v2_user_print_cond(v2_user_options[i].option, + v2_user_options[i].secure); + return v2_user_options[i].cmd; + } + } + + pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool match_option(const char *arg, int arglen, const char *opt) +static void __init +spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { - int len = strlen(opt); + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); + enum spectre_v2_user_cmd cmd; - return len == arglen && !strncmp(arg, opt, len); + if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) + return; + + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + + cmd = spectre_v2_parse_user_cmdline(v2_cmd); + switch (cmd) { + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; + case SPECTRE_V2_USER_CMD_FORCE: + mode = SPECTRE_V2_USER_STRICT; + break; + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + mode = SPECTRE_V2_USER_PRCTL; + break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPECTRE_V2_USER_SECCOMP; + else + mode = SPECTRE_V2_USER_PRCTL; + break; + } + + /* Initialize Indirect Branch Prediction Barrier */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + + switch (cmd) { + case SPECTRE_V2_USER_CMD_FORCE: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: + static_branch_enable(&switch_mm_always_ibpb); + break; + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + static_branch_enable(&switch_mm_cond_ibpb); + break; + default: + break; + } + + pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", + static_key_enabled(&switch_mm_always_ibpb) ? + "always-on" : "conditional"); + } + + /* If enhanced IBRS is enabled no STIPB required */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + + /* + * If SMT is not possible or STIBP is not available clear the STIPB + * mode. + */ + if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + mode = SPECTRE_V2_USER_NONE; +set_mode: + spectre_v2_user = mode; + /* Only print the STIBP mode when SMT possible */ + if (smt_possible) + pr_info("%s\n", spectre_v2_user_strings[mode]); } +static const char * const spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", +}; + static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] = { - { "off", SPECTRE_V2_CMD_NONE, false }, - { "on", SPECTRE_V2_CMD_FORCE, true }, - { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, - { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, - { "auto", SPECTRE_V2_CMD_AUTO, false }, +} mitigation_options[] __initdata = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, }; +static void __init spec_v2_print_cond(const char *reason, bool secure) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("%s selected on command line.\n", reason); +} + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; char arg[20]; int ret, i; - enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; - else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); - if (ret < 0) - return SPECTRE_V2_CMD_AUTO; - for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { - if (!match_option(arg, ret, mitigation_options[i].option)) - continue; - cmd = mitigation_options[i].cmd; - break; - } + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; - if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_CMD_AUTO; - } + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_CMD_AUTO; } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || @@ -316,54 +462,11 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (mitigation_options[i].secure) - spec2_print_if_secure(mitigation_options[i].option); - else - spec2_print_if_insecure(mitigation_options[i].option); - + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) - mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -417,14 +520,12 @@ retpoline_auto: pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : - SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + mode = SPECTRE_V2_RETPOLINE_AMD; setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } else { retpoline_generic: - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : - SPECTRE_V2_RETPOLINE_MINIMAL; + mode = SPECTRE_V2_RETPOLINE_GENERIC; setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } @@ -443,12 +544,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } - /* * Retpoline means the kernel is safe because it has no indirect * branches. Enhanced IBRS protects firmware too, so, enable restricted @@ -465,10 +560,67 @@ specv2_set_mode: pr_info("Enabling Restricted Speculation for firmware calls\n"); } + /* Set up IBPB and STIBP depending on the general spectre V2 command */ + spectre_v2_user_select_mitigation(cmd); + /* Enable STIBP if appropriate */ arch_smt_update(); } +static void update_stibp_msr(void * __unused) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +/* Update x86_spec_ctrl_base in case SMT state changed. */ +static void update_stibp_strict(void) +{ + u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask == x86_spec_ctrl_base) + return; + + pr_info("Update user space SMT mitigation: STIBP %s\n", + mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); +} + +/* Update the static key controlling the evaluation of TIF_SPEC_IB */ +static void update_indir_branch_cond(void) +{ + if (sched_smt_active()) + static_branch_enable(&switch_to_cond_stibp); + else + static_branch_disable(&switch_to_cond_stibp); +} + +void arch_smt_update(void) +{ + /* Enhanced IBRS implies STIBP. No update required. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + + mutex_lock(&spec_ctrl_mutex); + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + update_stibp_strict(); + break; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + update_indir_branch_cond(); + break; + } + + mutex_unlock(&spec_ctrl_mutex); +} + #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt @@ -483,7 +635,7 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_SECCOMP, }; -static const char *ssb_strings[] = { +static const char * const ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", @@ -493,7 +645,7 @@ static const char *ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] = { +} ssb_mitigation_options[] __initdata = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ @@ -604,10 +756,25 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +static void task_update_spec_tif(struct task_struct *tsk) { - bool update; + /* Force the update of the real TIF bits */ + set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE); + /* + * Immediately update the speculation control MSRs for the current + * task, but for a non-current task delay setting the CPU + * mitigation until it is scheduled next. + * + * This can only happen for SECCOMP mitigation. For PRCTL it's + * always the current task. + */ + if (tsk == current) + speculation_ctrl_update_current(); +} + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; @@ -618,28 +785,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task); break; default: return -ERANGE; } + return 0; +} - /* - * If being set on non-current task, delay setting the CPU - * mitigation until it is next scheduled. - */ - if (task == current && update) - speculative_store_bypass_update_current(); - +static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return 0; + /* + * Indirect branch speculation is always disabled in strict + * mode. + */ + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return -EPERM; + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + /* + * Indirect branch speculation is always allowed when + * mitigation is force disabled. + */ + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return -EPERM; + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return 0; + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); + break; + default: + return -ERANGE; + } return 0; } @@ -649,6 +844,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_set(task, ctrl); default: return -ENODEV; } @@ -659,6 +856,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -681,11 +880,35 @@ static int ssb_prctl_get(struct task_struct *task) } } +static int ib_prctl_get(struct task_struct *task) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return PR_SPEC_NOT_AFFECTED; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return PR_SPEC_ENABLE; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case SPECTRE_V2_USER_STRICT: + return PR_SPEC_DISABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_get(task); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_get(task); default: return -ENODEV; } @@ -823,7 +1046,7 @@ early_param("l1tf", l1tf_cmdline); #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" #if IS_ENABLED(CONFIG_KVM_INTEL) -static const char *l1tf_vmx_states[] = { +static const char * const l1tf_vmx_states[] = { [VMENTER_L1D_FLUSH_AUTO] = "auto", [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", @@ -839,13 +1062,14 @@ static ssize_t l1tf_show_state(char *buf) if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && - cpu_smt_control == CPU_SMT_ENABLED)) + sched_smt_active())) { return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation]); + } return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation], - cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); + sched_smt_active() ? "vulnerable" : "disabled"); } #else static ssize_t l1tf_show_state(char *buf) @@ -854,11 +1078,39 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static char *stibp_state(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return ""; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", STIBP: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", STIBP: forced"; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + if (static_key_enabled(&switch_to_cond_stibp)) + return ", STIBP: conditional"; + } + return ""; +} + +static char *ibpb_state(void) +{ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + if (static_key_enabled(&switch_mm_always_ibpb)) + return ", IBPB: always-on"; + if (static_key_enabled(&switch_mm_cond_ibpb)) + return ", IBPB: conditional"; + return ", IBPB: disabled"; + } + return ""; +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -876,13 +1128,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cbbd57ae06ee..ffb181f959d2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1074,7 +1074,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + memset(&c->x86_capability, 0, sizeof(c->x86_capability)); c->extended_cpuid_level = 0; if (!have_cpuid_p()) @@ -1317,7 +1317,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + memset(&c->x86_capability, 0, sizeof(c->x86_capability)); generic_identify(c); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8cb3c02980cf..36d2696c9563 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -485,7 +485,7 @@ static void mce_report_event(struct pt_regs *regs) * be somewhat complicated (e.g. segment offset would require an instruction * parser). So only support physical addresses up to page granuality for now. */ -static int mce_usable_address(struct mce *m) +int mce_usable_address(struct mce *m) { if (!(m->status & MCI_STATUS_ADDRV)) return 0; @@ -505,6 +505,7 @@ static int mce_usable_address(struct mce *m) return 1; } +EXPORT_SYMBOL_GPL(mce_usable_address); bool mce_is_memory_error(struct mce *m) { @@ -534,7 +535,7 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); -static bool mce_is_correctable(struct mce *m) +bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) return false; @@ -547,6 +548,7 @@ static bool mce_is_correctable(struct mce *m) return true; } +EXPORT_SYMBOL_GPL(mce_is_correctable); static bool cec_add_mce(struct mce *m) { @@ -2215,7 +2217,7 @@ static int mce_device_create(unsigned int cpu) if (dev) return 0; - dev = kzalloc(sizeof *dev, GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; dev->id = cpu; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index dd33c357548f..e12454e21b8a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -56,7 +56,7 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 -static bool thresholding_en; +static bool thresholding_irq_en; static const char * const th_names[] = { "load_store", @@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, set_offset: offset = setup_APIC_mce_threshold(offset, new); - - if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) - mce_threshold_vector = amd_threshold_interrupt; + if (offset == new) + thresholding_irq_en = true; done: mce_threshold_block_init(&b, offset); @@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - if (!thresholding_en) - return 0; - for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu) struct threshold_bank **bp; int err = 0; - if (!thresholding_en) - return 0; - bp = per_cpu(threshold_banks, cpu); if (bp) return 0; @@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void) { unsigned lcpu = 0; - if (mce_threshold_vector == amd_threshold_interrupt) - thresholding_en = true; - /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { int err = mce_threshold_create_device(lcpu); @@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void) return err; } + if (thresholding_irq_en) + mce_threshold_vector = amd_threshold_interrupt; + return 0; } /* diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b9bc8a1a584e..2637ff09d6a0 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -666,8 +666,8 @@ static ssize_t pf_show(struct device *dev, } static DEVICE_ATTR_WO(reload); -static DEVICE_ATTR(version, 0400, version_show, NULL); -static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); +static DEVICE_ATTR(version, 0444, version_show, NULL); +static DEVICE_ATTR(processor_flags, 0444, pf_show, NULL); static struct attribute *mc_default_attrs[] = { &dev_attr_version.attr, diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 1c72f3819eb1..e81a2db42df7 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -20,6 +20,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kexec.h> +#include <linux/i8253.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -295,6 +296,16 @@ static void __init ms_hyperv_init_platform(void) if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; + /* + * Hyper-V VMs have a PIT emulation quirk such that zeroing the + * counter register during PIT shutdown restarts the PIT. So it + * continues to interrupt @18.2 HZ. Setting i8253_clear_counter + * to false tells pit_shutdown() not to zero the counter so that + * the PIT really is shutdown. Generation 2 VMs don't have a PIT, + * and setting this value has no effect. + */ + i8253_clear_counter_on_shutdown = false; + #if IS_ENABLED(CONFIG_HYPERV) /* * Setup the hook to get control post apic initialization. diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e12ee86906c6..86e277f8daf4 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -798,7 +798,7 @@ static void generic_set_all(void) local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ - for (count = 0; count < sizeof mask * 8; ++count) { + for (count = 0; count < sizeof(mask) * 8; ++count) { if (mask & 0x01) set_bit(count, &smp_changes_mask); mask >>= 1; diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 40eee6cc4124..2e173d47b450 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -174,12 +174,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) case MTRRIOC_SET_PAGE_ENTRY: case MTRRIOC_DEL_PAGE_ENTRY: case MTRRIOC_KILL_PAGE_ENTRY: - if (copy_from_user(&sentry, arg, sizeof sentry)) + if (copy_from_user(&sentry, arg, sizeof(sentry))) return -EFAULT; break; case MTRRIOC_GET_ENTRY: case MTRRIOC_GET_PAGE_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) + if (copy_from_user(&gentry, arg, sizeof(gentry))) return -EFAULT; break; #ifdef CONFIG_COMPAT @@ -332,7 +332,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) switch (cmd) { case MTRRIOC_GET_ENTRY: case MTRRIOC_GET_PAGE_ENTRY: - if (copy_to_user(arg, &gentry, sizeof gentry)) + if (copy_to_user(arg, &gentry, sizeof(gentry))) err = -EFAULT; break; #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d9ab49bed8af..0eda91f8eeac 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s) } early_param("no-vmw-sched-clock", setup_vmw_sched_clock); -static unsigned long long vmware_sched_clock(void) +static unsigned long long notrace vmware_sched_clock(void) { unsigned long long ns; diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 5e801c8c8ce7..374a52fa5296 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -213,8 +213,9 @@ static unsigned int mem32_serial_in(unsigned long addr, int offset) * early_pci_serial_init() * * This function is invoked when the early_printk param starts with "pciserial" - * The rest of the param should be ",B:D.F,baud" where B, D & F describe the - * location of a PCI device that must be a UART device. + * The rest of the param should be "[force],B:D.F,baud", where B, D & F describe + * the location of a PCI device that must be a UART device. "force" is optional + * and overrides the use of an UART device with a wrong PCI class code. */ static __init void early_pci_serial_init(char *s) { @@ -224,17 +225,23 @@ static __init void early_pci_serial_init(char *s) u32 classcode, bar0; u16 cmdreg; char *e; + int force = 0; - - /* - * First, part the param to get the BDF values - */ if (*s == ',') ++s; if (*s == 0) return; + /* Force the use of an UART device with wrong class code */ + if (!strncmp(s, "force,", 6)) { + force = 1; + s += 6; + } + + /* + * Part the param to get the BDF values + */ bus = (u8)simple_strtoul(s, &e, 16); s = e; if (*s != ':') @@ -253,7 +260,7 @@ static __init void early_pci_serial_init(char *s) s++; /* - * Second, find the device from the BDF + * Find the device from the BDF */ cmdreg = read_pci_config(bus, slot, func, PCI_COMMAND); classcode = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); @@ -264,8 +271,10 @@ static __init void early_pci_serial_init(char *s) */ if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) && (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) || - (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ - return; + (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ { + if (!force) + return; + } /* * Determine if it is IO or memory mapped @@ -289,7 +298,7 @@ static __init void early_pci_serial_init(char *s) } /* - * Lastly, initialize the hardware + * Initialize the hardware */ if (*s) { if (strcmp(s, "nocfg") == 0) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 61a949d84dfa..d99a8ee9e185 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } + local_bh_disable(); fpu->initialized = 1; - preempt_disable(); fpu__restore(fpu); - preempt_enable(); + local_bh_enable(); return err; } else { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 01ebcb6f263e..7ee8067cbf45 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, { unsigned long old; int faulted; - struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, return; } - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { + if (function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = old; - return; - } - - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, parent) == -EBUSY) { - *parent = old; - return; - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 76fa3b836598..ec6fefbfd3c0 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void) cr4_init_shadow(); sanitize_boot_params(&boot_params); - x86_verify_bootdata_version(); x86_early_init_platform_quirks(); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5dc377dc9d7b..16b1cbd3a61e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -385,7 +385,7 @@ static void __init copy_bootdata(char *real_mode_data) */ sme_map_bootdata(real_mode_data); - memcpy(&boot_params, real_mode_data, sizeof boot_params); + memcpy(&boot_params, real_mode_data, sizeof(boot_params)); sanitize_boot_params(&boot_params); cmd_line_ptr = get_cmd_line_ptr(); if (cmd_line_ptr) { @@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data) if (!boot_params.hdr.version) copy_bootdata(__va(real_mode_data)); - x86_verify_bootdata_version(); - x86_early_init_platform_quirks(); switch (boot_params.hdr.hardware_subarch) { diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 40b16b270656..6adf6e6c2933 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -189,7 +189,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) int len = 0, ret; while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len, real, &insn); + ret = __copy_instruction(dest + len, src + len, real + len, &insn); if (!ret || !can_boost(&insn, src + len)) return -EINVAL; len += ret; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ab18e0884dc6..6135ae8ce036 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -199,14 +199,6 @@ static void sanity_check_ldt_mapping(struct mm_struct *mm) /* * If PTI is enabled, this maps the LDT into the kernelmode and * usermode tables for the given mm. - * - * There is no corresponding unmap function. Even if the LDT is freed, we - * leave the PTEs around until the slot is reused or the mm is destroyed. - * This is harmless: the LDT is always in ordinary memory, and no one will - * access the freed slot. - * - * If we wanted to unmap freed LDTs, we'd also need to do a flush to make - * it useful, and the flush would slow down modify_ldt(). */ static int map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) @@ -214,8 +206,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) unsigned long va; bool is_vmalloc; spinlock_t *ptl; - pgd_t *pgd; - int i; + int i, nr_pages; if (!static_cpu_has(X86_FEATURE_PTI)) return 0; @@ -229,16 +220,11 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) /* Check if the current mappings are sane */ sanity_check_ldt_mapping(mm); - /* - * Did we already have the top level entry allocated? We can't - * use pgd_none() for this because it doens't do anything on - * 4-level page table kernels. - */ - pgd = pgd_offset(mm, LDT_BASE_ADDR); - is_vmalloc = is_vmalloc_addr(ldt->entries); - for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { unsigned long offset = i << PAGE_SHIFT; const void *src = (char *)ldt->entries + offset; unsigned long pfn; @@ -272,13 +258,39 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) /* Propagate LDT mapping to the user page-table */ map_ldt_struct_to_user(mm); - va = (unsigned long)ldt_slot_va(slot); - flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, PAGE_SHIFT, false); - ldt->slot = slot; return 0; } +static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) +{ + unsigned long va; + int i, nr_pages; + + if (!ldt) + return; + + /* LDT map/unmap is only required for PTI */ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { + unsigned long offset = i << PAGE_SHIFT; + spinlock_t *ptl; + pte_t *ptep; + + va = (unsigned long)ldt_slot_va(ldt->slot) + offset; + ptep = get_locked_pte(mm, va, &ptl); + pte_clear(mm, va, ptep); + pte_unmap_unlock(ptep, ptl); + } + + va = (unsigned long)ldt_slot_va(ldt->slot); + flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); +} + #else /* !CONFIG_PAGE_TABLE_ISOLATION */ static int @@ -286,6 +298,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) { return 0; } + +static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) +{ +} #endif /* CONFIG_PAGE_TABLE_ISOLATION */ static void free_ldt_pgtables(struct mm_struct *mm) @@ -524,6 +540,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } install_ldt(mm, new_ldt); + unmap_ldt_struct(mm, old_ldt); free_ldt_struct(old_ldt); error = 0; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index ef688804f80d..4588414e2561 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -115,14 +115,14 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) err = -EBADF; break; } - if (copy_from_user(®s, uregs, sizeof regs)) { + if (copy_from_user(®s, uregs, sizeof(regs))) { err = -EFAULT; break; } err = rdmsr_safe_regs_on_cpu(cpu, regs); if (err) break; - if (copy_to_user(uregs, ®s, sizeof regs)) + if (copy_to_user(uregs, ®s, sizeof(regs))) err = -EFAULT; break; @@ -131,14 +131,14 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) err = -EBADF; break; } - if (copy_from_user(®s, uregs, sizeof regs)) { + if (copy_from_user(®s, uregs, sizeof(regs))) { err = -EFAULT; break; } err = wrmsr_safe_regs_on_cpu(cpu, regs); if (err) break; - if (copy_to_user(uregs, ®s, sizeof regs)) + if (copy_to_user(uregs, ®s, sizeof(regs))) err = -EFAULT; break; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4d4df37922a..c0e0101133f3 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -56,17 +56,6 @@ asm (".pushsection .entry.text, \"ax\"\n" ".type _paravirt_nop, @function\n\t" ".popsection"); -/* identity function, which can be inlined */ -u32 notrace _paravirt_ident_32(u32 x) -{ - return x; -} - -u64 notrace _paravirt_ident_64(u64 x) -{ - return x; -} - void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -102,6 +91,12 @@ static unsigned paravirt_patch_call(void *insnbuf, const void *target, } #ifdef CONFIG_PARAVIRT_XXL +/* identity function, which can be inlined */ +u64 notrace _paravirt_ident_64(u64 x) +{ + return x; +} + static unsigned paravirt_patch_jmp(void *insnbuf, const void *target, unsigned long addr, unsigned len) { @@ -146,13 +141,11 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf, else if (opfunc == _paravirt_nop) ret = 0; +#ifdef CONFIG_PARAVIRT_XXL /* identity functions just return their single argument */ - else if (opfunc == _paravirt_ident_32) - ret = paravirt_patch_ident_32(insnbuf, len); else if (opfunc == _paravirt_ident_64) ret = paravirt_patch_ident_64(insnbuf, len); -#ifdef CONFIG_PARAVIRT_XXL else if (type == PARAVIRT_PATCH(cpu.iret) || type == PARAVIRT_PATCH(cpu.usergs_sysret64)) /* If operation requires a jmp, then jmp */ @@ -309,13 +302,8 @@ struct pv_info pv_info = { #endif }; -#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) -/* 32-bit pagetable entries */ -#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) -#else /* 64-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) -#endif struct paravirt_patch_template pv_ops = { /* Init ops. */ @@ -483,5 +471,5 @@ NOKPROBE_SYMBOL(native_set_debugreg); NOKPROBE_SYMBOL(native_load_idt); #endif -EXPORT_SYMBOL_GPL(pv_ops); +EXPORT_SYMBOL(pv_ops); EXPORT_SYMBOL_GPL(pv_info); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 6368c22fa1fa..de138d3912e4 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -10,24 +10,18 @@ DEF_NATIVE(cpu, iret, "iret"); DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax"); -#endif - -#if defined(CONFIG_PARAVIRT_SPINLOCKS) -DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)"); -DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); -#endif - -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) -{ - /* arg in %eax, return in %eax */ - return 0; -} unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) { /* arg in %edx:%eax, return in %edx:%eax */ return 0; } +#endif + +#if defined(CONFIG_PARAVIRT_SPINLOCKS) +DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)"); +DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); +#endif extern bool pv_is_native_spin_unlock(void); extern bool pv_is_native_vcpu_is_preempted(void); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 7ca9cb726f4d..9d9e04b31077 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -15,27 +15,19 @@ DEF_NATIVE(cpu, wbinvd, "wbinvd"); DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq"); DEF_NATIVE(cpu, swapgs, "swapgs"); -#endif - -DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov64, "mov %rdi, %rax"); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) -DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)"); -DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); -#endif - -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) -{ - return paravirt_patch_insns(insnbuf, len, - start__mov32, end__mov32); -} - unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) { return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64); } +#endif + +#if defined(CONFIG_PARAVIRT_SPINLOCKS) +DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)"); +DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); +#endif extern bool pv_is_native_spin_unlock(void); extern bool pv_is_native_vcpu_is_preempted(void); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c93fcfdf1673..7d31192296a8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -40,6 +40,8 @@ #include <asm/prctl.h> #include <asm/spec-ctrl.h> +#include "process.h" + /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned @@ -252,11 +254,12 @@ void arch_setup_new_exec(void) enable_cpuid(); } -static inline void switch_to_bitmap(struct tss_struct *tss, - struct thread_struct *prev, +static inline void switch_to_bitmap(struct thread_struct *prev, struct thread_struct *next, unsigned long tifp, unsigned long tifn) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. @@ -395,32 +398,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void intel_set_ssb_state(unsigned long tifn) +/* + * Update the MSRs managing speculation control, during context switch. + * + * tifp: Previous task's thread flags + * tifn: Next task's thread flags + */ +static __always_inline void __speculation_ctrl_update(unsigned long tifp, + unsigned long tifn) { - u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + unsigned long tif_diff = tifp ^ tifn; + u64 msr = x86_spec_ctrl_base; + bool updmsr = false; + + /* + * If TIF_SSBD is different, select the proper mitigation + * method. Note that if SSBD mitigation is disabled or permanentely + * enabled this branch can't be taken because nothing can set + * TIF_SSBD. + */ + if (tif_diff & _TIF_SSBD) { + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + amd_set_ssb_virt_state(tifn); + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + amd_set_core_ssb_state(tifn); + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr |= ssbd_tif_to_spec_ctrl(tifn); + updmsr = true; + } + } + + /* + * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, + * otherwise avoid the MSR write. + */ + if (IS_ENABLED(CONFIG_SMP) && + static_branch_unlikely(&switch_to_cond_stibp)) { + updmsr |= !!(tif_diff & _TIF_SPEC_IB); + msr |= stibp_tif_to_spec_ctrl(tifn); + } - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + if (updmsr) + wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) - amd_set_ssb_virt_state(tifn); - else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - amd_set_core_ssb_state(tifn); - else - intel_set_ssb_state(tifn); + if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { + if (task_spec_ssb_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SSBD); + else + clear_tsk_thread_flag(tsk, TIF_SSBD); + + if (task_spec_ib_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SPEC_IB); + else + clear_tsk_thread_flag(tsk, TIF_SPEC_IB); + } + /* Return the updated threadinfo flags*/ + return task_thread_info(tsk)->flags; } -void speculative_store_bypass_update(unsigned long tif) +void speculation_ctrl_update(unsigned long tif) { + /* Forced update. Make sure all relevant TIF flags are different */ preempt_disable(); - __speculative_store_bypass_update(tif); + __speculation_ctrl_update(~tif, tif); preempt_enable(); } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +/* Called from seccomp/prctl update */ +void speculation_ctrl_update_current(void) +{ + preempt_disable(); + speculation_ctrl_update(speculation_ctrl_update_tif(current)); + preempt_enable(); +} + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; unsigned long tifp, tifn; @@ -430,7 +486,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(tss, prev, next, tifp, tifn); + switch_to_bitmap(prev, next, tifp, tifn); propagate_user_return_notify(prev_p, next_p); @@ -451,8 +507,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - if ((tifp ^ tifn) & _TIF_SSBD) - __speculative_store_bypass_update(tifn); + if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { + __speculation_ctrl_update(tifp, tifn); + } else { + speculation_ctrl_update_tif(prev_p); + tifn = speculation_ctrl_update_tif(next_p); + + /* Enforce MSR update to ensure consistent state */ + __speculation_ctrl_update(~tifn, tifn); + } } /* diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h new file mode 100644 index 000000000000..898e97cf6629 --- /dev/null +++ b/arch/x86/kernel/process.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Code shared between 32 and 64 bit + +#include <asm/spec-ctrl.h> + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); + +/* + * This needs to be inline to optimize for the common case where no extra + * work needs to be done. + */ +static inline void switch_to_extra(struct task_struct *prev, + struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long prev_tif = task_thread_info(prev)->flags; + + if (IS_ENABLED(CONFIG_SMP)) { + /* + * Avoid __switch_to_xtra() invocation when conditional + * STIPB is disabled and the only different bit is + * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not + * in the TIF_WORK_CTXSW masks. + */ + if (!static_branch_likely(&switch_to_cond_stibp)) { + prev_tif &= ~_TIF_SPEC_IB; + next_tif &= ~_TIF_SPEC_IB; + } + } + + /* + * __switch_to_xtra() handles debug registers, i/o bitmaps, + * speculation mitigations etc. + */ + if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT || + prev_tif & _TIF_WORK_CTXSW_PREV)) + __switch_to_xtra(prev, next); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5046a3c9dec2..d3e593eb189f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,6 +59,8 @@ #include <asm/intel_rdt_sched.h> #include <asm/proto.h> +#include "process.h" + void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -232,7 +234,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -264,12 +265,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 31b4755369f0..bbfbf017065c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -60,6 +60,8 @@ #include <asm/unistd_32_ia32.h> #endif +#include "process.h" + /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { @@ -553,7 +555,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); @@ -617,12 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload sp0. */ update_task_stack(next_p); - /* - * Now maybe reload the debug registers and handle I/O bitmaps - */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || - task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); #ifdef CONFIG_XEN_PV /* @@ -701,10 +697,10 @@ static void __set_personality_x32(void) current->mm->context.ia32_compat = TIF_X32; current->personality &= ~READ_IMPLIES_EXEC; /* - * in_compat_syscall() uses the presence of the x32 syscall bit + * in_32bit_syscall() uses the presence of the x32 syscall bit * flag to determine compat status. The x86 mmap() code relies on * the syscall bitness so set x32 syscall bit right here to make - * in_compat_syscall() work during exec(). + * in_32bit_syscall() work during exec(). * * Pretend to come from a x32 execve. */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b74e7bfed6ab..d494b9bfe618 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1280,23 +1280,6 @@ void __init setup_arch(char **cmdline_p) unwind_init(); } -/* - * From boot protocol 2.14 onwards we expect the bootloader to set the - * version to "0x8000 | <used version>". In case we find a version >= 2.14 - * without the 0x8000 we assume the boot loader supports 2.13 only and - * reset the version accordingly. The 0x8000 flag is removed in any case. - */ -void __init x86_verify_bootdata_version(void) -{ - if (boot_params.hdr.version & VERSION_WRITTEN) - boot_params.hdr.version &= ~VERSION_WRITTEN; - else if (boot_params.hdr.version >= 0x020e) - boot_params.hdr.version = 0x020d; - - if (boot_params.hdr.version < 0x020e) - boot_params.hdr.acpi_rsdp_addr = 0; -} - #ifdef CONFIG_X86_32 static struct resource video_ram_resource = { diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 6a78d4b36a79..f7476ce23b6e 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -105,7 +105,7 @@ out: static void find_start_end(unsigned long addr, unsigned long flags, unsigned long *begin, unsigned long *end) { - if (!in_compat_syscall() && (flags & MAP_32BIT)) { + if (!in_32bit_syscall() && (flags & MAP_32BIT)) { /* This is usually used needed to map code in small model, so it needs to be in the first 31bit. Limit it to that. This means we need to move the @@ -122,7 +122,7 @@ static void find_start_end(unsigned long addr, unsigned long flags, } *begin = get_mmap_base(1); - if (in_compat_syscall()) + if (in_32bit_syscall()) *end = task_size_32bit(); else *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW); @@ -193,7 +193,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; /* for MAP_32BIT mappings we force the legacy mmap base */ - if (!in_compat_syscall() && (flags & MAP_32BIT)) + if (!in_32bit_syscall() && (flags & MAP_32BIT)) goto bottomup; /* requesting a specific address */ @@ -217,9 +217,10 @@ get_unmapped_area: * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area * in the full address space. * - * !in_compat_syscall() check to avoid high addresses for x32. + * !in_32bit_syscall() check to avoid high addresses for x32 + * (and make it no op on native i386). */ - if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall()) + if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall()) info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; info.align_mask = 0; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8f6dcd88202e..9b7c4ca8f0a7 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -306,7 +306,7 @@ __visible void __noreturn handle_stack_overflow(const char *message, die(message, regs, 0); /* Be absolutely certain we don't return. */ - panic(message); + panic("%s", message); } #endif diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 1eae5af491c2..891a75dbc131 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -26,65 +26,8 @@ #define TOPOLOGY_REGISTER_OFFSET 0x10 -#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_XXL -/* - * Interrupt control on vSMPowered systems: - * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' - * and vice versa. - */ - -asmlinkage __visible unsigned long vsmp_save_fl(void) -{ - unsigned long flags = native_save_fl(); - - if (!(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC)) - flags &= ~X86_EFLAGS_IF; - return flags; -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); - -__visible void vsmp_restore_fl(unsigned long flags) -{ - if (flags & X86_EFLAGS_IF) - flags &= ~X86_EFLAGS_AC; - else - flags |= X86_EFLAGS_AC; - native_restore_fl(flags); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); - -asmlinkage __visible void vsmp_irq_disable(void) -{ - unsigned long flags = native_save_fl(); - - native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); - -asmlinkage __visible void vsmp_irq_enable(void) -{ - unsigned long flags = native_save_fl(); - - native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); - -static unsigned __init vsmp_patch(u8 type, void *ibuf, - unsigned long addr, unsigned len) -{ - switch (type) { - case PARAVIRT_PATCH(irq.irq_enable): - case PARAVIRT_PATCH(irq.irq_disable): - case PARAVIRT_PATCH(irq.save_fl): - case PARAVIRT_PATCH(irq.restore_fl): - return paravirt_patch_default(type, ibuf, addr, len); - default: - return native_patch(type, ibuf, addr, len); - } - -} - -static void __init set_vsmp_pv_ops(void) +#ifdef CONFIG_PCI +static void __init set_vsmp_ctl(void) { void __iomem *address; unsigned int cap, ctl, cfg; @@ -109,28 +52,12 @@ static void __init set_vsmp_pv_ops(void) } #endif - if (cap & ctl & (1 << 4)) { - /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_ops.irq.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); - pv_ops.irq.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); - pv_ops.irq.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); - pv_ops.irq.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); - pv_ops.init.patch = vsmp_patch; - ctl &= ~(1 << 4); - } writel(ctl, address + 4); ctl = readl(address + 4); pr_info("vSMP CTL: control set to:0x%08x\n", ctl); early_iounmap(address, 8); } -#else -static void __init set_vsmp_pv_ops(void) -{ -} -#endif - -#ifdef CONFIG_PCI static int is_vsmp = -1; static void __init detect_vsmp_box(void) @@ -164,11 +91,14 @@ static int is_vsmp_box(void) { return 0; } +static void __init set_vsmp_ctl(void) +{ +} #endif static void __init vsmp_cap_cpus(void) { -#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) +#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) && defined(CONFIG_PCI) void __iomem *address; unsigned int cfg, topology, node_shift, maxcpus; @@ -221,6 +151,6 @@ void __init vsmp_init(void) vsmp_cap_cpus(); - set_vsmp_pv_ops(); + set_vsmp_ctl(); return; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 34edf198708f..78e430f4e15c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1509,7 +1509,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, index << 3 | 0x2); addr = dt.address + index * 8; - return linear_read_system(ctxt, addr, desc, sizeof *desc); + return linear_read_system(ctxt, addr, desc, sizeof(*desc)); } static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, @@ -1522,7 +1522,7 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, struct desc_struct desc; u16 sel; - memset (dt, 0, sizeof *dt); + memset(dt, 0, sizeof(*dt)); if (!ops->get_segment(ctxt, &sel, &desc, &base3, VCPU_SREG_LDTR)) return; @@ -1586,7 +1586,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; - return linear_write_system(ctxt, addr, desc, sizeof *desc); + return linear_write_system(ctxt, addr, desc, sizeof(*desc)); } static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, @@ -1604,7 +1604,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 dummy; u32 base3 = 0; - memset(&seg_desc, 0, sizeof seg_desc); + memset(&seg_desc, 0, sizeof(seg_desc)); if (ctxt->mode == X86EMUL_MODE_REAL) { /* set real mode segment descriptor (keep limit etc. for @@ -3075,17 +3075,17 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); + ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg)); if (ret != X86EMUL_CONTINUE) return ret; save_state_to_tss16(ctxt, &tss_seg); - ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); + ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg)); if (ret != X86EMUL_CONTINUE) return ret; - ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg); + ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg)); if (ret != X86EMUL_CONTINUE) return ret; @@ -3094,7 +3094,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, ret = linear_write_system(ctxt, new_tss_base, &tss_seg.prev_task_link, - sizeof tss_seg.prev_task_link); + sizeof(tss_seg.prev_task_link)); if (ret != X86EMUL_CONTINUE) return ret; } @@ -3216,7 +3216,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, u32 eip_offset = offsetof(struct tss_segment_32, eip); u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector); - ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); + ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg)); if (ret != X86EMUL_CONTINUE) return ret; @@ -3228,7 +3228,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, if (ret != X86EMUL_CONTINUE) return ret; - ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg); + ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg)); if (ret != X86EMUL_CONTINUE) return ret; @@ -3237,7 +3237,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, ret = linear_write_system(ctxt, new_tss_base, &tss_seg.prev_task_link, - sizeof tss_seg.prev_task_link); + sizeof(tss_seg.prev_task_link)); if (ret != X86EMUL_CONTINUE) return ret; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3cd227ff807f..c4533d05c214 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -55,7 +55,7 @@ #define PRIo64 "o" /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) +#define apic_debug(fmt, arg...) do {} while (0) /* 14 is the version for Xeon and Pentium 8.4.8*/ #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) @@ -576,6 +576,11 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); + if (unlikely(!map)) { + count = -EOPNOTSUPP; + goto out; + } + if (min > map->max_apic_id) goto out; /* Bits above cluster_size are masked in the caller. */ @@ -2409,7 +2414,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) r = kvm_apic_state_fixup(vcpu, s, true); if (r) return r; - memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); + memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cf5f572f2305..7c03c0f35444 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5074,9 +5074,9 @@ static bool need_remote_flush(u64 old, u64 new) } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, - const u8 *new, int *bytes) + int *bytes) { - u64 gentry; + u64 gentry = 0; int r; /* @@ -5088,22 +5088,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ *gpa &= ~(gpa_t)7; *bytes = 8; - r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8); - if (r) - gentry = 0; - new = (const u8 *)&gentry; } - switch (*bytes) { - case 4: - gentry = *(const u32 *)new; - break; - case 8: - gentry = *(const u64 *)new; - break; - default: - gentry = 0; - break; + if (*bytes == 4 || *bytes == 8) { + r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes); + if (r) + gentry = 0; } return gentry; @@ -5207,8 +5197,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); - /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have @@ -5217,6 +5205,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); + + gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); + ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0e21ccc46792..cc6467b35a85 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) return vcpu->arch.tsc_offset; } -static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); u64 g_tsc_offset = 0; @@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) svm->vmcb->control.tsc_offset = offset + g_tsc_offset; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + return svm->vmcb->control.tsc_offset; } static void avic_init_vmcb(struct vcpu_svm *svm) @@ -1664,20 +1665,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, static int avic_init_access_page(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - int ret; + int ret = 0; + mutex_lock(&kvm->slots_lock); if (kvm->arch.apic_access_page_done) - return 0; + goto out; - ret = x86_set_memory_region(kvm, - APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, - PAGE_SIZE); + ret = __x86_set_memory_region(kvm, + APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, + PAGE_SIZE); if (ret) - return ret; + goto out; kvm->arch.apic_access_page_done = true; - return 0; +out: + mutex_unlock(&kvm->slots_lock); + return ret; } static int avic_init_backing_page(struct kvm_vcpu *vcpu) @@ -2189,21 +2193,31 @@ out: return ERR_PTR(err); } +static void svm_clear_current_vmcb(struct vmcb *vmcb) +{ + int i; + + for_each_online_cpu(i) + cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); +} + static void svm_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So, ensure that no logical CPU has this + * vmcb page recorded as its current vmcb. + */ + svm_clear_current_vmcb(svm->vmcb); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); - /* - * The vmcb page can be recycled, causing a false negative in - * svm_vcpu_load(). So do a full IBPB now. - */ - indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -7149,7 +7163,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .has_wbinvd_exit = svm_has_wbinvd_exit, .read_l1_tsc_offset = svm_read_l1_tsc_offset, - .write_tsc_offset = svm_write_tsc_offset, + .write_l1_tsc_offset = svm_write_l1_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4555077d69ce..02edd9960e9d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -174,6 +174,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); * refer SDM volume 3b section 21.6.13 & 22.1.3. */ static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; +module_param(ple_gap, uint, 0444); static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; module_param(ple_window, uint, 0444); @@ -984,6 +985,7 @@ struct vcpu_vmx { struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; + bool guest_msrs_dirty; unsigned long host_idt_base; #ifdef CONFIG_X86_64 u64 msr_host_kernel_gs_base; @@ -1306,7 +1308,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); @@ -1610,12 +1612,6 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); - /* We don't support disabling the feature for simplicity. */ - if (vmx->nested.enlightened_vmcs_enabled) - return 0; - - vmx->nested.enlightened_vmcs_enabled = true; - /* * vmcs_version represents the range of supported Enlightened VMCS * versions: lower 8 bits is the minimal version, higher 8 bits is the @@ -1625,6 +1621,12 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, if (vmcs_version) *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1; + /* We don't support disabling the feature for simplicity. */ + if (vmx->nested.enlightened_vmcs_enabled) + return 0; + + vmx->nested.enlightened_vmcs_enabled = true; + vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; @@ -2897,6 +2899,20 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmx->req_immediate_exit = false; + /* + * Note that guest MSRs to be saved/restored can also be changed + * when guest state is loaded. This happens when guest transitions + * to/from long-mode by setting MSR_EFER.LMA. + */ + if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) { + vmx->guest_msrs_dirty = false; + for (i = 0; i < vmx->save_nmsrs; ++i) + kvm_set_shared_msr(vmx->guest_msrs[i].index, + vmx->guest_msrs[i].data, + vmx->guest_msrs[i].mask); + + } + if (vmx->loaded_cpu_state) return; @@ -2957,11 +2973,6 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmcs_writel(HOST_GS_BASE, gs_base); host_state->gs_base = gs_base; } - - for (i = 0; i < vmx->save_nmsrs; ++i) - kvm_set_shared_msr(vmx->guest_msrs[i].index, - vmx->guest_msrs[i].data, - vmx->guest_msrs[i].mask); } static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) @@ -3436,6 +3447,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) move_msr_up(vmx, index, save_nmsrs++); vmx->save_nmsrs = save_nmsrs; + vmx->guest_msrs_dirty = true; if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); @@ -3452,11 +3464,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) return vcpu->arch.tsc_offset; } -/* - * writes 'offset' into guest's timestamp counter offset register - */ -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { + u64 active_offset = offset; if (is_guest_mode(vcpu)) { /* * We're here if L1 chose not to trap WRMSR to TSC. According @@ -3464,17 +3474,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * set for L2 remains unchanged, and still needs to be added * to the newly set TSC to get L2's TSC. */ - struct vmcs12 *vmcs12; - /* recalculate vmcs02.TSC_OFFSET: */ - vmcs12 = get_vmcs12(vcpu); - vmcs_write64(TSC_OFFSET, offset + - (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? - vmcs12->tsc_offset : 0)); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING)) + active_offset += vmcs12->tsc_offset; } else { trace_kvm_write_tsc_offset(vcpu->vcpu_id, vmcs_read64(TSC_OFFSET), offset); - vmcs_write64(TSC_OFFSET, offset); } + + vmcs_write64(TSC_OFFSET, active_offset); + return active_offset; } /* @@ -5944,7 +5953,7 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -5982,7 +5991,7 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit } } -static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -6020,7 +6029,7 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm } } -static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type, bool value) { if (value) @@ -8664,8 +8673,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; - vmcs12->hdr.revision_id = evmcs->revision_id; - /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ vmcs12->tpr_threshold = evmcs->tpr_threshold; vmcs12->guest_rip = evmcs->guest_rip; @@ -9369,7 +9376,30 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page); - if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) { + /* + * Currently, KVM only supports eVMCS version 1 + * (== KVM_EVMCS_VERSION) and thus we expect guest to set this + * value to first u32 field of eVMCS which should specify eVMCS + * VersionNumber. + * + * Guest should be aware of supported eVMCS versions by host by + * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is + * expected to set this CPUID leaf according to the value + * returned in vmcs_version from nested_enable_evmcs(). + * + * However, it turns out that Microsoft Hyper-V fails to comply + * to their own invented interface: When Hyper-V use eVMCS, it + * just sets first u32 field of eVMCS to revision_id specified + * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number + * which is one of the supported versions specified in + * CPUID.0x4000000A.EAX[0:15]. + * + * To overcome Hyper-V bug, we accept here either a supported + * eVMCS version or VMCS12 revision_id as valid values for first + * u32 field of eVMCS. + */ + if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) && + (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) { nested_release_evmcs(vcpu); return 0; } @@ -9390,9 +9420,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, * present in struct hv_enlightened_vmcs, ...). Make sure there * are no leftovers. */ - if (from_launch) - memset(vmx->nested.cached_vmcs12, 0, - sizeof(*vmx->nested.cached_vmcs12)); + if (from_launch) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + memset(vmcs12, 0, sizeof(*vmcs12)); + vmcs12->hdr.revision_id = VMCS12_REVISION; + } } return 1; @@ -15062,7 +15094,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .read_l1_tsc_offset = vmx_read_l1_tsc_offset, - .write_tsc_offset = vmx_write_tsc_offset, + .write_l1_tsc_offset = vmx_write_l1_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 66d66d77caee..d02937760c3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1665,8 +1665,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { - kvm_x86_ops->write_tsc_offset(vcpu, offset); - vcpu->arch.tsc_offset = offset; + vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset); } static inline bool kvm_check_tsc_unstable(void) @@ -1794,7 +1793,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { - kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment); + u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); + kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment); } static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) @@ -2924,7 +2924,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, unsigned size; r = -EFAULT; - if (copy_from_user(&msrs, user_msrs, sizeof msrs)) + if (copy_from_user(&msrs, user_msrs, sizeof(msrs))) goto out; r = -E2BIG; @@ -3091,11 +3091,11 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned n; r = -EFAULT; - if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) + if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) goto out; n = msr_list.nmsrs; msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs; - if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) + if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) goto out; r = -E2BIG; if (n < msr_list.nmsrs) @@ -3117,7 +3117,7 @@ long kvm_arch_dev_ioctl(struct file *filp, struct kvm_cpuid2 cpuid; r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, @@ -3126,7 +3126,7 @@ long kvm_arch_dev_ioctl(struct file *filp, goto out; r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) goto out; r = 0; break; @@ -3894,7 +3894,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_interrupt irq; r = -EFAULT; - if (copy_from_user(&irq, argp, sizeof irq)) + if (copy_from_user(&irq, argp, sizeof(irq))) goto out; r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); break; @@ -3912,7 +3912,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_cpuid cpuid; r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); break; @@ -3922,7 +3922,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_cpuid2 cpuid; r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, cpuid_arg->entries); @@ -3933,14 +3933,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_cpuid2 cpuid; r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, cpuid_arg->entries); if (r) goto out; r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) goto out; r = 0; break; @@ -3961,13 +3961,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_tpr_access_ctl tac; r = -EFAULT; - if (copy_from_user(&tac, argp, sizeof tac)) + if (copy_from_user(&tac, argp, sizeof(tac))) goto out; r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac); if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &tac, sizeof tac)) + if (copy_to_user(argp, &tac, sizeof(tac))) goto out; r = 0; break; @@ -3980,7 +3980,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!lapic_in_kernel(vcpu)) goto out; r = -EFAULT; - if (copy_from_user(&va, argp, sizeof va)) + if (copy_from_user(&va, argp, sizeof(va))) goto out; idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); @@ -3991,7 +3991,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, u64 mcg_cap; r = -EFAULT; - if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap)) + if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap))) goto out; r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap); break; @@ -4000,7 +4000,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_x86_mce mce; r = -EFAULT; - if (copy_from_user(&mce, argp, sizeof mce)) + if (copy_from_user(&mce, argp, sizeof(mce))) goto out; r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); break; @@ -4536,7 +4536,7 @@ long kvm_arch_vm_ioctl(struct file *filp, if (kvm->created_vcpus) goto set_identity_unlock; r = -EFAULT; - if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) + if (copy_from_user(&ident_addr, argp, sizeof(ident_addr))) goto set_identity_unlock; r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); set_identity_unlock: @@ -4620,7 +4620,7 @@ set_identity_unlock: if (r) goto get_irqchip_out; r = -EFAULT; - if (copy_to_user(argp, chip, sizeof *chip)) + if (copy_to_user(argp, chip, sizeof(*chip))) goto get_irqchip_out; r = 0; get_irqchip_out: @@ -4666,7 +4666,7 @@ set_identity_unlock: } case KVM_SET_PIT: { r = -EFAULT; - if (copy_from_user(&u.ps, argp, sizeof u.ps)) + if (copy_from_user(&u.ps, argp, sizeof(u.ps))) goto out; r = -ENXIO; if (!kvm->arch.vpit) @@ -6918,6 +6918,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, clock_pairing.nsec = ts.tv_nsec; clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); clock_pairing.flags = 0; + memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad)); ret = 0; if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, @@ -7455,7 +7456,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) else { if (vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); + if (ioapic_in_kernel(vcpu->kvm)) + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } if (is_guest_mode(vcpu)) @@ -8205,7 +8207,7 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->efer = vcpu->arch.efer; sregs->apic_base = kvm_get_apic_base(vcpu); - memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); + memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap)); if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft) set_bit(vcpu->arch.interrupt.nr, @@ -8509,7 +8511,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) fpu->last_opcode = fxsave->fop; fpu->last_ip = fxsave->rip; fpu->last_dp = fxsave->rdp; - memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); + memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space)); vcpu_put(vcpu); return 0; @@ -8530,7 +8532,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) fxsave->fop = fpu->last_opcode; fxsave->rip = fpu->last_ip; fxsave->rdp = fpu->last_dp; - memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); + memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space)); vcpu_put(vcpu); return 0; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 00b296617ca4..92e4c4b85bba 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -92,7 +92,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area * in the full address space. */ - info.high_limit = in_compat_syscall() ? + info.high_limit = in_32bit_syscall() ? task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW); info.align_mask = PAGE_MASK & ~huge_page_mask(h); @@ -116,7 +116,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area * in the full address space. */ - if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall()) + if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall()) info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; info.align_mask = PAGE_MASK & ~huge_page_mask(h); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 1e95d57760cf..db3165714521 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -166,7 +166,7 @@ unsigned long get_mmap_base(int is_legacy) struct mm_struct *mm = current->mm; #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - if (in_compat_syscall()) { + if (in_32bit_syscall()) { return is_legacy ? mm->mmap_compat_legacy_base : mm->mmap_compat_base; } diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index a80fdd7fb40f..abffa0be80da 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -399,9 +399,17 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) n = simple_strtoul(emu_cmdline, &emu_cmdline, 0); ret = -1; for_each_node_mask(i, physnode_mask) { + /* + * The reason we pass in blk[0] is due to + * numa_remove_memblk_from() called by + * emu_setup_memblk() will delete entry 0 + * and then move everything else up in the pi.blk + * array. Therefore we should always be looking + * at blk[0]. + */ ret = split_nodes_size_interleave_uniform(&ei, &pi, - pi.blk[i].start, pi.blk[i].end, 0, - n, &pi.blk[i], nid); + pi.blk[0].start, pi.blk[0].end, 0, + n, &pi.blk[0], nid); if (ret < 0) break; if (ret < n) { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f799076e3d57..db7a10082238 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2309,9 +2309,13 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) /* * We should perform an IPI and flush all tlbs, - * but that can deadlock->flush only current cpu: + * but that can deadlock->flush only current cpu. + * Preemption needs to be disabled around __flush_tlb_all() due to + * CR3 reload in __native_flush_tlb(). */ + preempt_disable(); __flush_tlb_all(); + preempt_enable(); arch_flush_lazy_mmu_mode(); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index bddd6b3cee1d..03b6b4c2238d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,7 +7,6 @@ #include <linux/export.h> #include <linux/cpu.h> #include <linux/debugfs.h> -#include <linux/ptrace.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -31,6 +30,12 @@ */ /* + * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is + * stored in cpu_tlb_state.last_user_mm_ibpb. + */ +#define LAST_USER_MM_IBPB 0x1UL + +/* * We get here when we do something requiring a TLB invalidation * but could not go invalidate all of the contexts. We do the * necessary invalidation by clearing out the 'ctx_id' which @@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) } } -static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; + + return (unsigned long)next->mm | ibpb; +} + +static void cond_ibpb(struct task_struct *next) { + if (!next || !next->mm) + return; + /* - * Check if the current (previous) task has access to the memory - * of the @tsk (next) task. If access is denied, make sure to - * issue a IBPB to stop user->user Spectre-v2 attacks. - * - * Note: __ptrace_may_access() returns 0 or -ERRNO. + * Both, the conditional and the always IBPB mode use the mm + * pointer to avoid the IBPB when switching between tasks of the + * same process. Using the mm pointer instead of mm->context.ctx_id + * opens a hypothetical hole vs. mm_struct reuse, which is more or + * less impossible to control by an attacker. Aside of that it + * would only affect the first schedule so the theoretically + * exposed data is not really interesting. */ - return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && - ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); + if (static_branch_likely(&switch_mm_cond_ibpb)) { + unsigned long prev_mm, next_mm; + + /* + * This is a bit more complex than the always mode because + * it has to handle two cases: + * + * 1) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB set to a user space task + * (potential victim) which has TIF_SPEC_IB not set. + * + * 2) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB not set to a user space task + * (potential victim) which has TIF_SPEC_IB set. + * + * This could be done by unconditionally issuing IBPB when + * a task which has TIF_SPEC_IB set is either scheduled in + * or out. Though that results in two flushes when: + * + * - the same user space task is scheduled out and later + * scheduled in again and only a kernel thread ran in + * between. + * + * - a user space task belonging to the same process is + * scheduled in after a kernel thread ran in between + * + * - a user space task belonging to the same process is + * scheduled in immediately. + * + * Optimize this with reasonably small overhead for the + * above cases. Mangle the TIF_SPEC_IB bit into the mm + * pointer of the incoming task which is stored in + * cpu_tlbstate.last_user_mm_ibpb for comparison. + */ + next_mm = mm_mangle_tif_spec_ib(next); + prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); + + /* + * Issue IBPB only if the mm's are different and one or + * both have the IBPB bit set. + */ + if (next_mm != prev_mm && + (next_mm | prev_mm) & LAST_USER_MM_IBPB) + indirect_branch_prediction_barrier(); + + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); + } + + if (static_branch_unlikely(&switch_mm_always_ibpb)) { + /* + * Only flush when switching to a user space task with a + * different context than the user space task which ran + * last on this CPU. + */ + if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { + indirect_branch_prediction_barrier(); + this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); + } + } } void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, @@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, new_asid = prev_asid; need_flush = true; } else { - u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); - /* * Avoid user/user BTB poisoning by flushing the branch * predictor when switching between processes. This stops * one process from doing Spectre-v2 attacks on another. - * - * As an optimization, flush indirect branches only when - * switching into a processes that can't be ptrace by the - * current one (as in such case, attacker has much more - * convenient way how to tamper with the next process than - * branch buffer poisoning). */ - if (static_cpu_has(X86_FEATURE_USE_IBPB) && - ibpb_needed(tsk, last_ctx_id)) - indirect_branch_prediction_barrier(); + cond_ibpb(tsk); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } - /* - * Record last user mm's context id, so we can avoid - * flushing branch buffer with IBPB if we switch back - * to the same user. - */ - if (next != &init_mm) - this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); - /* Make sure we write CR3 before loaded_mm. */ barrier(); @@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ - this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 7476b3b097e1..7138bc7a265c 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -183,7 +183,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) num--; } - if (efi_x >= si->lfb_width) { + if (efi_x + font->width > si->lfb_width) { efi_x = 0; efi_y += font->height; } diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 0b08067c45f3..b629f6992d9f 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -130,7 +130,7 @@ static void regex_init(int use_real_mode) REG_EXTENDED|REG_NOSUB); if (err) { - regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf); + regerror(err, &sym_regex_c[i], errbuf, sizeof(errbuf)); die("%s", errbuf); } } @@ -405,7 +405,7 @@ static void read_shdrs(FILE *fp) } for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; - if (fread(&shdr, sizeof shdr, 1, fp) != 1) + if (fread(&shdr, sizeof(shdr), 1, fp) != 1) die("Cannot read ELF section headers %d/%d: %s\n", i, ehdr.e_shnum, strerror(errno)); sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name); diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 413f3519d9a1..c907b20d4993 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -194,7 +194,7 @@ extern unsigned long um_vdso_addr; typedef unsigned long elf_greg_t; -#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) +#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_i387_struct elf_fpregset_t; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e996e8e744cb..750f46ad018a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -10,7 +10,6 @@ #include <xen/xen.h> #include <xen/features.h> #include <xen/page.h> -#include <xen/interface/memory.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> @@ -346,80 +345,3 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -void __init arch_xen_balloon_init(struct resource *hostmem_resource) -{ - struct xen_memory_map memmap; - int rc; - unsigned int i, last_guest_ram; - phys_addr_t max_addr = PFN_PHYS(max_pfn); - struct e820_table *xen_e820_table; - const struct e820_entry *entry; - struct resource *res; - - if (!xen_initial_domain()) - return; - - xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); - if (!xen_e820_table) - return; - - memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); - set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); - rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); - if (rc) { - pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); - goto out; - } - - last_guest_ram = 0; - for (i = 0; i < memmap.nr_entries; i++) { - if (xen_e820_table->entries[i].addr >= max_addr) - break; - if (xen_e820_table->entries[i].type == E820_TYPE_RAM) - last_guest_ram = i; - } - - entry = &xen_e820_table->entries[last_guest_ram]; - if (max_addr >= entry->addr + entry->size) - goto out; /* No unallocated host RAM. */ - - hostmem_resource->start = max_addr; - hostmem_resource->end = entry->addr + entry->size; - - /* - * Mark non-RAM regions between the end of dom0 RAM and end of host RAM - * as unavailable. The rest of that region can be used for hotplug-based - * ballooning. - */ - for (; i < memmap.nr_entries; i++) { - entry = &xen_e820_table->entries[i]; - - if (entry->type == E820_TYPE_RAM) - continue; - - if (entry->addr >= hostmem_resource->end) - break; - - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) - goto out; - - res->name = "Unavailable host RAM"; - res->start = entry->addr; - res->end = (entry->addr + entry->size < hostmem_resource->end) ? - entry->addr + entry->size : hostmem_resource->end; - rc = insert_resource(hostmem_resource, res); - if (rc) { - pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", - __func__, res->start, res->end, rc); - kfree(res); - goto out; - } - } - - out: - kfree(xen_e820_table); -} -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 0d7b3ae4960b..a5d7ed125337 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1905,7 +1905,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) init_top_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ - /* L4[272] -> level3_ident_pgt */ + /* L4[273] -> level3_ident_pgt */ /* L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_top_pgt); @@ -1925,8 +1925,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) addr[0] = (unsigned long)pgd; addr[1] = (unsigned long)l3; addr[2] = (unsigned long)l2; - /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: - * Both L4[272][0] and L4[511][510] have entries that point to the same + /* Graft it onto L4[273][0]. Note that we creating an aliasing problem: + * Both L4[273][0] and L4[511][510] have entries that point to the same * L2 (PMD) tables. Meaning that if you modify it in __va space * it will be also modified in the __ka space! (But if you just * modify the PMD table to point to other PTE's or none, then you diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 2bce7958ce8b..0766a08bdf45 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -69,6 +69,11 @@ void xen_mc_flush(void) trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); +#if MC_DEBUG + memcpy(b->debug, b->entries, + b->mcidx * sizeof(struct multicall_entry)); +#endif + switch (b->mcidx) { case 0: /* no-op */ @@ -87,32 +92,34 @@ void xen_mc_flush(void) break; default: -#if MC_DEBUG - memcpy(b->debug, b->entries, - b->mcidx * sizeof(struct multicall_entry)); -#endif - if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) BUG(); for (i = 0; i < b->mcidx; i++) if (b->entries[i].result < 0) ret++; + } + if (WARN_ON(ret)) { + pr_err("%d of %d multicall(s) failed: cpu %d\n", + ret, b->mcidx, smp_processor_id()); + for (i = 0; i < b->mcidx; i++) { + if (b->entries[i].result < 0) { #if MC_DEBUG - if (ret) { - printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", - ret, smp_processor_id()); - dump_stack(); - for (i = 0; i < b->mcidx; i++) { - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n", - i+1, b->mcidx, + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n", + i + 1, b->debug[i].op, b->debug[i].args[0], b->entries[i].result, b->caller[i]); +#else + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n", + i + 1, + b->entries[i].op, + b->entries[i].args[0], + b->entries[i].result); +#endif } } -#endif } b->mcidx = 0; @@ -126,8 +133,6 @@ void xen_mc_flush(void) b->cbidx = 0; local_irq_restore(flags); - - WARN_ON(ret); } struct multicall_space __xen_mc_entry(size_t args) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b06731705529..055e37e43541 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -656,8 +656,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) /* * The interface requires atomic updates on p2m elements. - * xen_safe_write_ulong() is using __put_user which does an atomic - * store via asm(). + * xen_safe_write_ulong() is using an atomic store via asm(). */ if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) return true; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1163e33121fb..075ed47993bb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -808,6 +808,7 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { + bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -823,10 +824,11 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - type = E820_TYPE_UNUSABLE; + discard = true; } - xen_align_and_add_e820_region(addr, chunk_size, type); + if (!discard) + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 441c88262169..3776122c87cc 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -3,24 +3,21 @@ * Split spinlock implementation out into its own file, so it can be * compiled in a FTRACE-compatible way. */ -#include <linux/kernel_stat.h> +#include <linux/kernel.h> #include <linux/spinlock.h> -#include <linux/debugfs.h> -#include <linux/log2.h> -#include <linux/gfp.h> #include <linux/slab.h> +#include <linux/atomic.h> #include <asm/paravirt.h> #include <asm/qspinlock.h> -#include <xen/interface/xen.h> #include <xen/events.h> #include "xen-ops.h" -#include "debugfs.h" static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); +static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest); static bool xen_pvspin = true; static void xen_qlock_kick(int cpu) @@ -39,25 +36,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { - unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); + atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest); /* If kicker interrupts not initialized yet, just spin */ if (irq == -1 || in_nmi()) return; - /* Guard against reentry. */ - local_irq_save(flags); + /* Detect reentry. */ + atomic_inc(nest_cnt); - /* If irq pending already clear it. */ - if (xen_test_irq_pending(irq)) { + /* If irq pending already and no nested call clear it. */ + if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); } else if (READ_ONCE(*byte) == val) { /* Block until irq becomes pending (or a spurious wakeup) */ xen_poll_irq(irq); } - local_irq_restore(flags); + atomic_dec(nest_cnt); } static irqreturn_t dummy_handler(int irq, void *dev_id) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 60c141af222b..d29b7365da8d 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -config ZONE_DMA - def_bool y - config XTENSA def_bool y select ARCH_HAS_SG_CHAIN diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile index dc9e0ba7122c..294846117fc2 100644 --- a/arch/xtensa/boot/Makefile +++ b/arch/xtensa/boot/Makefile @@ -33,7 +33,7 @@ uImage: $(obj)/uImage boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y)) $(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS) -OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary +OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index be9bfd9aa865..34a23016dd14 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -23,7 +23,11 @@ # error Linux requires the Xtensa Windowed Registers Option. #endif -#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH +/* Xtensa ABI requires stack alignment to be at least 16 */ + +#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) + +#define ARCH_SLAB_MINALIGN STACK_ALIGN /* * User space process size: 1 GB. diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index 67904f55f188..120dd746a147 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -94,14 +94,14 @@ int main(void) DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp)); DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable)); #if XTENSA_HAVE_COPROCESSORS - DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp)); + DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0)); + DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1)); + DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2)); + DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3)); + DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4)); + DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5)); + DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6)); + DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7)); #endif DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user)); DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t)); diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 2f76118ecf62..9053a5622d2c 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -88,9 +88,12 @@ _SetupMMU: initialize_mmu #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY rsr a2, excsave1 - movi a3, 0x08000000 + movi a3, XCHAL_KSEG_PADDR + bltu a2, a3, 1f + sub a2, a2, a3 + movi a3, XCHAL_KSEG_SIZE bgeu a2, a3, 1f - movi a3, 0xd0000000 + movi a3, XCHAL_KSEG_CACHED_VADDR add a2, a2, a3 wsr a2, excsave1 1: diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 483dcfb6e681..4bb68133a72a 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -94,18 +94,21 @@ void coprocessor_release_all(struct thread_info *ti) void coprocessor_flush_all(struct thread_info *ti) { - unsigned long cpenable; + unsigned long cpenable, old_cpenable; int i; preempt_disable(); + RSR_CPENABLE(old_cpenable); cpenable = ti->cpenable; + WSR_CPENABLE(cpenable); for (i = 0; i < XCHAL_CP_MAX; i++) { if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti) coprocessor_flush(ti, i); cpenable >>= 1; } + WSR_CPENABLE(old_cpenable); preempt_enable(); } diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index c0845cb1cbb9..d9541be0605a 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs) } +#if XTENSA_HAVE_COPROCESSORS +#define CP_OFFSETS(cp) \ + { \ + .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \ + .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \ + .sz = sizeof(xtregs_ ## cp ## _t), \ + } + +static const struct { + size_t elf_xtregs_offset; + size_t ti_offset; + size_t sz; +} cp_offsets[] = { + CP_OFFSETS(cp0), + CP_OFFSETS(cp1), + CP_OFFSETS(cp2), + CP_OFFSETS(cp3), + CP_OFFSETS(cp4), + CP_OFFSETS(cp5), + CP_OFFSETS(cp6), + CP_OFFSETS(cp7), +}; +#endif + static int ptrace_getxregs(struct task_struct *child, void __user *uregs) { struct pt_regs *regs = task_pt_regs(child); struct thread_info *ti = task_thread_info(child); elf_xtregs_t __user *xtregs = uregs; int ret = 0; + int i __maybe_unused; if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t))) return -EIO; @@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs) #if XTENSA_HAVE_COPROCESSORS /* Flush all coprocessor registers to memory. */ coprocessor_flush_all(ti); - ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp, - sizeof(xtregs_coprocessor_t)); + + for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) + ret |= __copy_to_user((char __user *)xtregs + + cp_offsets[i].elf_xtregs_offset, + (const char *)ti + + cp_offsets[i].ti_offset, + cp_offsets[i].sz); #endif ret |= __copy_to_user(&xtregs->opt, ®s->xtregs_opt, sizeof(xtregs->opt)); @@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) struct pt_regs *regs = task_pt_regs(child); elf_xtregs_t *xtregs = uregs; int ret = 0; + int i __maybe_unused; if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t))) return -EFAULT; @@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) coprocessor_flush_all(ti); coprocessor_release_all(ti); - ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0, - sizeof(xtregs_coprocessor_t)); + for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) + ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset, + (const char __user *)xtregs + + cp_offsets[i].elf_xtregs_offset, + cp_offsets[i].sz); #endif ret |= __copy_from_user(®s->xtregs_opt, &xtregs->opt, sizeof(xtregs->opt)); diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index b727b18a68ac..b80a430453b1 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -131,6 +131,7 @@ SECTIONS .fixup : { *(.fixup) } EXCEPTION_TABLE(16) + NOTES /* Data section */ _sdata = .; @@ -296,38 +297,11 @@ SECTIONS _end = .; - .xt.lit : { *(.xt.lit) } - .xt.prop : { *(.xt.prop) } - - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - .debug_info 0 : { *(.debug_info) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - - .xt.insn 0 : - { - *(.xt.insn) - *(.gnu.linkonce.x*) - } + DWARF_DEBUG - .xt.lit 0 : - { - *(.xt.lit) - *(.gnu.linkonce.p*) - } + .xt.prop 0 : { KEEP(*(.xt.prop .xt.prop.* .gnu.linkonce.prop.*)) } + .xt.insn 0 : { KEEP(*(.xt.insn .xt.insn.* .gnu.linkonce.x*)) } + .xt.lit 0 : { KEEP(*(.xt.lit .xt.lit.* .gnu.linkonce.p*)) } /* Sections to be discarded */ DISCARDS diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 9750a48f491b..30a48bba4a47 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -71,7 +71,7 @@ void __init zones_init(void) { /* All pages are DMA-able, so we put them all in the DMA zone. */ unsigned long zones_size[MAX_NR_ZONES] = { - [ZONE_DMA] = max_low_pfn - ARCH_PFN_OFFSET, + [ZONE_NORMAL] = max_low_pfn - ARCH_PFN_OFFSET, #ifdef CONFIG_HIGHMEM [ZONE_HIGHMEM] = max_pfn - max_low_pfn, #endif |

