diff options
Diffstat (limited to 'arch')
520 files changed, 7523 insertions, 4773 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index e9c9334507dd..180ea33164dc 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -336,17 +336,6 @@ config HAVE_ARCH_SECCOMP_FILTER results in the system call being skipped immediately. - seccomp syscall wired up - For best performance, an arch should use seccomp_phase1 and - seccomp_phase2 directly. It should call seccomp_phase1 for all - syscalls if TIF_SECCOMP is set, but seccomp_phase1 does not - need to be called from a ptrace-safe context. It must then - call seccomp_phase2 if seccomp_phase1 returns anything other - than SECCOMP_PHASE1_OK or SECCOMP_PHASE1_SKIP. - - As an additional optimization, an arch may provide seccomp_data - directly to seccomp_phase1; this avoids multiple calls - to the syscall_xyz helpers for every syscall. - config SECCOMP_FILTER def_bool y depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET @@ -707,4 +696,38 @@ config ARCH_NO_COHERENT_DMA_MMAP config CPU_NO_EFFICIENT_FFS def_bool n +config HAVE_ARCH_VMAP_STACK + def_bool n + help + An arch should select this symbol if it can support kernel stacks + in vmalloc space. This means: + + - vmalloc space must be large enough to hold many kernel stacks. + This may rule out many 32-bit architectures. + + - Stacks in vmalloc space need to work reliably. For example, if + vmap page tables are created on demand, either this mechanism + needs to work while the stack points to a virtual address with + unpopulated page tables or arch code (switch_to() and switch_mm(), + most likely) needs to ensure that the stack's page table entries + are populated before running on a possibly unpopulated stack. + + - If the stack overflows into a guard page, something reasonable + should happen. The definition of "reasonable" is flexible, but + instantly rebooting without logging anything would be unfriendly. + +config VMAP_STACK + default y + bool "Use a virtually-mapped stack" + depends on HAVE_ARCH_VMAP_STACK && !KASAN + ---help--- + Enable this if you want the use virtually-mapped kernel stacks + with guard pages. This causes kernel stack overflows to be + caught immediately rather than causing difficult-to-diagnose + corruption. + + This is presently incompatible with KASAN because KASAN expects + the stack to map directly to the KASAN shadow map using a formula + that is incorrect if the stack is in vmalloc space. + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index c419b43c461d..466e42e96bfa 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -371,14 +371,6 @@ __copy_tofrom_user_nocheck(void *to, const void *from, long len) return __cu_len; } -extern inline long -__copy_tofrom_user(void *to, const void *from, long len, const void __user *validate) -{ - if (__access_ok((unsigned long)validate, len, get_fs())) - len = __copy_tofrom_user_nocheck(to, from, len); - return len; -} - #define __copy_to_user(to, from, n) \ ({ \ __chk_user_ptr(to); \ @@ -393,17 +385,22 @@ __copy_tofrom_user(void *to, const void *from, long len, const void __user *vali #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user - extern inline long copy_to_user(void __user *to, const void *from, long n) { - return __copy_tofrom_user((__force void *)to, from, n, to); + if (likely(__access_ok((unsigned long)to, n, get_fs()))) + n = __copy_tofrom_user_nocheck((__force void *)to, from, n); + return n; } extern inline long copy_from_user(void *to, const void __user *from, long n) { - return __copy_tofrom_user(to, (__force void *)from, n, from); + if (likely(__access_ok((unsigned long)from, n, get_fs()))) + n = __copy_tofrom_user_nocheck(to, (__force void *)from, n); + else + memset(to, 0, n); + return n; } extern void __do_clear_user(void); diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 0d3e59f56974..ecd12379e2cd 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -13,7 +13,7 @@ config ARC select CLKSRC_OF select CLONE_BACKWARDS select COMMON_CLK - select GENERIC_ATOMIC64 + select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT # for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP @@ -353,8 +353,8 @@ endchoice config NODES_SHIFT int "Maximum NUMA Nodes (as a power of 2)" - default "1" if !DISCONTIGMEM - default "2" if DISCONTIGMEM + default "0" if !DISCONTIGMEM + default "1" if DISCONTIGMEM depends on NEED_MULTIPLE_NODES ---help--- Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 601ed173080b..aa82d13d4213 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -47,7 +47,6 @@ endif upto_gcc44 := $(call cc-ifversion, -le, 0404, y) atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) -atleast_gcc48 := $(call cc-ifversion, -ge, 0408, y) cflags-$(atleast_gcc44) += -fsection-anchors @@ -66,10 +65,8 @@ endif endif -# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok -ifeq ($(atleast_gcc48),y) -cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2 -endif +cfi := $(call as-instr,.cfi_startproc\n.cfi_endproc,-DARC_DW2_UNWIND_AS_CFI) +cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables $(cfi) ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts index 3dd6ed941464..3acf04db8030 100644 --- a/arch/arc/boot/dts/abilis_tb100_dvk.dts +++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts @@ -24,6 +24,7 @@ /include/ "abilis_tb100.dtsi" / { + model = "abilis,tb100"; chosen { bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8"; }; diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts index 1cf51c280f28..37d88c5dd181 100644 --- a/arch/arc/boot/dts/abilis_tb101_dvk.dts +++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts @@ -24,6 +24,7 @@ /include/ "abilis_tb101.dtsi" / { + model = "abilis,tb101"; chosen { bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8"; }; diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts index 3f9b0582e734..d9b9b9dcfc4c 100644 --- a/arch/arc/boot/dts/axs101.dts +++ b/arch/arc/boot/dts/axs101.dts @@ -13,6 +13,7 @@ /include/ "axs10x_mb.dtsi" / { + model = "snps,axs101"; compatible = "snps,axs101", "snps,arc-sdp"; chosen { diff --git a/arch/arc/boot/dts/axs103.dts b/arch/arc/boot/dts/axs103.dts index e6d0e31ea299..ec7fb277a067 100644 --- a/arch/arc/boot/dts/axs103.dts +++ b/arch/arc/boot/dts/axs103.dts @@ -16,6 +16,7 @@ /include/ "axs10x_mb.dtsi" / { + model = "snps,axs103"; compatible = "snps,axs103", "snps,arc-sdp"; chosen { diff --git a/arch/arc/boot/dts/axs103_idu.dts b/arch/arc/boot/dts/axs103_idu.dts index f999fef5a60a..070c29782216 100644 --- a/arch/arc/boot/dts/axs103_idu.dts +++ b/arch/arc/boot/dts/axs103_idu.dts @@ -16,6 +16,7 @@ /include/ "axs10x_mb.dtsi" / { + model = "snps,axs103-smp"; compatible = "snps,axs103", "snps,arc-sdp"; chosen { diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts index 63970513e4ae..ce0ccd20b5bf 100644 --- a/arch/arc/boot/dts/nsim_700.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -10,6 +10,7 @@ /include/ "skeleton.dtsi" / { + model = "snps,nsim"; compatible = "snps,nsim"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index bf05fe5f67b0..3772c40c245e 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -10,6 +10,7 @@ /include/ "skeleton_hs.dtsi" / { + model = "snps,nsim_hs"; compatible = "snps,nsim_hs"; #address-cells = <2>; #size-cells = <2>; diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts index 99eabe1a2bf6..48434d7c4498 100644 --- a/arch/arc/boot/dts/nsim_hs_idu.dts +++ b/arch/arc/boot/dts/nsim_hs_idu.dts @@ -10,6 +10,7 @@ /include/ "skeleton_hs_idu.dtsi" / { + model = "snps,nsim_hs-smp"; compatible = "snps,nsim_hs"; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index e659a340ca8a..bcf603142a33 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -10,6 +10,7 @@ /include/ "skeleton.dtsi" / { + model = "snps,nsimosci"; compatible = "snps,nsimosci"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts index 16ce5d65cfde..14a727cbf4c9 100644 --- a/arch/arc/boot/dts/nsimosci_hs.dts +++ b/arch/arc/boot/dts/nsimosci_hs.dts @@ -10,6 +10,7 @@ /include/ "skeleton_hs.dtsi" / { + model = "snps,nsimosci_hs"; compatible = "snps,nsimosci_hs"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts index ce8dfbc30c4d..cbf65b6cc7c6 100644 --- a/arch/arc/boot/dts/nsimosci_hs_idu.dts +++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts @@ -10,6 +10,7 @@ /include/ "skeleton_hs_idu.dtsi" / { + model = "snps,nsimosci_hs-smp"; compatible = "snps,nsimosci_hs"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/vdk_hs38.dts b/arch/arc/boot/dts/vdk_hs38.dts index 5d803dd2de59..3c51103f0cd0 100644 --- a/arch/arc/boot/dts/vdk_hs38.dts +++ b/arch/arc/boot/dts/vdk_hs38.dts @@ -13,6 +13,7 @@ /include/ "vdk_axs10x_mb.dtsi" / { + model = "snps,vdk_archs"; compatible = "snps,axs103"; chosen { diff --git a/arch/arc/boot/dts/vdk_hs38_smp.dts b/arch/arc/boot/dts/vdk_hs38_smp.dts index 2ba60c399d99..6be68001a6f0 100644 --- a/arch/arc/boot/dts/vdk_hs38_smp.dts +++ b/arch/arc/boot/dts/vdk_hs38_smp.dts @@ -13,6 +13,7 @@ /include/ "vdk_axs10x_mb.dtsi" / { + model = "snps,vdk_archs-smp"; compatible = "snps,axs103"; chosen { diff --git a/arch/arc/boot/dts/zebu_hs.dts b/arch/arc/boot/dts/zebu_hs.dts new file mode 100644 index 000000000000..1c1324e84965 --- /dev/null +++ b/arch/arc/boot/dts/zebu_hs.dts @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "skeleton_hs.dtsi" + +/ { + model = "snps,zebu_hs"; + compatible = "snps,zebu_hs"; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&core_intc>; + + memory { + device_type = "memory"; + reg = <0x80000000 0x20000000>; /* 512 */ + }; + + chosen { + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1"; + }; + + aliases { + serial0 = &uart0; + }; + + fpga { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + /* child and parent address space 1:1 mapped */ + ranges; + + core_clk: core_clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <50000000>; + }; + + core_intc: interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + uart0: serial@f0000000 { + compatible = "ns8250"; + reg = <0xf0000000 0x2000>; + interrupts = <24>; + clock-frequency = <50000000>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + no-loopback-test = <1>; + }; + + arcpct0: pct { + compatible = "snps,archs-pct"; + #interrupt-cells = <1>; + interrupts = <20>; + }; + }; +}; diff --git a/arch/arc/boot/dts/zebu_hs_idu.dts b/arch/arc/boot/dts/zebu_hs_idu.dts new file mode 100644 index 000000000000..65204b4c0f13 --- /dev/null +++ b/arch/arc/boot/dts/zebu_hs_idu.dts @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "skeleton_hs_idu.dtsi" + +/ { + model = "snps,zebu_hs-smp"; + compatible = "snps,zebu_hs"; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&core_intc>; + + memory { + device_type = "memory"; + reg = <0x80000000 0x20000000>; /* 512 */ + }; + + chosen { + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug"; + }; + + aliases { + serial0 = &uart0; + }; + + fpga { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + /* child and parent address space 1:1 mapped */ + ranges; + + core_clk: core_clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <50000000>; /* 50 MHZ */ + }; + + core_intc: interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; +/* interrupts = <16 17 18 19 20 21 22 23 24 25>; */ + }; + + idu_intc: idu-interrupt-controller { + compatible = "snps,archs-idu-intc"; + interrupt-controller; + interrupt-parent = <&core_intc>; + /* <hwirq distribution> + distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 */ + #interrupt-cells = <2>; + interrupts = <24 25 26 27 28 29 30 31>; + + }; + + uart0: serial@f0000000 { + /* compatible = "ns8250"; Doesn't use FIFOs */ + compatible = "ns16550a"; + reg = <0xf0000000 0x2000>; + interrupt-parent = <&idu_intc>; + /* interrupts = <0 1>; DEST=1*/ + /* interrupts = <0 2>; DEST=2*/ + interrupts = <0 0>; /* RR*/ + clock-frequency = <50000000>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + no-loopback-test = <1>; + }; + + arcpct0: pct { + compatible = "snps,archs-pct"; + #interrupt-cells = <1>; + interrupts = <20>; + }; + }; +}; diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 6cdffea3a914..0a0eaf09aac7 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -18,6 +18,9 @@ CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS101=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 491b3b5f22bd..2233f5777a71 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -18,6 +18,9 @@ CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index b25ee73b2e79..110874705085 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -18,6 +18,9 @@ CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index a99dc7a3f0af..65ab9fbf83f2 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -11,13 +11,16 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" +CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 59f221fc9a41..3b3990cddbe1 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -16,6 +16,9 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set diff --git a/arch/arc/configs/zebu_hs_defconfig b/arch/arc/configs/zebu_hs_defconfig new file mode 100644 index 000000000000..9f6166be7145 --- /dev/null +++ b/arch/arc/configs/zebu_hs_defconfig @@ -0,0 +1,86 @@ +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" +CONFIG_EXPERT=y +CONFIG_PERF_EVENTS=y +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +CONFIG_MODULES=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARC_PLAT_SIM=y +CONFIG_ISA_ARCV2=y +CONFIG_ARC_BUILTIN_DTB_NAME="zebu_hs" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_NET_KEY=y +CONFIG_INET=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_IPV6 is not set +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV is not set +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_MOUSE_PS2_TOUCHKIT=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_ARC_PS2=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_DEBUG_MEMORY_INIT=y +# CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/zebu_hs_smp_defconfig b/arch/arc/configs/zebu_hs_smp_defconfig new file mode 100644 index 000000000000..44e9693f4257 --- /dev/null +++ b/arch/arc/configs/zebu_hs_smp_defconfig @@ -0,0 +1,89 @@ +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" +CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +CONFIG_KPROBES=y +CONFIG_MODULES=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARC_PLAT_SIM=y +CONFIG_ISA_ARCV2=y +CONFIG_SMP=y +CONFIG_ARC_BUILTIN_DTB_NAME="zebu_hs_idu" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_NET_KEY=y +CONFIG_INET=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_IPV6 is not set +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV is not set +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_MOUSE_PS2_TOUCHKIT=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_ARC_PS2=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_LOCKUP_DETECTOR=y +# CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 7fbaea00a336..db25c65155cb 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -95,7 +95,7 @@ /* Auxiliary registers */ #define AUX_IDENTITY 4 #define AUX_INTR_VEC_BASE 0x25 -#define AUX_NON_VOL 0x5e +#define AUX_VOL 0x5e /* * Floating Pt Registers @@ -240,14 +240,6 @@ struct bcr_extn_xymem { #endif }; -struct bcr_perip { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int start:8, pad2:8, sz:8, ver:8; -#else - unsigned int ver:8, sz:8, pad2:8, start:8; -#endif -}; - struct bcr_iccm_arcompact { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int base:16, pad:5, sz:3, ver:8; diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 4e3c1b6b0806..b65930a49589 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -20,6 +20,7 @@ #ifndef CONFIG_ARC_PLAT_EZNPS #define atomic_read(v) READ_ONCE((v)->counter) +#define ATOMIC_INIT(i) { (i) } #ifdef CONFIG_ARC_HAS_LLSC @@ -284,6 +285,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3) #define atomic_sub(i, v) atomic_add(-(i), (v)) #define atomic_sub_return(i, v) atomic_add_return(-(i), (v)) +#define atomic_fetch_sub(i, v) atomic_fetch_add(-(i), (v)) #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ @@ -292,6 +294,7 @@ ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3) ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) #define atomic_andnot(mask, v) atomic_and(~(mask), (v)) +#define atomic_fetch_andnot(mask, v) atomic_fetch_and(~(mask), (v)) ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) @@ -343,10 +346,266 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) #define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0) -#define ATOMIC_INIT(i) { (i) } + +#ifdef CONFIG_GENERIC_ATOMIC64 #include <asm-generic/atomic64.h> -#endif +#else /* Kconfig ensures this is only enabled with needed h/w assist */ + +/* + * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD) + * - The address HAS to be 64-bit aligned + * - There are 2 semantics involved here: + * = exclusive implies no interim update between load/store to same addr + * = both words are observed/updated together: this is guaranteed even + * for regular 64-bit load (LDD) / store (STD). Thus atomic64_set() + * is NOT required to use LLOCKD+SCONDD, STD suffices + */ + +typedef struct { + aligned_u64 counter; +} atomic64_t; + +#define ATOMIC64_INIT(a) { (a) } + +static inline long long atomic64_read(const atomic64_t *v) +{ + unsigned long long val; + + __asm__ __volatile__( + " ldd %0, [%1] \n" + : "=r"(val) + : "r"(&v->counter)); + + return val; +} + +static inline void atomic64_set(atomic64_t *v, long long a) +{ + /* + * This could have been a simple assignment in "C" but would need + * explicit volatile. Otherwise gcc optimizers could elide the store + * which borked atomic64 self-test + * In the inline asm version, memory clobber needed for exact same + * reason, to tell gcc about the store. + * + * This however is not needed for sibling atomic64_add() etc since both + * load/store are explicitly done in inline asm. As long as API is used + * for each access, gcc has no way to optimize away any load/store + */ + __asm__ __volatile__( + " std %0, [%1] \n" + : + : "r"(a), "r"(&v->counter) + : "memory"); +} + +#define ATOMIC64_OP(op, op1, op2) \ +static inline void atomic64_##op(long long a, atomic64_t *v) \ +{ \ + unsigned long long val; \ + \ + __asm__ __volatile__( \ + "1: \n" \ + " llockd %0, [%1] \n" \ + " " #op1 " %L0, %L0, %L2 \n" \ + " " #op2 " %H0, %H0, %H2 \n" \ + " scondd %0, [%1] \n" \ + " bnz 1b \n" \ + : "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ + : "cc"); \ +} \ + +#define ATOMIC64_OP_RETURN(op, op1, op2) \ +static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \ +{ \ + unsigned long long val; \ + \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: \n" \ + " llockd %0, [%1] \n" \ + " " #op1 " %L0, %L0, %L2 \n" \ + " " #op2 " %H0, %H0, %H2 \n" \ + " scondd %0, [%1] \n" \ + " bnz 1b \n" \ + : [val] "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ + : "cc"); /* memory clobber comes from smp_mb() */ \ + \ + smp_mb(); \ + \ + return val; \ +} + +#define ATOMIC64_FETCH_OP(op, op1, op2) \ +static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \ +{ \ + unsigned long long val, orig; \ + \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: \n" \ + " llockd %0, [%2] \n" \ + " " #op1 " %L1, %L0, %L3 \n" \ + " " #op2 " %H1, %H0, %H3 \n" \ + " scondd %1, [%2] \n" \ + " bnz 1b \n" \ + : "=&r"(orig), "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ + : "cc"); /* memory clobber comes from smp_mb() */ \ + \ + smp_mb(); \ + \ + return orig; \ +} + +#define ATOMIC64_OPS(op, op1, op2) \ + ATOMIC64_OP(op, op1, op2) \ + ATOMIC64_OP_RETURN(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) + +#define atomic64_andnot atomic64_andnot + +ATOMIC64_OPS(add, add.f, adc) +ATOMIC64_OPS(sub, sub.f, sbc) +ATOMIC64_OPS(and, and, and) +ATOMIC64_OPS(andnot, bic, bic) +ATOMIC64_OPS(or, or, or) +ATOMIC64_OPS(xor, xor, xor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +static inline long long +atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new) +{ + long long prev; + + smp_mb(); + + __asm__ __volatile__( + "1: llockd %0, [%1] \n" + " brne %L0, %L2, 2f \n" + " brne %H0, %H2, 2f \n" + " scondd %3, [%1] \n" + " bnz 1b \n" + "2: \n" + : "=&r"(prev) + : "r"(ptr), "ir"(expected), "r"(new) + : "cc"); /* memory clobber comes from smp_mb() */ + + smp_mb(); + + return prev; +} + +static inline long long atomic64_xchg(atomic64_t *ptr, long long new) +{ + long long prev; + + smp_mb(); + + __asm__ __volatile__( + "1: llockd %0, [%1] \n" + " scondd %2, [%1] \n" + " bnz 1b \n" + "2: \n" + : "=&r"(prev) + : "r"(ptr), "r"(new) + : "cc"); /* memory clobber comes from smp_mb() */ + + smp_mb(); + + return prev; +} + +/** + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic64_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long val; + + smp_mb(); + + __asm__ __volatile__( + "1: llockd %0, [%1] \n" + " sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n" + " sub.c %H0, %H0, 1 # if C set, w1 - 1\n" + " brlt %H0, 0, 2f \n" + " scondd %0, [%1] \n" + " bnz 1b \n" + "2: \n" + : "=&r"(val) + : "r"(&v->counter) + : "cc"); /* memory clobber comes from smp_mb() */ + + smp_mb(); + + return val; +} + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * if (v != u) { v += a; ret = 1} else {ret = 0} + * Returns 1 iff @v was not @u (i.e. if add actually happened) + */ +static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + long long val; + int op_done; + + smp_mb(); + + __asm__ __volatile__( + "1: llockd %0, [%2] \n" + " mov %1, 1 \n" + " brne %L0, %L4, 2f # continue to add since v != u \n" + " breq.d %H0, %H4, 3f # return since v == u \n" + " mov %1, 0 \n" + "2: \n" + " add.f %L0, %L0, %L3 \n" + " adc %H0, %H0, %H3 \n" + " scondd %0, [%2] \n" + " bnz 1b \n" + "3: \n" + : "=&r"(val), "=&r" (op_done) + : "r"(&v->counter), "r"(a), "r"(u) + : "cc"); /* memory clobber comes from smp_mb() */ + + smp_mb(); + + return op_done; +} + +#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) +#define atomic64_inc(v) atomic64_add(1LL, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) +#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) +#define atomic64_dec(v) atomic64_sub(1LL, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) +#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) + +#endif /* !CONFIG_GENERIC_ATOMIC64 */ + +#endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 23706c635c30..fb781e34f322 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -54,7 +54,7 @@ extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void read_decode_cache_bcr(void); extern int ioc_exists; -extern unsigned long perip_base; +extern unsigned long perip_base, perip_end; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h new file mode 100644 index 000000000000..bb7bdbc59a44 --- /dev/null +++ b/arch/arc/include/asm/dwarf.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2016-17 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ARC_DWARF_H +#define _ASM_ARC_DWARF_H + +#ifdef __ASSEMBLY__ + +#ifdef ARC_DW2_UNWIND_AS_CFI + +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_REGISTER .cfi_register +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_UNDEFINED .cfi_undefined + +#else + +#define CFI_IGNORE # + +#define CFI_STARTPROC CFI_IGNORE +#define CFI_ENDPROC CFI_IGNORE +#define CFI_DEF_CFA CFI_IGNORE +#define CFI_REGISTER CFI_IGNORE +#define CFI_REL_OFFSET CFI_IGNORE +#define CFI_UNDEFINED CFI_IGNORE + +#endif /* !ARC_DW2_UNWIND_AS_CFI */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ARC_DWARF_H */ diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index 51a99e25fe33..7096f97a1434 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -23,8 +23,7 @@ /* ARC Relocations (kernel Modules only) */ #define R_ARC_32 0x4 #define R_ARC_32_ME 0x1B -#define R_ARC_S25H_PCREL 0x10 -#define R_ARC_S25W_PCREL 0x11 +#define R_ARC_32_PCREL 0x31 /*to set parameters in the core dumps */ #define ELF_ARCH EM_ARCOMPACT diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index d1ec7f6b31e0..e880dfa3fcd3 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -112,7 +112,7 @@ static inline long arch_local_save_flags(void) */ temp = (1 << 5) | ((!!(temp & STATUS_IE_MASK)) << CLRI_STATUS_IE_BIT) | - (temp & CLRI_STATUS_E_MASK); + ((temp >> 1) & CLRI_STATUS_E_MASK); return temp; } diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index 5faad17118b4..b29f1a9fd6f7 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -9,6 +9,8 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H +#include <asm/dwarf.h> + #ifdef __ASSEMBLY__ #define ASM_NL ` /* use '`' to mark new line in macro */ @@ -32,6 +34,16 @@ #endif .endm +#define ENTRY_CFI(name) \ + .globl name ASM_NL \ + ALIGN ASM_NL \ + name: ASM_NL \ + CFI_STARTPROC ASM_NL + +#define END_CFI(name) \ + CFI_ENDPROC ASM_NL \ + .size name, .-name + #else /* !__ASSEMBLY__ */ #ifdef CONFIG_ARC_HAS_ICCM diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 5f071762fb1c..9185541035cc 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -118,6 +118,9 @@ static const char * const arc_pmu_ev_hw_map[] = { [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */ [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */ + + [PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc", /* Instr: mem read cached */ + [PERF_COUNT_HW_CACHE_MISSES] = "dclm", /* D-cache Load Miss */ }; #define C(_x) PERF_COUNT_HW_CACHE_##_x diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index a78d5670884f..41faf17cd28d 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -83,7 +83,10 @@ "2: ;nop\n" \ " .section .fixup, \"ax\"\n" \ " .align 4\n" \ - "3: mov %0, %3\n" \ + "3: # return -EFAULT\n" \ + " mov %0, %3\n" \ + " # zero out dst ptr\n" \ + " mov %1, 0\n" \ " j 2b\n" \ " .previous\n" \ " .section __ex_table, \"a\"\n" \ @@ -101,7 +104,11 @@ "2: ;nop\n" \ " .section .fixup, \"ax\"\n" \ " .align 4\n" \ - "3: mov %0, %3\n" \ + "3: # return -EFAULT\n" \ + " mov %0, %3\n" \ + " # zero out dst ptr\n" \ + " mov %1, 0\n" \ + " mov %R1, 0\n" \ " j 2b\n" \ " .previous\n" \ " .section __ex_table, \"a\"\n" \ diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S index e6890b1f8650..7c1f365ef3d2 100644 --- a/arch/arc/kernel/ctx_sw_asm.S +++ b/arch/arc/kernel/ctx_sw_asm.S @@ -23,6 +23,7 @@ .global __switch_to .type __switch_to, @function __switch_to: + CFI_STARTPROC /* Save regs on kernel mode stack of task */ st.a blink, [sp, -4] @@ -59,4 +60,4 @@ __switch_to: ld.ab blink, [sp, 4] j [blink] -END(__switch_to) +END_CFI(__switch_to) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 2efb0625331d..1eea99beecc3 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -35,7 +35,7 @@ ENTRY(sys_clone_wrapper) btst r10, TIF_SYSCALL_TRACE bnz tracesys_exit - b ret_from_system_call + b .Lret_from_system_call END(sys_clone_wrapper) ENTRY(ret_from_fork) @@ -61,18 +61,6 @@ ENTRY(ret_from_fork) b ret_from_exception END(ret_from_fork) -#ifdef CONFIG_ARC_DW2_UNWIND -; Workaround for bug 94179 (STAR ): -; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder -; section (.debug_frame) as loadable. So we force it here. -; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag) -; would not work after a clean build due to kernel build system dependencies. -.section .debug_frame, "wa",@progbits - -; Reset to .text as this file is included in entry-<isa>.S -.section .text, "ax",@progbits -#endif - ;################### Non TLB Exception Handling ############################# ; --------------------------------------------- @@ -260,20 +248,18 @@ ENTRY(EV_Trap) ; syscall num shd not exceed the total system calls avail cmp r8, NR_syscalls mov.hi r0, -ENOSYS - bhi ret_from_system_call + bhi .Lret_from_system_call ; Offset into the syscall_table and call handler ld.as r9,[sys_call_table, r8] jl [r9] ; Entry into Sys Call Handler - ; fall through to ret_from_system_call -END(EV_Trap) - -ENTRY(ret_from_system_call) +.Lret_from_system_call: st r0, [sp, PT_r0] ; sys call return value in pt_regs - ; fall through yet again to ret_from_exception + ; fall through to ret_from_exception +END(EV_Trap) ;############# Return from Intr/Excp/Trap (Linux Specifics) ############## ; diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 6c24faf48b16..62b59409a5d9 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -74,7 +74,7 @@ void arc_init_IRQ(void) tmp = read_aux_reg(0xa); tmp |= STATUS_AD_MASK | (irq_prio << 1); tmp &= ~STATUS_IE_MASK; - asm volatile("flag %0 \n"::"r"(tmp)); + asm volatile("kflag %0 \n"::"r"(tmp)); } static void arcv2_irq_mask(struct irq_data *data) diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c index 376e04622962..9a2849756022 100644 --- a/arch/arc/kernel/module.c +++ b/arch/arc/kernel/module.c @@ -22,13 +22,9 @@ static inline void arc_write_me(unsigned short *addr, unsigned long value) *(addr + 1) = (value & 0xffff); } -/* ARC specific section quirks - before relocation loop in generic loader - * - * For dwarf unwinding out of modules, this needs to - * 1. Ensure the .debug_frame is allocatable (ARC Linker bug: despite - * -fasynchronous-unwind-tables it doesn't). - * 2. Since we are iterating thru sec hdr tbl anyways, make a note of - * the exact section index, for later use. +/* + * This gets called before relocation loop in generic loader + * Make a note of the section index of unwinding section */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstr, struct module *mod) @@ -40,8 +36,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, mod->arch.unw_info = NULL; for (i = 1; i < hdr->e_shnum; i++) { - if (strcmp(secstr+sechdrs[i].sh_name, ".debug_frame") == 0) { - sechdrs[i].sh_flags |= SHF_ALLOC; + if (strcmp(secstr+sechdrs[i].sh_name, ".eh_frame") == 0) { mod->arch.unw_sec_idx = i; break; } @@ -106,10 +101,12 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, */ relo_type = ELF32_R_TYPE(rel_entry[i].r_info); - if (likely(R_ARC_32_ME == relo_type)) + if (likely(R_ARC_32_ME == relo_type)) /* ME ( S + A ) */ arc_write_me((unsigned short *)location, relocation); - else if (R_ARC_32 == relo_type) + else if (R_ARC_32 == relo_type) /* ( S + A ) */ *((Elf32_Addr *) location) = relocation; + else if (R_ARC_32_PCREL == relo_type) /* ( S + A ) - PDATA ) */ + *((Elf32_Addr *) location) = relocation - location; else goto relo_err; diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 08f03d9b5b3e..2ce24e74f879 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -179,8 +179,8 @@ static int arc_pmu_event_init(struct perf_event *event) if (arc_pmu->ev_hw_idx[event->attr.config] < 0) return -ENOENT; hwc->config |= arc_pmu->ev_hw_idx[event->attr.config]; - pr_debug("init event %d with h/w %d \'%s\'\n", - (int) event->attr.config, (int) hwc->config, + pr_debug("init event %d with h/w %08x \'%s\'\n", + (int)event->attr.config, (int)hwc->config, arc_pmu_ev_hw_map[event->attr.config]); return 0; @@ -189,6 +189,8 @@ static int arc_pmu_event_init(struct perf_event *event) if (ret < 0) return ret; hwc->config |= arc_pmu->ev_hw_idx[ret]; + pr_debug("init cache event with h/w %08x \'%s\'\n", + (int)hwc->config, arc_pmu_ev_hw_map[ret]); return 0; default: return -ENOENT; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index f52a0d0dc462..3df7f9c72f42 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -171,6 +171,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { #else { {0x50, "ARC HS38 R2.0"}, 0x51}, { {0x52, "ARC HS38 R2.1"}, 0x52}, + { {0x53, "ARC HS38 R3.0"}, 0x53}, #endif { {0x00, NULL } } }; @@ -272,8 +273,8 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) FIX_PTR(cpu); n += scnprintf(buf + n, len - n, - "Vector Table\t: %#x\nUncached Base\t: %#lx\n", - cpu->vec_base, perip_base); + "Vector Table\t: %#x\nPeripherals\t: %#lx:%#lx\n", + cpu->vec_base, perip_base, perip_end); if (cpu->extn.fpu_sp || cpu->extn.fpu_dp) n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 0587bf121d11..61fd1ce63c56 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -111,6 +111,8 @@ UNW_REGISTER_INFO}; #define DW_EH_PE_indirect 0x80 #define DW_EH_PE_omit 0xff +#define CIE_ID 0 + typedef unsigned long uleb128_t; typedef signed long sleb128_t; @@ -232,6 +234,7 @@ void __init arc_unwind_init(void) static const u32 bad_cie, not_fde; static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *); +static const u32 *__cie_for_fde(const u32 *fde); static signed fde_pointer_type(const u32 *cie); struct eh_frame_hdr_table_entry { @@ -338,10 +341,9 @@ static void init_unwind_hdr(struct unwind_table *table, for (fde = table->address, tableSize = table->size, n = 0; tableSize; tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { - /* const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); */ - const u32 *cie = (const u32 *)(fde[1]); + const u32 *cie = __cie_for_fde(fde); - if (fde[1] == 0xffffffff) + if (fde[1] == CIE_ID) continue; /* this is a CIE */ ptr = (const u8 *)(fde + 2); header->table[n].start = read_pointer(&ptr, @@ -504,6 +506,15 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) return value; } +static const u32 *__cie_for_fde(const u32 *fde) +{ + const u32 *cie; + + cie = fde + 1 - fde[1] / sizeof(*fde); + + return cie; +} + static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) { const u32 *cie; @@ -511,19 +522,18 @@ static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) if (!*fde || (*fde & (sizeof(*fde) - 1))) return &bad_cie; - if (fde[1] == 0xffffffff) + if (fde[1] == CIE_ID) return ¬_fde; /* this is a CIE */ if ((fde[1] & (sizeof(*fde) - 1))) /* || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) */ return NULL; /* this is not a valid FDE */ - /* cie = fde + 1 - fde[1] / sizeof(*fde); */ - cie = (u32 *) fde[1]; + cie = __cie_for_fde(fde); if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde) || (*cie & (sizeof(*cie) - 1)) - || (cie[1] != 0xffffffff)) + || (cie[1] != CIE_ID)) return NULL; /* this is not a (valid) CIE */ return cie; } diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 894e696bddaa..36611072305f 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -82,14 +82,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - /* - * .exit.text is discard at runtime, not link time, to deal with - * references from .debug_frame - * It will be init freed, being inside [__init_start : __init_end] - */ - .exit.text : { EXIT_TEXT } - .exit.data : { EXIT_DATA } - . = ALIGN(PAGE_SIZE); __init_end = .; @@ -120,18 +112,13 @@ SECTIONS #ifdef CONFIG_ARC_DW2_UNWIND . = ALIGN(PAGE_SIZE); - .debug_frame : { + .eh_frame : { __start_unwind = .; - *(.debug_frame) + *(.eh_frame) __end_unwind = .; } - /* - * gcc 4.8 generates this for -fasynchonous-unwind-tables, - * while we still use the .debug_frame based unwinder - */ - /DISCARD/ : { *(.eh_frame) } #else - /DISCARD/ : { *(.debug_frame) } + /DISCARD/ : { *(.eh_frame) } #endif NOTES @@ -148,7 +135,7 @@ SECTIONS } #ifndef CONFIG_DEBUG_INFO - /* open-coded because we need .debug_frame seperately for unwinding */ + /DISCARD/ : { *(.debug_frame) } /DISCARD/ : { *(.debug_aranges) } /DISCARD/ : { *(.debug_pubnames) } /DISCARD/ : { *(.debug_info) } diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S index a4015e7d9ab7..21a103044b70 100644 --- a/arch/arc/lib/memcmp.S +++ b/arch/arc/lib/memcmp.S @@ -16,7 +16,7 @@ #define SHIFT r2 #endif -ENTRY(memcmp) +ENTRY_CFI(memcmp) or r12,r0,r1 asl_s r12,r12,30 sub r3,r2,1 @@ -149,4 +149,4 @@ ENTRY(memcmp) .Lnil: j_s.d [blink] mov r0,0 -END(memcmp) +END_CFI(memcmp) diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S index 3222573e50de..ba0beccdaafd 100644 --- a/arch/arc/lib/memcpy-700.S +++ b/arch/arc/lib/memcpy-700.S @@ -8,7 +8,7 @@ #include <linux/linkage.h> -ENTRY(memcpy) +ENTRY_CFI(memcpy) or r3,r0,r1 asl_s r3,r3,30 mov_s r5,r0 @@ -63,4 +63,4 @@ ENTRY(memcpy) .Lendbloop: j_s.d [blink] stb r12,[r5,0] -END(memcpy) +END_CFI(memcpy) diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S index f96c75edf30a..d61044dd8b58 100644 --- a/arch/arc/lib/memcpy-archs.S +++ b/arch/arc/lib/memcpy-archs.S @@ -40,7 +40,7 @@ # define ZOLAND 0xF #endif -ENTRY(memcpy) +ENTRY_CFI(memcpy) prefetch [r1] ; Prefetch the read location prefetchw [r0] ; Prefetch the write location mov.f 0, r2 @@ -233,4 +233,4 @@ ENTRY(memcpy) .Lcopybytewise_3: j [blink] -END(memcpy) +END_CFI(memcpy) diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index 365b18364815..62ad4bcb841a 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -10,7 +10,7 @@ #undef PREALLOC_NOT_AVAIL -ENTRY(memset) +ENTRY_CFI(memset) prefetchw [r0] ; Prefetch the write location mov.f 0, r2 ;;; if size is zero @@ -112,11 +112,11 @@ ENTRY(memset) j [blink] -END(memset) +END_CFI(memset) -ENTRY(memzero) +ENTRY_CFI(memzero) ; adjust bzero args to memset args mov r2, r1 b.d memset ;tail call so need to tinker with blink mov r1, 0 -END(memzero) +END_CFI(memzero) diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S index d36bd43fc98d..cf736f9aa403 100644 --- a/arch/arc/lib/memset.S +++ b/arch/arc/lib/memset.S @@ -10,7 +10,7 @@ #define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ -ENTRY(memset) +ENTRY_CFI(memset) mov_s r4,r0 or r12,r0,r2 bmsk.f r12,r12,1 @@ -46,14 +46,14 @@ ENTRY(memset) stb.ab r1,[r4,1] .Ltiny_end: j_s [blink] -END(memset) +END_CFI(memset) ; memzero: @r0 = mem, @r1 = size_t ; memset: @r0 = mem, @r1 = char, @r2 = size_t -ENTRY(memzero) +ENTRY_CFI(memzero) ; adjust bzero args to memset args mov r2, r1 mov r1, 0 b memset ;tail call so need to tinker with blink -END(memzero) +END_CFI(memzero) diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S index b725d5862107..2d300daae2ae 100644 --- a/arch/arc/lib/strchr-700.S +++ b/arch/arc/lib/strchr-700.S @@ -13,7 +13,7 @@ #include <linux/linkage.h> -ENTRY(strchr) +ENTRY_CFI(strchr) extb_s r1,r1 asl r5,r1,8 bmsk r2,r0,1 @@ -130,4 +130,4 @@ ENTRY(strchr) j_s.d [blink] mov.mi r0,0 #endif /* ENDIAN */ -END(strchr) +END_CFI(strchr) diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S index 4f338eec3365..fae9e82a09eb 100644 --- a/arch/arc/lib/strcmp-archs.S +++ b/arch/arc/lib/strcmp-archs.S @@ -8,7 +8,7 @@ #include <linux/linkage.h> -ENTRY(strcmp) +ENTRY_CFI(strcmp) or r2, r0, r1 bmsk_s r2, r2, 1 brne r2, 0, @.Lcharloop @@ -75,4 +75,4 @@ ENTRY(strcmp) .Lcmpend: j_s.d [blink] sub r0, r2, r3 -END(strcmp) +END_CFI(strcmp) diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S index 3544600fefe6..fb20096e5008 100644 --- a/arch/arc/lib/strcmp.S +++ b/arch/arc/lib/strcmp.S @@ -15,7 +15,7 @@ #include <linux/linkage.h> -ENTRY(strcmp) +ENTRY_CFI(strcmp) or r2,r0,r1 bmsk_s r2,r2,1 brne r2,0,.Lcharloop @@ -93,4 +93,4 @@ ENTRY(strcmp) .Lcmpend: j_s.d [blink] sub r0,r2,r3 -END(strcmp) +END_CFI(strcmp) diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S index 8422f38e1218..6a6c1553807d 100644 --- a/arch/arc/lib/strcpy-700.S +++ b/arch/arc/lib/strcpy-700.S @@ -18,7 +18,7 @@ #include <linux/linkage.h> -ENTRY(strcpy) +ENTRY_CFI(strcpy) or r2,r0,r1 bmsk_s r2,r2,1 brne.d r2,0,charloop @@ -67,4 +67,4 @@ charloop: brne.d r3,0,charloop stb.ab r3,[r10,1] j [blink] -END(strcpy) +END_CFI(strcpy) diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S index 53cfd5685a5f..839b44b8d055 100644 --- a/arch/arc/lib/strlen.S +++ b/arch/arc/lib/strlen.S @@ -8,7 +8,7 @@ #include <linux/linkage.h> -ENTRY(strlen) +ENTRY_CFI(strlen) or r3,r0,7 ld r2,[r3,-7] ld.a r6,[r3,-3] @@ -80,4 +80,4 @@ ENTRY(strlen) .Learly_end: b.d .Lend sub_s.ne r1,r1,r1 -END(strlen) +END_CFI(strlen) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 0b10efe3a6a7..97dddbefb86a 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -25,6 +25,7 @@ static int l2_line_sz; int ioc_exists; volatile int slc_enable = 1, ioc_enable = 1; unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ +unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int cacheop); @@ -76,7 +77,6 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) static void read_decode_cache_bcr_arcv2(int cpu) { struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc; - struct bcr_generic uncached_space; struct bcr_generic sbcr; struct bcr_slc_cfg { @@ -95,6 +95,15 @@ static void read_decode_cache_bcr_arcv2(int cpu) #endif } cbcr; + struct bcr_volatile { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int start:4, limit:4, pad:22, order:1, disable:1; +#else + unsigned int disable:1, order:1, pad:22, limit:4, start:4; +#endif + } vol; + + READ_BCR(ARC_REG_SLC_BCR, sbcr); if (sbcr.ver) { READ_BCR(ARC_REG_SLC_CFG, slc_cfg); @@ -107,10 +116,14 @@ static void read_decode_cache_bcr_arcv2(int cpu) if (cbcr.c && ioc_enable) ioc_exists = 1; - /* Legacy Data Uncached BCR is deprecated from v3 onwards */ - READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); - if (uncached_space.ver > 2) - perip_base = read_aux_reg(AUX_NON_VOL) & 0xF0000000; + /* HS 2.0 didn't have AUX_VOL */ + if (cpuinfo_arc700[cpu].core.family > 0x51) { + READ_BCR(AUX_VOL, vol); + perip_base = vol.start << 28; + /* HS 3.0 has limit and strict-ordering fields */ + if (cpuinfo_arc700[cpu].core.family > 0x52) + perip_end = (vol.limit << 28) - 1; + } } void read_decode_cache_bcr(void) diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index f52b7db67fd3..9881bd740ccc 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -19,7 +19,7 @@ static inline bool arc_uncached_addr_space(phys_addr_t paddr) if (is_isa_arcompact()) { if (paddr >= ARC_UNCACHED_ADDR_SPACE) return true; - } else if (paddr >= perip_base && paddr <= 0xFFFFFFFF) { + } else if (paddr >= perip_base && paddr <= perip_end) { return true; } diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c index e4fe51456808..aea87389e44b 100644 --- a/arch/arc/plat-sim/platform.c +++ b/arch/arc/plat-sim/platform.c @@ -24,6 +24,7 @@ static const char *simulation_compat[] __initconst = { "snps,nsim_hs", "snps,nsimosci", "snps,nsimosci_hs", + "snps,zebu_hs", NULL, }; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a9c4e48bb7ec..3cd9042fbb62 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1,6 +1,7 @@ config ARM bool default y + select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE @@ -878,6 +879,7 @@ config ARCH_STM32 select CLKSRC_STM32 select PINCTRL select RESET_CONTROLLER + select STM32_EXTI help Support for STMicroelectronics STM32 processors. diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index af11c2f8f3b7..fc6d541549a2 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -779,7 +779,7 @@ __armv7_mmu_cache_on: orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client bic r6, r6, #1 << 31 @ 32-bit translation system - bic r6, r6, #3 << 0 @ use only ttbr0 + bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer mcrne p15, 0, r1, c3, c0, 0 @ load domain access control mcrne p15, 0, r6, c2, c0, 2 @ load ttb control diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi index c8609d8d2c55..b689172632ef 100644 --- a/arch/arm/boot/dts/am335x-baltos.dtsi +++ b/arch/arm/boot/dts/am335x-baltos.dtsi @@ -226,7 +226,7 @@ #address-cells = <1>; #size-cells = <1>; - elm_id = <&elm>; + ti,elm-id = <&elm>; }; }; diff --git a/arch/arm/boot/dts/am335x-igep0033.dtsi b/arch/arm/boot/dts/am335x-igep0033.dtsi index df63484ef9b3..e7d9ca1305fa 100644 --- a/arch/arm/boot/dts/am335x-igep0033.dtsi +++ b/arch/arm/boot/dts/am335x-igep0033.dtsi @@ -161,7 +161,7 @@ #address-cells = <1>; #size-cells = <1>; - elm_id = <&elm>; + ti,elm-id = <&elm>; /* MTD partition table */ partition@0 { diff --git a/arch/arm/boot/dts/am335x-phycore-som.dtsi b/arch/arm/boot/dts/am335x-phycore-som.dtsi index 86f773165d5c..1263c9d4cba3 100644 --- a/arch/arm/boot/dts/am335x-phycore-som.dtsi +++ b/arch/arm/boot/dts/am335x-phycore-som.dtsi @@ -197,7 +197,7 @@ gpmc,wr-access-ns = <30>; gpmc,wr-data-mux-bus-ns = <0>; - elm_id = <&elm>; + ti,elm-id = <&elm>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/armada-388-clearfog.dts b/arch/arm/boot/dts/armada-388-clearfog.dts index 2e0556af6e5e..d3e6bd805006 100644 --- a/arch/arm/boot/dts/armada-388-clearfog.dts +++ b/arch/arm/boot/dts/armada-388-clearfog.dts @@ -390,12 +390,12 @@ port@0 { reg = <0>; - label = "lan1"; + label = "lan5"; }; port@1 { reg = <1>; - label = "lan2"; + label = "lan4"; }; port@2 { @@ -405,12 +405,12 @@ port@3 { reg = <3>; - label = "lan4"; + label = "lan2"; }; port@4 { reg = <4>; - label = "lan5"; + label = "lan1"; }; port@5 { diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi index caf2707680c1..e9b47b2bbc33 100644 --- a/arch/arm/boot/dts/bcm2835-rpi.dtsi +++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi @@ -2,6 +2,7 @@ / { memory { + device_type = "memory"; reg = <0 0x10000000>; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index b98252232d20..445624a1a1de 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -2,7 +2,6 @@ #include <dt-bindings/clock/bcm2835.h> #include <dt-bindings/clock/bcm2835-aux.h> #include <dt-bindings/gpio/gpio.h> -#include "skeleton.dtsi" /* This include file covers the common peripherals and configuration between * bcm2835 and bcm2836 implementations, leaving the CPU configuration to @@ -13,6 +12,8 @@ compatible = "brcm,bcm2835"; model = "BCM2835"; interrupt-parent = <&intc>; + #address-cells = <1>; + #size-cells = <1>; chosen { bootargs = "earlyprintk console=ttyAMA0"; diff --git a/arch/arm/boot/dts/exynos5410-odroidxu.dts b/arch/arm/boot/dts/exynos5410-odroidxu.dts index d9499310a301..f6d135245a4b 100644 --- a/arch/arm/boot/dts/exynos5410-odroidxu.dts +++ b/arch/arm/boot/dts/exynos5410-odroidxu.dts @@ -447,14 +447,11 @@ samsung,dw-mshc-ciu-div = <3>; samsung,dw-mshc-sdr-timing = <0 4>; samsung,dw-mshc-ddr-timing = <0 2>; - samsung,dw-mshc-hs400-timing = <0 2>; - samsung,read-strobe-delay = <90>; pinctrl-names = "default"; pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus1 &sd0_bus4 &sd0_bus8 &sd0_cd>; bus-width = <8>; cap-mmc-highspeed; mmc-hs200-1_8v; - mmc-hs400-1_8v; vmmc-supply = <&ldo20_reg>; vqmmc-supply = <&ldo11_reg>; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index b620ac884cfd..b13b0b2db881 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -243,7 +243,7 @@ clocks = <&clks IMX6QDL_CLK_SPDIF_GCLK>, <&clks IMX6QDL_CLK_OSC>, <&clks IMX6QDL_CLK_SPDIF>, <&clks IMX6QDL_CLK_ASRC>, <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_ESAI_EXTAL>, - <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_MLB>, + <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_SPBA>; clock-names = "core", "rxtx0", "rxtx1", "rxtx2", diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts index 96ea936eeeb0..240a2864d044 100644 --- a/arch/arm/boot/dts/imx6sx-sabreauto.dts +++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts @@ -64,7 +64,7 @@ cd-gpios = <&gpio7 11 GPIO_ACTIVE_LOW>; no-1-8-v; keep-power-in-suspend; - enable-sdio-wakup; + wakeup-source; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 95ee268ed510..2f33c463cbce 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -131,7 +131,7 @@ ti,y-min = /bits/ 16 <0>; ti,y-max = /bits/ 16 <0>; ti,pressure-max = /bits/ 16 <0>; - ti,x-plat-ohms = /bits/ 16 <400>; + ti,x-plate-ohms = /bits/ 16 <400>; wakeup-source; }; }; diff --git a/arch/arm/boot/dts/kirkwood-ib62x0.dts b/arch/arm/boot/dts/kirkwood-ib62x0.dts index ef84d8699a76..5bf62897014c 100644 --- a/arch/arm/boot/dts/kirkwood-ib62x0.dts +++ b/arch/arm/boot/dts/kirkwood-ib62x0.dts @@ -113,7 +113,7 @@ partition@e0000 { label = "u-boot environment"; - reg = <0xe0000 0x100000>; + reg = <0xe0000 0x20000>; }; partition@100000 { diff --git a/arch/arm/boot/dts/kirkwood-openrd.dtsi b/arch/arm/boot/dts/kirkwood-openrd.dtsi index e4ecab112601..7175511a92da 100644 --- a/arch/arm/boot/dts/kirkwood-openrd.dtsi +++ b/arch/arm/boot/dts/kirkwood-openrd.dtsi @@ -116,6 +116,10 @@ }; }; +&pciec { + status = "okay"; +}; + &pcie0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index 365f39ff58bb..0ff1c2de95bf 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -35,10 +35,15 @@ ranges = <0 0 0x00000000 0x1000000>; /* CS0: 16MB for NAND */ nand@0,0 { - linux,mtd-name = "micron,mt29f4g16abbda3w"; + compatible = "ti,omap2-nand"; reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ + interrupt-parent = <&gpmc>; + interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */ + <1 IRQ_TYPE_NONE>; /* termcount */ + linux,mtd-name = "micron,mt29f4g16abbda3w"; nand-bus-width = <16>; ti,nand-ecc-opt = "bch8"; + rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */ gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <44>; @@ -54,10 +59,6 @@ gpmc,wr-access-ns = <40>; gpmc,wr-data-mux-bus-ns = <0>; gpmc,device-width = <2>; - - gpmc,page-burst-access-ns = <5>; - gpmc,cycle2cycle-delay-ns = <50>; - #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 5e9a13c0eaf7..1c2c74655416 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -46,6 +46,7 @@ linux,mtd-name = "micron,mt29f4g16abbda3w"; nand-bus-width = <16>; ti,nand-ecc-opt = "bch8"; + rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */ gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <44>; diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi index de256fa8da48..3e946cac55f3 100644 --- a/arch/arm/boot/dts/omap3-overo-base.dtsi +++ b/arch/arm/boot/dts/omap3-overo-base.dtsi @@ -223,7 +223,9 @@ }; &gpmc { - ranges = <0 0 0x00000000 0x20000000>; + ranges = <0 0 0x30000000 0x1000000>, /* CS0 */ + <4 0 0x2b000000 0x1000000>, /* CS4 */ + <5 0 0x2c000000 0x1000000>; /* CS5 */ nand@0,0 { compatible = "ti,omap2-nand"; diff --git a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi index 7df27926ead2..4f4c6efbd518 100644 --- a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi @@ -55,8 +55,6 @@ #include "omap-gpmc-smsc9221.dtsi" &gpmc { - ranges = <5 0 0x2c000000 0x1000000>; /* CS5 */ - ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio6>; diff --git a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi index 9e24b6a1d07b..1b304e2f1bd2 100644 --- a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi @@ -27,8 +27,6 @@ #include "omap-gpmc-smsc9221.dtsi" &gpmc { - ranges = <5 0 0x2c000000 0x1000000>; /* CS5 */ - ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio6>; diff --git a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi index 334109e14613..82e98ee3023a 100644 --- a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi @@ -15,9 +15,6 @@ #include "omap-gpmc-smsc9221.dtsi" &gpmc { - ranges = <4 0 0x2b000000 0x1000000>, /* CS4 */ - <5 0 0x2c000000 0x1000000>; /* CS5 */ - smsc1: ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio6>; diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index d294e82447a2..8b063ab10c19 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -550,8 +550,9 @@ interrupt-names = "mmcirq"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mmc0>; - clock-names = "mmc"; - clocks = <&clk_s_c0_flexgen CLK_MMC_0>; + clock-names = "mmc", "icn"; + clocks = <&clk_s_c0_flexgen CLK_MMC_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_HVA>; bus-width = <8>; non-removable; }; @@ -565,8 +566,9 @@ interrupt-names = "mmcirq"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sd1>; - clock-names = "mmc"; - clocks = <&clk_s_c0_flexgen CLK_MMC_1>; + clock-names = "mmc", "icn"; + clocks = <&clk_s_c0_flexgen CLK_MMC_1>, + <&clk_s_c0_flexgen CLK_RX_ICN_HVA>; resets = <&softreset STIH407_MMC1_SOFTRESET>; bus-width = <4>; }; diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi index 18ed1ad10d32..40318869c733 100644 --- a/arch/arm/boot/dts/stih410.dtsi +++ b/arch/arm/boot/dts/stih410.dtsi @@ -41,7 +41,8 @@ compatible = "st,st-ohci-300x"; reg = <0x9a03c00 0x100>; interrupts = <GIC_SPI 180 IRQ_TYPE_NONE>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>, <&softreset STIH407_USB2_PORT0_SOFTRESET>; reset-names = "power", "softreset"; @@ -57,7 +58,8 @@ interrupts = <GIC_SPI 151 IRQ_TYPE_NONE>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>, <&softreset STIH407_USB2_PORT0_SOFTRESET>; reset-names = "power", "softreset"; @@ -71,7 +73,8 @@ compatible = "st,st-ohci-300x"; reg = <0x9a83c00 0x100>; interrupts = <GIC_SPI 181 IRQ_TYPE_NONE>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>, <&softreset STIH407_USB2_PORT1_SOFTRESET>; reset-names = "power", "softreset"; @@ -87,7 +90,8 @@ interrupts = <GIC_SPI 153 IRQ_TYPE_NONE>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>, <&softreset STIH407_USB2_PORT1_SOFTRESET>; reset-names = "power", "softreset"; diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi index 35df462559ca..1a189d44ad38 100644 --- a/arch/arm/boot/dts/stm32f429.dtsi +++ b/arch/arm/boot/dts/stm32f429.dtsi @@ -176,6 +176,14 @@ reg = <0x40013800 0x400>; }; + exti: interrupt-controller@40013c00 { + compatible = "st,stm32-exti"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x40013C00 0x400>; + interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>; + }; + pin-controller { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index e012890e0cf2..a17ba0243db3 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -84,7 +84,7 @@ trips { cpu_alert0: cpu_alert0 { /* milliCelsius */ - temperature = <850000>; + temperature = <85000>; hysteresis = <2000>; type = "passive"; }; diff --git a/arch/arm/boot/dts/tegra114-dalmore.dts b/arch/arm/boot/dts/tegra114-dalmore.dts index 1dfc492cc004..1444fbd543e7 100644 --- a/arch/arm/boot/dts/tegra114-dalmore.dts +++ b/arch/arm/boot/dts/tegra114-dalmore.dts @@ -897,7 +897,7 @@ palmas: tps65913@58 { compatible = "ti,palmas"; reg = <0x58>; - interrupts = <0 86 IRQ_TYPE_LEVEL_LOW>; + interrupts = <0 86 IRQ_TYPE_LEVEL_HIGH>; #interrupt-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index 70cf40996c3f..966a7fc044af 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -802,7 +802,7 @@ palmas: pmic@58 { compatible = "ti,palmas"; reg = <0x58>; - interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_LOW>; + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; #interrupt-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/tegra114-tn7.dts b/arch/arm/boot/dts/tegra114-tn7.dts index 17dd14545862..a161fa1dfb61 100644 --- a/arch/arm/boot/dts/tegra114-tn7.dts +++ b/arch/arm/boot/dts/tegra114-tn7.dts @@ -63,7 +63,7 @@ palmas: pmic@58 { compatible = "ti,palmas"; reg = <0x58>; - interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_LOW>; + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; #interrupt-cells = <2>; interrupt-controller; diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index 6403e0de540e..e52b82449a79 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -1382,7 +1382,7 @@ * Pin 41: BR_UART1_TXD * Pin 44: BR_UART1_RXD */ - serial@0,70006000 { + serial@70006000 { compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart"; status = "okay"; }; @@ -1394,7 +1394,7 @@ * Pin 71: UART2_CTS_L * Pin 74: UART2_RTS_L */ - serial@0,70006040 { + serial@70006040 { compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart"; status = "okay"; }; diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 0e97b4b871f9..6c7b06854fce 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -140,7 +140,7 @@ static struct locomo_dev_info locomo_devices[] = { static void locomo_handler(struct irq_desc *desc) { - struct locomo *lchip = irq_desc_get_chip_data(desc); + struct locomo *lchip = irq_desc_get_handler_data(desc); int req, i; /* Acknowledge the parent IRQ */ @@ -200,8 +200,7 @@ static void locomo_setup_irq(struct locomo *lchip) * Install handler for IRQ_LOCOMO_HW. */ irq_set_irq_type(lchip->irq, IRQ_TYPE_EDGE_FALLING); - irq_set_chip_data(lchip->irq, lchip); - irq_set_chained_handler(lchip->irq, locomo_handler); + irq_set_chained_handler_and_data(lchip->irq, locomo_handler, lchip); /* Install handlers for IRQ_LOCOMO_* */ for ( ; irq <= lchip->irq_base + 3; irq++) { diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index fb0a0a4dfea4..2e076c492005 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -472,8 +472,8 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) * specifies that S0ReadyInt and S1ReadyInt should be '1'. */ sa1111_writel(0, irqbase + SA1111_INTPOL0); - sa1111_writel(SA1111_IRQMASK_HI(IRQ_S0_READY_NINT) | - SA1111_IRQMASK_HI(IRQ_S1_READY_NINT), + sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) | + BIT(IRQ_S1_READY_NINT & 31), irqbase + SA1111_INTPOL1); /* clear all IRQs */ @@ -754,7 +754,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) if (sachip->irq != NO_IRQ) { ret = sa1111_setup_irq(sachip, pd->irq_base); if (ret) - goto err_unmap; + goto err_clk; } #ifdef CONFIG_ARCH_SA1100 @@ -799,6 +799,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) return 0; + err_clk: + clk_disable(sachip->clk); err_unmap: iounmap(sachip->base); err_clk_unprep: @@ -869,9 +871,9 @@ struct sa1111_save_data { #ifdef CONFIG_PM -static int sa1111_suspend(struct platform_device *dev, pm_message_t state) +static int sa1111_suspend_noirq(struct device *dev) { - struct sa1111 *sachip = platform_get_drvdata(dev); + struct sa1111 *sachip = dev_get_drvdata(dev); struct sa1111_save_data *save; unsigned long flags; unsigned int val; @@ -934,9 +936,9 @@ static int sa1111_suspend(struct platform_device *dev, pm_message_t state) * restored by their respective drivers, and must be called * via LDM after this function. */ -static int sa1111_resume(struct platform_device *dev) +static int sa1111_resume_noirq(struct device *dev) { - struct sa1111 *sachip = platform_get_drvdata(dev); + struct sa1111 *sachip = dev_get_drvdata(dev); struct sa1111_save_data *save; unsigned long flags, id; void __iomem *base; @@ -952,7 +954,7 @@ static int sa1111_resume(struct platform_device *dev) id = sa1111_readl(sachip->base + SA1111_SKID); if ((id & SKID_ID_MASK) != SKID_SA1111_ID) { __sa1111_remove(sachip); - platform_set_drvdata(dev, NULL); + dev_set_drvdata(dev, NULL); kfree(save); return 0; } @@ -1003,8 +1005,8 @@ static int sa1111_resume(struct platform_device *dev) } #else -#define sa1111_suspend NULL -#define sa1111_resume NULL +#define sa1111_suspend_noirq NULL +#define sa1111_resume_noirq NULL #endif static int sa1111_probe(struct platform_device *pdev) @@ -1017,7 +1019,7 @@ static int sa1111_probe(struct platform_device *pdev) return -EINVAL; irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENXIO; + return irq; return __sa1111_probe(&pdev->dev, mem, irq); } @@ -1038,6 +1040,11 @@ static int sa1111_remove(struct platform_device *pdev) return 0; } +static struct dev_pm_ops sa1111_pm_ops = { + .suspend_noirq = sa1111_suspend_noirq, + .resume_noirq = sa1111_resume_noirq, +}; + /* * Not sure if this should be on the system bus or not yet. * We really want some way to register a system device at @@ -1050,10 +1057,9 @@ static int sa1111_remove(struct platform_device *pdev) static struct platform_driver sa1111_device_driver = { .probe = sa1111_probe, .remove = sa1111_remove, - .suspend = sa1111_suspend, - .resume = sa1111_resume, .driver = { .name = "sa1111", + .pm = &sa1111_pm_ops, }, }; diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 01986deef7c5..36cc7cc012f9 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -28,7 +28,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_CPUFREQ_DT=y CONFIG_CPU_IDLE=y CONFIG_ARM_EXYNOS_CPUIDLE=y diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 71b42e66488a..78cd2f197e01 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -161,6 +161,7 @@ CONFIG_USB_MON=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_DWC3=y +CONFIG_NOP_USB_XCEIV=y CONFIG_KEYSTONE_USB_PHY=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 2c8665cd9dc5..58459105cadc 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -135,7 +135,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m -CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_QORIQ_CPUFREQ=y CONFIG_CPU_IDLE=y CONFIG_ARM_CPUIDLE=y @@ -781,7 +781,7 @@ CONFIG_MXS_DMA=y CONFIG_DMA_BCM2835=y CONFIG_DMA_OMAP=y CONFIG_QCOM_BAM_DMA=y -CONFIG_XILINX_VDMA=y +CONFIG_XILINX_DMA=y CONFIG_DMA_SUN6I=y CONFIG_STAGING=y CONFIG_SENSORS_ISL29018=y diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index da3c0428507b..aef022a87c53 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -284,7 +284,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index e08d15184056..dfe4002812da 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -34,6 +34,7 @@ #define ICC_CTLR __ACCESS_CP15(c12, 0, c12, 4) #define ICC_SRE __ACCESS_CP15(c12, 0, c12, 5) #define ICC_IGRPEN1 __ACCESS_CP15(c12, 0, c12, 7) +#define ICC_BPR1 __ACCESS_CP15(c12, 0, c12, 3) #define ICC_HSRE __ACCESS_CP15(c12, 4, c9, 5) @@ -157,6 +158,11 @@ static inline void gic_write_sre(u32 val) isb(); } +static inline void gic_write_bpr1(u32 val) +{ + asm volatile("mcr " __stringify(ICC_BPR1) : : "r" (val)); +} + /* * Even in 32bit systems that use LPAE, there is no guarantee that the I/O * interface provides true 64bit atomic accesses, so using strd/ldrd doesn't diff --git a/arch/arm/include/asm/clocksource.h b/arch/arm/include/asm/clocksource.h new file mode 100644 index 000000000000..0b350a7e26f3 --- /dev/null +++ b/arch/arm/include/asm/clocksource.h @@ -0,0 +1,8 @@ +#ifndef _ASM_CLOCKSOURCE_H +#define _ASM_CLOCKSOURCE_H + +struct arch_clocksource_data { + bool vdso_direct; /* Usable for direct VDSO access? */ +}; + +#endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index d009f7911ffc..bf02dbd9ccda 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -111,7 +111,7 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) /* The ARM override for dma_max_pfn() */ static inline unsigned long dma_max_pfn(struct device *dev) { - return PHYS_PFN_OFFSET + dma_to_pfn(dev, *dev->dma_mask); + return dma_to_pfn(dev, *dev->dma_mask); } #define dma_max_pfn(dev) dma_max_pfn(dev) diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h index d0131ee6f6af..3f82e9da7cec 100644 --- a/arch/arm/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm/include/asm/pgtable-2level-hwdef.h @@ -47,6 +47,7 @@ #define PMD_SECT_WB (PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) #define PMD_SECT_MINICACHE (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE) #define PMD_SECT_WBWA (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) +#define PMD_SECT_CACHE_MASK (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) #define PMD_SECT_NONSHARED_DEV (PMD_SECT_TEX(2)) /* diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index f8f1cff62065..4cd664abfcd3 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -62,6 +62,7 @@ #define PMD_SECT_WT (_AT(pmdval_t, 2) << 2) /* normal inner write-through */ #define PMD_SECT_WB (_AT(pmdval_t, 3) << 2) /* normal inner write-back */ #define PMD_SECT_WBWA (_AT(pmdval_t, 7) << 2) /* normal inner write-alloc */ +#define PMD_SECT_CACHE_MASK (_AT(pmdval_t, 7) << 2) /* * + Level 3 descriptor (PTE) diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 40ecd5f514a2..f676febbb270 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -88,6 +88,8 @@ void __init arm_dt_init_cpu_maps(void) return; for_each_child_of_node(cpus, cpu) { + const __be32 *cell; + int prop_bytes; u32 hwid; if (of_node_cmp(cpu->type, "cpu")) @@ -99,7 +101,8 @@ void __init arm_dt_init_cpu_maps(void) * properties is considered invalid to build the * cpu_logical_map. */ - if (of_property_read_u32(cpu, "reg", &hwid)) { + cell = of_get_property(cpu, "reg", &prop_bytes); + if (!cell || prop_bytes < sizeof(*cell)) { pr_debug(" * %s missing reg property\n", cpu->full_name); of_node_put(cpu); @@ -107,10 +110,15 @@ void __init arm_dt_init_cpu_maps(void) } /* - * 8 MSBs must be set to 0 in the DT since the reg property + * Bits n:24 must be set to 0 in the DT since the reg property * defines the MPIDR[23:0]. */ - if (hwid & ~MPIDR_HWID_BITMASK) { + do { + hwid = be32_to_cpu(*cell++); + prop_bytes -= sizeof(*cell); + } while (!hwid && prop_bytes > 0); + + if (prop_bytes || (hwid & ~MPIDR_HWID_BITMASK)) { of_node_put(cpu); return; } diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 709ee1d6d4df..3f1759411d51 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -218,7 +218,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer); + frame_pointer, NULL); if (err == -EBUSY) { *parent = old; return; diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 0b1e4a93d67e..15d073ae5da2 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -142,6 +142,19 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE and r7, #0x1f @ Preserve HPMN mcr p15, 4, r7, c1, c1, 1 @ HDCR + @ Make sure NS-SVC is initialised appropriately + mrc p15, 0, r7, c1, c0, 0 @ SCTLR + orr r7, #(1 << 5) @ CP15 barriers enabled + bic r7, #(3 << 7) @ Clear SED/ITD for v8 (RES0 for v7) + bic r7, #(3 << 19) @ WXN and UWXN disabled + mcr p15, 0, r7, c1, c0, 0 @ SCTLR + + mrc p15, 0, r7, c0, c0, 0 @ MIDR + mcr p15, 4, r7, c0, c0, 0 @ VPIDR + + mrc p15, 0, r7, c0, c0, 5 @ MPIDR + mcr p15, 4, r7, c0, c0, 5 @ VMPIDR + #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) @ make CNTP_* and CNTPCT accessible from PL1 mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 15063851cd10..b9423491b9d7 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -596,12 +596,6 @@ static struct attribute_group armv7_pmuv1_events_attr_group = { .attrs = armv7_pmuv1_event_attrs, }; -static const struct attribute_group *armv7_pmuv1_attr_groups[] = { - &armv7_pmuv1_events_attr_group, - &armv7_pmu_format_attr_group, - NULL, -}; - ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); @@ -653,12 +647,6 @@ static struct attribute_group armv7_pmuv2_events_attr_group = { .attrs = armv7_pmuv2_event_attrs, }; -static const struct attribute_group *armv7_pmuv2_attr_groups[] = { - &armv7_pmuv2_events_attr_group, - &armv7_pmu_format_attr_group, - NULL, -}; - /* * Perf Events' indices */ @@ -1208,7 +1196,10 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1217,7 +1208,10 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1226,7 +1220,10 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1236,7 +1233,10 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1246,7 +1246,10 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1256,7 +1259,10 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1264,7 +1270,10 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return ret; } diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 994e971a8538..a0affd14086a 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -270,7 +270,7 @@ static bool tk_is_cntvct(const struct timekeeper *tk) if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) return false; - if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0) + if (!tk->tkr_mono.clock->archdata.vdso_direct) return false; return true; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 75f130ef6504..c94b90d43772 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -158,8 +158,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { int i; - kvm_free_stage2_pgd(kvm); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_arch_vcpu_free(kvm->vcpus[i]); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 29d0b23af2a9..e9a5c0e0c115 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1714,7 +1714,8 @@ int kvm_mmu_init(void) kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && - hyp_idmap_start < kern_hyp_va(~0UL)) { + hyp_idmap_start < kern_hyp_va(~0UL) && + hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { /* * The idmap page is intersecting with the VA space, * it is not safe to continue further. @@ -1893,6 +1894,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) void kvm_arch_flush_shadow_all(struct kvm *kvm) { + kvm_free_stage2_pgd(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 3750575c73c5..06332f626565 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -255,6 +255,12 @@ static int __init exynos_pmu_irq_init(struct device_node *node, return -ENOMEM; } + /* + * Clear the OF_POPULATED flag set in of_irq_init so that + * later the Exynos PMU platform device won't be skipped. + */ + of_node_clear_flag(node, OF_POPULATED); + return 0; } diff --git a/arch/arm/mach-imx/mach-imx6ul.c b/arch/arm/mach-imx/mach-imx6ul.c index 5d9bfab279dd..6bb7d9cf1e38 100644 --- a/arch/arm/mach-imx/mach-imx6ul.c +++ b/arch/arm/mach-imx/mach-imx6ul.c @@ -64,6 +64,7 @@ static void __init imx6ul_init_machine(void) if (parent == NULL) pr_warn("failed to initialize soc device\n"); + of_platform_default_populate(NULL, NULL, parent); imx6ul_enet_init(); imx_anatop_init(); imx6ul_pm_init(); diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 58924b3844df..fe708e26d021 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -295,7 +295,7 @@ int imx6_set_lpm(enum mxc_cpu_pwr_mode mode) val &= ~BM_CLPCR_SBYOS; if (cpu_is_imx6sl()) val |= BM_CLPCR_BYPASS_PMIC_READY; - if (cpu_is_imx6sl() || cpu_is_imx6sx()) + if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul()) val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS; else val |= BM_CLPCR_BYP_MMDC_CH1_LPM_HS; @@ -310,7 +310,7 @@ int imx6_set_lpm(enum mxc_cpu_pwr_mode mode) val |= 0x3 << BP_CLPCR_STBY_COUNT; val |= BM_CLPCR_VSTBY; val |= BM_CLPCR_SBYOS; - if (cpu_is_imx6sl()) + if (cpu_is_imx6sl() || cpu_is_imx6sx()) val |= BM_CLPCR_BYPASS_PMIC_READY; if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul()) val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS; diff --git a/arch/arm/mach-omap2/cm33xx.c b/arch/arm/mach-omap2/cm33xx.c index c073fb57dd13..6f2d0aec0513 100644 --- a/arch/arm/mach-omap2/cm33xx.c +++ b/arch/arm/mach-omap2/cm33xx.c @@ -220,9 +220,6 @@ static int am33xx_cm_wait_module_ready(u8 part, s16 inst, u16 clkctrl_offs, { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout(_is_module_ready(inst, clkctrl_offs), MAX_MODULE_READY_TIME, i); @@ -246,9 +243,6 @@ static int am33xx_cm_wait_module_idle(u8 part, s16 inst, u16 clkctrl_offs, { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout((_clkctrl_idlest(inst, clkctrl_offs) == CLKCTRL_IDLEST_DISABLED), MAX_MODULE_READY_TIME, i); diff --git a/arch/arm/mach-omap2/cminst44xx.c b/arch/arm/mach-omap2/cminst44xx.c index 2c0e07ed6b99..2ab27ade136a 100644 --- a/arch/arm/mach-omap2/cminst44xx.c +++ b/arch/arm/mach-omap2/cminst44xx.c @@ -278,9 +278,6 @@ static int omap4_cminst_wait_module_ready(u8 part, s16 inst, u16 clkctrl_offs, { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout(_is_module_ready(part, inst, clkctrl_offs), MAX_MODULE_READY_TIME, i); @@ -304,9 +301,6 @@ static int omap4_cminst_wait_module_idle(u8 part, s16 inst, u16 clkctrl_offs, { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout((_clkctrl_idlest(part, inst, clkctrl_offs) == CLKCTRL_IDLEST_DISABLED), MAX_MODULE_DISABLE_TIME, i); diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 0c4754386532..369f95a703ac 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -322,34 +322,25 @@ static void irq_save_secure_context(void) #endif #ifdef CONFIG_HOTPLUG_CPU -static int irq_cpu_hotplug_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int omap_wakeupgen_cpu_online(unsigned int cpu) { - unsigned int cpu = (unsigned int)hcpu; - - /* - * Corresponding FROZEN transitions do not have to be handled, - * they are handled by at a higher level - * (drivers/cpuidle/coupled.c). - */ - switch (action) { - case CPU_ONLINE: - wakeupgen_irqmask_all(cpu, 0); - break; - case CPU_DEAD: - wakeupgen_irqmask_all(cpu, 1); - break; - } - return NOTIFY_OK; + wakeupgen_irqmask_all(cpu, 0); + return 0; } -static struct notifier_block irq_hotplug_notifier = { - .notifier_call = irq_cpu_hotplug_notify, -}; +static int omap_wakeupgen_cpu_dead(unsigned int cpu) +{ + wakeupgen_irqmask_all(cpu, 1); + return 0; +} static void __init irq_hotplug_init(void) { - register_hotcpu_notifier(&irq_hotplug_notifier); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "arm/omap-wake:online", + omap_wakeupgen_cpu_online, NULL); + cpuhp_setup_state_nocalls(CPUHP_ARM_OMAP_WAKE_DEAD, + "arm/omap-wake:dead", NULL, + omap_wakeupgen_cpu_dead); } #else static void __init irq_hotplug_init(void) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 5b709383381c..1052b29697b8 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1053,6 +1053,10 @@ static int _omap4_wait_target_disable(struct omap_hwmod *oh) if (oh->flags & HWMOD_NO_IDLEST) return 0; + if (!oh->prcm.omap4.clkctrl_offs && + !(oh->prcm.omap4.flags & HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET)) + return 0; + return omap_cm_wait_module_idle(oh->clkdm->prcm_partition, oh->clkdm->cm_inst, oh->prcm.omap4.clkctrl_offs, 0); @@ -2971,6 +2975,10 @@ static int _omap4_wait_target_ready(struct omap_hwmod *oh) if (!_find_mpu_rt_port(oh)) return 0; + if (!oh->prcm.omap4.clkctrl_offs && + !(oh->prcm.omap4.flags & HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET)) + return 0; + /* XXX check module SIDLEMODE, hardreset status */ return omap_cm_wait_module_ready(oh->clkdm->prcm_partition, diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index 4041bad79a9a..78904017f18c 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -443,8 +443,12 @@ struct omap_hwmod_omap2_prcm { * HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT: Some IP blocks don't have a PRCM * module-level context loss register associated with them; this * flag bit should be set in those cases + * HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET: Some IP blocks have a valid CLKCTRL + * offset of zero; this flag bit should be set in those cases to + * distinguish from hwmods that have no clkctrl offset. */ #define HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT (1 << 0) +#define HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET (1 << 1) /** * struct omap_hwmod_omap4_prcm - OMAP4-specific PRCM data diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c index 55c5878577f4..e2d84aa7f595 100644 --- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c @@ -29,6 +29,7 @@ #define CLKCTRL(oh, clkctrl) ((oh).prcm.omap4.clkctrl_offs = (clkctrl)) #define RSTCTRL(oh, rstctrl) ((oh).prcm.omap4.rstctrl_offs = (rstctrl)) #define RSTST(oh, rstst) ((oh).prcm.omap4.rstst_offs = (rstst)) +#define PRCM_FLAGS(oh, flag) ((oh).prcm.omap4.flags = (flag)) /* * 'l3' class @@ -1296,6 +1297,7 @@ static void omap_hwmod_am33xx_clkctrl(void) CLKCTRL(am33xx_i2c1_hwmod, AM33XX_CM_WKUP_I2C0_CLKCTRL_OFFSET); CLKCTRL(am33xx_wd_timer1_hwmod, AM33XX_CM_WKUP_WDT1_CLKCTRL_OFFSET); CLKCTRL(am33xx_rtc_hwmod, AM33XX_CM_RTC_RTC_CLKCTRL_OFFSET); + PRCM_FLAGS(am33xx_rtc_hwmod, HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET); CLKCTRL(am33xx_mmc2_hwmod, AM33XX_CM_PER_MMC2_CLKCTRL_OFFSET); CLKCTRL(am33xx_gpmc_hwmod, AM33XX_CM_PER_GPMC_CLKCTRL_OFFSET); CLKCTRL(am33xx_l4_ls_hwmod, AM33XX_CM_PER_L4LS_CLKCTRL_OFFSET); diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index d72ee6185d5e..1cc4a6f3954e 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -722,8 +722,20 @@ static struct omap_hwmod omap3xxx_dss_dispc_hwmod = { * display serial interface controller */ +static struct omap_hwmod_class_sysconfig omap3xxx_dsi_sysc = { + .rev_offs = 0x0000, + .sysc_offs = 0x0010, + .syss_offs = 0x0014, + .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY | + SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE | + SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS), + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART), + .sysc_fields = &omap_hwmod_sysc_type1, +}; + static struct omap_hwmod_class omap3xxx_dsi_hwmod_class = { .name = "dsi", + .sysc = &omap3xxx_dsi_sysc, }; static struct omap_hwmod_irq_info omap3xxx_dsi1_irqs[] = { diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index 7245f3359564..d6159f8ef0c2 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -137,6 +137,18 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = { // no D+ pullup; lubbock can't connect/disconnect in software }; +static void lubbock_init_pcmcia(void) +{ + struct clk *clk; + + /* Add an alias for the SA1111 PCMCIA clock */ + clk = clk_get_sys("pxa2xx-pcmcia", NULL); + if (!IS_ERR(clk)) { + clkdev_create(clk, NULL, "1800"); + clk_put(clk); + } +} + static struct resource sa1111_resources[] = { [0] = { .start = 0x10000000, @@ -467,6 +479,8 @@ static void __init lubbock_init(void) pxa_set_btuart_info(NULL); pxa_set_stuart_info(NULL); + lubbock_init_pcmcia(); + clk_add_alias("SA1111_CLK", NULL, "GPIO11_CLK", NULL); pxa_set_udc_info(&udc_info); pxa_set_fb_info(NULL, &sharp_lm8v31); diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c index cbf53bb9c814..0db46895c82a 100644 --- a/arch/arm/mach-sa1100/clock.c +++ b/arch/arm/mach-sa1100/clock.c @@ -125,6 +125,8 @@ static unsigned long clk_36864_get_rate(struct clk *clk) } static struct clkops clk_36864_ops = { + .enable = clk_cpu_enable, + .disable = clk_cpu_disable, .get_rate = clk_36864_get_rate, }; @@ -140,9 +142,8 @@ static struct clk_lookup sa11xx_clkregs[] = { CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864), }; -static int __init sa11xx_clk_init(void) +int __init sa11xx_clk_init(void) { clkdev_add_table(sa11xx_clkregs, ARRAY_SIZE(sa11xx_clkregs)); return 0; } -core_initcall(sa11xx_clk_init); diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 345e63f4eb71..3e09beddb6e8 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -34,6 +34,7 @@ #include <mach/hardware.h> #include <mach/irqs.h> +#include <mach/reset.h> #include "generic.h" #include <clocksource/pxa.h> @@ -95,6 +96,8 @@ static void sa1100_power_off(void) void sa11x0_restart(enum reboot_mode mode, const char *cmd) { + clear_reset_status(RESET_STATUS_ALL); + if (mode == REBOOT_SOFT) { /* Jump into ROM at address 0 */ soft_restart(0); @@ -388,6 +391,7 @@ void __init sa1100_init_irq(void) sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start); sa1100_init_gpio(); + sa11xx_clk_init(); } /* diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 0d92e119b36b..68199b603ff7 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -44,3 +44,5 @@ int sa11x0_pm_init(void); #else static inline int sa11x0_pm_init(void) { return 0; } #endif + +int sa11xx_clk_init(void); diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c index 8d478f1da265..d1ecaf37d142 100644 --- a/arch/arm/mach-shmobile/platsmp-scu.c +++ b/arch/arm/mach-shmobile/platsmp-scu.c @@ -21,26 +21,14 @@ static phys_addr_t shmobile_scu_base_phys; static void __iomem *shmobile_scu_base; -static int shmobile_smp_scu_notifier_call(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int shmobile_scu_cpu_prepare(unsigned int cpu) { - unsigned int cpu = (long)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - /* For this particular CPU register SCU SMP boot vector */ - shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu), - shmobile_scu_base_phys); - break; - }; - - return NOTIFY_OK; + /* For this particular CPU register SCU SMP boot vector */ + shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu), + shmobile_scu_base_phys); + return 0; } -static struct notifier_block shmobile_smp_scu_notifier = { - .notifier_call = shmobile_smp_scu_notifier_call, -}; - void __init shmobile_smp_scu_prepare_cpus(phys_addr_t scu_base_phys, unsigned int max_cpus) { @@ -54,7 +42,9 @@ void __init shmobile_smp_scu_prepare_cpus(phys_addr_t scu_base_phys, scu_power_mode(shmobile_scu_base, SCU_PM_NORMAL); /* Use CPU notifier for reset vector control */ - register_cpu_notifier(&shmobile_smp_scu_notifier); + cpuhp_setup_state_nocalls(CPUHP_ARM_SHMOBILE_SCU_PREPARE, + "arm/shmobile-scu:prepare", + shmobile_scu_cpu_prepare, NULL); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index 62437b57813e..73e3adbc1330 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -41,39 +41,26 @@ #define REGULATOR_IRQ_MASK BIT(2) /* IRQ2, active low */ -static void __iomem *irqc; - -static const u8 da9063_mask_regs[] = { - DA9063_REG_IRQ_MASK_A, - DA9063_REG_IRQ_MASK_B, - DA9063_REG_IRQ_MASK_C, - DA9063_REG_IRQ_MASK_D, -}; - -/* DA9210 System Control and Event Registers */ +/* start of DA9210 System Control and Event Registers */ #define DA9210_REG_MASK_A 0x54 -#define DA9210_REG_MASK_B 0x55 - -static const u8 da9210_mask_regs[] = { - DA9210_REG_MASK_A, - DA9210_REG_MASK_B, -}; - -static void da9xxx_mask_irqs(struct i2c_client *client, const u8 regs[], - unsigned int nregs) -{ - unsigned int i; - dev_info(&client->dev, "Masking %s interrupt sources\n", client->name); +static void __iomem *irqc; - for (i = 0; i < nregs; i++) { - int error = i2c_smbus_write_byte_data(client, regs[i], ~0); - if (error) { - dev_err(&client->dev, "i2c error %d\n", error); - return; - } - } -} +/* first byte sets the memory pointer, following are consecutive reg values */ +static u8 da9063_irq_clr[] = { DA9063_REG_IRQ_MASK_A, 0xff, 0xff, 0xff, 0xff }; +static u8 da9210_irq_clr[] = { DA9210_REG_MASK_A, 0xff, 0xff }; + +static struct i2c_msg da9xxx_msgs[2] = { + { + .addr = 0x58, + .len = ARRAY_SIZE(da9063_irq_clr), + .buf = da9063_irq_clr, + }, { + .addr = 0x68, + .len = ARRAY_SIZE(da9210_irq_clr), + .buf = da9210_irq_clr, + }, +}; static int regulator_quirk_notify(struct notifier_block *nb, unsigned long action, void *data) @@ -93,12 +80,15 @@ static int regulator_quirk_notify(struct notifier_block *nb, client = to_i2c_client(dev); dev_dbg(dev, "Detected %s\n", client->name); - if ((client->addr == 0x58 && !strcmp(client->name, "da9063"))) - da9xxx_mask_irqs(client, da9063_mask_regs, - ARRAY_SIZE(da9063_mask_regs)); - else if (client->addr == 0x68 && !strcmp(client->name, "da9210")) - da9xxx_mask_irqs(client, da9210_mask_regs, - ARRAY_SIZE(da9210_mask_regs)); + if ((client->addr == 0x58 && !strcmp(client->name, "da9063")) || + (client->addr == 0x68 && !strcmp(client->name, "da9210"))) { + int ret; + + dev_info(&client->dev, "clearing da9063/da9210 interrupts\n"); + ret = i2c_transfer(client->adapter, da9xxx_msgs, ARRAY_SIZE(da9xxx_msgs)); + if (ret != ARRAY_SIZE(da9xxx_msgs)) + dev_err(&client->dev, "i2c error %d\n", ret); + } mon = ioread32(irqc + IRQC_MONITOR); if (mon & REGULATOR_IRQ_MASK) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6344913f0804..30fe03f95c85 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -137,7 +137,7 @@ void __init init_default_cache_policy(unsigned long pmd) initial_pmd_value = pmd; - pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE; + pmd &= PMD_SECT_CACHE_MASK; for (i = 0; i < ARRAY_SIZE(cache_policies); i++) if (cache_policies[i].pmd == pmd) { diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index a7123b4e129d..d00d52c9de3e 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -16,6 +16,7 @@ #include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> +#include <asm/memory.h> #include "proc-macros.S" diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 3d2cef6488ea..f193414d0f6f 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -170,9 +170,6 @@ static int xen_starting_cpu(unsigned int cpu) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); - /* Direct vCPU id mapping for ARM guests. */ - per_cpu(xen_vcpu_id, cpu) = cpu; - info.mfn = virt_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); @@ -330,6 +327,7 @@ static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; struct shared_info *shared_info_page = NULL; + int cpu; if (!xen_domain()) return 0; @@ -380,7 +378,8 @@ static int __init xen_guest_init(void) return -ENOMEM; /* Direct vCPU id mapping for ARM guests. */ - per_cpu(xen_vcpu_id, 0) = 0; + for_each_possible_cpu(cpu) + per_cpu(xen_vcpu_id, cpu) = cpu; xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bc3f00f586f1..17c14a1d9112 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -4,6 +4,7 @@ config ARM64 select ACPI_GENERIC_GSI if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI + select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -102,10 +103,8 @@ config ARM64 select NO_BOOTMEM select OF select OF_EARLY_FLATTREE - select OF_NUMA if NUMA && OF select OF_RESERVED_MEM select PCI_ECAM if ACPI - select PERF_USE_VMALLOC select POWER_RESET select POWER_SUPPLY select SPARSE_IRQ @@ -122,6 +121,9 @@ config ARCH_PHYS_ADDR_T_64BIT config MMU def_bool y +config DEBUG_RODATA + def_bool y + config ARM64_PAGE_SHIFT int default 16 if ARM64_64K_PAGES @@ -415,18 +417,13 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" - depends on MODULES default y - select ARM64_MODULE_CMODEL_LARGE + select ARM64_MODULE_CMODEL_LARGE if MODULES help - This option builds kernel modules using the large memory model in - order to avoid the use of the ADRP instruction, which can cause - a subsequent memory access to use an incorrect address on Cortex-A53 - parts up to r0p4. - - Note that the kernel itself must be linked with a version of ld - which fixes potentially affected ADRP instructions through the - use of veneers. + This option links the kernel with '--fix-cortex-a53-843419' and + builds modules using the large memory model in order to avoid the use + of the ADRP instruction, which can cause a subsequent memory access + to use an incorrect address on Cortex-A53 parts up to r0p4. If unsure, say Y. @@ -582,7 +579,8 @@ config HOTPLUG_CPU # Common NUMA Features config NUMA bool "Numa Memory Allocation and Scheduler Support" - depends on SMP + select ACPI_NUMA if ACPI + select OF_NUMA help Enable NUMA (Non Uniform Memory Access) support. @@ -603,11 +601,18 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA +config HAVE_SETUP_PER_CPU_AREA + def_bool y + depends on NUMA + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + depends on NUMA + source kernel/Kconfig.preempt source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC - depends on !HIBERNATION def_bool y config ARCH_HAS_HOLES_MEMORYMODEL diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 0cc758cdd0dc..b661fe742615 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -49,16 +49,6 @@ config DEBUG_SET_MODULE_RONX If in doubt, say Y. -config DEBUG_RODATA - bool "Make kernel text and rodata read-only" - default y - help - If this is set, kernel text and rodata will be made read-only. This - is to help catch accidental or malicious attempts to change the - kernel's executable code. - - If in doubt, say Y. - config DEBUG_ALIGN_RODATA depends on DEBUG_RODATA bool "Align linker sections up to SECTION_SIZE" diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index be5d824ebdba..a8c77c72a831 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -93,6 +93,7 @@ config ARCH_MVEBU select ARMADA_CP110_SYSCON select ARMADA_37XX_CLK select MVEBU_ODMI + select MVEBU_PIC help This enables support for Marvell EBU familly, including: - Armada 3700 SoC Family @@ -159,7 +160,6 @@ config ARCH_TEGRA select CLKSRC_MMIO select CLKSRC_OF select GENERIC_CLOCKEVENTS - select HAVE_CLK select PINCTRL select RESET_CONTROLLER help diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 5b54f8c021d8..ab51aed6b6c1 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -18,6 +18,14 @@ ifneq ($(CONFIG_RELOCATABLE),) LDFLAGS_vmlinux += -pie -Bsymbolic endif +ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) + ifeq ($(call ld-option, --fix-cortex-a53-843419),) +$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) + else +LDFLAGS_vmlinux += --fix-cortex-a53-843419 + endif +endif + KBUILD_DEFCONFIG := defconfig # Check for binutils support for specific extensions @@ -38,10 +46,12 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian AS += -EB LD += -EB +UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian AS += -EL LD += -EL +UTS_MACHINE := aarch64 endif CHECKFLAGS += -D__aarch64__ diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 445aa678f914..c2b9bcb0ef61 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -255,10 +255,10 @@ /* Local timer */ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; }; timer0: timer0@ffc03000 { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index e502c24b0ac7..bf6c8d051002 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -102,13 +102,13 @@ timer { compatible = "arm,armv8-timer"; interrupts = <GIC_PPI 13 - (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>, + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 14 - (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>, + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 11 - (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>, + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 10 - (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>; + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>; }; xtal: xtal-clk { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index f1c2c713f9b0..c29dab9d1834 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -110,10 +110,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 0 0xff01>, /* Secure Phys IRQ */ - <1 13 0xff01>, /* Non-secure Phys IRQ */ - <1 14 0xff01>, /* Virt IRQ */ - <1 15 0xff01>; /* Hyp IRQ */ + interrupts = <1 0 0xff08>, /* Secure Phys IRQ */ + <1 13 0xff08>, /* Non-secure Phys IRQ */ + <1 14 0xff08>, /* Virt IRQ */ + <1 15 0xff08>; /* Hyp IRQ */ clock-frequency = <50000000>; }; diff --git a/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi new file mode 120000 index 000000000000..3937b77cb310 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm2835-rpi.dtsi
\ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts index 6f47dd2bb1db..7841b724e340 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts +++ b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts @@ -1,7 +1,7 @@ /dts-v1/; #include "bcm2837.dtsi" -#include "../../../../arm/boot/dts/bcm2835-rpi.dtsi" -#include "../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi" +#include "bcm2835-rpi.dtsi" +#include "bcm283x-rpi-smsc9514.dtsi" / { compatible = "raspberrypi,3-model-b", "brcm,bcm2837"; diff --git a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi index f2a31d06845d..8216bbb29fe0 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi @@ -1,4 +1,4 @@ -#include "../../../../arm/boot/dts/bcm283x.dtsi" +#include "bcm283x.dtsi" / { compatible = "brcm,bcm2836"; diff --git a/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi new file mode 120000 index 000000000000..dca7c057d5a5 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
\ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/bcm283x.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi new file mode 120000 index 000000000000..5f54e4cab99b --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm283x.dtsi
\ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index f53b0955bfd3..d4a12fad8afd 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -88,13 +88,13 @@ timer { compatible = "arm,armv8-timer"; interrupts = <GIC_PPI 13 (GIC_CPU_MASK_RAW(0xff) | - IRQ_TYPE_EDGE_RISING)>, + IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 14 (GIC_CPU_MASK_RAW(0xff) | - IRQ_TYPE_EDGE_RISING)>, + IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 11 (GIC_CPU_MASK_RAW(0xff) | - IRQ_TYPE_EDGE_RISING)>, + IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 10 (GIC_CPU_MASK_RAW(0xff) | - IRQ_TYPE_EDGE_RISING)>; + IRQ_TYPE_LEVEL_LOW)>; }; pmu { diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index 2eb9b225f0bc..04dc8a8d1539 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -354,10 +354,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xff01>, - <1 14 0xff01>, - <1 11 0xff01>, - <1 10 0xff01>; + interrupts = <1 13 4>, + <1 14 4>, + <1 11 4>, + <1 10 4>; }; pmu { diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi index ca663dfe5189..162831546e18 100644 --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi @@ -473,10 +473,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xff01>, - <1 14 0xff01>, - <1 11 0xff01>, - <1 10 0xff01>; + interrupts = <1 13 0xff08>, + <1 14 0xff08>, + <1 11 0xff08>, + <1 10 0xff08>; }; pmu_system_controller: system-controller@105c0000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index e669fbd7f9c3..a67e210e2019 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -119,10 +119,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0x1>, /* Physical Secure PPI */ - <1 14 0x1>, /* Physical Non-Secure PPI */ - <1 11 0x1>, /* Virtual PPI */ - <1 10 0x1>; /* Hypervisor PPI */ + interrupts = <1 13 0xf08>, /* Physical Secure PPI */ + <1 14 0xf08>, /* Physical Non-Secure PPI */ + <1 11 0xf08>, /* Virtual PPI */ + <1 10 0xf08>; /* Hypervisor PPI */ }; pmu { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi index 21023a388c29..e3b6034ea5d9 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi @@ -191,10 +191,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0x8>, /* Physical Secure PPI, active-low */ - <1 14 0x8>, /* Physical Non-Secure PPI, active-low */ - <1 11 0x8>, /* Virtual PPI, active-low */ - <1 10 0x8>; /* Hypervisor PPI, active-low */ + interrupts = <1 13 4>, /* Physical Secure PPI, active-low */ + <1 14 4>, /* Physical Non-Secure PPI, active-low */ + <1 11 4>, /* Virtual PPI, active-low */ + <1 10 4>; /* Hypervisor PPI, active-low */ }; pmu { diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index eab1a42fb934..c2a6745f168c 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -122,10 +122,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>, - <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>, - <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>, - <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>; }; odmi: odmi@300000 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi index c223915f0907..d73bdc8c9115 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi @@ -129,10 +129,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 4>, + <1 14 4>, + <1 11 4>, + <1 10 4>; }; soc { diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index e595f22e7e4b..3e2e51fbd2bc 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -65,10 +65,10 @@ timer { compatible = "arm,armv8-timer"; interrupt-parent = <&gic>; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; }; amba_apu { diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 5c888049d061..6b2aa0fd6cd0 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -216,7 +216,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index f43d2c44c765..44e1d7f10add 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,4 +1,3 @@ -generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h generic-y += cputime.h @@ -10,7 +9,6 @@ generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += errno.h -generic-y += ftrace.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h @@ -27,12 +25,10 @@ generic-y += mman.h generic-y += msgbuf.h generic-y += msi.h generic-y += mutex.h -generic-y += pci.h generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h -generic-y += sections.h generic-y += segment.h generic-y += sembuf.h generic-y += serial.h @@ -45,7 +41,6 @@ generic-y += swab.h generic-y += switch_to.h generic-y += termbits.h generic-y += termios.h -generic-y += topology.h generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 5420cb0fcb3e..e517088d635f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,7 +12,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H -#include <linux/mm.h> +#include <linux/memblock.h> #include <linux/psci.h> #include <asm/cputype.h> @@ -32,7 +32,11 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { - if (!page_is_ram(phys >> PAGE_SHIFT)) + /* + * EFI's reserve_regions() call adds memory with the WB attribute + * to memblock via early_init_dt_add_memory_arch(). + */ + if (!memblock_is_memory(phys)) return ioremap(phys, size); return ioremap_cache(phys, size); diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 8746ff6abd77..55101bd86b98 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -2,6 +2,7 @@ #define __ASM_ALTERNATIVE_H #include <asm/cpufeature.h> +#include <asm/insn.h> #ifndef __ASSEMBLY__ @@ -90,34 +91,55 @@ void apply_alternatives(void *start, size_t length); .endm /* - * Begin an alternative code sequence. + * Alternative sequences + * + * The code for the case where the capability is not present will be + * assembled and linked as normal. There are no restrictions on this + * code. + * + * The code for the case where the capability is present will be + * assembled into a special section to be used for dynamic patching. + * Code for that case must: + * + * 1. Be exactly the same length (in bytes) as the default code + * sequence. * - * The code that follows this macro will be assembled and linked as - * normal. There are no restrictions on this code. + * 2. Not contain a branch target that is used outside of the + * alternative sequence it is defined in (branches into an + * alternative sequence are not fixed up). + */ + +/* + * Begin an alternative code sequence. */ .macro alternative_if_not cap + .set .Lasm_alt_mode, 0 .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: .endm +.macro alternative_if cap + .set .Lasm_alt_mode, 1 + .pushsection .altinstructions, "a" + altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f + .popsection + .pushsection .altinstr_replacement, "ax" + .align 2 /* So GAS knows label 661 is suitably aligned */ +661: +.endm + /* - * Provide the alternative code sequence. - * - * The code that follows this macro is assembled into a special - * section to be used for dynamic patching. Code that follows this - * macro must: - * - * 1. Be exactly the same length (in bytes) as the default code - * sequence. - * - * 2. Not contain a branch target that is used outside of the - * alternative sequence it is defined in (branches into an - * alternative sequence are not fixed up). + * Provide the other half of the alternative code sequence. */ .macro alternative_else -662: .pushsection .altinstr_replacement, "ax" +662: + .if .Lasm_alt_mode==0 + .pushsection .altinstr_replacement, "ax" + .else + .popsection + .endif 663: .endm @@ -125,11 +147,25 @@ void apply_alternatives(void *start, size_t length); * Complete an alternative code sequence. */ .macro alternative_endif -664: .popsection +664: + .if .Lasm_alt_mode==0 + .popsection + .endif .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) .endm +/* + * Provides a trivial alternative or default sequence consisting solely + * of NOPs. The number of NOPs is chosen automatically to match the + * previous case. + */ +.macro alternative_else_nop_endif +alternative_else + nops (662b-661b) / AARCH64_INSN_SIZE +alternative_endif +.endm + #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 8ec88e5b290f..fc2a0cb47b2c 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -28,6 +28,7 @@ #define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) #define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) #define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) #define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) @@ -165,6 +166,11 @@ static inline void gic_write_sre(u32 val) isb(); } +static inline void gic_write_bpr1(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val)); +} + #define gic_read_typer(c) readq_relaxed(c) #define gic_write_irouter(v, c) writeq_relaxed(v, c) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index fbe0ca31a99c..eaa5bbe3fa87 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -20,13 +20,55 @@ #define __ASM_ARCH_TIMER_H #include <asm/barrier.h> +#include <asm/sysreg.h> #include <linux/bug.h> #include <linux/init.h> +#include <linux/jump_label.h> #include <linux/types.h> #include <clocksource/arm_arch_timer.h> +#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585) +extern struct static_key_false arch_timer_read_ool_enabled; +#define needs_fsl_a008585_workaround() \ + static_branch_unlikely(&arch_timer_read_ool_enabled) +#else +#define needs_fsl_a008585_workaround() false +#endif + +u32 __fsl_a008585_read_cntp_tval_el0(void); +u32 __fsl_a008585_read_cntv_tval_el0(void); +u64 __fsl_a008585_read_cntvct_el0(void); + +/* + * The number of retries is an arbitrary value well beyond the highest number + * of iterations the loop has been observed to take. + */ +#define __fsl_a008585_read_reg(reg) ({ \ + u64 _old, _new; \ + int _retries = 200; \ + \ + do { \ + _old = read_sysreg(reg); \ + _new = read_sysreg(reg); \ + _retries--; \ + } while (unlikely(_old != _new) && _retries); \ + \ + WARN_ON_ONCE(!_retries); \ + _new; \ +}) + +#define arch_timer_reg_read_stable(reg) \ +({ \ + u64 _val; \ + if (needs_fsl_a008585_workaround()) \ + _val = __fsl_a008585_read_##reg(); \ + else \ + _val = read_sysreg(reg); \ + _val; \ +}) + /* * These register accessors are marked inline so the compiler can * nicely work out which register we want, and chuck away the rest of @@ -38,19 +80,19 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); + write_sysreg(val, cntp_ctl_el0); break; case ARCH_TIMER_REG_TVAL: - asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); + write_sysreg(val, cntp_tval_el0); break; } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); + write_sysreg(val, cntv_ctl_el0); break; case ARCH_TIMER_REG_TVAL: - asm volatile("msr cntv_tval_el0, %0" : : "r" (val)); + write_sysreg(val, cntv_tval_el0); break; } } @@ -61,48 +103,38 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) static __always_inline u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { - u32 val; - if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); - break; + return read_sysreg(cntp_ctl_el0); case ARCH_TIMER_REG_TVAL: - asm volatile("mrs %0, cntp_tval_el0" : "=r" (val)); - break; + return arch_timer_reg_read_stable(cntp_tval_el0); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val)); - break; + return read_sysreg(cntv_ctl_el0); case ARCH_TIMER_REG_TVAL: - asm volatile("mrs %0, cntv_tval_el0" : "=r" (val)); - break; + return arch_timer_reg_read_stable(cntv_tval_el0); } } - return val; + BUG(); } static inline u32 arch_timer_get_cntfrq(void) { - u32 val; - asm volatile("mrs %0, cntfrq_el0" : "=r" (val)); - return val; + return read_sysreg(cntfrq_el0); } static inline u32 arch_timer_get_cntkctl(void) { - u32 cntkctl; - asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); - return cntkctl; + return read_sysreg(cntkctl_el1); } static inline void arch_timer_set_cntkctl(u32 cntkctl) { - asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); + write_sysreg(cntkctl, cntkctl_el1); } static inline u64 arch_counter_get_cntpct(void) @@ -116,12 +148,8 @@ static inline u64 arch_counter_get_cntpct(void) static inline u64 arch_counter_get_cntvct(void) { - u64 cval; - isb(); - asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); - - return cval; + return arch_timer_reg_read_stable(cntvct_el0); } static inline int arch_timer_arch_init(void) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index d5025c69ca81..28bfe6132eb6 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -87,6 +87,15 @@ .endm /* + * NOP sequence + */ + .macro nops, num + .rept \num + nop + .endr + .endm + +/* * Emit an entry into the exception table */ .macro _asm_extable, from, to @@ -216,11 +225,26 @@ lr .req x30 // link register .macro mmid, rd, rn ldr \rd, [\rn, #MM_CONTEXT_ID] .endm +/* + * read_ctr - read CTR_EL0. If the system has mismatched + * cache line sizes, provide the system wide safe value + * from arm64_ftr_reg_ctrel0.sys_val + */ + .macro read_ctr, reg +alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE + mrs \reg, ctr_el0 // read CTR + nop +alternative_else + ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL +alternative_endif + .endm + /* - * dcache_line_size - get the minimum D-cache line size from the CTR register. + * raw_dcache_line_size - get the minimum D-cache line size on this CPU + * from the CTR register. */ - .macro dcache_line_size, reg, tmp + .macro raw_dcache_line_size, reg, tmp mrs \tmp, ctr_el0 // read CTR ubfm \tmp, \tmp, #16, #19 // cache line size encoding mov \reg, #4 // bytes per word @@ -228,9 +252,20 @@ lr .req x30 // link register .endm /* - * icache_line_size - get the minimum I-cache line size from the CTR register. + * dcache_line_size - get the safe D-cache line size across all CPUs */ - .macro icache_line_size, reg, tmp + .macro dcache_line_size, reg, tmp + read_ctr \tmp + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * raw_icache_line_size - get the minimum I-cache line size on this CPU + * from the CTR register. + */ + .macro raw_icache_line_size, reg, tmp mrs \tmp, ctr_el0 // read CTR and \tmp, \tmp, #0xf // cache line size encoding mov \reg, #4 // bytes per word @@ -238,6 +273,16 @@ lr .req x30 // link register .endm /* + * icache_line_size - get the safe I-cache line size across all CPUs + */ + .macro icache_line_size, reg, tmp + read_ctr \tmp + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map */ .macro tcr_set_idmap_t0sz, valreg, tmpreg diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index b5890be8f257..7457ce082b5f 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -86,8 +86,8 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(add_return##name), \ + __LL_SC_ATOMIC(add_return##name) \ + __nops(1), \ /* LSE atomics */ \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ @@ -112,8 +112,8 @@ static inline void atomic_and(int i, atomic_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(and), + __LL_SC_ATOMIC(and) + __nops(1), /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") @@ -130,8 +130,8 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(fetch_and##name), \ + __LL_SC_ATOMIC(fetch_and##name) \ + __nops(1), \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ @@ -156,8 +156,8 @@ static inline void atomic_sub(int i, atomic_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(sub), + __LL_SC_ATOMIC(sub) + __nops(1), /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") @@ -174,9 +174,8 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ __LL_SC_ATOMIC(sub_return##name) \ - " nop", \ + __nops(2), \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ @@ -203,8 +202,8 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(fetch_sub##name), \ + __LL_SC_ATOMIC(fetch_sub##name) \ + __nops(1), \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ @@ -284,8 +283,8 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(add_return##name), \ + __LL_SC_ATOMIC64(add_return##name) \ + __nops(1), \ /* LSE atomics */ \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ @@ -310,8 +309,8 @@ static inline void atomic64_and(long i, atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(and), + __LL_SC_ATOMIC64(and) + __nops(1), /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") @@ -328,8 +327,8 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(fetch_and##name), \ + __LL_SC_ATOMIC64(fetch_and##name) \ + __nops(1), \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ @@ -354,8 +353,8 @@ static inline void atomic64_sub(long i, atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(sub), + __LL_SC_ATOMIC64(sub) + __nops(1), /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") @@ -372,9 +371,8 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ __LL_SC_ATOMIC64(sub_return##name) \ - " nop", \ + __nops(2), \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ @@ -401,8 +399,8 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(fetch_sub##name), \ + __LL_SC_ATOMIC64(fetch_sub##name) \ + __nops(1), \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ @@ -426,13 +424,8 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" __LL_SC_ATOMIC64(dec_if_positive) - " nop\n" - " nop\n" - " nop\n" - " nop\n" - " nop", + __nops(6), /* LSE atomics */ "1: ldr x30, %[v]\n" " subs %[ret], x30, #1\n" @@ -464,9 +457,8 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_CMPXCHG(name) \ - " nop", \ + __LL_SC_CMPXCHG(name) \ + __nops(2), \ /* LSE atomics */ \ " mov " #w "30, %" #w "[old]\n" \ " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \ @@ -517,10 +509,8 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - " nop\n" \ - " nop\n" \ - __LL_SC_CMPXCHG_DBL(name), \ + __LL_SC_CMPXCHG_DBL(name) \ + __nops(3), \ /* LSE atomics */ \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ " eor %[old1], %[old1], %[oldval1]\n" \ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 4eea7f618dce..4e0497f581a0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -20,6 +20,9 @@ #ifndef __ASSEMBLY__ +#define __nops(n) ".rept " #n "\nnop\n.endr\n" +#define nops(n) asm volatile(__nops(n)) + #define sev() asm volatile("sev" : : : "memory") #define wfe() asm volatile("wfe" : : : "memory") #define wfi() asm volatile("wfi" : : : "memory") diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index c64268dbff64..2e5fb976a572 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -68,6 +68,7 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); @@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma, */ extern void __dma_map_area(const void *, size_t, int); extern void __dma_unmap_area(const void *, size_t, int); -extern void __dma_flush_range(const void *, const void *); +extern void __dma_flush_area(const void *, size_t); /* * Copy user data from/to a page which is mapped into a different diff --git a/arch/arm64/include/asm/clocksource.h b/arch/arm64/include/asm/clocksource.h new file mode 100644 index 000000000000..0b350a7e26f3 --- /dev/null +++ b/arch/arm64/include/asm/clocksource.h @@ -0,0 +1,8 @@ +#ifndef _ASM_CLOCKSOURCE_H +#define _ASM_CLOCKSOURCE_H + +struct arch_clocksource_data { + bool vdso_direct; /* Usable for direct VDSO access? */ +}; + +#endif diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index bd86a79491bc..91b26d26af8a 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -43,10 +43,8 @@ static inline unsigned long __xchg_case_##name(unsigned long x, \ " cbnz %w1, 1b\n" \ " " #mb, \ /* LSE atomics */ \ - " nop\n" \ - " nop\n" \ " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ - " nop\n" \ + __nops(3) \ " " #nop_lse) \ : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \ : "r" (x) \ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 7099f26e3702..758d74fedfad 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -9,6 +9,8 @@ #ifndef __ASM_CPUFEATURE_H #define __ASM_CPUFEATURE_H +#include <linux/jump_label.h> + #include <asm/hwcap.h> #include <asm/sysreg.h> @@ -37,8 +39,9 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HYP_OFFSET_LOW 14 +#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 -#define ARM64_NCAPS 15 +#define ARM64_NCAPS 16 #ifndef __ASSEMBLY__ @@ -63,7 +66,7 @@ struct arm64_ftr_bits { enum ftr_type type; u8 shift; u8 width; - s64 safe_val; /* safe value for discrete features */ + s64 safe_val; /* safe value for FTR_EXACT features */ }; /* @@ -72,13 +75,14 @@ struct arm64_ftr_bits { * @sys_val Safe value across the CPUs (system view) */ struct arm64_ftr_reg { - u32 sys_id; - const char *name; - u64 strict_mask; - u64 sys_val; - struct arm64_ftr_bits *ftr_bits; + const char *name; + u64 strict_mask; + u64 sys_val; + const struct arm64_ftr_bits *ftr_bits; }; +extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; + /* scope of capability check */ enum { SCOPE_SYSTEM, @@ -109,6 +113,7 @@ struct arm64_cpu_capabilities { }; extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; bool this_cpu_has_cap(unsigned int cap); @@ -121,16 +126,21 @@ static inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) return false; - return test_bit(num, cpu_hwcaps); + if (__builtin_constant_p(num)) + return static_branch_unlikely(&cpu_hwcap_keys[num]); + else + return test_bit(num, cpu_hwcaps); } static inline void cpus_set_cap(unsigned int num) { - if (num >= ARM64_NCAPS) + if (num >= ARM64_NCAPS) { pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", num, ARM64_NCAPS); - else + } else { __set_bit(num, cpu_hwcaps); + static_branch_enable(&cpu_hwcap_keys[num]); + } } static inline int __attribute_const__ @@ -157,7 +167,7 @@ cpuid_feature_extract_unsigned_field(u64 features, int field) return cpuid_feature_extract_unsigned_field_width(features, field, 4); } -static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) +static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) { return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); } @@ -170,7 +180,7 @@ cpuid_feature_extract_field(u64 features, int field, bool sign) cpuid_feature_extract_unsigned_field(features, field); } -static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) +static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val) { return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign); } @@ -193,11 +203,11 @@ void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); -void check_local_cpu_errata(void); -void __init enable_errata_workarounds(void); +void check_local_cpu_capabilities(void); -void verify_local_cpu_errata(void); -void verify_local_cpu_capabilities(void); +void update_cpu_errata_workarounds(void); +void __init enable_errata_workarounds(void); +void verify_local_cpu_errata_workarounds(void); u64 read_system_reg(u32 id); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9d9fd4b9a72e..26a68ddb11c1 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -93,11 +93,7 @@ #include <asm/sysreg.h> -#define read_cpuid(reg) ({ \ - u64 __val; \ - asm("mrs_s %0, " __stringify(SYS_ ## reg) : "=r" (__val)); \ - __val; \ -}) +#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) /* * The CPU ID never changes at run time, so we might as well tell the diff --git a/arch/arm64/include/asm/dcc.h b/arch/arm64/include/asm/dcc.h index 65e0190e97c8..836b05630003 100644 --- a/arch/arm64/include/asm/dcc.h +++ b/arch/arm64/include/asm/dcc.h @@ -21,21 +21,16 @@ #define __ASM_DCC_H #include <asm/barrier.h> +#include <asm/sysreg.h> static inline u32 __dcc_getstatus(void) { - u32 ret; - - asm volatile("mrs %0, mdccsr_el0" : "=r" (ret)); - - return ret; + return read_sysreg(mdccsr_el0); } static inline char __dcc_getchar(void) { - char c; - - asm volatile("mrs %0, dbgdtrrx_el0" : "=r" (c)); + char c = read_sysreg(dbgdtrrx_el0); isb(); return c; @@ -47,8 +42,7 @@ static inline void __dcc_putchar(char c) * The typecast is to make absolutely certain that 'c' is * zero-extended. */ - asm volatile("msr dbgdtrtx_el0, %0" - : : "r" ((unsigned long)(unsigned char)c)); + write_sysreg((unsigned char)c, dbgdtrtx_el0); isb(); } diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 4b6b3f72a215..b71420a12f26 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -61,8 +61,6 @@ #define AARCH64_BREAK_KGDB_DYN_DBG \ (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) -#define KGDB_DYN_BRK_INS_BYTE(x) \ - ((AARCH64_BREAK_KGDB_DYN_DBG >> (8 * (x))) & 0xff) #define CACHE_FLUSH_IS_SAFE 1 diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index f772e15c4766..d14c478976d0 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -78,6 +78,23 @@ #define ESR_ELx_IL (UL(1) << 25) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) + +/* ISS field definitions shared by different classes */ +#define ESR_ELx_WNR (UL(1) << 6) + +/* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_EA (UL(1) << 9) +#define ESR_ELx_S1PTW (UL(1) << 7) + +/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_ACCESS (0x08) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) + +/* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV (UL(1) << 24) #define ESR_ELx_SAS_SHIFT (22) #define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) @@ -86,16 +103,9 @@ #define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) #define ESR_ELx_SF (UL(1) << 15) #define ESR_ELx_AR (UL(1) << 14) -#define ESR_ELx_EA (UL(1) << 9) #define ESR_ELx_CM (UL(1) << 8) -#define ESR_ELx_S1PTW (UL(1) << 7) -#define ESR_ELx_WNR (UL(1) << 6) -#define ESR_ELx_FSC (0x3F) -#define ESR_ELx_FSC_TYPE (0x3C) -#define ESR_ELx_FSC_EXTABT (0x10) -#define ESR_ELx_FSC_ACCESS (0x08) -#define ESR_ELx_FSC_FAULT (0x04) -#define ESR_ELx_FSC_PERM (0x0C) + +/* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -109,6 +119,62 @@ ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ ((imm) & 0xffff)) +/* ISS field definitions for System instruction traps */ +#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22 +#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT) +#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1 +#define ESR_ELx_SYS64_ISS_DIR_READ 0x1 +#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_SYS64_ISS_RT_SHIFT 5 +#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT) +#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1 +#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT) +#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10 +#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT) +#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14 +#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT) +#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17 +#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT) +#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20 +#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT) +#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_CRM_MASK) +#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \ + (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \ + ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT)) + +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +/* + * User space cache operations have the following sysreg encoding + * in System instructions. + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0) + */ +#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 +#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5 + +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ + ESR_ELx_SYS64_ISS_DIR_WRITE) + +#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 115ea2a64520..9510ace570e2 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -18,6 +18,7 @@ #include <asm/cputype.h> #include <asm/cpufeature.h> +#include <asm/sysreg.h> #include <asm/virt.h> #ifdef __KERNEL__ @@ -98,18 +99,18 @@ static inline void decode_ctrl_reg(u32 reg, #define AARCH64_DBG_REG_WCR (AARCH64_DBG_REG_WVR + ARM_MAX_WRP) /* Debug register names. */ -#define AARCH64_DBG_REG_NAME_BVR "bvr" -#define AARCH64_DBG_REG_NAME_BCR "bcr" -#define AARCH64_DBG_REG_NAME_WVR "wvr" -#define AARCH64_DBG_REG_NAME_WCR "wcr" +#define AARCH64_DBG_REG_NAME_BVR bvr +#define AARCH64_DBG_REG_NAME_BCR bcr +#define AARCH64_DBG_REG_NAME_WVR wvr +#define AARCH64_DBG_REG_NAME_WCR wcr /* Accessor macros for the debug registers. */ #define AARCH64_DBG_READ(N, REG, VAL) do {\ - asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\ + VAL = read_sysreg(dbg##REG##N##_el1);\ } while (0) #define AARCH64_DBG_WRITE(N, REG, VAL) do {\ - asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\ + write_sysreg(VAL, dbg##REG##N##_el1);\ } while (0) struct task_struct; @@ -141,8 +142,6 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) } #endif -extern struct pmu perf_ops_bp; - /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 1dbaa901d7e5..bc853663dd51 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -246,7 +246,8 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ { return (val); } -__AARCH64_INSN_FUNCS(adr_adrp, 0x1F000000, 0x10000000) +__AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000) +__AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) @@ -318,6 +319,11 @@ __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) bool aarch64_insn_is_nop(u32 insn); bool aarch64_insn_is_branch_imm(u32 insn); +static inline bool aarch64_insn_is_adr_adrp(u32 insn) +{ + return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn); +} + int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); @@ -398,6 +404,9 @@ int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); +s32 aarch64_insn_adrp_get_offset(u32 insn); +u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset); + bool aarch32_insn_is_wide(u32 insn); #define A32_RN_OFFSET 16 diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 9b6e408cfa51..0bba427bb4c2 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -40,25 +40,25 @@ #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) { - asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writew __raw_writew static inline void __raw_writew(u16 val, volatile void __iomem *addr) { - asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writel __raw_writel static inline void __raw_writel(u32 val, volatile void __iomem *addr) { - asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writeq __raw_writeq static inline void __raw_writeq(u64 val, volatile void __iomem *addr) { - asm volatile("str %0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_readb __raw_readb @@ -184,17 +184,6 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); }) #define iowrite64be(v,p) ({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); }) -/* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access - */ -#define xlate_dev_mem_ptr(p) __va(p) - -/* - * Convert a virtual cached pointer to an uncached pointer - */ -#define xlate_dev_kmem_ptr(p) p - #include <asm-generic/io.h> /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b6bb83400cd8..dff109871f2a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -99,14 +99,10 @@ .macro kern_hyp_va reg alternative_if_not ARM64_HAS_VIRT_HOST_EXTN and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK -alternative_else - nop -alternative_endif -alternative_if_not ARM64_HYP_OFFSET_LOW - nop -alternative_else +alternative_else_nop_endif +alternative_if ARM64_HYP_OFFSET_LOW and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK -alternative_endif +alternative_else_nop_endif .endm #else diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 31b73227b41f..ba62df8c6e35 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -214,7 +214,7 @@ static inline void *phys_to_virt(phys_addr_t x) #ifndef CONFIG_SPARSEMEM_VMEMMAP #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) #define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) @@ -222,11 +222,15 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) -#define virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ +#define _virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ + PHYS_OFFSET) >> PAGE_SHIFT) #endif #endif +#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET) +#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \ + _virt_addr_valid(kaddr)) + #include <asm-generic/memory_model.h> #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index b1892a0dbcb0..a50185375f09 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -27,22 +27,17 @@ #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/pgtable.h> +#include <asm/sysreg.h> #include <asm/tlbflush.h> -#ifdef CONFIG_PID_IN_CONTEXTIDR -static inline void contextidr_thread_switch(struct task_struct *next) -{ - asm( - " msr contextidr_el1, %0\n" - " isb" - : - : "r" (task_pid_nr(next))); -} -#else static inline void contextidr_thread_switch(struct task_struct *next) { + if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR)) + return; + + write_sysreg(task_pid_nr(next), contextidr_el1); + isb(); } -#endif /* * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0. @@ -51,11 +46,8 @@ static inline void cpu_set_reserved_ttbr0(void) { unsigned long ttbr = virt_to_phys(empty_zero_page); - asm( - " msr ttbr0_el1, %0 // set TTBR0\n" - " isb" - : - : "r" (ttbr)); + write_sysreg(ttbr, ttbr0_el1); + isb(); } /* @@ -81,13 +73,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) if (!__cpu_uses_extended_idmap()) return; - asm volatile ( - " mrs %0, tcr_el1 ;" - " bfi %0, %1, %2, %3 ;" - " msr tcr_el1, %0 ;" - " isb" - : "=&r" (tcr) - : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); + tcr = read_sysreg(tcr_el1); + tcr &= ~TCR_T0SZ_MASK; + tcr |= t0sz << TCR_T0SZ_OFFSET; + write_sysreg(tcr, tcr_el1); + isb(); } #define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 0a456bef8c79..2fee2f59288c 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -199,19 +199,19 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, #define _percpu_read(pcp) \ ({ \ typeof(pcp) __retval; \ - preempt_disable(); \ + preempt_disable_notrace(); \ __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \ sizeof(pcp)); \ - preempt_enable(); \ + preempt_enable_notrace(); \ __retval; \ }) #define _percpu_write(pcp, val) \ do { \ - preempt_disable(); \ + preempt_disable_notrace(); \ __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \ sizeof(pcp)); \ - preempt_enable(); \ + preempt_enable_notrace(); \ } while(0) \ #define _pcp_protect(operation, pcp, val) \ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index c3ae239db3ee..eb0c2bd90de9 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -208,6 +208,7 @@ #define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) #define TCR_TxSZ_WIDTH 6 +#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) #define TCR_IRGN0_SHIFT 8 #define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 39f5252673f7..2142c7726e76 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -70,12 +70,13 @@ #define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_COPY #define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_COPY_EXEC #define __P111 PAGE_COPY_EXEC @@ -84,7 +85,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e20bd431184a..ffbb9a520563 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -73,7 +73,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) -#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) +#define pte_ng(pte) (!!(pte_val(pte) & PTE_NG)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -84,8 +84,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -#define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +#define pte_valid_global(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_NG)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) @@ -155,6 +155,16 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } +static inline pte_t pte_clear_rdonly(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_RDONLY)); +} + +static inline pte_t pte_mkpresent(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_VALID)); +} + static inline pmd_t pmd_mkcont(pmd_t pmd) { return __pmd(pmd_val(pmd) | PMD_SECT_CONT); @@ -168,7 +178,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * Only if the new pte is valid and kernel, otherwise TLB maintenance * or update_mmu_cache() have the necessary barriers. */ - if (pte_valid_not_user(pte)) { + if (pte_valid_global(pte)) { dsb(ishst); isb(); } @@ -202,7 +212,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; - if (pte_user(pte) && pte_exec(pte) && !pte_special(pte)) + if (pte_ng(pte) && pte_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte, addr); } diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ace0a96e7d6e..df2e53d3a969 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -37,7 +37,6 @@ #include <asm/ptrace.h> #include <asm/types.h> -#ifdef __KERNEL__ #define STACK_TOP_MAX TASK_SIZE_64 #ifdef CONFIG_COMPAT #define AARCH32_VECTORS_BASE 0xffff0000 @@ -49,7 +48,6 @@ extern phys_addr_t arm64_dma_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) -#endif /* __KERNEL__ */ struct debug_info { /* Have we suspended stepping by a debugger? */ diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h new file mode 100644 index 000000000000..4e7e7067afdb --- /dev/null +++ b/arch/arm64/include/asm/sections.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2016 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SECTIONS_H +#define __ASM_SECTIONS_H + +#include <asm-generic/sections.h> + +extern char __alt_instructions[], __alt_instructions_end[]; +extern char __exception_text_start[], __exception_text_end[]; +extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; +extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +extern char __hyp_text_start[], __hyp_text_end[]; +extern char __idmap_text_start[], __idmap_text_end[]; +extern char __irqentry_text_start[], __irqentry_text_end[]; +extern char __mmuoff_data_start[], __mmuoff_data_end[]; + +#endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index e875a5a551d7..cae331d553f8 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -66,8 +66,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" -" nop\n" -" nop\n", + __nops(2), /* LSE atomics */ " mov %w1, %w0\n" " cas %w0, %w0, %2\n" @@ -99,9 +98,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) /* LSE atomics */ " mov %w2, %w5\n" " ldadda %w2, %w0, %3\n" -" nop\n" -" nop\n" -" nop\n" + __nops(3) ) /* Did we get the lock? */ @@ -165,8 +162,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) " stlrh %w1, %0", /* LSE atomics */ " mov %w1, #1\n" - " nop\n" - " staddlh %w1, %0") + " staddlh %w1, %0\n" + __nops(1)) : "=Q" (lock->owner), "=&r" (tmp) : : "memory"); @@ -212,7 +209,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) " cbnz %w0, 1b\n" " stxr %w0, %w2, %1\n" " cbnz %w0, 2b\n" - " nop", + __nops(1), /* LSE atomics */ "1: mov %w0, wzr\n" "2: casa %w0, %w2, %1\n" @@ -241,8 +238,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) /* LSE atomics */ " mov %w0, wzr\n" " casa %w0, %w2, %1\n" - " nop\n" - " nop") + __nops(2)) : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) : "memory"); @@ -290,8 +286,8 @@ static inline void arch_read_lock(arch_rwlock_t *rw) " add %w0, %w0, #1\n" " tbnz %w0, #31, 1b\n" " stxr %w1, %w0, %2\n" - " nop\n" - " cbnz %w1, 2b", + " cbnz %w1, 2b\n" + __nops(1), /* LSE atomics */ "1: wfe\n" "2: ldxr %w0, %2\n" @@ -317,9 +313,8 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) " cbnz %w1, 1b", /* LSE atomics */ " movn %w0, #0\n" - " nop\n" - " nop\n" - " staddl %w0, %2") + " staddl %w0, %2\n" + __nops(2)) : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : : "memory"); @@ -344,7 +339,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) " tbnz %w1, #31, 1f\n" " casa %w0, %w1, %2\n" " sbc %w1, %w1, %w0\n" - " nop\n" + __nops(1) "1:") : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : @@ -363,4 +358,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() +/* + * Accesses appearing in program order before a spin_lock() operation + * can be reordered with accesses inside the critical section, by virtue + * of arch_spin_lock being constructed using acquire semantics. + * + * In cases where this is problematic (e.g. try_to_wake_up), an + * smp_mb__before_spinlock() can restore the required ordering. + */ +#define smp_mb__before_spinlock() smp_mb() + #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 024d623f662e..b8a313fd7a09 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -47,4 +47,7 @@ int swsusp_arch_resume(void); int arch_hibernation_header_save(void *addr, unsigned int max_size); int arch_hibernation_header_restore(void *addr); +/* Used to resume on the CPU we hibernated on */ +int hibernate_resume_nonboot_cpu_disable(void); + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cc06794b7346..e8d46e8e6079 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -100,6 +100,7 @@ /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_UCI (1 << 26) #define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_UCT (1 << 15) #define SCTLR_EL1_SED (1 << 8) #define SCTLR_EL1_CP15BEN (1 << 5) @@ -253,16 +254,6 @@ asm( " .endm\n" ); -static inline void config_sctlr_el1(u32 clear, u32 set) -{ - u32 val; - - asm volatile("mrs %0, sctlr_el1" : "=r" (val)); - val &= ~clear; - val |= set; - asm volatile("msr sctlr_el1, %0" : : "r" (val)); -} - /* * Unlike read_cpuid, calls to read_sysreg are never expected to be * optimized away or replaced with synthetic values. @@ -273,12 +264,41 @@ static inline void config_sctlr_el1(u32 clear, u32 set) __val; \ }) +/* + * The "Z" constraint normally means a zero immediate, but when combined with + * the "%x0" template means XZR. + */ #define write_sysreg(v, r) do { \ u64 __val = (u64)v; \ - asm volatile("msr " __stringify(r) ", %0" \ - : : "r" (__val)); \ + asm volatile("msr " __stringify(r) ", %x0" \ + : : "rZ" (__val)); \ +} while (0) + +/* + * For registers without architectural names, or simply unsupported by + * GAS. + */ +#define read_sysreg_s(r) ({ \ + u64 __val; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg_s(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val)); \ } while (0) +static inline void config_sctlr_el1(u32 clear, u32 set) +{ + u32 val; + + val = read_sysreg(sctlr_el1); + val &= ~clear; + val |= set; + write_sysreg(val, sctlr_el1); +} + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 57f110bea6a8..bc812435bc76 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -56,12 +56,6 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); __show_ratelimited; \ }) -#define UDBG_UNDEFINED (1 << 0) -#define UDBG_SYSCALL (1 << 1) -#define UDBG_BADABORT (1 << 2) -#define UDBG_SEGV (1 << 3) -#define UDBG_BUS (1 << 4) - #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index abd64bd1f6d9..e9ea5a6bd449 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -75,6 +75,9 @@ static inline struct thread_info *current_thread_info(void) __attribute_const__; /* * struct thread_info can be accessed directly via sp_el0. + * + * We don't use read_sysreg() as we want the compiler to cache the value where + * possible. */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index b460ae28e346..deab52374119 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -25,6 +25,24 @@ #include <asm/cputype.h> /* + * Raw TLBI operations. + * + * Where necessary, use the __tlbi() macro to avoid asm() + * boilerplate. Drivers and most kernel code should use the TLB + * management routines in preference to the macro below. + * + * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending + * on whether a particular TLBI operation takes an argument or + * not. The macros handles invoking the asm with or without the + * register argument as appropriate. + */ +#define __TLBI_0(op, arg) asm ("tlbi " #op) +#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0" : : "r" (arg)) +#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) + +#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) + +/* * TLB Management * ============== * @@ -66,7 +84,7 @@ static inline void local_flush_tlb_all(void) { dsb(nshst); - asm("tlbi vmalle1"); + __tlbi(vmalle1); dsb(nsh); isb(); } @@ -74,7 +92,7 @@ static inline void local_flush_tlb_all(void) static inline void flush_tlb_all(void) { dsb(ishst); - asm("tlbi vmalle1is"); + __tlbi(vmalle1is); dsb(ish); isb(); } @@ -84,7 +102,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) unsigned long asid = ASID(mm) << 48; dsb(ishst); - asm("tlbi aside1is, %0" : : "r" (asid)); + __tlbi(aside1is, asid); dsb(ish); } @@ -94,7 +112,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48); dsb(ishst); - asm("tlbi vale1is, %0" : : "r" (addr)); + __tlbi(vale1is, addr); dsb(ish); } @@ -122,9 +140,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { if (last_level) - asm("tlbi vale1is, %0" : : "r"(addr)); + __tlbi(vale1is, addr); else - asm("tlbi vae1is, %0" : : "r"(addr)); + __tlbi(vae1is, addr); } dsb(ish); } @@ -149,7 +167,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - asm("tlbi vaae1is, %0" : : "r"(addr)); + __tlbi(vaae1is, addr); dsb(ish); isb(); } @@ -163,7 +181,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, { unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); - asm("tlbi vae1is, %0" : : "r" (addr)); + __tlbi(vae1is, addr); dsb(ish); } diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 9cd03f3e812f..02e9035b0685 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -19,6 +19,7 @@ #define __ASM_TRAP_H #include <linux/list.h> +#include <asm/sections.h> struct pt_regs; @@ -39,9 +40,6 @@ void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); #ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { - extern char __irqentry_text_start[]; - extern char __irqentry_text_end[]; - return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } @@ -54,8 +52,6 @@ static inline int __in_irqentry_text(unsigned long ptr) static inline int in_exception_text(unsigned long ptr) { - extern char __exception_text_start[]; - extern char __exception_text_end[]; int in; in = ptr >= (unsigned long)&__exception_text_start && diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 1788545f25bc..fea10736b11f 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -45,6 +45,8 @@ #ifndef __ASSEMBLY__ #include <asm/ptrace.h> +#include <asm/sections.h> +#include <asm/sysreg.h> /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -75,10 +77,7 @@ static inline bool is_hyp_mode_mismatched(void) static inline bool is_kernel_in_hyp_mode(void) { - u64 el; - - asm("mrs %0, CurrentEL" : "=r" (el)); - return el == CurrentEL_EL2; + return read_sysreg(CurrentEL) == CurrentEL_EL2; } #ifdef CONFIG_ARM64_VHE @@ -87,14 +86,6 @@ extern void verify_cpu_run_el(void); static inline void verify_cpu_run_el(void) {} #endif -/* The section containing the hypervisor idmap text */ -extern char __hyp_idmap_text_start[]; -extern char __hyp_idmap_text_end[]; - -/* The section containing the hypervisor text */ -extern char __hyp_text_start[]; -extern char __hyp_text_end[]; - #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 14f7b651c787..7d66bbaafc0c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -10,6 +10,8 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_return_address.o = -pg +CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"' + # Object file lists. arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index f85149cc7c71..f01fab637dab 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -105,8 +105,10 @@ int __init arm64_acpi_numa_init(void) int ret; ret = acpi_numa_init(); - if (ret) + if (ret) { + pr_info("Failed to initialise from firmware\n"); return ret; + } return srat_disabled() ? -EINVAL : 0; } diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d2ee1b21a10d..06d650f61da7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -25,14 +25,13 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> #include <asm/insn.h> +#include <asm/sections.h> #include <linux/stop_machine.h> #define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - struct alt_region { struct alt_instr *begin; struct alt_instr *end; @@ -59,6 +58,8 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) BUG(); } +#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) + static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) { u32 insn; @@ -80,6 +81,25 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) offset = target - (unsigned long)insnptr; insn = aarch64_set_branch_offset(insn, offset); } + } else if (aarch64_insn_is_adrp(insn)) { + s32 orig_offset, new_offset; + unsigned long target; + + /* + * If we're replacing an adrp instruction, which uses PC-relative + * immediate addressing, adjust the offset to reflect the new + * PC. adrp operates on 4K aligned addresses. + */ + orig_offset = aarch64_insn_adrp_get_offset(insn); + target = align_down(altinsnptr, SZ_4K) + orig_offset; + new_offset = target - align_down(insnptr, SZ_4K); + insn = aarch64_insn_adrp_set_offset(insn, new_offset); + } else if (aarch64_insn_uses_literal(insn)) { + /* + * Disallow patching unhandled instructions using PC relative + * literal addresses + */ + BUG(); } return insn; @@ -124,8 +144,8 @@ static int __apply_alternatives_multi_stop(void *unused) { static int patched = 0; struct alt_region region = { - .begin = __alt_instructions, - .end = __alt_instructions_end, + .begin = (struct alt_instr *)__alt_instructions, + .end = (struct alt_instr *)__alt_instructions_end, }; /* We always have a CPU 0 at this point (__init) */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 05070b72fc28..4a2f0f0fef32 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -23,6 +23,7 @@ #include <linux/dma-mapping.h> #include <linux/kvm_host.h> #include <linux/suspend.h> +#include <asm/cpufeature.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/smp_plat.h> @@ -145,5 +146,6 @@ int main(void) DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); + DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val)); return 0; } diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index b8629d52fba9..9617301f76b5 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -39,7 +39,7 @@ static inline enum cache_type get_cache_type(int level) if (level > MAX_CACHE_LEVEL) return CACHE_TYPE_NOCACHE; - asm volatile ("mrs %x0, clidr_el1" : "=r" (clidr)); + clidr = read_sysreg(clidr_el1); return CLIDR_CTYPE(clidr, level); } @@ -55,11 +55,9 @@ u64 __attribute_const__ cache_get_ccsidr(u64 csselr) WARN_ON(preemptible()); - /* Put value into CSSELR */ - asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + write_sysreg(csselr, csselr_el1); isb(); - /* Read result out of CCSIDR */ - asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr)); + ccsidr = read_sysreg(ccsidr_el1); return ccsidr; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 82b0fc2e637b..0150394f4cab 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -30,6 +30,21 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) entry->midr_range_max); } +static bool +has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) != + (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); +} + +static void cpu_enable_trap_ctr_access(void *__unused) +{ + /* Clear SCTLR_EL1.UCT */ + config_sctlr_el1(SCTLR_EL1_UCT, 0); +} + #define MIDR_RANGE(model, min, max) \ .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ @@ -108,6 +123,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { + .desc = "Mismatched cache line size", + .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, + .matches = has_mismatched_cache_line_size, + .def_scope = SCOPE_LOCAL_CPU, + .enable = cpu_enable_trap_ctr_access, + }, + { } }; @@ -116,7 +138,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { * and the related information is freed soon after. If the new CPU requires * an errata not detected at boot, fail this CPU. */ -void verify_local_cpu_errata(void) +void verify_local_cpu_errata_workarounds(void) { const struct arm64_cpu_capabilities *caps = arm64_errata; @@ -131,7 +153,7 @@ void verify_local_cpu_errata(void) } } -void check_local_cpu_errata(void) +void update_cpu_errata_workarounds(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); } diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index c7cfb8fe06f9..e137ceaf5016 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -17,6 +17,7 @@ */ #include <linux/acpi.h> +#include <linux/cache.h> #include <linux/errno.h> #include <linux/of.h> #include <linux/string.h> @@ -28,7 +29,7 @@ extern const struct cpu_operations smp_spin_table_ops; extern const struct cpu_operations acpi_parking_protocol_ops; extern const struct cpu_operations cpu_psci_ops; -const struct cpu_operations *cpu_ops[NR_CPUS]; +const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 62272eac1352..d577f263cc4a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -46,6 +46,9 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); +EXPORT_SYMBOL(cpu_hwcap_keys); + #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ .sign = SIGNED, \ @@ -74,7 +77,7 @@ static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); -static struct arm64_ftr_bits ftr_id_aa64isar0[] = { +static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), @@ -87,7 +90,7 @@ static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), @@ -101,7 +104,7 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -119,7 +122,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), @@ -130,7 +133,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), @@ -139,7 +142,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_ctr[] = { +static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ @@ -147,15 +150,21 @@ static struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will - * make use of *minLine + * make use of *minLine. + * If we have differing I-cache policies, report it as the weakest - AIVIVT. */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_mmfr0[] = { +struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { + .name = "SYS_CTR_EL0", + .ftr_bits = ftr_ctr +}; + +static const struct arm64_ftr_bits ftr_id_mmfr0[] = { S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ @@ -167,7 +176,7 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), @@ -178,14 +187,14 @@ static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_mvfr2[] = { +static const struct arm64_ftr_bits ftr_mvfr2[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_dczid[] = { +static const struct arm64_ftr_bits ftr_dczid[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ @@ -193,7 +202,7 @@ static struct arm64_ftr_bits ftr_dczid[] = { }; -static struct arm64_ftr_bits ftr_id_isar5[] = { +static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), @@ -204,14 +213,14 @@ static struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_mmfr4[] = { +static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_pfr0[] = { +static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ @@ -220,7 +229,7 @@ static struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_dfr0[] = { +static const struct arm64_ftr_bits ftr_id_dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), @@ -238,7 +247,7 @@ static struct arm64_ftr_bits ftr_id_dfr0[] = { * 0. Covers the following 32bit registers: * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] */ -static struct arm64_ftr_bits ftr_generic_32bits[] = { +static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), @@ -250,29 +259,32 @@ static struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_generic[] = { +static const struct arm64_ftr_bits ftr_generic[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_generic32[] = { +static const struct arm64_ftr_bits ftr_generic32[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0), ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_aa64raz[] = { +static const struct arm64_ftr_bits ftr_aa64raz[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), ARM64_FTR_END, }; -#define ARM64_FTR_REG(id, table) \ - { \ - .sys_id = id, \ +#define ARM64_FTR_REG(id, table) { \ + .sys_id = id, \ + .reg = &(struct arm64_ftr_reg){ \ .name = #id, \ .ftr_bits = &((table)[0]), \ - } + }} -static struct arm64_ftr_reg arm64_ftr_regs[] = { +static const struct __ftr_reg_entry { + u32 sys_id; + struct arm64_ftr_reg *reg; +} arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 1 */ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), @@ -315,7 +327,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 3, CRn = 0, CRm = 0 */ - ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), + { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), /* Op1 = 3, CRn = 14, CRm = 0 */ @@ -324,7 +336,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { static int search_cmp_ftr_reg(const void *id, const void *regp) { - return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id; + return (int)(unsigned long)id - (int)((const struct __ftr_reg_entry *)regp)->sys_id; } /* @@ -339,14 +351,20 @@ static int search_cmp_ftr_reg(const void *id, const void *regp) */ static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) { - return bsearch((const void *)(unsigned long)sys_id, + const struct __ftr_reg_entry *ret; + + ret = bsearch((const void *)(unsigned long)sys_id, arm64_ftr_regs, ARRAY_SIZE(arm64_ftr_regs), sizeof(arm64_ftr_regs[0]), search_cmp_ftr_reg); + if (ret) + return ret->reg; + return NULL; } -static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) +static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, + s64 ftr_val) { u64 mask = arm64_ftr_mask(ftrp); @@ -355,7 +373,8 @@ static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val return reg; } -static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur) +static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, + s64 cur) { s64 ret = 0; @@ -376,27 +395,13 @@ static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur) return ret; } -static int __init sort_cmp_ftr_regs(const void *a, const void *b) -{ - return ((const struct arm64_ftr_reg *)a)->sys_id - - ((const struct arm64_ftr_reg *)b)->sys_id; -} - -static void __init swap_ftr_regs(void *a, void *b, int size) -{ - struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a; - *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b; - *(struct arm64_ftr_reg *)b = tmp; -} - static void __init sort_ftr_regs(void) { - /* Keep the array sorted so that we can do the binary search */ - sort(arm64_ftr_regs, - ARRAY_SIZE(arm64_ftr_regs), - sizeof(arm64_ftr_regs[0]), - sort_cmp_ftr_regs, - swap_ftr_regs); + int i; + + /* Check that the array is sorted so that we can do the binary search */ + for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++) + BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id); } /* @@ -407,7 +412,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) { u64 val = 0; u64 strict_mask = ~0x0ULL; - struct arm64_ftr_bits *ftrp; + const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); BUG_ON(!reg); @@ -464,7 +469,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) { - struct arm64_ftr_bits *ftrp; + const struct arm64_ftr_bits *ftrp; for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); @@ -1004,23 +1009,33 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) * cannot do anything to fix it up and could cause unexpected failures. So * we park the CPU. */ -void verify_local_cpu_capabilities(void) +static void verify_local_cpu_capabilities(void) { + verify_local_cpu_errata_workarounds(); + verify_local_cpu_features(arm64_features); + verify_local_elf_hwcaps(arm64_elf_hwcaps); + if (system_supports_32bit_el0()) + verify_local_elf_hwcaps(compat_elf_hwcaps); +} +void check_local_cpu_capabilities(void) +{ + /* + * All secondary CPUs should conform to the early CPU features + * in use by the kernel based on boot CPU. + */ check_early_cpu_features(); /* - * If we haven't computed the system capabilities, there is nothing - * to verify. + * If we haven't finalised the system capabilities, this CPU gets + * a chance to update the errata work arounds. + * Otherwise, this CPU should verify that it has all the system + * advertised capabilities. */ if (!sys_caps_initialised) - return; - - verify_local_cpu_errata(); - verify_local_cpu_features(arm64_features); - verify_local_elf_hwcaps(arm64_elf_hwcaps); - if (system_supports_32bit_el0()) - verify_local_elf_hwcaps(compat_elf_hwcaps); + update_cpu_errata_workarounds(); + else + verify_local_cpu_capabilities(); } static void __init setup_feature_capabilities(void) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index ed1b84fe6925..b3d5b3e8fbcb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -363,8 +363,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) } cpuinfo_detect_icache_policy(info); - - check_local_cpu_errata(); } void cpuinfo_store_cpu(void) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 91fff48d0f57..73ae90ef434c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -46,16 +46,14 @@ static void mdscr_write(u32 mdscr) { unsigned long flags; local_dbg_save(flags); - asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); + write_sysreg(mdscr, mdscr_el1); local_dbg_restore(flags); } NOKPROBE_SYMBOL(mdscr_write); static u32 mdscr_read(void) { - u32 mdscr; - asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); - return mdscr; + return read_sysreg(mdscr_el1); } NOKPROBE_SYMBOL(mdscr_read); @@ -132,36 +130,18 @@ NOKPROBE_SYMBOL(disable_debug_monitors); /* * OS lock clearing. */ -static void clear_os_lock(void *unused) +static int clear_os_lock(unsigned int cpu) { - asm volatile("msr oslar_el1, %0" : : "r" (0)); -} - -static int os_lock_notify(struct notifier_block *self, - unsigned long action, void *data) -{ - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - clear_os_lock(NULL); - return NOTIFY_OK; + write_sysreg(0, oslar_el1); + isb(); + return 0; } -static struct notifier_block os_lock_nb = { - .notifier_call = os_lock_notify, -}; - static int debug_monitors_init(void) { - cpu_notifier_register_begin(); - - /* Clear the OS lock. */ - on_each_cpu(clear_os_lock, NULL, 1); - isb(); - - /* Register hotplug handler. */ - __register_cpu_notifier(&os_lock_nb); - - cpu_notifier_register_done(); - return 0; + return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, + "CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING", + clear_os_lock, NULL); } postcore_initcall(debug_monitors_init); @@ -254,7 +234,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; if (user_mode(regs)) { - send_user_sigtrap(TRAP_HWBKPT); + send_user_sigtrap(TRAP_TRACE); /* * ptrace will disable single step unless explicitly @@ -382,7 +362,7 @@ NOKPROBE_SYMBOL(aarch32_break_handler); static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, - TRAP_HWBKPT, "single-step handler"); + TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, TRAP_BRKPT, "ptrace BRK handler"); return 0; @@ -435,8 +415,10 @@ NOKPROBE_SYMBOL(kernel_active_single_step); /* ptrace API */ void user_enable_single_step(struct task_struct *task) { - set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); - set_regs_spsr_ss(task_pt_regs(task)); + struct thread_info *ti = task_thread_info(task); + + if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP)) + set_regs_spsr_ss(task_pt_regs(task)); } NOKPROBE_SYMBOL(user_enable_single_step); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 0f03a8fe2314..aef02d2af3b5 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -219,7 +219,7 @@ ENDPROC(ftrace_graph_caller) * * Run ftrace_return_to_handler() before going back to parent. * @fp is checked against the value passed by ftrace_graph_caller() - * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. + * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled. */ ENTRY(return_to_handler) save_return_regs diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 441420ca7d08..223d54a4d66b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -104,7 +104,7 @@ str x20, [sp, #S_ORIG_ADDR_LIMIT] mov x20, #TASK_SIZE_64 str x20, [tsk, #TI_ADDR_LIMIT] - ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 @@ -150,13 +150,7 @@ ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 #ifdef CONFIG_ARM64_ERRATUM_845719 -alternative_if_not ARM64_WORKAROUND_845719 - nop - nop -#ifdef CONFIG_PID_IN_CONTEXTIDR - nop -#endif -alternative_else +alternative_if ARM64_WORKAROUND_845719 tbz x22, #4, 1f #ifdef CONFIG_PID_IN_CONTEXTIDR mrs x29, contextidr_el1 @@ -165,7 +159,7 @@ alternative_else msr contextidr_el1, xzr #endif 1: -alternative_endif +alternative_else_nop_endif #endif .endif msr elr_el1, x21 // set up the return data @@ -707,18 +701,13 @@ ret_fast_syscall_trace: * Ok, we need to do extra processing, enter the slow path. */ work_pending: - tbnz x1, #TIF_NEED_RESCHED, work_resched - /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ mov x0, sp // 'regs' - enable_irq // enable interrupts for do_notify_resume() bl do_notify_resume - b ret_to_user -work_resched: #ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off // the IRQs are off here, inform the tracing code + bl trace_hardirqs_on // enabled while in userspace #endif - bl schedule - + ldr x1, [tsk, #TI_FLAGS] // re-check for single-step + b finish_ret_to_user /* * "slow" syscall return path. */ @@ -727,6 +716,7 @@ ret_to_user: ldr x1, [tsk, #TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending +finish_ret_to_user: enable_step_tsk x1, x2 kernel_exit 0 ENDPROC(ret_to_user) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 975b274ee7b5..394c61db5566 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -299,28 +299,16 @@ static inline void fpsimd_pm_init(void) { } #endif /* CONFIG_CPU_PM */ #ifdef CONFIG_HOTPLUG_CPU -static int fpsimd_cpu_hotplug_notifier(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static int fpsimd_cpu_dead(unsigned int cpu) { - unsigned int cpu = (long)hcpu; - - switch (action) { - case CPU_DEAD: - case CPU_DEAD_FROZEN: - per_cpu(fpsimd_last_state, cpu) = NULL; - break; - } - return NOTIFY_OK; + per_cpu(fpsimd_last_state, cpu) = NULL; + return 0; } -static struct notifier_block fpsimd_cpu_hotplug_notifier_block = { - .notifier_call = fpsimd_cpu_hotplug_notifier, -}; - static inline void fpsimd_hotplug_init(void) { - register_cpu_notifier(&fpsimd_cpu_hotplug_notifier_block); + cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead", + NULL, fpsimd_cpu_dead); } #else diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index ebecf9aa33d1..40ad08ac569a 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -138,7 +138,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, return; err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer); + frame_pointer, NULL); if (err == -EBUSY) return; else diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 3e7b050e99dc..427f6d3f084c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -208,13 +208,23 @@ efi_header_end: __INIT + /* + * The following callee saved general purpose registers are used on the + * primary lowlevel boot path: + * + * Register Scope Purpose + * x21 stext() .. start_kernel() FDT pointer passed at boot in x0 + * x23 stext() .. start_kernel() physical misalignment/KASLR offset + * x28 __create_page_tables() callee preserved temp register + * x19/x20 __primary_switch() callee preserved temp registers + */ ENTRY(stext) bl preserve_boot_args - bl el2_setup // Drop to EL1, w20=cpu_boot_mode - adrp x24, __PHYS_OFFSET - and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 + bl el2_setup // Drop to EL1, w0=cpu_boot_mode + adrp x23, __PHYS_OFFSET + and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag - bl __create_page_tables // x25=TTBR0, x26=TTBR1 + bl __create_page_tables /* * The following calls CPU setup code, see arch/arm64/mm/proc.S for * details. @@ -222,9 +232,7 @@ ENTRY(stext) * the TCR will have been set. */ bl __cpu_setup // initialise processor - adr_l x27, __primary_switch // address to jump to after - // MMU has been enabled - b __enable_mmu + b __primary_switch ENDPROC(stext) /* @@ -311,23 +319,21 @@ ENDPROC(preserve_boot_args) * been enabled */ __create_page_tables: - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir mov x28, lr /* * Invalidate the idmap and swapper page tables to avoid potential * dirty cache lines being evicted. */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE bl __inval_cache_range /* * Clear the idmap and swapper page tables. */ - mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -340,7 +346,7 @@ __create_page_tables: /* * Create the identity mapping. */ - mov x0, x25 // idmap_pg_dir + adrp x0, idmap_pg_dir adrp x3, __idmap_text_start // __pa(__idmap_text_start) #ifndef CONFIG_ARM64_VA_BITS_48 @@ -390,7 +396,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - mov x0, x26 // swapper_pg_dir + adrp x0, swapper_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 @@ -405,8 +411,8 @@ __create_page_tables: * accesses (MMU disabled), invalidate the idmap and swapper page * tables again to remove any speculatively loaded cache lines. */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE dmb sy bl __inval_cache_range @@ -416,14 +422,27 @@ ENDPROC(__create_page_tables) /* * The following fragment of code is executed with the MMU enabled. + * + * x0 = __PHYS_OFFSET */ - .set initial_sp, init_thread_union + THREAD_START_SP __primary_switched: - mov x28, lr // preserve LR + adrp x4, init_thread_union + add sp, x4, #THREAD_SIZE + msr sp_el0, x4 // Save thread_info + adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb + stp xzr, x30, [sp, #-16]! + mov x29, sp + + str_l x21, __fdt_pointer, x5 // Save FDT pointer + + ldr_l x4, kimage_vaddr // Save the offset between + sub x4, x4, x0 // the kernel virtual and + str_l x4, kimage_voffset, x5 // physical mappings + // Clear BSS adr_l x0, __bss_start mov x1, xzr @@ -432,17 +451,6 @@ __primary_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW - adr_l sp, initial_sp, x4 - mov x4, sp - and x4, x4, #~(THREAD_SIZE - 1) - msr sp_el0, x4 // Save thread_info - str_l x21, __fdt_pointer, x5 // Save FDT pointer - - ldr_l x4, kimage_vaddr // Save the offset between - sub x4, x4, x24 // the kernel virtual and - str_l x4, kimage_voffset, x5 // physical mappings - - mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init #endif @@ -454,8 +462,8 @@ __primary_switched: bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset - ret x28 // we must enable KASLR, return - // to __enable_mmu() + ldp x29, x30, [sp], #16 // we must enable KASLR, return + ret // to __primary_switch() 0: #endif b start_kernel @@ -465,7 +473,7 @@ ENDPROC(__primary_switched) * end early head section, begin head code that is also used for * hotplug and needs to have the same protections as the text region */ - .section ".text","ax" + .section ".idmap.text","ax" ENTRY(kimage_vaddr) .quad _text - TEXT_OFFSET @@ -490,7 +498,7 @@ CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr sctlr_el1, x0 - mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb ret @@ -586,7 +594,7 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems cbz x2, install_el2_stub - mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 isb ret @@ -601,7 +609,7 @@ install_el2_stub: PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr - mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) @@ -611,27 +619,39 @@ ENDPROC(el2_setup) */ set_cpu_boot_mode_flag: adr_l x1, __boot_cpu_mode - cmp w20, #BOOT_CPU_MODE_EL2 + cmp w0, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 -1: str w20, [x1] // This CPU has booted in EL1 +1: str w0, [x1] // This CPU has booted in EL1 dmb sy dc ivac, x1 // Invalidate potentially stale cache line ret ENDPROC(set_cpu_boot_mode_flag) /* + * These values are written with the MMU off, but read with the MMU on. + * Writers will invalidate the corresponding address, discarding up to a + * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures + * sufficient alignment that the CWG doesn't overlap another section. + */ + .pushsection ".mmuoff.data.write", "aw" +/* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. * * This is not in .bss, because we set it sufficiently early that the boot-time * zeroing of .bss would clobber it. */ - .pushsection .data..cacheline_aligned - .align L1_CACHE_SHIFT ENTRY(__boot_cpu_mode) .long BOOT_CPU_MODE_EL2 .long BOOT_CPU_MODE_EL1 +/* + * The booting CPU updates the failed status @__early_cpu_boot_status, + * with MMU turned off. + */ +ENTRY(__early_cpu_boot_status) + .long 0 + .popsection /* @@ -639,7 +659,7 @@ ENTRY(__boot_cpu_mode) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl el2_setup // Drop to EL1, w0=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 mov_q x1, MPIDR_HWID_BITMASK @@ -666,12 +686,10 @@ secondary_startup: /* * Common entry point for secondary CPUs. */ - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - - adr_l x27, __secondary_switch // address to jump to after enabling the MMU - b __enable_mmu + bl __enable_mmu + ldr x8, =__secondary_switched + br x8 ENDPROC(secondary_startup) __secondary_switched: @@ -706,33 +724,27 @@ ENDPROC(__secondary_switched) dc ivac, \tmp1 // Invalidate potentially stale cache line .endm - .pushsection .data..cacheline_aligned - .align L1_CACHE_SHIFT -ENTRY(__early_cpu_boot_status) - .long 0 - .popsection - /* * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. - * x27 = *virtual* address to jump to upon completion * - * Other registers depend on the function called upon completion. + * Returns to the caller via x30/lr. This requires the caller to be covered + * by the .idmap.text section. * * Checks if the selected granule size is supported by the CPU. * If it isn't, park the CPU */ - .section ".idmap.text", "ax" ENTRY(__enable_mmu) - mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support update_early_cpu_boot_status 0, x1, x2 - msr ttbr0_el1, x25 // load TTBR0 - msr ttbr1_el1, x26 // load TTBR1 + adrp x1, idmap_pg_dir + adrp x2, swapper_pg_dir + msr ttbr0_el1, x1 // load TTBR0 + msr ttbr1_el1, x2 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -744,29 +756,7 @@ ENTRY(__enable_mmu) ic iallu dsb nsh isb -#ifdef CONFIG_RANDOMIZE_BASE - mov x19, x0 // preserve new SCTLR_EL1 value - blr x27 - - /* - * If we return here, we have a KASLR displacement in x23 which we need - * to take into account by discarding the current kernel mapping and - * creating a new one. - */ - msr sctlr_el1, x22 // disable the MMU - isb - bl __create_page_tables // recreate kernel mapping - - tlbi vmalle1 // Remove any stale TLB entries - dsb nsh - - msr sctlr_el1, x19 // re-enable the MMU - isb - ic iallu // flush instructions fetched - dsb nsh // via old mapping - isb -#endif - br x27 + ret ENDPROC(__enable_mmu) __no_granule_support: @@ -775,11 +765,11 @@ __no_granule_support: 1: wfe wfi - b 1b + b 1b ENDPROC(__no_granule_support) -__primary_switch: #ifdef CONFIG_RELOCATABLE +__relocate_kernel: /* * Iterate over each entry in the relocation table, and apply the * relocations in place. @@ -801,14 +791,46 @@ __primary_switch: add x13, x13, x23 // relocate str x13, [x11, x23] b 0b +1: ret +ENDPROC(__relocate_kernel) +#endif -1: +__primary_switch: +#ifdef CONFIG_RANDOMIZE_BASE + mov x19, x0 // preserve new SCTLR_EL1 value + mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + + bl __enable_mmu +#ifdef CONFIG_RELOCATABLE + bl __relocate_kernel +#ifdef CONFIG_RANDOMIZE_BASE ldr x8, =__primary_switched - br x8 -ENDPROC(__primary_switch) + adrp x0, __PHYS_OFFSET + blr x8 -__secondary_switch: - ldr x8, =__secondary_switched + /* + * If we return here, we have a KASLR displacement in x23 which we need + * to take into account by discarding the current kernel mapping and + * creating a new one. + */ + msr sctlr_el1, x20 // disable the MMU + isb + bl __create_page_tables // recreate kernel mapping + + tlbi vmalle1 // Remove any stale TLB entries + dsb nsh + + msr sctlr_el1, x19 // re-enable the MMU + isb + ic iallu // flush instructions fetched + dsb nsh // via old mapping + isb + + bl __relocate_kernel +#endif +#endif + ldr x8, =__primary_switched + adrp x0, __PHYS_OFFSET br x8 -ENDPROC(__secondary_switch) +ENDPROC(__primary_switch) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 46f29b6560ec..e56d848b6466 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -36,8 +36,8 @@ .macro break_before_make_ttbr_switch zero_page, page_table msr ttbr1_el1, \zero_page isb - tlbi vmalle1is - dsb ish + tlbi vmalle1 + dsb nsh msr ttbr1_el1, \page_table isb .endm @@ -96,7 +96,7 @@ ENTRY(swsusp_arch_suspend_exit) add x1, x10, #PAGE_SIZE /* Clean the copied page to PoU - based on flush_icache_range() */ - dcache_line_size x2, x3 + raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 2: dc cvau, x4 /* clean D line / unified line */ diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 65d81f965e74..d55a7b09959b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -15,9 +15,9 @@ * License terms: GNU General Public License (GPL) version 2 */ #define pr_fmt(x) "hibernate: " x +#include <linux/cpu.h> #include <linux/kvm_host.h> #include <linux/mm.h> -#include <linux/notifier.h> #include <linux/pm.h> #include <linux/sched.h> #include <linux/suspend.h> @@ -26,6 +26,7 @@ #include <asm/barrier.h> #include <asm/cacheflush.h> +#include <asm/cputype.h> #include <asm/irqflags.h> #include <asm/memory.h> #include <asm/mmu_context.h> @@ -34,6 +35,7 @@ #include <asm/pgtable-hwdef.h> #include <asm/sections.h> #include <asm/smp.h> +#include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/sysreg.h> #include <asm/virt.h> @@ -54,12 +56,6 @@ extern int in_suspend; /* Do we need to reset el2? */ #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) -/* - * Start/end of the hibernate exit code, this must be copied to a 'safe' - * location in memory, and executed from there. - */ -extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; - /* temporary el2 vectors in the __hibernate_exit_text section. */ extern char hibernate_el2_vectors[]; @@ -67,6 +63,12 @@ extern char hibernate_el2_vectors[]; extern char __hyp_stub_vectors[]; /* + * The logical cpu number we should resume on, initialised to a non-cpu + * number. + */ +static int sleep_cpu = -EINVAL; + +/* * Values that may not change over hibernate/resume. We put the build number * and date in here so that we guarantee not to resume with a different * kernel. @@ -88,6 +90,8 @@ static struct arch_hibernate_hdr { * re-configure el2. */ phys_addr_t __hyp_stub_vectors; + + u64 sleep_cpu_mpidr; } resume_hdr; static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) @@ -130,12 +134,22 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) else hdr->__hyp_stub_vectors = 0; + /* Save the mpidr of the cpu we called cpu_suspend() on... */ + if (sleep_cpu < 0) { + pr_err("Failing to hibernate on an unkown CPU.\n"); + return -ENODEV; + } + hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu); + pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu, + hdr->sleep_cpu_mpidr); + return 0; } EXPORT_SYMBOL(arch_hibernation_header_save); int arch_hibernation_header_restore(void *addr) { + int ret; struct arch_hibernate_hdr_invariants invariants; struct arch_hibernate_hdr *hdr = addr; @@ -145,6 +159,24 @@ int arch_hibernation_header_restore(void *addr) return -EINVAL; } + sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr); + pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu, + hdr->sleep_cpu_mpidr); + if (sleep_cpu < 0) { + pr_crit("Hibernated on a CPU not known to this kernel!\n"); + sleep_cpu = -EINVAL; + return -EINVAL; + } + if (!cpu_online(sleep_cpu)) { + pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n"); + ret = cpu_up(sleep_cpu); + if (ret) { + pr_err("Failed to bring hibernate-CPU up!\n"); + sleep_cpu = -EINVAL; + return ret; + } + } + resume_hdr = *hdr; return 0; @@ -241,6 +273,7 @@ out: return rc; } +#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) int swsusp_arch_suspend(void) { @@ -256,10 +289,16 @@ int swsusp_arch_suspend(void) local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { - /* Clean kernel to PoC for secondary core startup */ - __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); + /* Clean kernel core startup/idle code to PoC*/ + dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); + dcache_clean_range(__idmap_text_start, __idmap_text_end); + + /* Clean kvm setup code to PoC? */ + if (el2_reset_needed()) + dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); /* * Tell the hibernation core that we've just restored @@ -267,6 +306,7 @@ int swsusp_arch_suspend(void) */ in_suspend = 0; + sleep_cpu = -EINVAL; __cpu_suspend_exit(); } @@ -275,6 +315,33 @@ int swsusp_arch_suspend(void) return ret; } +static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) +{ + pte_t pte = *src_pte; + + if (pte_valid(pte)) { + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, pte_clear_rdonly(pte)); + } else if (debug_pagealloc_enabled() && !pte_none(pte)) { + /* + * debug_pagealloc will removed the PTE_VALID bit if + * the page isn't in use by the resume kernel. It may have + * been in use by the original kernel, in which case we need + * to put it back in our copy to do the restore. + * + * Before marking this entry valid, check the pfn should + * be mapped. + */ + BUG_ON(!pfn_valid(pte_pfn(pte))); + + set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte))); + } +} + static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, unsigned long end) { @@ -290,13 +357,7 @@ static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, src_pte = pte_offset_kernel(src_pmd, start); do { - if (!pte_none(*src_pte)) - /* - * Resume will overwrite areas that may be marked - * read only (code, rodata). Clear the RDONLY bit from - * the temporary mappings we use during restore. - */ - set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); + _copy_pte(dst_pte, src_pte, addr); } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); return 0; @@ -483,27 +544,12 @@ out: return rc; } -static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, - unsigned long action, void *ptr) +int hibernate_resume_nonboot_cpu_disable(void) { - if (action == PM_HIBERNATION_PREPARE && - cpumask_first(cpu_online_mask) != 0) { - pr_warn("CPU0 is offline.\n"); - return notifier_from_errno(-ENODEV); + if (sleep_cpu < 0) { + pr_err("Failing to resume from hibernate on an unkown CPU.\n"); + return -ENODEV; } - return NOTIFY_OK; -} - -static int __init check_boot_cpu_online_init(void) -{ - /* - * Set this pm_notifier callback with a lower priority than - * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be - * called earlier to disable cpu hotplug before the cpu online check. - */ - pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); - - return 0; + return freeze_secondary_cpus(sleep_cpu); } -core_initcall(check_boot_cpu_online_init); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 26a6bf77d272..948b73148d56 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -857,7 +857,7 @@ void hw_breakpoint_thread_switch(struct task_struct *next) /* * CPU initialisation. */ -static void hw_breakpoint_reset(void *unused) +static int hw_breakpoint_reset(unsigned int cpu) { int i; struct perf_event **slots; @@ -888,28 +888,14 @@ static void hw_breakpoint_reset(void *unused) write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); } } -} -static int hw_breakpoint_reset_notify(struct notifier_block *self, - unsigned long action, - void *hcpu) -{ - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) { - local_irq_disable(); - hw_breakpoint_reset(NULL); - local_irq_enable(); - } - return NOTIFY_OK; + return 0; } -static struct notifier_block hw_breakpoint_reset_nb = { - .notifier_call = hw_breakpoint_reset_notify, -}; - #ifdef CONFIG_CPU_PM -extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); #else -static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) { } #endif @@ -919,36 +905,34 @@ static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ static int __init arch_hw_breakpoint_init(void) { + int ret; + core_num_brps = get_num_brps(); core_num_wrps = get_num_wrps(); pr_info("found %d breakpoint and %d watchpoint registers.\n", core_num_brps, core_num_wrps); - cpu_notifier_register_begin(); - - /* - * Reset the breakpoint resources. We assume that a halting - * debugger will leave the world in a nice state for us. - */ - smp_call_function(hw_breakpoint_reset, NULL, 1); - hw_breakpoint_reset(NULL); - /* Register debug fault handlers. */ hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-breakpoint handler"); hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-watchpoint handler"); - /* Register hotplug notifier. */ - __register_cpu_notifier(&hw_breakpoint_reset_nb); - - cpu_notifier_register_done(); + /* + * Reset the breakpoint resources. We assume that a halting + * debugger will leave the world in a nice state for us. + */ + ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, + "CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING", + hw_breakpoint_reset, NULL); + if (ret) + pr_err("failed to register CPU hotplug notifier: %d\n", ret); /* Register cpu_suspend hw breakpoint restore hook */ cpu_suspend_set_dbg_restorer(hw_breakpoint_reset); - return 0; + return ret; } arch_initcall(arch_hw_breakpoint_init); diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 63f9432d05e8..6f2ac4fc66ca 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -96,7 +96,7 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) page = vmalloc_to_page(addr); - else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) + else if (!module) page = pfn_to_page(PHYS_PFN(__pa(addr))); else return addr; @@ -1202,6 +1202,19 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset) BUG(); } +s32 aarch64_insn_adrp_get_offset(u32 insn) +{ + BUG_ON(!aarch64_insn_is_adrp(insn)); + return aarch64_insn_decode_immediate(AARCH64_INSN_IMM_ADR, insn) << 12; +} + +u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset) +{ + BUG_ON(!aarch64_insn_is_adrp(insn)); + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn, + offset >> 12); +} + /* * Extract the Op/CR data from a msr/mrs instruction. */ diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index b05469173ba5..769f24ef628c 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include <linux/cache.h> #include <linux/crc32.h> #include <linux/init.h> #include <linux/libfdt.h> @@ -20,7 +21,7 @@ #include <asm/pgtable.h> #include <asm/sections.h> -u64 __read_mostly module_alloc_base; +u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; static __init u64 get_kaslr_seed(void *fdt) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 8c57f6496e56..e017a9493b92 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -19,10 +19,13 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/bug.h> #include <linux/irq.h> #include <linux/kdebug.h> #include <linux/kgdb.h> #include <linux/kprobes.h> +#include <asm/debug-monitors.h> +#include <asm/insn.h> #include <asm/traps.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -338,15 +341,24 @@ void kgdb_arch_exit(void) unregister_die_notifier(&kgdb_notifier); } -/* - * ARM instructions are always in LE. - * Break instruction is encoded in LE format - */ -struct kgdb_arch arch_kgdb_ops = { - .gdb_bpt_instr = { - KGDB_DYN_BRK_INS_BYTE(0), - KGDB_DYN_BRK_INS_BYTE(1), - KGDB_DYN_BRK_INS_BYTE(2), - KGDB_DYN_BRK_INS_BYTE(3), - } -}; +struct kgdb_arch arch_kgdb_ops; + +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + + BUILD_BUG_ON(AARCH64_INSN_SIZE != BREAK_INSTR_SIZE); + + err = aarch64_insn_read((void *)bpt->bpt_addr, (u32 *)bpt->saved_instr); + if (err) + return err; + + return aarch64_insn_write((void *)bpt->bpt_addr, + (u32)AARCH64_BREAK_KGDB_DYN_DBG); +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + return aarch64_insn_write((void *)bpt->bpt_addr, + *(u32 *)bpt->saved_instr); +} diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 838ccf123307..a9310a69fffd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -24,6 +24,7 @@ #include <asm/sysreg.h> #include <asm/virt.h> +#include <linux/acpi.h> #include <linux/of.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> @@ -190,13 +191,23 @@ #define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED /* PMUv3 HW events mapping. */ + +/* + * ARMv8 Architectural defined events, not all of these may + * be supported on any given implementation. Undefined events will + * be disabled at run-time. + */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, }; /* ARM Cortex-A53 HW events mapping. */ @@ -258,6 +269,15 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, @@ -523,12 +543,6 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { .attrs = armv8_pmuv3_format_attrs, }; -static const struct attribute_group *armv8_pmuv3_attr_groups[] = { - &armv8_pmuv3_events_attr_group, - &armv8_pmuv3_format_attr_group, - NULL, -}; - /* * Perf Events' indices */ @@ -905,9 +919,22 @@ static void armv8pmu_reset(void *info) static int armv8_pmuv3_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + int hw_event_id; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + + hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); + if (hw_event_id < 0) + return hw_event_id; + + /* disable micro/arch events not supported by this PMU */ + if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && + !test_bit(hw_event_id, armpmu->pmceid_bitmap)) { + return -EOPNOTSUPP; + } + + return hw_event_id; } static int armv8_a53_map_event(struct perf_event *event) @@ -985,7 +1012,10 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_pmuv3"; cpu_pmu->map_event = armv8_pmuv3_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -994,7 +1024,10 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a53"; cpu_pmu->map_event = armv8_a53_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1003,7 +1036,10 @@ static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a57"; cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1012,7 +1048,10 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a72"; cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1021,7 +1060,10 @@ static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cavium_thunder"; cpu_pmu->map_event = armv8_thunder_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1030,7 +1072,10 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_brcm_vulcan"; cpu_pmu->map_event = armv8_vulcan_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1044,21 +1089,32 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {}, }; +/* + * Non DT systems have their micro/arch events probed at run-time. + * A fairly complete list of generic events are provided and ones that + * aren't supported by the current PMU are disabled. + */ +static const struct pmu_probe_info armv8_pmu_probe_table[] = { + PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */ + { /* sentinel value */ } +}; + static int armv8_pmu_device_probe(struct platform_device *pdev) { - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); + if (acpi_disabled) + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, + NULL); + + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, + armv8_pmu_probe_table); } static struct platform_driver armv8_pmu_driver = { .driver = { - .name = "armv8-pmu", + .name = ARMV8_PMU_PDEV_NAME, .of_match_table = armv8_pmu_of_device_ids, }, .probe = armv8_pmu_device_probe, }; -static int __init register_armv8_pmu_driver(void) -{ - return platform_driver_register(&armv8_pmu_driver); -} -device_initcall(register_armv8_pmu_driver); +builtin_platform_driver(armv8_pmu_driver); diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 37e47a9d617e..d1731bf977ef 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -16,6 +16,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/module.h> +#include <linux/kallsyms.h> #include <asm/kprobes.h> #include <asm/insn.h> #include <asm/sections.h> @@ -122,7 +123,7 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) static bool __kprobes is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) { - while (scan_start > scan_end) { + while (scan_start >= scan_end) { /* * atomic region starts from exclusive load and ends with * exclusive store. @@ -142,33 +143,30 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) { enum kprobe_insn decoded; kprobe_opcode_t insn = le32_to_cpu(*addr); - kprobe_opcode_t *scan_start = addr - 1; - kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; -#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) - struct module *mod; -#endif - - if (addr >= (kprobe_opcode_t *)_text && - scan_end < (kprobe_opcode_t *)_text) - scan_end = (kprobe_opcode_t *)_text; -#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) - else { - preempt_disable(); - mod = __module_address((unsigned long)addr); - if (mod && within_module_init((unsigned long)addr, mod) && - !within_module_init((unsigned long)scan_end, mod)) - scan_end = (kprobe_opcode_t *)mod->init_layout.base; - else if (mod && within_module_core((unsigned long)addr, mod) && - !within_module_core((unsigned long)scan_end, mod)) - scan_end = (kprobe_opcode_t *)mod->core_layout.base; - preempt_enable(); + kprobe_opcode_t *scan_end = NULL; + unsigned long size = 0, offset = 0; + + /* + * If there's a symbol defined in front of and near enough to + * the probe address assume it is the entry point to this + * code and use it to further limit how far back we search + * when determining if we're in an atomic sequence. If we could + * not find any symbol skip the atomic test altogether as we + * could otherwise end up searching irrelevant text/literals. + * KPROBES depends on KALLSYMS so this last case should never + * happen. + */ + if (kallsyms_lookup_size_offset((unsigned long) addr, &size, &offset)) { + if (offset < (MAX_ATOMIC_CONTEXT_SIZE*sizeof(kprobe_opcode_t))) + scan_end = addr - (offset / sizeof(kprobe_opcode_t)); + else + scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; } -#endif decoded = arm_probe_decode_insn(insn, asi); - if (decoded == INSN_REJECTED || - is_probed_address_atomic(scan_start, scan_end)) - return INSN_REJECTED; + if (decoded != INSN_REJECTED && scan_end) + if (is_probed_address_atomic(addr - 1, scan_end)) + return INSN_REJECTED; return decoded; } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c6b0f40620d8..f5077ea7af6d 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -19,7 +19,7 @@ #include <linux/kasan.h> #include <linux/kernel.h> #include <linux/kprobes.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/slab.h> #include <linux/stop_machine.h> #include <linux/stringify.h> @@ -31,7 +31,7 @@ #include <asm/insn.h> #include <asm/uaccess.h> #include <asm/irq.h> -#include <asm-generic/sections.h> +#include <asm/sections.h> #include "decode-insn.h" @@ -166,13 +166,18 @@ static void __kprobes set_current_kprobe(struct kprobe *p) } /* - * The D-flag (Debug mask) is set (masked) upon debug exception entry. - * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive - * probe i.e. when probe hit from kprobe handler context upon - * executing the pre/post handlers. In this case we return with - * D-flag clear so that single-stepping can be carried-out. - * - * Leave D-flag set in all other cases. + * When PSTATE.D is set (masked), then software step exceptions can not be + * generated. + * SPSR's D bit shows the value of PSTATE.D immediately before the + * exception was taken. PSTATE.D is set while entering into any exception + * mode, however software clears it for any normal (none-debug-exception) + * mode in the exception entry. Therefore, when we are entering into kprobe + * breakpoint handler from any normal mode then SPSR.D bit is already + * cleared, however it is set when we are entering from any debug exception + * mode. + * Since we always need to generate single step exception after a kprobe + * breakpoint exception therefore we need to clear it unconditionally, when + * we become sure that the current breakpoint exception is for kprobe. */ static void __kprobes spsr_set_debug_flag(struct pt_regs *regs, int mask) @@ -245,10 +250,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, set_ss_context(kcb, slot); /* mark pending ss */ - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 0); - else - WARN_ON(regs->pstate & PSR_D_BIT); + spsr_set_debug_flag(regs, 0); /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); @@ -333,8 +335,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) BUG(); kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 1); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); @@ -457,9 +457,6 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) kprobes_restore_local_irqflag(kcb, regs); kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 1); - post_kprobe_handler(kcb, regs); } @@ -543,9 +540,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) bool arch_within_kprobe_blacklist(unsigned long addr) { - extern char __idmap_text_start[], __idmap_text_end[]; - extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; - if ((addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) || (addr >= (unsigned long)__entry_text_start && diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6cd2612236dc..a4f5f766af08 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -202,7 +202,7 @@ void show_regs(struct pt_regs * regs) static void tls_thread_flush(void) { - asm ("msr tpidr_el0, xzr"); + write_sysreg(0, tpidr_el0); if (is_compat_task()) { current->thread.tp_value = 0; @@ -213,7 +213,7 @@ static void tls_thread_flush(void) * with a stale shadow state during context switch. */ barrier(); - asm ("msr tpidrro_el0, xzr"); + write_sysreg(0, tpidrro_el0); } } @@ -253,7 +253,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, * Read the current TLS pointer from tpidr_el0 as it may be * out-of-sync with the saved value. */ - asm("mrs %0, tpidr_el0" : "=r" (*task_user_tls(p))); + *task_user_tls(p) = read_sysreg(tpidr_el0); if (stack_start) { if (is_compat_thread(task_thread_info(p))) @@ -289,17 +289,15 @@ static void tls_thread_switch(struct task_struct *next) { unsigned long tpidr, tpidrro; - asm("mrs %0, tpidr_el0" : "=r" (tpidr)); + tpidr = read_sysreg(tpidr_el0); *task_user_tls(current) = tpidr; tpidr = *task_user_tls(next); tpidrro = is_compat_thread(task_thread_info(next)) ? next->thread.tp_value : 0; - asm( - " msr tpidr_el0, %0\n" - " msr tpidrro_el0, %1" - : : "r" (tpidr), "r" (tpidrro)); + write_sysreg(tpidr, tpidr_el0); + write_sysreg(tpidrro, tpidrro_el0); } /* Restore the UAO state depending on next's addr_limit */ diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 51b73cdde287..ce704a4aeadd 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -34,7 +34,7 @@ ENTRY(arm64_relocate_new_kernel) /* Setup the list loop variables. */ mov x17, x1 /* x17 = kimage_start */ mov x16, x0 /* x16 = kimage_head */ - dcache_line_size x15, x0 /* x15 = dcache line size */ + raw_dcache_line_size x15, x0 /* x15 = dcache line size */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 536dce22fe76..f534f492a268 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -206,10 +206,15 @@ static void __init request_standard_resources(void) for_each_memblock(memory, region) { res = alloc_bootmem_low(sizeof(*res)); - res->name = "System RAM"; + if (memblock_is_nomap(region)) { + res->name = "reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + } else { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + } res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); @@ -228,7 +233,7 @@ void __init setup_arch(char **cmdline_p) { pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); - sprintf(init_utsname()->machine, ELF_PLATFORM); + sprintf(init_utsname()->machine, UTS_MACHINE); init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a8eafdbc7cb8..404dd67080b9 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -402,15 +402,31 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int thread_flags) { - if (thread_flags & _TIF_SIGPENDING) - do_signal(regs); - - if (thread_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } - - if (thread_flags & _TIF_FOREIGN_FPSTATE) - fpsimd_restore_current_state(); + /* + * The assembly code enters us with IRQs off, but it hasn't + * informed the tracing code of that for efficiency reasons. + * Update the trace code with the current status. + */ + trace_hardirqs_off(); + do { + if (thread_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); + + if (thread_flags & _TIF_SIGPENDING) + do_signal(regs); + + if (thread_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + + if (thread_flags & _TIF_FOREIGN_FPSTATE) + fpsimd_restore_current_state(); + } + local_irq_disable(); + thread_flags = READ_ONCE(current_thread_info()->flags); + } while (thread_flags & _TIF_WORK_MASK); } diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index ccf79d849e0a..b8799e7c79de 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -73,10 +73,9 @@ ENTRY(__cpu_suspend_enter) str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] /* find the mpidr_hash */ - ldr x1, =sleep_save_stash - ldr x1, [x1] + ldr_l x1, sleep_save_stash mrs x7, mpidr_el1 - ldr x9, =mpidr_hash + adr_l x9, mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] /* * Following code relies on the struct mpidr_hash @@ -95,36 +94,30 @@ ENTRY(__cpu_suspend_enter) mov x0, #1 ret ENDPROC(__cpu_suspend_enter) - .ltorg + .pushsection ".idmap.text", "ax" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly + bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ - adr_l lr, __enable_mmu /* __cpu_setup will return here */ - adr_l x27, _resume_switched /* __enable_mmu will branch here */ - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir - b __cpu_setup -ENDPROC(cpu_resume) - - .pushsection ".idmap.text", "ax" -_resume_switched: + bl __enable_mmu ldr x8, =_cpu_resume br x8 -ENDPROC(_resume_switched) +ENDPROC(cpu_resume) .ltorg .popsection ENTRY(_cpu_resume) mrs x1, mpidr_el1 - adrp x8, mpidr_hash - add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address - /* retrieve mpidr_hash members to compute the hash */ + adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address + + /* retrieve mpidr_hash members to compute the hash */ ldr x2, [x8, #MPIDR_HASH_MASK] ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 - /* x7 contains hash index, let's use it to grab context pointer */ + + /* x7 contains hash index, let's use it to grab context pointer */ ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d93d43352504..d3f151cfd4a1 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -201,12 +201,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } -static void smp_store_cpu_info(unsigned int cpuid) -{ - store_cpu_topology(cpuid); - numa_store_cpu_info(cpuid); -} - /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -239,7 +233,7 @@ asmlinkage void secondary_start_kernel(void) * this CPU ticks all of those. If it doesn't, the CPU will * fail to come online. */ - verify_local_cpu_capabilities(); + check_local_cpu_capabilities(); if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); @@ -254,7 +248,7 @@ asmlinkage void secondary_start_kernel(void) */ notify_cpu_starting(cpu); - smp_store_cpu_info(cpu); + store_cpu_topology(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -437,8 +431,19 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + /* + * Initialise the static keys early as they may be enabled by the + * cpufeature code. + */ + jump_label_init(); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); + /* + * Run the errata work around checks on the boot CPU, once we have + * initialised the cpu feature infrastructure from + * cpuinfo_store_boot_cpu() above. + */ + update_cpu_errata_workarounds(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) @@ -619,6 +624,7 @@ static void __init of_parse_and_init_cpus(void) } bootcpu_valid = true; + early_map_cpu_to_node(0, of_node_to_nid(dn)); /* * cpu_logical_map has already been @@ -689,10 +695,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { int err; unsigned int cpu; + unsigned int this_cpu; init_cpu_topology(); - smp_store_cpu_info(smp_processor_id()); + this_cpu = smp_processor_id(); + store_cpu_topology(this_cpu); + numa_store_cpu_info(this_cpu); /* * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set @@ -719,6 +728,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) continue; set_cpu_present(cpu, true); + numa_store_cpu_info(cpu); } } diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 18a71bcd26ee..9a00eee9acc8 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -29,7 +29,8 @@ #include <asm/smp_plat.h> extern void secondary_holding_pen(void); -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; +volatile unsigned long __section(".mmuoff.data.read") +secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d9751a4769e7..c2efddfca18c 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -43,6 +43,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long fp = frame->fp; unsigned long irq_stack_ptr; + if (!tsk) + tsk = current; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -67,7 +70,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (tsk && tsk->ret_stack && + if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { /* * This is a case where function graph tracer has @@ -152,6 +155,27 @@ static int save_trace(struct stackframe *frame, void *d) return trace->nr_entries >= trace->max_entries; } +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + struct stack_trace_data data; + struct stackframe frame; + + data.trace = trace; + data.skip = trace->skip; + data.no_sched_functions = 0; + + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif + + walk_stackframe(current, &frame, save_trace, &data); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { struct stack_trace_data data; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index b616e365cee3..ad734142070d 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -23,8 +23,8 @@ unsigned long *sleep_save_stash; * time the notifier runs debug exceptions might have been enabled already, * with HW breakpoints registers content still in an unknown state. */ -static void (*hw_breakpoint_restore)(void *); -void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +static int (*hw_breakpoint_restore)(unsigned int); +void __init cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) { /* Prevent multiple restore hook initializations */ if (WARN_ON(hw_breakpoint_restore)) @@ -34,6 +34,8 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) void notrace __cpu_suspend_exit(void) { + unsigned int cpu = smp_processor_id(); + /* * We are resuming from reset with the idmap active in TTBR0_EL1. * We must uninstall the idmap and restore the expected MMU @@ -45,7 +47,7 @@ void notrace __cpu_suspend_exit(void) * Restore per-cpu offset before any kernel * subsystem relying on it has a chance to run. */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + set_my_cpu_offset(per_cpu_offset(cpu)); /* * Restore HW breakpoint registers to sane values @@ -53,7 +55,7 @@ void notrace __cpu_suspend_exit(void) * through local_dbg_restore. */ if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + hw_breakpoint_restore(cpu); } /* diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 28c511b06edf..abaf582fc7a8 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -94,7 +94,7 @@ long compat_arm_syscall(struct pt_regs *regs) * See comment in tls_thread_flush. */ barrier(); - asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); + write_sysreg(regs->regs[0], tpidrro_el0); return 0; default: diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e04f83873af7..5ff020f8fb7f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -142,6 +142,11 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) unsigned long irq_stack_ptr; int skip; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (!tsk) + tsk = current; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -151,11 +156,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) else irq_stack_ptr = 0; - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - - if (!tsk) - tsk = current; - if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; @@ -447,36 +447,29 @@ void cpu_enable_cache_maint_trap(void *__unused) : "=r" (res) \ : "r" (address), "i" (-EFAULT) ) -asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int ret; - - /* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */ - if ((esr & 0x01fffc01) == 0x0012dc00) { - int rt = (esr >> 5) & 0x1f; - int crm = (esr >> 1) & 0x0f; + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + int ret = 0; - address = (rt == 31) ? 0 : regs->regs[rt]; + address = (rt == 31) ? 0 : regs->regs[rt]; - switch (crm) { - case 11: /* DC CVAU, gets promoted */ - __user_cache_maint("dc civac", address, ret); - break; - case 10: /* DC CVAC, gets promoted */ - __user_cache_maint("dc civac", address, ret); - break; - case 14: /* DC CIVAC */ - __user_cache_maint("dc civac", address, ret); - break; - case 5: /* IC IVAU */ - __user_cache_maint("ic ivau", address, ret); - break; - default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); - return; - } - } else { + switch (crm) { + case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC: /* DC CIVAC */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_IC_IVAU: /* IC IVAU */ + __user_cache_maint("ic ivau", address, ret); + break; + default: force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); return; } @@ -487,6 +480,48 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + regs->regs[rt] = arm64_ftr_reg_ctrel0.sys_val; + regs->pc += 4; +} + +struct sys64_hook { + unsigned int esr_mask; + unsigned int esr_val; + void (*handler)(unsigned int esr, struct pt_regs *regs); +}; + +static struct sys64_hook sys64_hooks[] = { + { + .esr_mask = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL, + .handler = user_cache_maint_handler, + }, + { + /* Trap read access to CTR_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ, + .handler = ctr_read_handler, + }, + {}, +}; + +asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +{ + struct sys64_hook *hook; + + for (hook = sys64_hooks; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); +} + long compat_arm_syscall(struct pt_regs *regs); asmlinkage long do_ni_syscall(struct pt_regs *regs) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 076312b17d4f..a2c2478e7d78 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -18,12 +18,13 @@ * Author: Will Deacon <will.deacon@arm.com> */ -#include <linux/kernel.h> +#include <linux/cache.h> #include <linux/clocksource.h> #include <linux/elf.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/gfp.h> +#include <linux/kernel.h> #include <linux/mm.h> #include <linux/sched.h> #include <linux/signal.h> @@ -37,8 +38,7 @@ #include <asm/vdso_datapage.h> extern char vdso_start, vdso_end; -static unsigned long vdso_pages; -static struct page **vdso_pagelist; +static unsigned long vdso_pages __ro_after_init; /* * The vDSO data page. @@ -53,9 +53,9 @@ struct vdso_data *vdso_data = &vdso_data_store.data; /* * Create and map the vectors page for AArch32 tasks. */ -static struct page *vectors_page[1]; +static struct page *vectors_page[1] __ro_after_init; -static int alloc_vectors_page(void) +static int __init alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; @@ -88,7 +88,7 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - static struct vm_special_mapping spec = { + static const struct vm_special_mapping spec = { .name = "[vectors]", .pages = vectors_page, @@ -110,11 +110,19 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) } #endif /* CONFIG_COMPAT */ -static struct vm_special_mapping vdso_spec[2]; +static struct vm_special_mapping vdso_spec[2] __ro_after_init = { + { + .name = "[vvar]", + }, + { + .name = "[vdso]", + }, +}; static int __init vdso_init(void) { int i; + struct page **vdso_pagelist; if (memcmp(&vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); @@ -138,16 +146,8 @@ static int __init vdso_init(void) for (i = 0; i < vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i); - /* Populate the special mapping structures */ - vdso_spec[0] = (struct vm_special_mapping) { - .name = "[vvar]", - .pages = vdso_pagelist, - }; - - vdso_spec[1] = (struct vm_special_mapping) { - .name = "[vdso]", - .pages = &vdso_pagelist[1], - }; + vdso_spec[0].pages = &vdso_pagelist[0]; + vdso_spec[1].pages = &vdso_pagelist[1]; return 0; } @@ -201,7 +201,7 @@ up_fail: */ void update_vsyscall(struct timekeeper *tk) { - u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter"); + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; ++vdso_data->tb_seq_count; smp_wmb(); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 659963d40bb4..5ce9b2929e0d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -185,6 +185,25 @@ SECTIONS _data = .; _sdata = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + + /* + * Data written with the MMU off but read with the MMU on requires + * cache lines to be invalidated, discarding up to a Cache Writeback + * Granule (CWG) of data from the cache. Keep the section that + * requires this type of maintenance to be in its own Cache Writeback + * Granule (CWG) area so the cache maintenance operations don't + * interfere with adjacent data. + */ + .mmuoff.data.write : ALIGN(SZ_2K) { + __mmuoff_data_start = .; + *(.mmuoff.data.write) + } + . = ALIGN(SZ_2K); + .mmuoff.data.read : { + *(.mmuoff.data.read) + __mmuoff_data_end = .; + } + PECOFF_EDATA_PADDING _edata = .; diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 7ce931565151..2726635dceba 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -46,10 +46,6 @@ alternative_if_not ARM64_HAS_VIRT_HOST_EXTN hvc #0 ldr lr, [sp], #16 ret -alternative_else +alternative_else_nop_endif b __vhe_hyp_call - nop - nop - nop -alternative_endif ENDPROC(__kvm_call_hyp) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e51367d159d0..f302fdb3a030 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -36,6 +36,7 @@ #include <asm/kvm_host.h> #include <asm/kvm_mmu.h> #include <asm/perf_event.h> +#include <asm/sysreg.h> #include <trace/events/kvm.h> @@ -67,11 +68,9 @@ static u32 get_ccsidr(u32 csselr) /* Make sure noone else changes CSSELR during this! */ local_irq_disable(); - /* Put value into CSSELR */ - asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + write_sysreg(csselr, csselr_el1); isb(); - /* Read result out of CCSIDR */ - asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr)); + ccsidr = read_sysreg(ccsidr_el1); local_irq_enable(); return ccsidr; @@ -174,9 +173,7 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u32 val; - asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val)); - p->regval = val; + p->regval = read_sysreg(dbgauthstatus_el1); return true; } } @@ -429,10 +426,7 @@ static void reset_wcr(struct kvm_vcpu *vcpu, static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 amair; - - asm volatile("mrs %0, amair_el1\n" : "=r" (amair)); - vcpu_sys_reg(vcpu, AMAIR_EL1) = amair; + vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1); } static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -456,8 +450,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; - asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr)); - /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN + pmcr = read_sysreg(pmcr_el0); + /* + * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN * except PMCR.E resetting to zero. */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) @@ -557,9 +552,9 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (!(p->Op2 & 1)) - asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid)); + pmceid = read_sysreg(pmceid0_el0); else - asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid)); + pmceid = read_sysreg(pmceid1_el0); p->regval = pmceid; @@ -1833,11 +1828,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, static void get_##reg(struct kvm_vcpu *v, \ const struct sys_reg_desc *r) \ { \ - u64 val; \ - \ - asm volatile("mrs %0, " __stringify(reg) "\n" \ - : "=r" (val)); \ - ((struct sys_reg_desc *)r)->val = val; \ + ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \ } FUNCTION_INVARIANT(midr_el1) diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index ed90578fa120..46af7186bca6 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -26,6 +26,7 @@ #include <asm/kvm_host.h> #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include <asm/sysreg.h> #include <linux/init.h> #include "sys_regs.h" @@ -43,10 +44,7 @@ static bool access_actlr(struct kvm_vcpu *vcpu, static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 actlr; - - asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr)); - vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr; + vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); } /* diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 4c1e700840b6..c3cd65e31814 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -29,14 +29,11 @@ * x1 - src */ ENTRY(copy_page) -alternative_if_not ARM64_HAS_NO_HW_PREFETCH - nop - nop -alternative_else +alternative_if ARM64_HAS_NO_HW_PREFETCH # Prefetch two cache lines ahead. prfm pldl1strm, [x1, #128] prfm pldl1strm, [x1, #256] -alternative_endif +alternative_else_nop_endif ldp x2, x3, [x1] ldp x4, x5, [x1, #16] @@ -52,11 +49,9 @@ alternative_endif 1: subs x18, x18, #128 -alternative_if_not ARM64_HAS_NO_HW_PREFETCH - nop -alternative_else +alternative_if ARM64_HAS_NO_HW_PREFETCH prfm pldl1strm, [x1, #384] -alternative_endif +alternative_else_nop_endif stnp x2, x3, [x0] ldp x2, x3, [x1] diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 07d7352d7c38..58b5a906ff78 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -105,19 +105,20 @@ ENTRY(__clean_dcache_area_pou) ENDPROC(__clean_dcache_area_pou) /* - * __inval_cache_range(start, end) - * - start - start address of region - * - end - end address of region + * __dma_inv_area(start, size) + * - start - virtual start address of region + * - size - size in question */ -ENTRY(__inval_cache_range) +__dma_inv_area: + add x1, x1, x0 /* FALLTHROUGH */ /* - * __dma_inv_range(start, end) - * - start - virtual start address of region - * - end - virtual end address of region + * __inval_cache_range(start, end) + * - start - start address of region + * - end - end address of region */ -__dma_inv_range: +ENTRY(__inval_cache_range) dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -136,46 +137,43 @@ __dma_inv_range: dsb sy ret ENDPIPROC(__inval_cache_range) -ENDPROC(__dma_inv_range) +ENDPROC(__dma_inv_area) + +/* + * __clean_dcache_area_poc(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoC. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_poc) + /* FALLTHROUGH */ /* - * __dma_clean_range(start, end) + * __dma_clean_area(start, size) * - start - virtual start address of region - * - end - virtual end address of region + * - size - size in question */ -__dma_clean_range: - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x0, x0, x3 -1: -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc cvac, x0 -alternative_else - dc civac, x0 -alternative_endif - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy +__dma_clean_area: + dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -ENDPROC(__dma_clean_range) +ENDPIPROC(__clean_dcache_area_poc) +ENDPROC(__dma_clean_area) /* - * __dma_flush_range(start, end) + * __dma_flush_area(start, size) + * + * clean & invalidate D / U line + * * - start - virtual start address of region - * - end - virtual end address of region + * - size - size in question */ -ENTRY(__dma_flush_range) - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D / U line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy +ENTRY(__dma_flush_area) + dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__dma_flush_range) +ENDPIPROC(__dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -184,10 +182,9 @@ ENDPIPROC(__dma_flush_range) * - dir - DMA direction */ ENTRY(__dma_map_area) - add x1, x1, x0 cmp w2, #DMA_FROM_DEVICE - b.eq __dma_inv_range - b __dma_clean_range + b.eq __dma_inv_area + b __dma_clean_area ENDPIPROC(__dma_map_area) /* @@ -197,8 +194,7 @@ ENDPIPROC(__dma_map_area) * - dir - DMA direction */ ENTRY(__dma_unmap_area) - add x1, x1, x0 cmp w2, #DMA_TO_DEVICE - b.ne __dma_inv_range + b.ne __dma_inv_area ret ENDPIPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c4284c432ae8..bdacead5b802 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -20,6 +20,7 @@ #include <linux/gfp.h> #include <linux/acpi.h> #include <linux/bootmem.h> +#include <linux/cache.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/genalloc.h> @@ -30,7 +31,7 @@ #include <asm/cacheflush.h> -static int swiotlb __read_mostly; +static int swiotlb __ro_after_init; static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) @@ -168,7 +169,7 @@ static void *__dma_alloc(struct device *dev, size_t size, return ptr; /* remove any dirty cache lines on the kernel alias */ - __dma_flush_range(ptr, ptr + size); + __dma_flush_area(ptr, size); /* create a coherent mapping */ page = virt_to_page(ptr); @@ -387,7 +388,7 @@ static int __init atomic_pool_init(void) void *page_addr = page_address(page); memset(page_addr, 0, atomic_pool_size); - __dma_flush_range(page_addr, page_addr + atomic_pool_size); + __dma_flush_area(page_addr, atomic_pool_size); atomic_pool = gen_pool_create(PAGE_SHIFT, -1); if (!atomic_pool) @@ -548,7 +549,7 @@ fs_initcall(dma_debug_do_init); /* Thankfully, all cache ops are by VA so we can ignore phys here */ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) { - __dma_flush_range(virt, virt + PAGE_SIZE); + __dma_flush_area(virt, PAGE_SIZE); } static void *__iommu_alloc_attrs(struct device *dev, size_t size, diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 81acd4706878..c9f118cd812b 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -2,7 +2,7 @@ * Based on arch/arm/mm/extable.c */ -#include <linux/module.h> +#include <linux/extable.h> #include <linux/uaccess.h> int fixup_exception(struct pt_regs *regs) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 05d2bd776c69..53d9159662fe 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -18,7 +18,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/module.h> +#include <linux/extable.h> #include <linux/signal.h> #include <linux/mm.h> #include <linux/hardirq.h> @@ -251,8 +251,7 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, good_area: /* * Check that the permissions on the VMA allow for the fault which - * occurred. If we encountered a write or exec fault, we must have - * appropriate permissions, otherwise we allow any permission. + * occurred. */ if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; @@ -288,7 +287,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, esr)) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 43a76b07eb32..8377329d8c97 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -25,8 +25,6 @@ #include <asm/cachetype.h> #include <asm/tlbflush.h> -#include "mm.h" - void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index bbb7ee76e319..21c489bdeb4e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -23,6 +23,7 @@ #include <linux/swap.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/cache.h> #include <linux/mman.h> #include <linux/nodemask.h> #include <linux/initrd.h> @@ -34,6 +35,7 @@ #include <linux/dma-contiguous.h> #include <linux/efi.h> #include <linux/swiotlb.h> +#include <linux/vmalloc.h> #include <asm/boot.h> #include <asm/fixmap.h> @@ -47,16 +49,14 @@ #include <asm/tlb.h> #include <asm/alternative.h> -#include "mm.h" - /* * We need to be able to catch inadvertent references to memstart_addr * that occur (potentially in generic code) before arm64_memblock_init() * executes, which assigns it its actual value. So use a default value * that cannot be mistaken for a real physical address. */ -s64 memstart_addr __read_mostly = -1; -phys_addr_t arm64_dma_phys_limit __read_mostly; +s64 memstart_addr __ro_after_init = -1; +phys_addr_t arm64_dma_phys_limit __ro_after_init; #ifdef CONFIG_BLK_DEV_INITRD static int __init early_initrd(char *p) @@ -485,7 +485,12 @@ void free_initmem(void) { free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)), 0, "unused kernel"); - fixup_init(); + /* + * Unmap the __init region but leave the VM area in place. This + * prevents the region from being reused for kernel modules, which + * is not supported by kallsyms. + */ + unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h deleted file mode 100644 index 71fe98985455..000000000000 --- a/arch/arm64/mm/mm.h +++ /dev/null @@ -1,2 +0,0 @@ - -void fixup_init(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4989948d1feb..05615a3fdc6f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cache.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -42,11 +43,9 @@ #include <asm/memblock.h> #include <asm/mmu_context.h> -#include "mm.h" - u64 idmap_t0sz = TCR_T0SZ(VA_BITS); -u64 kimage_voffset __read_mostly; +u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); /* @@ -399,16 +398,6 @@ void mark_rodata_ro(void) section_size, PAGE_KERNEL_RO); } -void fixup_init(void) -{ - /* - * Unmap the __init region but leave the VM area in place. This - * prevents the region from being reused for kernel modules, which - * is not supported by kallsyms. - */ - unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); -} - static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, pgprot_t prot, struct vm_struct *vma) { diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 5bb15eab6f00..778a985c8a70 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -17,6 +17,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#define pr_fmt(fmt) "NUMA: " fmt + #include <linux/acpi.h> #include <linux/bootmem.h> #include <linux/memblock.h> @@ -24,6 +26,7 @@ #include <linux/of.h> #include <asm/acpi.h> +#include <asm/sections.h> struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -38,10 +41,9 @@ static __init int numa_parse_early_param(char *opt) { if (!opt) return -EINVAL; - if (!strncmp(opt, "off", 3)) { - pr_info("%s\n", "NUMA turned off"); + if (!strncmp(opt, "off", 3)) numa_off = true; - } + return 0; } early_param("numa", numa_parse_early_param); @@ -93,7 +95,6 @@ void numa_clear_node(unsigned int cpu) */ static void __init setup_node_to_cpumask_map(void) { - unsigned int cpu; int node; /* setup nr_node_ids if not done yet */ @@ -106,11 +107,8 @@ static void __init setup_node_to_cpumask_map(void) cpumask_clear(node_to_cpumask_map[node]); } - for_each_possible_cpu(cpu) - set_cpu_numa_node(cpu, NUMA_NO_NODE); - /* cpumask_of_node() will now work */ - pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids); + pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); } /* @@ -118,18 +116,77 @@ static void __init setup_node_to_cpumask_map(void) */ void numa_store_cpu_info(unsigned int cpu) { - map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]); + map_cpu_to_node(cpu, cpu_to_node_map[cpu]); } void __init early_map_cpu_to_node(unsigned int cpu, int nid) { /* fallback to node 0 */ - if (nid < 0 || nid >= MAX_NUMNODES) + if (nid < 0 || nid >= MAX_NUMNODES || numa_off) nid = 0; cpu_to_node_map[cpu] = nid; + + /* + * We should set the numa node of cpu0 as soon as possible, because it + * has already been set up online before. cpu_to_node(0) will soon be + * called. + */ + if (!cpu) + set_cpu_numa_node(cpu, nid); +} + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init early_cpu_to_node(int cpu) +{ + return cpu_to_node_map[cpu]; +} + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return node_distance(from, to); } +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + int nid = early_cpu_to_node(cpu); + + return memblock_virt_alloc_try_nid(size, align, + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + /** * numa_add_memblk - Set node id to memblk * @nid: NUMA node ID of the new memblk @@ -145,13 +202,13 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) ret = memblock_set_node(start, (end - start), &memblock.memory, nid); if (ret < 0) { - pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n", + pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n", start, (end - 1), nid); return ret; } node_set(nid, numa_nodes_parsed); - pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n", + pr_info("Adding memblock [0x%llx - 0x%llx] on node %d\n", start, (end - 1), nid); return ret; } @@ -166,19 +223,18 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start_pfn << PAGE_SHIFT, - (end_pfn << PAGE_SHIFT) - 1); + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); nd = __va(nd_pa); /* report and initialize */ - pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n", + pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n", nd_pa, nd_pa + nd_size - 1); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); if (tnid != nid) - pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid); + pr_info("NODE_DATA(%d) on node %d\n", nid, tnid); node_data[nid] = nd; memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); @@ -235,8 +291,7 @@ static int __init numa_alloc_distance(void) numa_distance[i * numa_distance_cnt + j] = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - pr_debug("NUMA: Initialized distance table, cnt=%d\n", - numa_distance_cnt); + pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt); return 0; } @@ -257,20 +312,20 @@ static int __init numa_alloc_distance(void) void __init numa_set_distance(int from, int to, int distance) { if (!numa_distance) { - pr_warn_once("NUMA: Warning: distance table not allocated yet\n"); + pr_warn_once("Warning: distance table not allocated yet\n"); return; } if (from >= numa_distance_cnt || to >= numa_distance_cnt || from < 0 || to < 0) { - pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", + pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n", from, to, distance); return; } if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { - pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n", from, to, distance); return; } @@ -297,7 +352,7 @@ static int __init numa_register_nodes(void) /* Check that valid nid is set to memblks */ for_each_memblock(memory, mblk) if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) { - pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", mblk->nid, mblk->base, mblk->base + mblk->size - 1); return -EINVAL; @@ -335,8 +390,10 @@ static int __init numa_init(int (*init_func)(void)) if (ret < 0) return ret; - if (nodes_empty(numa_nodes_parsed)) + if (nodes_empty(numa_nodes_parsed)) { + pr_info("No NUMA configuration found\n"); return -EINVAL; + } ret = numa_register_nodes(); if (ret < 0) @@ -344,10 +401,6 @@ static int __init numa_init(int (*init_func)(void)) setup_node_to_cpumask_map(); - /* init boot processor */ - cpu_to_node_map[0] = 0; - map_cpu_to_node(0, 0); - return 0; } @@ -367,10 +420,8 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ - else - pr_info("No NUMA configuration found\n"); - pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", + 0LLU, PFN_PHYS(max_pfn) - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index ca6d268e3313..8def55e7249b 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -139,4 +139,43 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) __pgprot(0), __pgprot(PTE_VALID)); } -#endif +#ifdef CONFIG_HIBERNATION +/* + * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function + * is used to determine if a linear map page has been marked as not-valid by + * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. + * This is based on kern_addr_valid(), which almost does what we need. + * + * Because this is only called on the kernel linear map, p?d_sect() implies + * p?d_present(). When debug_pagealloc is enabled, sections mappings are + * disabled. + */ +bool kernel_page_present(struct page *page) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr = (unsigned long)page_address(page); + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return false; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return false; + if (pud_sect(*pud)) + return true; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return false; + if (pmd_sect(*pmd)) + return true; + + pte = pte_offset_kernel(pmd, addr); + return pte_valid(*pte); +} +#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index ae11d4e03d0e..371c5f03a170 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,8 +26,6 @@ #include <asm/page.h> #include <asm/tlbflush.h> -#include "mm.h" - static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9d37e967fa19..352c73b6a59e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -83,6 +83,7 @@ ENDPROC(cpu_do_suspend) * * x0: Address of context pointer */ + .pushsection ".idmap.text", "ax" ENTRY(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] @@ -120,6 +121,7 @@ ENTRY(cpu_do_resume) isb ret ENDPROC(cpu_do_resume) + .popsection #endif /* @@ -134,17 +136,12 @@ ENTRY(cpu_do_switch_mm) bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb -alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 - ret - nop - nop - nop -alternative_else +alternative_if ARM64_WORKAROUND_CAVIUM_27456 ic iallu dsb nsh isb +alternative_else_nop_endif ret -alternative_endif ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" @@ -181,6 +178,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) * Initialise the processor for turning the MMU on. Return in x0 the * value of the SCTLR_EL1 register. */ + .pushsection ".idmap.text", "ax" ENTRY(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh @@ -266,3 +264,4 @@ ENDPROC(__cpu_setup) crval: .word 0xfcffffff // clear .word 0x34d5d91d // set + .popsection diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h index 68cf638faf48..b1ec1fa06463 100644 --- a/arch/avr32/include/asm/uaccess.h +++ b/arch/avr32/include/asm/uaccess.h @@ -74,7 +74,7 @@ extern __kernel_size_t __copy_user(void *to, const void *from, extern __kernel_size_t copy_to_user(void __user *to, const void *from, __kernel_size_t n); -extern __kernel_size_t copy_from_user(void *to, const void __user *from, +extern __kernel_size_t ___copy_from_user(void *to, const void __user *from, __kernel_size_t n); static inline __kernel_size_t __copy_to_user(void __user *to, const void *from, @@ -88,6 +88,15 @@ static inline __kernel_size_t __copy_from_user(void *to, { return __copy_user(to, (const void __force *)from, n); } +static inline __kernel_size_t copy_from_user(void *to, + const void __user *from, + __kernel_size_t n) +{ + size_t res = ___copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c index d93ead02daed..7c6cf14f0985 100644 --- a/arch/avr32/kernel/avr32_ksyms.c +++ b/arch/avr32/kernel/avr32_ksyms.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL(copy_page); /* * Userspace access stuff. */ -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(___copy_from_user); EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(__copy_user); EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S index ea59c04b07de..075373471da1 100644 --- a/arch/avr32/lib/copy_user.S +++ b/arch/avr32/lib/copy_user.S @@ -23,13 +23,13 @@ */ .text .align 1 - .global copy_from_user - .type copy_from_user, @function -copy_from_user: + .global ___copy_from_user + .type ___copy_from_user, @function +___copy_from_user: branch_if_kernel r8, __copy_user ret_if_privileged r8, r11, r10, r10 rjmp __copy_user - .size copy_from_user, . - copy_from_user + .size ___copy_from_user, . - ___copy_from_user .global copy_to_user .type copy_to_user, @function diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h index 12f5d6851bbc..0a2a70096d8b 100644 --- a/arch/blackfin/include/asm/uaccess.h +++ b/arch/blackfin/include/asm/uaccess.h @@ -171,11 +171,12 @@ static inline int bad_user_access_length(void) static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) { memcpy(to, (const void __force *)from, n); - else - return n; - return 0; + return 0; + } + memset(to, 0, n); + return n; } static inline unsigned long __must_check diff --git a/arch/blackfin/kernel/ftrace-entry.S b/arch/blackfin/kernel/ftrace-entry.S index 28d059540424..3b8bdcbb7da3 100644 --- a/arch/blackfin/kernel/ftrace-entry.S +++ b/arch/blackfin/kernel/ftrace-entry.S @@ -169,7 +169,7 @@ ENTRY(_ftrace_graph_caller) r0 = sp; /* unsigned long *parent */ r1 = [sp]; /* unsigned long self_addr */ # endif -# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST +# ifdef HAVE_FUNCTION_GRAPH_FP_TEST r2 = fp; /* unsigned long frame_pointer */ # endif r0 += 16; /* skip the 4 local regs on stack */ @@ -190,7 +190,7 @@ ENTRY(_return_to_handler) [--sp] = r1; /* get original return address */ -# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST +# ifdef HAVE_FUNCTION_GRAPH_FP_TEST r0 = fp; /* Blackfin is sane, so omit this */ # endif call _ftrace_return_to_handler; diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c index 095de0fa044d..8dad7589b843 100644 --- a/arch/blackfin/kernel/ftrace.c +++ b/arch/blackfin/kernel/ftrace.c @@ -107,7 +107,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, return; if (ftrace_push_return_trace(*parent, self_addr, &trace.depth, - frame_pointer) == -EBUSY) + frame_pointer, NULL) == -EBUSY) return; trace.func = self_addr; diff --git a/arch/cris/include/asm/uaccess.h b/arch/cris/include/asm/uaccess.h index e3530d0f13ee..56c7d5750abd 100644 --- a/arch/cris/include/asm/uaccess.h +++ b/arch/cris/include/asm/uaccess.h @@ -194,30 +194,6 @@ extern unsigned long __copy_user(void __user *to, const void *from, unsigned lon extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n); extern unsigned long __do_clear_user(void __user *to, unsigned long n); -static inline unsigned long -__generic_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_user(to, from, n); - return n; -} - -static inline unsigned long -__generic_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - return __copy_user_zeroing(to, from, n); - return n; -} - -static inline unsigned long -__generic_clear_user(void __user *to, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - return __do_clear_user(to, n); - return n; -} - static inline long __strncpy_from_user(char *dst, const char __user *src, long count) { @@ -282,7 +258,7 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n) else if (n == 24) __asm_copy_from_user_24(to, from, ret); else - ret = __generic_copy_from_user(to, from, n); + ret = __copy_user_zeroing(to, from, n); return ret; } @@ -333,7 +309,7 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) else if (n == 24) __asm_copy_to_user_24(to, from, ret); else - ret = __generic_copy_to_user(to, from, n); + ret = __copy_user(to, from, n); return ret; } @@ -366,26 +342,43 @@ __constant_clear_user(void __user *to, unsigned long n) else if (n == 24) __asm_clear_24(to, ret); else - ret = __generic_clear_user(to, n); + ret = __do_clear_user(to, n); return ret; } -#define clear_user(to, n) \ - (__builtin_constant_p(n) ? \ - __constant_clear_user(to, n) : \ - __generic_clear_user(to, n)) +static inline size_t clear_user(void __user *to, size_t n) +{ + if (unlikely(!access_ok(VERIFY_WRITE, to, n))) + return n; + if (__builtin_constant_p(n)) + return __constant_clear_user(to, n); + else + return __do_clear_user(to, n); +} -#define copy_from_user(to, from, n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user(to, from, n) : \ - __generic_copy_from_user(to, from, n)) +static inline size_t copy_from_user(void *to, const void __user *from, size_t n) +{ + if (unlikely(!access_ok(VERIFY_READ, from, n))) { + memset(to, 0, n); + return n; + } + if (__builtin_constant_p(n)) + return __constant_copy_from_user(to, from, n); + else + return __copy_user_zeroing(to, from, n); +} -#define copy_to_user(to, from, n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user(to, from, n) : \ - __generic_copy_to_user(to, from, n)) +static inline size_t copy_to_user(void __user *to, const void *from, size_t n) +{ + if (unlikely(!access_ok(VERIFY_WRITE, to, n))) + return n; + if (__builtin_constant_p(n)) + return __constant_copy_to_user(to, from, n); + else + return __copy_user(to, from, n); +} /* We let the __ versions of copy_from/to_user inline, because they're often * used in fast paths and have only a small space overhead. diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h index 3ac9a59d65d4..87d9e34c5df8 100644 --- a/arch/frv/include/asm/uaccess.h +++ b/arch/frv/include/asm/uaccess.h @@ -263,19 +263,25 @@ do { \ extern long __memset_user(void *dst, unsigned long count); extern long __memcpy_user(void *dst, const void *src, unsigned long count); -#define clear_user(dst,count) __memset_user(____force(dst), (count)) +#define __clear_user(dst,count) __memset_user(____force(dst), (count)) #define __copy_from_user_inatomic(to, from, n) __memcpy_user((to), ____force(from), (n)) #define __copy_to_user_inatomic(to, from, n) __memcpy_user(____force(to), (from), (n)) #else -#define clear_user(dst,count) (memset(____force(dst), 0, (count)), 0) +#define __clear_user(dst,count) (memset(____force(dst), 0, (count)), 0) #define __copy_from_user_inatomic(to, from, n) (memcpy((to), ____force(from), (n)), 0) #define __copy_to_user_inatomic(to, from, n) (memcpy(____force(to), (from), (n)), 0) #endif -#define __clear_user clear_user +static inline unsigned long __must_check +clear_user(void __user *to, unsigned long n) +{ + if (likely(__access_ok(to, n))) + n = __clear_user(to, n); + return n; +} static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h index f000a382bc7f..f61cfb28e9f2 100644 --- a/arch/hexagon/include/asm/uaccess.h +++ b/arch/hexagon/include/asm/uaccess.h @@ -103,7 +103,8 @@ static inline long hexagon_strncpy_from_user(char *dst, const char __user *src, { long res = __strnlen_user(src, n); - /* return from strnlen can't be zero -- that would be rubbish. */ + if (unlikely(!res)) + return -EFAULT; if (res > n) { copy_from_user(dst, src, n); diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 29bd59790d6c..c7026429816b 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -56,7 +56,7 @@ struct thread_info { #define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_stack(ti) /* nothing */ +#define free_thread_stack(tsk) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 465c70982f40..bfe13196f770 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -241,8 +241,7 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use static inline unsigned long __copy_to_user (void __user *to, const void *from, unsigned long count) { - if (!__builtin_constant_p(count)) - check_object_size(from, count, true); + check_object_size(from, count, true); return __copy_user(to, (__force void __user *) from, count); } @@ -250,8 +249,7 @@ __copy_to_user (void __user *to, const void *from, unsigned long count) static inline unsigned long __copy_from_user (void *to, const void __user *from, unsigned long count) { - if (!__builtin_constant_p(count)) - check_object_size(to, count, false); + check_object_size(to, count, false); return __copy_user((__force void __user *) to, from, count); } @@ -265,27 +263,22 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) long __cu_len = (n); \ \ if (__access_ok(__cu_to, __cu_len, get_fs())) { \ - if (!__builtin_constant_p(n)) \ - check_object_size(__cu_from, __cu_len, true); \ + check_object_size(__cu_from, __cu_len, true); \ __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ } \ __cu_len; \ }) -#define copy_from_user(to, from, n) \ -({ \ - void *__cu_to = (to); \ - const void __user *__cu_from = (from); \ - long __cu_len = (n); \ - \ - __chk_user_ptr(__cu_from); \ - if (__access_ok(__cu_from, __cu_len, get_fs())) { \ - if (!__builtin_constant_p(n)) \ - check_object_size(__cu_to, __cu_len, false); \ - __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ - } \ - __cu_len; \ -}) +static inline unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + if (likely(__access_ok(from, n, get_fs()))) + n = __copy_user((__force void __user *) to, from, n); + else + memset(to, 0, n); + return n; +} #define __copy_in_user(to, from, size) __copy_user((to), (from), (size)) diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 92b7bc956795..9273e034b730 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -796,7 +796,7 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) * ACPI based hotplug CPU support */ #ifdef CONFIG_ACPI_HOTPLUG_CPU -static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA /* diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index eb9220cde76c..9509cc73b9c6 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -986,7 +986,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, int cpu = smp_processor_id(); previous_current = curr_task(cpu); - set_curr_task(cpu, current); + ia64_set_curr_task(cpu, current); if ((p = strchr(current->comm, ' '))) *p = '\0'; @@ -1360,14 +1360,14 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, cpumask_clear_cpu(i, &mca_cpu); /* wake next cpu */ while (monarch_cpu != -1) cpu_relax(); /* spin until last cpu leaves */ - set_curr_task(cpu, previous_current); + ia64_set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; return; } } } - set_curr_task(cpu, previous_current); + ia64_set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; monarch_cpu = -1; /* This frees the slaves and previous monarchs */ } @@ -1729,7 +1729,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); mprintk("Slave on cpu %d returning to normal service.\n", cpu); - set_curr_task(cpu, previous_current); + ia64_set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; atomic_dec(&slaves); return; @@ -1756,7 +1756,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); - set_curr_task(cpu, previous_current); + ia64_set_curr_task(cpu, previous_current); monarch_cpu = -1; return; } @@ -1890,7 +1890,7 @@ ia64_mca_cpu_init(void *cpu_data) PAGE_KERNEL))); } -static void ia64_mca_cmc_vector_adjust(void *dummy) +static int ia64_mca_cpu_online(unsigned int cpu) { unsigned long flags; @@ -1898,25 +1898,9 @@ static void ia64_mca_cmc_vector_adjust(void *dummy) if (!cmc_polling_enabled) ia64_mca_cmc_vector_enable(NULL); local_irq_restore(flags); + return 0; } -static int mca_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - ia64_mca_cmc_vector_adjust(NULL); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block mca_cpu_notifier = { - .notifier_call = mca_cpu_callback -}; - /* * ia64_mca_init * @@ -2111,15 +2095,13 @@ ia64_mca_late_init(void) if (!mca_init) return 0; - register_hotcpu_notifier(&mca_cpu_notifier); - /* Setup the CMCI/P vector and handler */ setup_timer(&cmc_poll_timer, ia64_mca_cmc_poll, 0UL); /* Unmask/enable the vector */ cmc_polling_enabled = 0; - schedule_work(&cmc_enable_work); - + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/mca:online", + ia64_mca_cpu_online, NULL); IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__); #ifdef CONFIG_ACPI diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h index cac7014daef3..6f8982157a75 100644 --- a/arch/m32r/include/asm/uaccess.h +++ b/arch/m32r/include/asm/uaccess.h @@ -219,7 +219,7 @@ extern int fixup_exception(struct pt_regs *regs); #define __get_user_nocheck(x, ptr, size) \ ({ \ long __gu_err = 0; \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ might_fault(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index ec9cc1fdd237..ddb8192a3661 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -396,7 +396,7 @@ void __init config_amiga(void) mach_max_dma_address = 0xffffffff; mach_reset = amiga_reset; -#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE) +#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) mach_beep = amiga_mksound; #endif diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index cbd5991fd49a..97a3c38cd1f5 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -211,7 +211,7 @@ void __init config_atari(void) arch_gettimeoffset = atari_gettimeoffset; mach_reset = atari_reset; mach_max_dma_address = 0xffffff; -#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE) +#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) mach_beep = atari_mksound; #endif #ifdef CONFIG_HEARTBEAT diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 8f5b6f7dd136..55be7e3ff109 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -566,6 +566,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -584,6 +586,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 31bded9c83d4..365dda66b0e6 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -525,6 +525,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -543,6 +545,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 0d7739e04ae2..ce3cbfd16fcd 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -546,6 +546,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -564,6 +566,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 2cbb5c465fec..8db496a9797d 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -517,6 +517,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -535,6 +537,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 96102a42c156..8314156f7149 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -527,6 +527,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -545,6 +547,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 97d88f7dc5a7..6600270b9622 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -549,6 +549,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -567,6 +569,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index be25ef208f0f..90abfe9eabba 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -629,6 +629,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -647,6 +649,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index a008344360c9..0d502c2f73d5 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -517,6 +517,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -535,6 +537,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 6735a25f36d4..5930e91fc710 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -517,6 +517,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -535,6 +537,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 780c6e9f6cf9..74e3ad82eca9 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -540,6 +540,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -558,6 +560,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 44693cf361e5..4ba8606a4e69 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -518,6 +518,8 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -536,6 +538,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index ef0071d61158..c6f49726a6c9 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -519,6 +519,8 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m @@ -537,6 +539,7 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 5b8ec4d5f8e8..50633c38f1e2 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -105,7 +105,7 @@ EXPORT_SYMBOL(mach_heartbeat); #ifdef CONFIG_M68K_L2_CACHE void (*mach_l2_flush) (int); #endif -#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE) +#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) void (*mach_beep)(unsigned int, unsigned int); EXPORT_SYMBOL(mach_beep); #endif diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 9202f82dfce6..58507edbdf1d 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -42,7 +42,7 @@ #include <linux/personality.h> #include <linux/tty.h> #include <linux/binfmts.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/tracehook.h> #include <asm/setup.h> diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 2f33a33001e5..e46895316eb0 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -162,7 +162,7 @@ void __init config_mac(void) mach_halt = mac_poweroff; mach_power_off = mac_poweroff; mach_max_dma_address = 0xffffffff; -#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE) +#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) mach_beep = mac_mksound; #endif diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index fcb7f05b60b6..ea89a24f4600 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -180,7 +180,7 @@ void __init config_q40(void) mach_reset = q40_reset; mach_get_model = q40_get_model; -#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE) +#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) mach_beep = q40_mksound; #endif #ifdef CONFIG_HEARTBEAT diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index 8282cbce7e39..273e61225c27 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -204,8 +204,9 @@ extern unsigned long __must_check __copy_user_zeroing(void *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) return __copy_user_zeroing(to, from, n); + memset(to, 0, n); return n; } diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 331b0d35f89c..826676778094 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -227,7 +227,7 @@ extern long __user_bad(void); #define __get_user(x, ptr) \ ({ \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ /*unsigned long __gu_ptr = (unsigned long)(ptr);*/ \ long __gu_err; \ switch (sizeof(*(ptr))) { \ @@ -373,10 +373,13 @@ extern long __user_bad(void); static inline long copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long res = n; might_fault(); - if (access_ok(VERIFY_READ, from, n)) - return __copy_from_user(to, from, n); - return n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } #define __copy_to_user(to, from, n) \ diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index fc7b48a52cd5..d57563c58a26 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -63,7 +63,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0); + err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); if (err == -EBUSY) { *parent = old; return; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 26388562e300..212ff92920d2 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -65,6 +65,7 @@ config MIPS select ARCH_CLOCKSOURCE_DATA select HANDLE_DOMAIN_IRQ select HAVE_EXIT_THREAD + select HAVE_REGS_AND_STACK_ACCESS_API menu "Machine selection" diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index f0e314ceb8ba..7f975b20b20c 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -113,42 +113,6 @@ config SPINLOCK_TEST help Add several files to the debugfs to test spinlock speed. -if CPU_MIPSR6 - -choice - prompt "Compact branch policy" - default MIPS_COMPACT_BRANCHES_OPTIMAL - -config MIPS_COMPACT_BRANCHES_NEVER - bool "Never (force delay slot branches)" - help - Pass the -mcompact-branches=never flag to the compiler in order to - force it to always emit branches with delay slots, and make no use - of the compact branch instructions introduced by MIPSr6. This is - useful if you suspect there may be an issue with compact branches in - either the compiler or the CPU. - -config MIPS_COMPACT_BRANCHES_OPTIMAL - bool "Optimal (use where beneficial)" - help - Pass the -mcompact-branches=optimal flag to the compiler in order for - it to make use of compact branch instructions where it deems them - beneficial, and use branches with delay slots elsewhere. This is the - default compiler behaviour, and should be used unless you have a - reason to choose otherwise. - -config MIPS_COMPACT_BRANCHES_ALWAYS - bool "Always (force compact branches)" - help - Pass the -mcompact-branches=always flag to the compiler in order to - force it to always emit compact branches, making no use of branch - instructions with delay slots. This can result in more compact code - which may be beneficial in some scenarios. - -endchoice - -endif # CPU_MIPSR6 - config SCACHE_DEBUGFS bool "L2 cache debugfs entries" depends on DEBUG_FS diff --git a/arch/mips/Makefile b/arch/mips/Makefile index efd7a9dc93c4..598ab2930fce 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -203,10 +203,6 @@ endif toolchain-virt := $(call cc-option-yn,$(mips-cflags) -mvirt) cflags-$(toolchain-virt) += -DTOOLCHAIN_SUPPORTS_VIRT -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_NEVER) += -mcompact-branches=never -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_OPTIMAL) += -mcompact-branches=optimal -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_ALWAYS) += -mcompact-branches=always - # # Firmware support # diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c index 2e7378467c5c..cc3a1e33a600 100644 --- a/arch/mips/ath79/clock.c +++ b/arch/mips/ath79/clock.c @@ -96,7 +96,7 @@ static struct clk * __init ath79_reg_ffclk(const char *name, struct clk *clk; clk = clk_register_fixed_factor(NULL, name, parent_name, 0, mult, div); - if (!clk) + if (IS_ERR(clk)) panic("failed to allocate %s clock structure", name); return clk; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 5a9b87b7993e..c1eb1ff7c800 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1619,6 +1619,12 @@ static int __init octeon_irq_init_gpio( return -ENOMEM; } + /* + * Clear the OF_POPULATED flag that was set by of_irq_init() + * so that all GPIO devices will be probed. + */ + of_node_clear_flag(gpio_node, OF_POPULATED); + return 0; } /* diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index b31fbc9d6eae..37a932d9148c 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -1059,7 +1059,7 @@ static int __init octeon_publish_devices(void) { return of_platform_bus_probe(NULL, octeon_ids, NULL); } -device_initcall(octeon_publish_devices); +arch_initcall(octeon_publish_devices); MODULE_AUTHOR("David Daney <ddaney@caviumnetworks.com>"); MODULE_LICENSE("GPL"); diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4d457d602d3b..256fe6f65cf2 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -380,29 +380,11 @@ static int octeon_update_boot_vector(unsigned int cpu) return 0; } -static int octeon_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - octeon_update_boot_vector(cpu); - break; - case CPU_ONLINE: - pr_info("Cpu %d online\n", cpu); - break; - case CPU_DEAD: - break; - } - - return NOTIFY_OK; -} - static int register_cavium_notifier(void) { - hotcpu_notifier(octeon_cpu_callback, 0); - return 0; + return cpuhp_setup_state_nocalls(CPUHP_MIPS_SOC_PREPARE, + "mips/cavium:prepare", + octeon_update_boot_vector, NULL); } late_initcall(register_cavium_notifier); diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index d7b99180c6e1..1910223a9c02 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -146,7 +146,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,cpu_mask_nr_tbl + # open coded PTR_LA t1, cpu_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, cpu_mask_nr_tbl + lui t1, %hi(cpu_mask_nr_tbl) + addiu t1, %lo(cpu_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, cpu_mask_nr_tbl + .set push + .set noat + lui t1, %highest(cpu_mask_nr_tbl) + lui AT, %hi(cpu_mask_nr_tbl) + daddiu t1, t1, %higher(cpu_mask_nr_tbl) + daddiu AT, AT, %lo(cpu_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 1: lw t2,(t1) nop and t2,t0 @@ -195,7 +213,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,asic_mask_nr_tbl + # open coded PTR_LA t1,asic_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, asic_mask_nr_tbl + lui t1, %hi(asic_mask_nr_tbl) + addiu t1, %lo(asic_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, asic_mask_nr_tbl + .set push + .set noat + lui t1, %highest(asic_mask_nr_tbl) + lui AT, %hi(asic_mask_nr_tbl) + daddiu t1, t1, %higher(asic_mask_nr_tbl) + daddiu AT, AT, %lo(asic_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 2: lw t2,(t1) nop and t2,t0 diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 56584a659183..83054f79f72a 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -157,6 +157,7 @@ ldc1 $f28, THREAD_FPR28(\thread) ldc1 $f30, THREAD_FPR30(\thread) ctc1 \tmp, fcr31 + .set pop .endm .macro fpu_restore_16odd thread diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h index 0cf5ac1f7245..8ff2cbdf2c3e 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h +++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h @@ -15,8 +15,8 @@ static inline bool __should_swizzle_bits(volatile void *a) { extern const bool octeon_should_swizzle_table[]; + u64 did = ((u64)(uintptr_t)a >> 40) & 0xff; - unsigned long did = ((unsigned long)a >> 40) & 0xff; return octeon_should_swizzle_table[did]; } @@ -29,7 +29,7 @@ static inline bool __should_swizzle_bits(volatile void *a) #define __should_swizzle_bits(a) false -static inline bool __should_swizzle_addr(unsigned long p) +static inline bool __should_swizzle_addr(u64 p) { /* boot bus? */ return ((p >> 40) & 0xff) == 0; diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h index 2f82bfa3a773..c9f5769dfc8f 100644 --- a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h @@ -11,11 +11,13 @@ #define CP0_EBASE $15, 1 .macro kernel_entry_setup +#ifdef CONFIG_SMP mfc0 t0, CP0_EBASE andi t0, t0, 0x3ff # CPUNum beqz t0, 1f # CPUs other than zero goto smp_bootstrap j smp_bootstrap +#endif /* CONFIG_SMP */ 1: .endm diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 58e7874e9347..4fafeefe65c2 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -458,10 +458,21 @@ static inline int mips_cm_revision(void) static inline unsigned int mips_cm_max_vp_width(void) { extern int smp_num_siblings; + uint32_t cfg; if (mips_cm_revision() >= CM_REV_CM3) return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK; + if (mips_cm_present()) { + /* + * We presume that all cores in the system will have the same + * number of VP(E)s, and if that ever changes then this will + * need revisiting. + */ + cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE_MSK; + return (cfg >> CM_GCR_Cx_CONFIG_PVPE_SHF) + 1; + } + if (IS_ENABLED(CONFIG_SMP)) return smp_num_siblings; diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index def9d8d13f6e..7dd2dd47909a 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -660,8 +660,6 @@ #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) -/* FTLB probability bits for R6 */ -#define MIPS_CONF7_FTLBP_SHIFT (18) /* WatchLo* register definitions */ #define MIPS_WATCHLO_IRW (_ULCAST_(0x7) << 0) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 11b965f98d95..21a2aaba20d5 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/thread_info.h> +#include <linux/string.h> #include <asm/asm-eva.h> /* @@ -1170,6 +1171,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); __cu_len = __invoke_copy_from_user(__cu_to, \ __cu_from, \ __cu_len); \ + } else { \ + memset(__cu_to, 0, __cu_len); \ } \ } \ __cu_len; \ diff --git a/arch/mips/include/asm/uprobes.h b/arch/mips/include/asm/uprobes.h index 34c325c674c4..70a4a2f173ff 100644 --- a/arch/mips/include/asm/uprobes.h +++ b/arch/mips/include/asm/uprobes.h @@ -36,7 +36,6 @@ struct arch_uprobe { unsigned long resume_epc; u32 insn[2]; u32 ixol[2]; - union mips_instruction orig_inst[MAX_UINSN_BYTES / 4]; }; struct arch_uprobe_task { diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index a88d44247cc8..dd3175442c9e 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -352,7 +352,12 @@ __setup("nohtw", htw_disable); static int mips_ftlb_disabled; static int mips_has_ftlb_configured; -static int set_ftlb_enable(struct cpuinfo_mips *c, int enable); +enum ftlb_flags { + FTLB_EN = 1 << 0, + FTLB_SET_PROB = 1 << 1, +}; + +static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags); static int __init ftlb_disable(char *s) { @@ -371,8 +376,6 @@ static int __init ftlb_disable(char *s) return 1; } - back_to_back_c0_hazard(); - config4 = read_c0_config4(); /* Check that FTLB has been disabled */ @@ -531,7 +534,7 @@ static unsigned int calculate_ftlb_probability(struct cpuinfo_mips *c) return 3; } -static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) +static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) { unsigned int config; @@ -542,33 +545,33 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) case CPU_P6600: /* proAptiv & related cores use Config6 to enable the FTLB */ config = read_c0_config6(); - /* Clear the old probability value */ - config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); - if (enable) - /* Enable FTLB */ - write_c0_config6(config | - (calculate_ftlb_probability(c) - << MIPS_CONF6_FTLBP_SHIFT) - | MIPS_CONF6_FTLBEN); + + if (flags & FTLB_EN) + config |= MIPS_CONF6_FTLBEN; else - /* Disable FTLB */ - write_c0_config6(config & ~MIPS_CONF6_FTLBEN); + config &= ~MIPS_CONF6_FTLBEN; + + if (flags & FTLB_SET_PROB) { + config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); + config |= calculate_ftlb_probability(c) + << MIPS_CONF6_FTLBP_SHIFT; + } + + write_c0_config6(config); + back_to_back_c0_hazard(); break; case CPU_I6400: - /* I6400 & related cores use Config7 to configure FTLB */ - config = read_c0_config7(); - /* Clear the old probability value */ - config &= ~(3 << MIPS_CONF7_FTLBP_SHIFT); - write_c0_config7(config | (calculate_ftlb_probability(c) - << MIPS_CONF7_FTLBP_SHIFT)); - break; + /* There's no way to disable the FTLB */ + if (!(flags & FTLB_EN)) + return 1; + return 0; case CPU_LOONGSON3: /* Flush ITLB, DTLB, VTLB and FTLB */ write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB | LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB); /* Loongson-3 cores use Config6 to enable the FTLB */ config = read_c0_config6(); - if (enable) + if (flags & FTLB_EN) /* Enable FTLB */ write_c0_config6(config & ~MIPS_CONF6_FTLBDIS); else @@ -788,6 +791,7 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) PAGE_SIZE, config4); /* Switch FTLB off */ set_ftlb_enable(c, 0); + mips_ftlb_disabled = 1; break; } c->tlbsizeftlbsets = 1 << @@ -852,7 +856,7 @@ static void decode_configs(struct cpuinfo_mips *c) c->scache.flags = MIPS_CACHE_NOT_PRESENT; /* Enable FTLB if present and not disabled */ - set_ftlb_enable(c, !mips_ftlb_disabled); + set_ftlb_enable(c, mips_ftlb_disabled ? 0 : FTLB_EN); ok = decode_config0(c); /* Read Config registers. */ BUG_ON(!ok); /* Arch spec violation! */ @@ -902,6 +906,9 @@ static void decode_configs(struct cpuinfo_mips *c) } } + /* configure the FTLB write probability */ + set_ftlb_enable(c, (mips_ftlb_disabled ? 0 : FTLB_EN) | FTLB_SET_PROB); + mips_probe_watch_registers(c); #ifndef CONFIG_MIPS_CPS diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 937c54bc8ccc..30a3b75e88eb 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -382,8 +382,8 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, if (unlikely(faulted)) goto out; - if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp) - == -EBUSY) { + if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp, + NULL) == -EBUSY) { *parent_ra_addr = old_parent_ra; return; } diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 17326a90d53c..dc0b29612891 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -142,9 +142,8 @@ LEAF(__r4k_wait) PTR_LA k1, __r4k_wait ori k0, 0x1f /* 32 byte rollback region */ xori k0, 0x1f - bne k0, k1, 9f + bne k0, k1, \handler MTC0 k0, CP0_EPC -9: .set pop .endm diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index c3372cac6db2..0a7e10b5f9e3 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -1164,7 +1164,9 @@ fpu_emul: regs->regs[31] = r31; regs->cp0_epc = epc; if (!used_math()) { /* First time FPU user. */ + preempt_disable(); err = init_fpu(); + preempt_enable(); set_used_math(); } lose_fpu(1); /* Save FPU state for the emulator. */ diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 7429ad09fbe3..d2d061520a23 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -605,14 +605,14 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) return -EOPNOTSUPP; /* Avoid inadvertently triggering emulation */ - if ((value & PR_FP_MODE_FR) && cpu_has_fpu && - !(current_cpu_data.fpu_id & MIPS_FPIR_F64)) + if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu && + !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64)) return -EOPNOTSUPP; - if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre) + if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre) return -EOPNOTSUPP; /* FR = 0 not supported in MIPS R6 */ - if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6) + if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6) return -EOPNOTSUPP; /* Proceed with the mode switch */ diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 36cf8d65c47d..0d57909d9026 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -87,6 +87,13 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type) int x = boot_mem_map.nr_map; int i; + /* + * If the region reaches the top of the physical address space, adjust + * the size slightly so that (start + size) doesn't overflow + */ + if (start + size - 1 == (phys_addr_t)ULLONG_MAX) + --size; + /* Sanity check */ if (start + size < start) { pr_warn("Trying to add an invalid memory region, skipped\n"); @@ -757,7 +764,6 @@ static void __init arch_mem_init(char **cmdline_p) device_tree_init(); sparse_init(); plat_swiotlb_setup(); - paging_init(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); /* Tell bootmem about cma reserved memblock section */ @@ -870,6 +876,7 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); cpu_cache_init(); + paging_init(); } unsigned long kernelsp[NR_CPUS]; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index e9d9fc6c754c..6183ad84cc73 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -513,7 +513,7 @@ static void cps_cpu_die(unsigned int cpu) * in which case the CPC will refuse to power down the core. */ do { - mips_cm_lock_other(core, vpe_id); + mips_cm_lock_other(core, 0); mips_cpc_lock_other(core); stat = read_cpc_co_stat_conf(); stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK; diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index f95f094f36e4..b0baf48951fa 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -322,6 +322,9 @@ asmlinkage void start_secondary(void) cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); + cpumask_set_cpu(cpu, &cpu_callin_map); + synchronise_count_slave(cpu); + set_cpu_online(cpu, true); set_cpu_sibling_map(cpu); @@ -329,10 +332,6 @@ asmlinkage void start_secondary(void) calculate_cpu_foreign_map(); - cpumask_set_cpu(cpu, &cpu_callin_map); - - synchronise_count_slave(cpu); - /* * irq will be enabled in ->smp_finish(), enabling it too early * is dangerous. diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 8452d933a645..4c7c1558944a 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -157,7 +157,6 @@ bool is_trap_insn(uprobe_opcode_t *insn) int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - union mips_instruction insn; /* * Now find the EPC where to resume after the breakpoint has been @@ -168,10 +167,10 @@ int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) unsigned long epc; epc = regs->cp0_epc; - __compute_return_epc_for_insn(regs, insn); + __compute_return_epc_for_insn(regs, + (union mips_instruction) aup->insn[0]); aup->resume_epc = regs->cp0_epc; } - utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; regs->cp0_epc = current->utask->xol_vaddr; @@ -222,7 +221,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, return NOTIFY_DONE; switch (val) { - case DIE_BREAK: + case DIE_UPROBE: if (uprobe_pre_sstep_notifier(regs)) return NOTIFY_STOP; break; @@ -257,7 +256,7 @@ unsigned long arch_uretprobe_hijack_return_addr( ra = regs->regs[31]; /* Replace the return address with the trampoline address */ - regs->regs[31] = ra; + regs->regs[31] = trampoline_vaddr; return ra; } @@ -280,24 +279,6 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } -/** - * set_orig_insn - Restore the original instruction. - * @mm: the probed process address space. - * @auprobe: arch specific probepoint information. - * @vaddr: the virtual address to insert the opcode. - * - * For mm @mm, restore the original opcode (opcode) at @vaddr. - * Return 0 (success) or a negative errno. - * - * This overrides the weak version in kernel/events/uprobes.c. - */ -int set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, - unsigned long vaddr) -{ - return uprobe_write_opcode(mm, vaddr, - *(uprobe_opcode_t *)&auprobe->orig_inst[0].word); -} - void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len) { diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 9abe447a4b48..f9dbfb14af33 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -39,16 +39,16 @@ static struct vm_special_mapping vdso_vvar_mapping = { static void __init init_vdso_image(struct mips_vdso_image *image) { unsigned long num_pages, i; + unsigned long data_pfn; BUG_ON(!PAGE_ALIGNED(image->data)); BUG_ON(!PAGE_ALIGNED(image->size)); num_pages = image->size / PAGE_SIZE; - for (i = 0; i < num_pages; i++) { - image->mapping.pages[i] = - virt_to_page(image->data + (i * PAGE_SIZE)); - } + data_pfn = __phys_to_pfn(__pa_symbol(image->data)); + for (i = 0; i < num_pages; i++) + image->mapping.pages[i] = pfn_to_page(data_pfn + i); } static int __init init_vdso(void) diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index 2fec6f753a35..99aab9f85904 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c @@ -677,7 +677,7 @@ void play_dead(void) play_dead_at_ckseg1(state_addr); } -void loongson3_disable_clock(int cpu) +static int loongson3_disable_clock(unsigned int cpu) { uint64_t core_id = cpu_data[cpu].core; uint64_t package_id = cpu_data[cpu].package; @@ -688,9 +688,10 @@ void loongson3_disable_clock(int cpu) if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG)) LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3)); } + return 0; } -void loongson3_enable_clock(int cpu) +static int loongson3_enable_clock(unsigned int cpu) { uint64_t core_id = cpu_data[cpu].core; uint64_t package_id = cpu_data[cpu].package; @@ -701,34 +702,15 @@ void loongson3_enable_clock(int cpu) if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG)) LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3); } -} - -#define CPU_POST_DEAD_FROZEN (CPU_POST_DEAD | CPU_TASKS_FROZEN) -static int loongson3_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action) { - case CPU_POST_DEAD: - case CPU_POST_DEAD_FROZEN: - pr_info("Disable clock for CPU#%d\n", cpu); - loongson3_disable_clock(cpu); - break; - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - pr_info("Enable clock for CPU#%d\n", cpu); - loongson3_enable_clock(cpu); - break; - } - - return NOTIFY_OK; + return 0; } static int register_loongson3_notifier(void) { - hotcpu_notifier(loongson3_cpu_callback, 0); - return 0; + return cpuhp_setup_state_nocalls(CPUHP_MIPS_SOC_PREPARE, + "mips/loongson:prepare", + loongson3_enable_clock, + loongson3_disable_clock); } early_initcall(register_loongson3_notifier); diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index 72a4642eee2c..4a094f7acb3d 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -298,5 +298,6 @@ bool do_dsemulret(struct pt_regs *xcp) /* Set EPC to return to post-branch instruction */ xcp->cp0_epc = current->thread.bd_emu_cont_pc; pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc); + MIPS_FPU_EMU_INC_STATS(ds_emul); return true; } diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index cd72805b64a7..fa7d8d3790bf 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -800,7 +800,7 @@ static void r4k_flush_icache_range(unsigned long start, unsigned long end) * If address-based cache ops don't require an SMP call, then * use them exclusively for small flushes. */ - size = start - end; + size = end - start; cache_size = icache_size; if (!cpu_has_ic_fills_f_dc) { size *= 2; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index a5509e7dcad2..72f7478ee068 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -261,7 +261,6 @@ unsigned __weak platform_maar_init(unsigned num_pairs) { struct maar_config cfg[BOOT_MEM_MAP_MAX]; unsigned i, num_configured, num_cfg = 0; - phys_addr_t skip; for (i = 0; i < boot_mem_map.nr_map; i++) { switch (boot_mem_map.map[i].type) { @@ -272,14 +271,14 @@ unsigned __weak platform_maar_init(unsigned num_pairs) continue; } - skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); - + /* Round lower up */ cfg[num_cfg].lower = boot_mem_map.map[i].addr; - cfg[num_cfg].lower += skip; + cfg[num_cfg].lower = (cfg[num_cfg].lower + 0xffff) & ~0xffff; - cfg[num_cfg].upper = cfg[num_cfg].lower; - cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; - cfg[num_cfg].upper -= skip; + /* Round upper down */ + cfg[num_cfg].upper = boot_mem_map.map[i].addr + + boot_mem_map.map[i].size; + cfg[num_cfg].upper = (cfg[num_cfg].upper & ~0xffff) - 1; cfg[num_cfg].attrs = MIPS_MAAR_S; num_cfg++; @@ -441,6 +440,9 @@ static inline void mem_init_free_highmem(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; + if (cpu_has_dc_aliases) + return; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { struct page *page = pfn_to_page(tmp); diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index ec5b21678fad..7e7364b0501e 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -39,6 +39,9 @@ #include <linux/console.h> #endif +#define ROCIT_CONFIG_GEN0 0x1f403000 +#define ROCIT_CONFIG_GEN0_PCI_IOCU BIT(7) + extern void malta_be_init(void); extern int malta_be_handler(struct pt_regs *regs, int is_fixup); @@ -107,6 +110,8 @@ static void __init fd_activate(void) static int __init plat_enable_iocoherency(void) { int supported = 0; + u32 cfg; + if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) { if (BONITO_PCICACHECTRL & BONITO_PCICACHECTRL_CPUCOH_PRES) { BONITO_PCICACHECTRL |= BONITO_PCICACHECTRL_CPUCOH_EN; @@ -129,7 +134,8 @@ static int __init plat_enable_iocoherency(void) } else if (mips_cm_numiocu() != 0) { /* Nothing special needs to be done to enable coherency */ pr_info("CMP IOCU detected\n"); - if ((*(unsigned int *)0xbf403000 & 0x81) != 0x81) { + cfg = __raw_readl((u32 *)CKSEG1ADDR(ROCIT_CONFIG_GEN0)); + if (!(cfg & ROCIT_CONFIG_GEN0_PCI_IOCU)) { pr_crit("IOCU OPERATION DISABLED BY SWITCH - DEFAULTING TO SW IO COHERENCY\n"); return 0; } diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index 20f7bf6de384..d012e877a95a 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -166,6 +166,7 @@ struct __large_struct { unsigned long buf[100]; }; "2:\n" \ " .section .fixup,\"ax\"\n" \ "3:\n\t" \ + " mov 0,%1\n" \ " mov %3,%0\n" \ " jmp 2b\n" \ " .previous\n" \ diff --git a/arch/mn10300/lib/usercopy.c b/arch/mn10300/lib/usercopy.c index 7826e6c364e7..ce8899e5e171 100644 --- a/arch/mn10300/lib/usercopy.c +++ b/arch/mn10300/lib/usercopy.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ -#include <asm/uaccess.h> +#include <linux/uaccess.h> unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long n) @@ -24,6 +24,8 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) __copy_user_zeroing(to, from, n); + else + memset(to, 0, n); return n; } diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index caa51ff85a3c..0ab82324c817 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -102,9 +102,12 @@ extern long __copy_to_user(void __user *to, const void *from, unsigned long n); static inline long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (!access_ok(VERIFY_READ, from, n)) - return n; - return __copy_from_user(to, from, n); + unsigned long res = n; + if (access_ok(VERIFY_READ, from, n)) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline long copy_to_user(void __user *to, const void *from, @@ -139,7 +142,7 @@ extern long strnlen_user(const char __user *s, long n); #define __get_user_unknown(val, size, ptr, err) do { \ err = 0; \ - if (copy_from_user(&(val), ptr, size)) { \ + if (__copy_from_user(&(val), ptr, size)) { \ err = -EFAULT; \ } \ } while (0) @@ -166,7 +169,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index a6bd07ca3d6c..5cc6b4f1b795 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -273,28 +273,20 @@ __copy_tofrom_user(void *to, const void *from, unsigned long size); static inline unsigned long copy_from_user(void *to, const void *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) - return __copy_tofrom_user(to, from, n); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - return __copy_tofrom_user(to, from, n - over) + over; - } - return n; + unsigned long res = n; + + if (likely(access_ok(VERIFY_READ, from, n))) + res = __copy_tofrom_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline unsigned long copy_to_user(void *to, const void *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_tofrom_user(to, from, n); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - return __copy_tofrom_user(to, from, n - over) + over; - } + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = __copy_tofrom_user(to, from, n); return n; } @@ -303,13 +295,8 @@ extern unsigned long __clear_user(void *addr, unsigned long size); static inline __must_check unsigned long clear_user(void *addr, unsigned long size) { - - if (access_ok(VERIFY_WRITE, addr, size)) - return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } + if (likely(access_ok(VERIFY_WRITE, addr, size))) + size = __clear_user(addr, size); return size; } diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index e9150487e20d..482847865dac 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -10,6 +10,7 @@ #include <asm-generic/uaccess-unaligned.h> #include <linux/bug.h> +#include <linux/string.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -221,7 +222,7 @@ static inline unsigned long __must_check copy_from_user(void *to, unsigned long n) { int sz = __compiletime_object_size(to); - int ret = -EFAULT; + unsigned long ret = n; if (likely(sz == -1 || sz >= n)) ret = __copy_from_user(to, from, n); @@ -230,6 +231,8 @@ static inline unsigned long __must_check copy_from_user(void *to, else __bad_copy_user(); + if (unlikely(ret)) + memset(to + (n - ret), 0, ret); return ret; } diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index a828a0adf52c..5a5506a35395 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -48,7 +48,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent, return; if (ftrace_push_return_trace(old, self_addr, &trace.depth, - 0 ) == -EBUSY) + 0, NULL) == -EBUSY) return; /* activate parisc_return_to_handler() as return point */ diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 2ef55f8968a2..b312b152461b 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -15,7 +15,7 @@ static inline bool early_cpu_has_feature(unsigned long feature) #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS #include <linux/jump_label.h> -#define NUM_CPU_FTR_KEYS 64 +#define NUM_CPU_FTR_KEYS BITS_PER_LONG extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS]; diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c1dc6c14deb8..c266227fdd5b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -308,40 +308,21 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) { - if (!__builtin_constant_p(n)) - check_object_size(to, n, false); + if (likely(access_ok(VERIFY_READ, from, n))) { + check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); } - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - if (!__builtin_constant_p(n - over)) - check_object_size(to, n - over, false); - return __copy_tofrom_user((__force void __user *)to, from, - n - over) + over; - } + memset(to, 0, n); return n; } static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - unsigned long over; - if (access_ok(VERIFY_WRITE, to, n)) { - if (!__builtin_constant_p(n)) - check_object_size(from, n, true); + check_object_size(from, n, true); return __copy_tofrom_user(to, (__force void __user *)from, n); } - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - if (!__builtin_constant_p(n)) - check_object_size(from, n - over, true); - return __copy_tofrom_user(to, (__force void __user *)from, - n - over) + over; - } return n; } @@ -383,8 +364,7 @@ static inline unsigned long __copy_from_user_inatomic(void *to, return 0; } - if (!__builtin_constant_p(n)) - check_object_size(to, n, false); + check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); } @@ -412,8 +392,8 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to, if (ret == 0) return 0; } - if (!__builtin_constant_p(n)) - check_object_size(from, n, true); + + check_object_size(from, n, true); return __copy_tofrom_user(to, (__force const void __user *)from, n); } @@ -439,10 +419,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(VERIFY_WRITE, addr, size))) return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } return size; } diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index cc52d9795f88..a95639b8d4ac 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -593,7 +593,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) if (!ftrace_graph_entry(&trace)) goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, + NULL) == -EBUSY) goto out; parent = return_hooker; diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 2265c6398a17..bd739fed26e3 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -411,7 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * r13 - PACA * cr3 - gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ _GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) @@ -453,7 +453,7 @@ lwarx_loop2: * At this stage * cr2 - eq if first thread to wakeup in core * cr3- gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -481,7 +481,7 @@ first_thread_in_subcore: * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore */ - bne cr4,subcore_state_restored + blt cr4,subcore_state_restored /* Restore per-subcore state */ ld r4,_SDR1(r1) @@ -526,7 +526,7 @@ timebase_resync: * If waking up from sleep, per core state is not lost, skip to * clear_lock. */ - bne cr4,clear_lock + blt cr4,clear_lock /* * First thread in the core to wake up and its waking up with @@ -557,7 +557,7 @@ common_exit: * If waking up from sleep, hypervisor state is not lost. Hence * skip hypervisor state restore. */ - bne cr4,hypervisor_state_restored + blt cr4,hypervisor_state_restored /* Waking up from winkle */ diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 0a57fe6d49cc..aa8214f30c92 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic) stw r7,12(r1) stw r8,8(r1) - rlwinm r0,r4,3,0x8 - rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */ - cmplwi cr7,r0,0 /* is destination address even ? */ addic r12,r6,0 addi r6,r4,-4 neg r0,r4 addi r4,r3,-4 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + crset 4*cr7+eq beq 58f cmplw 0,r5,r0 /* is this more than total to do? */ blt 63f /* if not much to do */ + rlwinm r7,r6,3,0x8 + rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ + cmplwi cr7,r7,0 /* is destination address even ? */ andi. r8,r0,3 /* get it word-aligned first */ mtctr r8 beq+ 61f diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 7d95bc402dba..c491f2c8f2b9 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -369,44 +369,34 @@ void destroy_context(struct mm_struct *mm) } #ifdef CONFIG_SMP - -static int mmu_context_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int mmu_ctx_cpu_prepare(unsigned int cpu) { - unsigned int cpu = (unsigned int)(long)hcpu; - /* We don't touch CPU 0 map, it's allocated at aboot and kept * around forever */ if (cpu == boot_cpuid) - return NOTIFY_OK; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); - stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); - kfree(stale_map[cpu]); - stale_map[cpu] = NULL; - - /* We also clear the cpu_vm_mask bits of CPUs going away */ - clear_tasks_mm_cpumask(cpu); - break; -#endif /* CONFIG_HOTPLUG_CPU */ - } - return NOTIFY_OK; + return 0; + + pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); + stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); + return 0; } -static struct notifier_block mmu_context_cpu_nb = { - .notifier_call = mmu_context_cpu_notify, -}; +static int mmu_ctx_cpu_dead(unsigned int cpu) +{ +#ifdef CONFIG_HOTPLUG_CPU + if (cpu == boot_cpuid) + return 0; + + pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); + kfree(stale_map[cpu]); + stale_map[cpu] = NULL; + + /* We also clear the cpu_vm_mask bits of CPUs going away */ + clear_tasks_mm_cpumask(cpu); +#endif + return 0; +} #endif /* CONFIG_SMP */ @@ -469,7 +459,9 @@ void __init mmu_context_init(void) #else stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); - register_cpu_notifier(&mmu_context_cpu_nb); + cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, + "powerpc/mmu/ctx:prepare", + mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); #endif printk(KERN_INFO diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index dfdb90cb4403..9f1983404e1a 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -113,7 +113,12 @@ BEGIN_FTR_SECTION END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T -0: +0: /* + * For userspace addresses, make sure this is region 0. + */ + cmpdi r9, 0 + bne 8f + /* when using slices, we extract the psize off the slice bitmaps * and then we need to get the sllp encoding off the mmu_psize_defs * array. diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 834868b9fdc9..366e4f510fcf 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -852,37 +852,33 @@ static void smp_core99_setup_cpu(int cpu_nr) #ifdef CONFIG_PPC64 #ifdef CONFIG_HOTPLUG_CPU -static int smp_core99_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static unsigned int smp_core99_host_open; + +static int smp_core99_cpu_prepare(unsigned int cpu) { int rc; - switch(action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - /* Open i2c bus if it was used for tb sync */ - if (pmac_tb_clock_chip_host) { - rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1); - if (rc) { - pr_err("Failed to open i2c bus for time sync\n"); - return notifier_from_errno(rc); - } + /* Open i2c bus if it was used for tb sync */ + if (pmac_tb_clock_chip_host && !smp_core99_host_open) { + rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1); + if (rc) { + pr_err("Failed to open i2c bus for time sync\n"); + return notifier_from_errno(rc); } - break; - case CPU_ONLINE: - case CPU_UP_CANCELED: - /* Close i2c bus if it was used for tb sync */ - if (pmac_tb_clock_chip_host) - pmac_i2c_close(pmac_tb_clock_chip_host); - break; - default: - break; + smp_core99_host_open = 1; } - return NOTIFY_OK; + return 0; } -static struct notifier_block smp_core99_cpu_nb = { - .notifier_call = smp_core99_cpu_notify, -}; +static int smp_core99_cpu_online(unsigned int cpu) +{ + /* Close i2c bus if it was used for tb sync */ + if (pmac_tb_clock_chip_host && smp_core99_host_open) { + pmac_i2c_close(pmac_tb_clock_chip_host); + smp_core99_host_open = 0; + } + return 0; +} #endif /* CONFIG_HOTPLUG_CPU */ static void __init smp_core99_bringup_done(void) @@ -902,7 +898,11 @@ static void __init smp_core99_bringup_done(void) g5_phy_disable_cpu1(); } #ifdef CONFIG_HOTPLUG_CPU - register_cpu_notifier(&smp_core99_cpu_nb); + cpuhp_setup_state_nocalls(CPUHP_POWERPC_PMAC_PREPARE, + "powerpc/pmac:prepare", smp_core99_cpu_prepare, + NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/pmac:online", + smp_core99_cpu_online, NULL); #endif if (ppc_md.progress) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 13218263e66e..38a5c657ffd3 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -124,6 +124,13 @@ static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); } +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; +} + static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) { phb->ioda.pe_array[pe_no].phb = phb; @@ -162,11 +169,12 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) { struct pnv_phb *phb = pe->phb; + unsigned int pe_num = pe->pe_number; WARN_ON(pe->pdev); memset(pe, 0, sizeof(struct pnv_ioda_pe)); - clear_bit(pe->pe_number, phb->ioda.pe_alloc); + clear_bit(pe_num, phb->ioda.pe_alloc); } /* The default M64 BAR is shared by all PEs */ @@ -2216,7 +2224,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, pnv_pci_link_table_and_group(phb->hose->node, num, tbl, &pe->table_group); - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); return 0; } @@ -2354,7 +2362,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, if (ret) pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); else - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); @@ -2870,7 +2878,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_m64(phb, res)) { + if (!pnv_pci_is_m64_flags(res->flags)) { dev_warn(&pdev->dev, "Don't support SR-IOV with" " non M64 VF BAR%d: %pR. \n", i, res); @@ -3095,7 +3103,7 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, * alignment for any 64-bit resource, PCIe doesn't care and * bridges only do 64-bit prefetchable anyway. */ - if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64)) + if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type)) return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; @@ -3402,12 +3410,6 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; - /* Release slave PEs in compound PE */ - if (pe->flags & PNV_IODA_PE_MASTER) { - list_for_each_entry_safe(slave, tmp, &pe->slaves, list) - pnv_ioda_release_pe(slave); - } - list_del(&pe->list); switch (phb->type) { case PNV_PHB_IODA1: @@ -3422,7 +3424,26 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) pnv_ioda_release_pe_seg(pe); pnv_ioda_deconfigure_pe(pe->phb, pe); - pnv_ioda_free_pe(pe); + + /* Release slave PEs in the compound PE */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry_safe(slave, tmp, &pe->slaves, list) { + list_del(&slave->list); + pnv_ioda_free_pe(slave); + } + } + + /* + * The PE for root bus can be removed because of hotplug in EEH + * recovery for fenced PHB error. We need to mark the PE dead so + * that it can be populated again in PCI hot add path. The PE + * shouldn't be destroyed as it's the global reserved resource. + */ + if (phb->ioda.root_pe_populated && + phb->ioda.root_pe_idx == pe->pe_number) + phb->ioda.root_pe_populated = false; + else + pnv_ioda_free_pe(pe); } static void pnv_pci_release_device(struct pci_dev *pdev) @@ -3438,7 +3459,17 @@ static void pnv_pci_release_device(struct pci_dev *pdev) if (!pdn || pdn->pe_number == IODA_INVALID_PE) return; + /* + * PCI hotplug can happen as part of EEH error recovery. The @pdn + * isn't removed and added afterwards in this scenario. We should + * set the PE number in @pdn to an invalid one. Otherwise, the PE's + * device count is decreased on removing devices while failing to + * be increased on adding devices. It leads to unbalanced PE's device + * count and eventually make normal PCI hotplug path broken. + */ pe = &phb->ioda.pe_array[pdn->pe_number]; + pdn->pe_number = IODA_INVALID_PE; + WARN_ON(--pe->device_count < 0); if (pe->device_count == 0) pnv_ioda_release_pe(pe); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 4ffcaa6f8670..a39d20e8623d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -41,7 +41,6 @@ #include <linux/root_dev.h> #include <linux/of.h> #include <linux/of_pci.h> -#include <linux/kexec.h> #include <asm/mmu.h> #include <asm/processor.h> @@ -66,6 +65,7 @@ #include <asm/eeh.h> #include <asm/reg.h> #include <asm/plpar_wrappers.h> +#include <asm/kexec.h> #include "pseries.h" diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 57d72f10a97f..9114243fa1b5 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -23,10 +23,10 @@ static void icp_opal_teardown_cpu(void) { - int cpu = smp_processor_id(); + int hw_cpu = hard_smp_processor_id(); /* Clear any pending IPI */ - opal_int_set_mfrr(cpu, 0xff); + opal_int_set_mfrr(hw_cpu, 0xff); } static void icp_opal_flush_ipi(void) @@ -101,14 +101,16 @@ static void icp_opal_eoi(struct irq_data *d) static void icp_opal_cause_ipi(int cpu, unsigned long data) { - opal_int_set_mfrr(cpu, IPI_PRIORITY); + int hw_cpu = get_hard_smp_processor_id(cpu); + + opal_int_set_mfrr(hw_cpu, IPI_PRIORITY); } static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id) { - int cpu = smp_processor_id(); + int hw_cpu = hard_smp_processor_id(); - opal_int_set_mfrr(cpu, 0xff); + opal_int_set_mfrr(hw_cpu, 0xff); return smp_ipi_demux(); } diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 95aefdba4be2..52d7c8709279 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -266,28 +266,28 @@ int __put_user_bad(void) __attribute__((noreturn)); __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ - unsigned char __x; \ + unsigned char __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 2: { \ - unsigned short __x; \ + unsigned short __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 4: { \ - unsigned int __x; \ + unsigned int __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 8: { \ - unsigned long long __x; \ + unsigned long long __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 0f7bfeba6da6..60a8a4e207ed 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -209,7 +209,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) /* Only trace if the calling function expects to. */ if (!ftrace_graph_entry(&trace)) goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, + NULL) == -EBUSY) goto out; parent = (unsigned long) return_to_handler; out: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f142215ed30d..607ec91966c7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2231,9 +2231,10 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -EINVAL; current->thread.fpu.fpc = fpu->fpc; if (MACHINE_HAS_VX) - convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs); + convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, + (freg_t *) fpu->fprs); else - memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs)); + memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); return 0; } @@ -2242,9 +2243,10 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) /* make sure we have the latest values */ save_fpu_regs(); if (MACHINE_HAS_VX) - convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs); + convert_vx_to_fp((freg_t *) fpu->fprs, + (__vector128 *) vcpu->run->s.regs.vrs); else - memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs)); + memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); fpu->fpc = current->thread.fpu.fpc; return 0; } diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c106488b4137..d8673e243f13 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -584,7 +584,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* Validity 0x0044 will be checked by SIE */ if (rc) goto unpin; - scb_s->gvrd = hpa; + scb_s->riccbd = hpa; } return 0; unpin: diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a58bca62a93b..cbb73fabc91e 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -740,28 +740,21 @@ out: put_task_struct(tsk); } -static int pfault_cpu_notify(struct notifier_block *self, unsigned long action, - void *hcpu) +static int pfault_cpu_dead(unsigned int cpu) { struct thread_struct *thread, *next; struct task_struct *tsk; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DEAD: - spin_lock_irq(&pfault_lock); - list_for_each_entry_safe(thread, next, &pfault_list, list) { - thread->pfault_wait = 0; - list_del(&thread->list); - tsk = container_of(thread, struct task_struct, thread); - wake_up_process(tsk); - put_task_struct(tsk); - } - spin_unlock_irq(&pfault_lock); - break; - default: - break; + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + put_task_struct(tsk); } - return NOTIFY_OK; + spin_unlock_irq(&pfault_lock); + return 0; } static int __init pfault_irq_init(void) @@ -775,7 +768,8 @@ static int __init pfault_irq_init(void) if (rc) goto out_pfault; irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); - hotcpu_notifier(pfault_cpu_notify, 0); + cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead", + NULL, pfault_cpu_dead); return 0; out_pfault: diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index 20a3591225cc..01aec8ccde83 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -163,7 +163,7 @@ do { \ __get_user_asm(val, "lw", ptr); \ break; \ case 8: \ - if ((copy_from_user((void *)&val, ptr, 8)) == 0) \ + if (__copy_from_user((void *)&val, ptr, 8) == 0) \ __gu_err = 0; \ else \ __gu_err = -EFAULT; \ @@ -188,6 +188,8 @@ do { \ \ if (likely(access_ok(VERIFY_READ, __gu_ptr, size))) \ __get_user_common((x), size, __gu_ptr); \ + else \ + (x) = 0; \ \ __gu_err; \ }) @@ -201,6 +203,7 @@ do { \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3:li %0, %4\n" \ + "li %1, 0\n" \ "j 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ @@ -298,35 +301,34 @@ extern int __copy_tofrom_user(void *to, const void *from, unsigned long len); static inline unsigned long copy_from_user(void *to, const void *from, unsigned long len) { - unsigned long over; + unsigned long res = len; - if (access_ok(VERIFY_READ, from, len)) - return __copy_tofrom_user(to, from, len); + if (likely(access_ok(VERIFY_READ, from, len))) + res = __copy_tofrom_user(to, from, len); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + len - TASK_SIZE; - return __copy_tofrom_user(to, from, len - over) + over; - } - return len; + if (unlikely(res)) + memset(to + (len - res), 0, res); + + return res; } static inline unsigned long copy_to_user(void *to, const void *from, unsigned long len) { - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, len)) - return __copy_tofrom_user(to, from, len); + if (likely(access_ok(VERIFY_WRITE, to, len))) + len = __copy_tofrom_user(to, from, len); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + len - TASK_SIZE; - return __copy_tofrom_user(to, from, len - over) + over; - } return len; } -#define __copy_from_user(to, from, len) \ - __copy_tofrom_user((to), (from), (len)) +static inline unsigned long +__copy_from_user(void *to, const void *from, unsigned long len) +{ + unsigned long left = __copy_tofrom_user(to, from, len); + if (unlikely(left)) + memset(to + (len - left), 0, left); + return left; +} #define __copy_to_user(to, from, len) \ __copy_tofrom_user((to), (from), (len)) @@ -340,17 +342,17 @@ __copy_to_user_inatomic(void *to, const void *from, unsigned long len) static inline unsigned long __copy_from_user_inatomic(void *to, const void *from, unsigned long len) { - return __copy_from_user(to, from, len); + return __copy_tofrom_user(to, from, len); } -#define __copy_in_user(to, from, len) __copy_from_user(to, from, len) +#define __copy_in_user(to, from, len) __copy_tofrom_user(to, from, len) static inline unsigned long copy_in_user(void *to, const void *from, unsigned long len) { if (access_ok(VERIFY_READ, from, len) && access_ok(VERFITY_WRITE, to, len)) - return copy_from_user(to, from, len); + return __copy_tofrom_user(to, from, len); } /* diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index caea2c45f6c2..1d159ce50f5a 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -60,7 +60,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ " movco.l %0, @%3 \n" \ " bf 1b \n" \ " synco \n" \ - : "=&z" (temp), "=&z" (res) \ + : "=&z" (temp), "=&r" (res) \ : "r" (i), "r" (&v->counter) \ : "t"); \ \ diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index a49635c51266..92ade79ac427 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -151,7 +151,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) __kernel_size_t __copy_size = (__kernel_size_t) n; if (__copy_size && __access_ok(__copy_from, __copy_size)) - return __copy_user(to, from, __copy_size); + __copy_size = __copy_user(to, from, __copy_size); + + if (unlikely(__copy_size)) + memset(to + (n - __copy_size), 0, __copy_size); return __copy_size; } diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h index c01376c76b86..ca5073dd4596 100644 --- a/arch/sh/include/asm/uaccess_64.h +++ b/arch/sh/include/asm/uaccess_64.h @@ -24,6 +24,7 @@ #define __get_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ + x = 0; \ switch (size) { \ case 1: \ retval = __get_user_asm_b((void *)&x, \ diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index 839612c8a0a0..0d3637c494bf 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -122,32 +122,16 @@ static void shx3_update_boot_vector(unsigned int cpu) __raw_writel(STBCR_RESET, STBCR_REG(cpu)); } -static int -shx3_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +static int shx3_cpu_prepare(unsigned int cpu) { - unsigned int cpu = (unsigned int)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - shx3_update_boot_vector(cpu); - break; - case CPU_ONLINE: - pr_info("CPU %u is now online\n", cpu); - break; - case CPU_DEAD: - break; - } - - return NOTIFY_OK; + shx3_update_boot_vector(cpu); + return 0; } -static struct notifier_block shx3_cpu_notifier = { - .notifier_call = shx3_cpu_callback, -}; - static int register_shx3_cpu_notifier(void) { - register_hotcpu_notifier(&shx3_cpu_notifier); + cpuhp_setup_state_nocalls(CPUHP_SH_SH3X_PREPARE, "sh/shx3:prepare", + shx3_cpu_prepare, NULL); return 0; } late_initcall(register_shx3_cpu_notifier); diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 38993e09ef03..95eccd49672f 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -382,7 +382,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0); + err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); if (err == -EBUSY) { __raw_writel(old, parent); return; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 59b09600dd32..f5d60f14a0bc 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -56,7 +56,6 @@ config SPARC64 def_bool 64BIT select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index 3192a8e42fd6..62755a339a59 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -9,6 +9,10 @@ void _mcount(void); #endif +#endif /* CONFIG_MCOUNT */ + +#if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY) +#define HAVE_FUNCTION_GRAPH_FP_TEST #endif #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 8c2a8c937540..c1263fc390db 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -25,6 +25,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index 26d9e7726867..ce2233f7e662 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -43,6 +43,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask); int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) +void smp_fill_in_cpu_possible_map(void); void smp_fill_in_sib_core_maps(void); void cpu_play_dead(void); @@ -72,6 +73,7 @@ void __cpu_die(unsigned int cpu); #define smp_fill_in_sib_core_maps() do { } while (0) #define smp_fetch_global_regs() do { } while (0) #define smp_fetch_global_pmu() do { } while (0) +#define smp_fill_in_cpu_possible_map() do { } while (0) #endif /* !(CONFIG_SMP) */ diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 341a5a133f48..ea55f86d7ccd 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -249,8 +249,7 @@ unsigned long __copy_user(void __user *to, const void __user *from, unsigned lon static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { if (n && __access_ok((unsigned long) to, n)) { - if (!__builtin_constant_p(n)) - check_object_size(from, n, true); + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); } else return n; @@ -258,19 +257,19 @@ static inline unsigned long copy_to_user(void __user *to, const void *from, unsi static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { - if (!__builtin_constant_p(n)) - check_object_size(from, n, true); + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); } static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { if (n && __access_ok((unsigned long) from, n)) { - if (!__builtin_constant_p(n)) - check_object_size(to, n, false); + check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); - } else + } else { + memset(to, 0, n); return n; + } } static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 8bda94fab8e8..37a315d0ddd4 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -212,8 +212,7 @@ copy_from_user(void *to, const void __user *from, unsigned long size) { unsigned long ret; - if (!__builtin_constant_p(size)) - check_object_size(to, size, false); + check_object_size(to, size, false); ret = ___copy_from_user(to, from, size); if (unlikely(ret)) @@ -233,8 +232,8 @@ copy_to_user(void __user *to, const void *from, unsigned long size) { unsigned long ret; - if (!__builtin_constant_p(size)) - check_object_size(from, size, true); + check_object_size(from, size, true); + ret = ___copy_to_user(to, from, size); if (unlikely(ret)) ret = copy_to_user_fixup(to, from, size); diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 0a2d2ddff543..6bcff698069b 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -131,7 +131,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, return parent + 8UL; if (ftrace_push_return_trace(parent, self_addr, &trace.depth, - frame_pointer) == -EBUSY) + frame_pointer, NULL) == -EBUSY) return parent + 8UL; trace.func = self_addr; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 599f1207eed2..6b7331d198e9 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -31,6 +31,7 @@ #include <linux/initrd.h> #include <linux/module.h> #include <linux/start_kernel.h> +#include <linux/bootmem.h> #include <asm/io.h> #include <asm/processor.h> @@ -50,6 +51,8 @@ #include <asm/elf.h> #include <asm/mdesc.h> #include <asm/cacheflush.h> +#include <asm/dma.h> +#include <asm/irq.h> #ifdef CONFIG_IP_PNP #include <net/ipconfig.h> @@ -590,6 +593,22 @@ static void __init init_sparc64_elf_hwcap(void) pause_patch(); } +void __init alloc_irqstack_bootmem(void) +{ + unsigned int i, node; + + for_each_possible_cpu(i) { + node = cpu_to_node(i); + + softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + } +} + void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -650,6 +669,13 @@ void __init setup_arch(char **cmdline_p) paging_init(); init_sparc64_elf_hwcap(); + smp_fill_in_cpu_possible_map(); + /* + * Once the OF device tree and MDESC have been setup and nr_cpus has + * been parsed, we know the list of possible cpus. Therefore we can + * allocate the IRQ stacks. + */ + alloc_irqstack_bootmem(); } extern int stop_a_enabled; diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index fb30e7c6a5b1..e80e6ba3d500 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -352,9 +352,7 @@ static void sparc_start_secondary(void *arg) preempt_disable(); cpu = smp_processor_id(); - /* Invoke the CPU_STARTING notifier callbacks */ notify_cpu_starting(cpu); - arch_cpu_pre_online(arg); /* Set the CPU in the cpu_online_mask */ diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 8a6151a628ce..d3035ba6cd31 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1227,6 +1227,20 @@ void __init smp_setup_processor_id(void) xcall_deliver_impl = hypervisor_xcall_deliver; } +void __init smp_fill_in_cpu_possible_map(void) +{ + int possible_cpus = num_possible_cpus(); + int i; + + if (possible_cpus > nr_cpu_ids) + possible_cpus = nr_cpu_ids; + + for (i = 0; i < possible_cpus; i++) + set_cpu_possible(i, true); + for (; i < NR_CPUS; i++) + set_cpu_possible(i, false); +} + void smp_fill_in_sib_core_maps(void) { unsigned int i; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index e16fdd28a931..3f291d8c57f7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -484,6 +484,7 @@ good_area: tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; + mm_rss *= REAL_HPAGE_PER_HPAGE; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 65457c9f1365..7ac6b62fb7c1 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1160,7 +1160,7 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } -static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) +static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; @@ -1173,8 +1173,8 @@ static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) return i; } -static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp, - int index) +static void __init find_numa_latencies_for_group(struct mdesc_handle *md, + u64 grp, int index) { u64 arc; @@ -2081,7 +2081,6 @@ void __init paging_init(void) { unsigned long end_pfn, shift, phys_base; unsigned long real_end, i; - int node; setup_page_offset(); @@ -2250,21 +2249,6 @@ void __init paging_init(void) /* Setup bootmem... */ last_valid_pfn = end_pfn = bootmem_init(phys_base); - /* Once the OF device tree and MDESC have been setup, we know - * the list of possible cpus. Therefore we can allocate the - * IRQ stacks. - */ - for_each_possible_cpu(i) { - node = cpu_to_node(i); - - softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), - THREAD_SIZE, - THREAD_SIZE, 0); - hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), - THREAD_SIZE, - THREAD_SIZE, 0); - } - kernel_physical_mapping_init(); { diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 3659d37b4d81..c56a195c9071 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { - if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.thp_pte_count++; - else - mm->context.thp_pte_count--; + /* + * Note that this routine only sets pmds for THP pages. + * Hugetlb pages are handled elsewhere. We need to check + * for huge zero page. Huge zero pages are like hugetlb + * pages in that there is no RSS, but there is the need + * for TSB entries. So, huge zero page counts go into + * hugetlb_pte_count. + */ + if (pmd_val(pmd) & _PAGE_PMD_HUGE) { + if (is_huge_zero_page(pmd_page(pmd))) + mm->context.hugetlb_pte_count++; + else + mm->context.thp_pte_count++; + } else { + if (is_huge_zero_page(pmd_page(orig))) + mm->context.hugetlb_pte_count--; + else + mm->context.thp_pte_count--; + } /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held @@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +/* + * This routine is only called when splitting a THP + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + /* + * set_pmd_at() will not be called in a way to decrement + * thp_pte_count when splitting a THP, so do it now. + * Sanity check pmd before doing the actual decrement. + */ + if ((pmd_val(entry) & _PAGE_PMD_HUGE) && + !is_huge_zero_page(pmd_page(entry))) + (vma->vm_mm)->context.thp_pte_count--; } void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 6725ed45580e..f2b77112e9d8 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -469,8 +469,10 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + unsigned long mm_rss = get_mm_rss(mm); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long total_huge_pte_count; + unsigned long saved_hugetlb_pte_count; + unsigned long saved_thp_pte_count; #endif unsigned int i; @@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - total_huge_pte_count = mm->context.hugetlb_pte_count + - mm->context.thp_pte_count; + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; + saved_thp_pte_count = mm->context.thp_pte_count; mm->context.hugetlb_pte_count = 0; mm->context.thp_pte_count = 0; + + mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) /* If this is fork, inherit the parent's TSB size. We would * grow it to that size on the first page fault anyways. */ - tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(total_huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); + if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, + (saved_hugetlb_pte_count + saved_thp_pte_count) * + REAL_HPAGE_PER_HPAGE); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c index 4a572088b270..b827a418b155 100644 --- a/arch/tile/kernel/ftrace.c +++ b/arch/tile/kernel/ftrace.c @@ -184,7 +184,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, *parent = return_hooker; err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer); + frame_pointer, NULL); if (err == -EBUSY) { *parent = old; return; diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index ef4b8f949b51..b783ac87d98a 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -21,21 +21,17 @@ void handle_syscall(struct uml_pt_regs *r) PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); if (syscall_trace_enter(regs)) - return; + goto out; /* Do the seccomp check after ptrace; failures should be fast. */ if (secure_computing(NULL) == -1) - return; + goto out; - /* Update the syscall number after orig_ax has potentially been updated - * with ptrace. - */ - UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); syscall = UPT_SYSCALL_NR(r); - if (syscall >= 0 && syscall <= __NR_syscall_max) PT_REGS_SET_SYSCALL_RETURN(regs, EXECUTE_SYSCALL(syscall, regs)); +out: syscall_trace_leave(regs); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2a1f0ce7c59a..9b2d50a73a11 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -93,6 +93,7 @@ config X86 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_EBPF_JIT if X86_64 + select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL @@ -109,7 +110,6 @@ config X86 select HAVE_EXIT_THREAD select HAVE_FENTRY if X86_64 select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS @@ -157,6 +157,7 @@ config X86 select SPARSE_IRQ select SRCU select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select X86_DEV_DMA_OPS if X86_64 @@ -549,6 +550,18 @@ config X86_INTEL_QUARK Say Y here if you have a Quark based system such as the Arduino compatible Intel Galileo. +config MLX_PLATFORM + tristate "Mellanox Technologies platform support" + depends on X86_64 + depends on X86_EXTENDED_PLATFORM + ---help--- + This option enables system support for the Mellanox Technologies + platform. + + Say Y here if you are building a kernel for Mellanox system. + + Otherwise, say N. + config X86_INTEL_LPSS bool "Intel Low Power Subsystem Support" depends on X86 && ACPI @@ -705,7 +718,6 @@ config PARAVIRT_DEBUG config PARAVIRT_SPINLOCKS bool "Paravirtualization layer for spinlocks" depends on PARAVIRT && SMP - select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS ---help--- Paravirtualized spinlocks allow a pvops backend to replace the spinlock implementation with something virtualization-friendly @@ -718,7 +730,7 @@ config PARAVIRT_SPINLOCKS config QUEUED_LOCK_STAT bool "Paravirt queued spinlock statistics" - depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS + depends on PARAVIRT_SPINLOCKS && DEBUG_FS ---help--- Enable the collection of statistical data on the slowpath behavior of paravirtualized queued spinlocks and report diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ff574dad95cc..cc69e37548db 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -29,22 +29,11 @@ __pure const struct efi_config *__efi_early(void) static void setup_boot_services##bits(struct efi_config *c) \ { \ efi_system_table_##bits##_t *table; \ - efi_boot_services_##bits##_t *bt; \ \ table = (typeof(table))sys_table; \ \ + c->boot_services = table->boottime; \ c->text_output = table->con_out; \ - \ - bt = (typeof(bt))(unsigned long)(table->boottime); \ - \ - c->allocate_pool = bt->allocate_pool; \ - c->allocate_pages = bt->allocate_pages; \ - c->get_memory_map = bt->get_memory_map; \ - c->free_pool = bt->free_pool; \ - c->free_pages = bt->free_pages; \ - c->locate_handle = bt->locate_handle; \ - c->handle_protocol = bt->handle_protocol; \ - c->exit_boot_services = bt->exit_boot_services; \ } BOOT_SERVICES(32); BOOT_SERVICES(64); @@ -286,29 +275,6 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) } } -static void find_bits(unsigned long mask, u8 *pos, u8 *size) -{ - u8 first, len; - - first = 0; - len = 0; - - if (mask) { - while (!(mask & 0x1)) { - mask = mask >> 1; - first++; - } - - while (mask & 0x1) { - mask = mask >> 1; - len++; - } - } - - *pos = first; - *size = len; -} - static efi_status_t __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) { @@ -578,7 +544,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; u32 *handles = (u32 *)uga_handle;; - efi_status_t status; + efi_status_t status = EFI_INVALID_PARAMETER; int i; first_uga = NULL; @@ -623,7 +589,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; u64 *handles = (u64 *)uga_handle;; - efi_status_t status; + efi_status_t status = EFI_INVALID_PARAMETER; int i; first_uga = NULL; @@ -1004,79 +970,87 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, return status; } -static efi_status_t exit_boot(struct boot_params *boot_params, - void *handle, bool is64) -{ - struct efi_info *efi = &boot_params->efi_info; - unsigned long map_sz, key, desc_size; - efi_memory_desc_t *mem_map; +struct exit_boot_struct { + struct boot_params *boot_params; + struct efi_info *efi; struct setup_data *e820ext; - const char *signature; __u32 e820ext_size; - __u32 nr_desc, prev_nr_desc; - efi_status_t status; - __u32 desc_version; - bool called_exit = false; - u8 nr_entries; - int i; - - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; - -get_map: - status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size, - &desc_version, &key); - - if (status != EFI_SUCCESS) - return status; - - prev_nr_desc = nr_desc; - nr_desc = map_sz / desc_size; - if (nr_desc > prev_nr_desc && - nr_desc > ARRAY_SIZE(boot_params->e820_map)) { - u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map); - - status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size); - if (status != EFI_SUCCESS) - goto free_mem_map; + bool is64; +}; - efi_call_early(free_pool, mem_map); - goto get_map; /* Allocated memory, get map again */ +static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv) +{ + static bool first = true; + const char *signature; + __u32 nr_desc; + efi_status_t status; + struct exit_boot_struct *p = priv; + + if (first) { + nr_desc = *map->buff_size / *map->desc_size; + if (nr_desc > ARRAY_SIZE(p->boot_params->e820_map)) { + u32 nr_e820ext = nr_desc - + ARRAY_SIZE(p->boot_params->e820_map); + + status = alloc_e820ext(nr_e820ext, &p->e820ext, + &p->e820ext_size); + if (status != EFI_SUCCESS) + return status; + } + first = false; } - signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; - memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); + signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; + memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); - efi->efi_systab = (unsigned long)sys_table; - efi->efi_memdesc_size = desc_size; - efi->efi_memdesc_version = desc_version; - efi->efi_memmap = (unsigned long)mem_map; - efi->efi_memmap_size = map_sz; + p->efi->efi_systab = (unsigned long)sys_table_arg; + p->efi->efi_memdesc_size = *map->desc_size; + p->efi->efi_memdesc_version = *map->desc_ver; + p->efi->efi_memmap = (unsigned long)*map->map; + p->efi->efi_memmap_size = *map->map_size; #ifdef CONFIG_X86_64 - efi->efi_systab_hi = (unsigned long)sys_table >> 32; - efi->efi_memmap_hi = (unsigned long)mem_map >> 32; + p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; + p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; #endif + return EFI_SUCCESS; +} + +static efi_status_t exit_boot(struct boot_params *boot_params, + void *handle, bool is64) +{ + unsigned long map_sz, key, desc_size, buff_size; + efi_memory_desc_t *mem_map; + struct setup_data *e820ext; + __u32 e820ext_size; + efi_status_t status; + __u32 desc_version; + struct efi_boot_memmap map; + struct exit_boot_struct priv; + + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; + priv.boot_params = boot_params; + priv.efi = &boot_params->efi_info; + priv.e820ext = NULL; + priv.e820ext_size = 0; + priv.is64 = is64; + /* Might as well exit boot services now */ - status = efi_call_early(exit_boot_services, handle, key); - if (status != EFI_SUCCESS) { - /* - * ExitBootServices() will fail if any of the event - * handlers change the memory map. In which case, we - * must be prepared to retry, but only once so that - * we're guaranteed to exit on repeated failures instead - * of spinning forever. - */ - if (called_exit) - goto free_mem_map; - - called_exit = true; - efi_call_early(free_pool, mem_map); - goto get_map; - } + status = efi_exit_boot_services(sys_table, handle, &map, &priv, + exit_boot_func); + if (status != EFI_SUCCESS) + return status; + e820ext = priv.e820ext; + e820ext_size = priv.e820ext_size; /* Historic? */ boot_params->alt_mem_k = 32 * 1024; @@ -1085,10 +1059,6 @@ get_map: return status; return EFI_SUCCESS; - -free_mem_map: - efi_call_early(free_pool, mem_map); - return status; } /* diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 1038524270e7..fd0b6a272dd5 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -82,7 +82,7 @@ ENTRY(efi_pe_entry) /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 88(%eax) + add %esi, 32(%eax) pushl %eax call make_boot_params @@ -108,7 +108,7 @@ ENTRY(efi32_stub_entry) /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 88(%eax) + add %esi, 32(%eax) pushl %eax 2: call efi_main @@ -264,7 +264,7 @@ relocated: #ifdef CONFIG_EFI_STUB .data efi32_config: - .fill 11,8,0 + .fill 4,8,0 .long efi_call_phys .long 0 .byte 0 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 0d80a7ad65cd..efdfba21a5b2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -265,7 +265,7 @@ ENTRY(efi_pe_entry) /* * Relocate efi_config->call(). */ - addq %rbp, efi64_config+88(%rip) + addq %rbp, efi64_config+32(%rip) movq %rax, %rdi call make_boot_params @@ -285,7 +285,7 @@ handover_entry: * Relocate efi_config->call(). */ movq efi_config(%rip), %rax - addq %rbp, 88(%rax) + addq %rbp, 32(%rax) 2: movq efi_config(%rip), %rdi call efi_main @@ -457,14 +457,14 @@ efi_config: #ifdef CONFIG_EFI_MIXED .global efi32_config efi32_config: - .fill 11,8,0 + .fill 4,8,0 .quad efi64_thunk .byte 0 #endif .global efi64_config efi64_config: - .fill 11,8,0 + .fill 4,8,0 .quad efi_call .byte 1 #endif /* CONFIG_EFI_STUB */ diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 1433f6b4607d..bdd9cc59d20f 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -31,13 +31,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> -static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) -{ - unsigned long top_of_stack = - (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; - return (struct thread_info *)(top_of_stack - THREAD_SIZE); -} - #ifdef CONFIG_CONTEXT_TRACKING /* Called on entry from user mode with IRQs off. */ __visible inline void enter_from_user_mode(void) @@ -71,7 +64,7 @@ static long syscall_trace_enter(struct pt_regs *regs) { u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); unsigned long ret = 0; bool emulated = false; u32 work; @@ -173,18 +166,17 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) /* Disable IRQs and retry */ local_irq_disable(); - cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); + cached_flags = READ_ONCE(current_thread_info()->flags); if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) break; - } } /* Called with IRQs disabled. */ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); u32 cached_flags; if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) @@ -209,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * special case only applies after poking regs and before the * very next return to user mode. */ - ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); @@ -247,7 +239,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags) */ __visible inline void syscall_return_slowpath(struct pt_regs *regs) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); u32 cached_flags = READ_ONCE(ti->flags); CT_WARN_ON(ct_state() != CONTEXT_KERNEL); @@ -270,7 +262,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) #ifdef CONFIG_X86_64 __visible void do_syscall_64(struct pt_regs *regs) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); unsigned long nr = regs->orig_ax; enter_from_user_mode(); @@ -303,11 +295,11 @@ __visible void do_syscall_64(struct pt_regs *regs) */ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) { - struct thread_info *ti = pt_regs_to_thread_info(regs); + struct thread_info *ti = current_thread_info(); unsigned int nr = (unsigned int)regs->orig_ax; #ifdef CONFIG_IA32_EMULATION - ti->status |= TS_COMPAT; + current->thread.status |= TS_COMPAT; #endif if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0b56666e6039..b75a8bcd2d23 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -204,34 +204,70 @@ POP_GS_EX .endm +/* + * %eax: prev task + * %edx: next task + */ +ENTRY(__switch_to_asm) + /* + * Save callee-saved registers + * This must match the order in struct inactive_task_frame + */ + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + + /* switch stack */ + movl %esp, TASK_threadsp(%eax) + movl TASK_threadsp(%edx), %esp + +#ifdef CONFIG_CC_STACKPROTECTOR + movl TASK_stack_canary(%edx), %ebx + movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset +#endif + + /* restore callee-saved registers */ + popl %esi + popl %edi + popl %ebx + popl %ebp + + jmp __switch_to +END(__switch_to_asm) + +/* + * A newly forked process directly context switches into this address. + * + * eax: prev task we switched from + * ebx: kernel thread func (NULL for user thread) + * edi: kernel thread arg + */ ENTRY(ret_from_fork) pushl %eax call schedule_tail popl %eax + testl %ebx, %ebx + jnz 1f /* kernel threads are uncommon */ + +2: /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax call syscall_return_slowpath jmp restore_all -END(ret_from_fork) - -ENTRY(ret_from_kernel_thread) - pushl %eax - call schedule_tail - popl %eax - movl PT_EBP(%esp), %eax - call *PT_EBX(%esp) - movl $0, PT_EAX(%esp) + /* kernel thread */ +1: movl %edi, %eax + call *%ebx /* - * Kernel threads return to userspace as if returning from a syscall. - * We should check whether anything actually uses this path and, if so, - * consider switching it over to ret_from_fork. + * A kernel thread is allowed to return here after successfully + * calling do_execve(). Exit to userspace to complete the execve() + * syscall. */ - movl %esp, %eax - call syscall_return_slowpath - jmp restore_all -ENDPROC(ret_from_kernel_thread) + movl $0, PT_EAX(%esp) + jmp 2b +END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d172c619c449..fee1d95902b5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) * If we need to do entry work or if we guess we'll need to do * exit work, go straight to the slow path. */ - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + movq PER_CPU_VAR(current_task), %r11 + testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) jnz entry_SYSCALL64_slow_path entry_SYSCALL_64_fastpath: @@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath: */ DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + movq PER_CPU_VAR(current_task), %r11 + testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) jnz 1f LOCKDEP_SYS_EXIT @@ -351,8 +353,7 @@ ENTRY(stub_ptregs_64) jmp entry_SYSCALL64_slow_path 1: - /* Called from C */ - jmp *%rax /* called from C */ + jmp *%rax /* Called from C */ END(stub_ptregs_64) .macro ptregs_stub func @@ -369,41 +370,73 @@ END(ptregs_\func) #include <asm/syscalls_64.h> /* + * %rdi: prev task + * %rsi: next task + */ +ENTRY(__switch_to_asm) + /* + * Save callee-saved registers + * This must match the order in inactive_task_frame + */ + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + /* switch stack */ + movq %rsp, TASK_threadsp(%rdi) + movq TASK_threadsp(%rsi), %rsp + +#ifdef CONFIG_CC_STACKPROTECTOR + movq TASK_stack_canary(%rsi), %rbx + movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset +#endif + + /* restore callee-saved registers */ + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + + jmp __switch_to +END(__switch_to_asm) + +/* * A newly forked process directly context switches into this address. * - * rdi: prev task we switched from + * rax: prev task we switched from + * rbx: kernel thread func (NULL for user thread) + * r12: kernel thread arg */ ENTRY(ret_from_fork) - LOCK ; btr $TIF_FORK, TI_flags(%r8) - + movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ - testb $3, CS(%rsp) /* from kernel_thread? */ - jnz 1f - - /* - * We came from kernel_thread. This code path is quite twisted, and - * someone should clean it up. - * - * copy_thread_tls stashes the function pointer in RBX and the - * parameter to be passed in RBP. The called function is permitted - * to call do_execve and thereby jump to user mode. - */ - movq RBP(%rsp), %rdi - call *RBX(%rsp) - movl $0, RAX(%rsp) + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ - /* - * Fall through as though we're exiting a syscall. This makes a - * twisted sort of sense if we just called do_execve. - */ - -1: +2: movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS jmp restore_regs_and_iret + +1: + /* kernel thread */ + movq %r12, %rdi + call *%rbx + /* + * A kernel thread is allowed to return here after successfully + * calling do_execve(). Exit to userspace to complete the execve() + * syscall. + */ + movq $0, RAX(%rsp) + jmp 2b END(ret_from_fork) /* @@ -555,27 +588,69 @@ native_irq_return_iret: #ifdef CONFIG_X86_ESPFIX64 native_irq_return_ldt: - pushq %rax - pushq %rdi + /* + * We are running with user GSBASE. All GPRs contain their user + * values. We have a percpu ESPFIX stack that is eight slots + * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom + * of the ESPFIX stack. + * + * We clobber RAX and RDI in this code. We stash RDI on the + * normal stack and RAX on the ESPFIX stack. + * + * The ESPFIX stack layout we set up looks like this: + * + * --- top of ESPFIX stack --- + * SS + * RSP + * RFLAGS + * CS + * RIP <-- RSP points here when we're done + * RAX <-- espfix_waddr points here + * --- bottom of ESPFIX stack --- + */ + + pushq %rdi /* Stash user RDI */ SWAPGS movq PER_CPU_VAR(espfix_waddr), %rdi - movq %rax, (0*8)(%rdi) /* RAX */ - movq (2*8)(%rsp), %rax /* RIP */ + movq %rax, (0*8)(%rdi) /* user RAX */ + movq (1*8)(%rsp), %rax /* user RIP */ movq %rax, (1*8)(%rdi) - movq (3*8)(%rsp), %rax /* CS */ + movq (2*8)(%rsp), %rax /* user CS */ movq %rax, (2*8)(%rdi) - movq (4*8)(%rsp), %rax /* RFLAGS */ + movq (3*8)(%rsp), %rax /* user RFLAGS */ movq %rax, (3*8)(%rdi) - movq (6*8)(%rsp), %rax /* SS */ + movq (5*8)(%rsp), %rax /* user SS */ movq %rax, (5*8)(%rdi) - movq (5*8)(%rsp), %rax /* RSP */ + movq (4*8)(%rsp), %rax /* user RSP */ movq %rax, (4*8)(%rdi) - andl $0xffff0000, %eax - popq %rdi + /* Now RAX == RSP. */ + + andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ + popq %rdi /* Restore user RDI */ + + /* + * espfix_stack[31:16] == 0. The page tables are set up such that + * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of + * espfix_waddr for any X. That is, there are 65536 RO aliases of + * the same page. Set up RSP so that RSP[31:16] contains the + * respective 16 bits of the /userspace/ RSP and RSP nonetheless + * still points to an RO alias of the ESPFIX stack. + */ orq PER_CPU_VAR(espfix_stack), %rax SWAPGS movq %rax, %rsp - popq %rax + + /* + * At this point, we cannot write to the stack any more, but we can + * still read. + */ + popq %rax /* Restore user RAX */ + + /* + * RSP now points to an ordinary IRET frame, except that the page + * is read-only and RSP[31:16] are preloaded with the userspace + * values. We can now IRET back to userspace. + */ jmp native_irq_return_iret #endif END(common_interrupt) @@ -1002,7 +1077,6 @@ ENTRY(error_entry) testb $3, CS+8(%rsp) jz .Lerror_kernelspace -.Lerror_entry_from_usermode_swapgs: /* * We entered from user mode or we're pretending to have entered * from user mode due to an IRET fault. @@ -1045,7 +1119,8 @@ ENTRY(error_entry) * gsbase and proceed. We'll fix up the exception and land in * .Lgs_change's error handler with kernel gsbase. */ - jmp .Lerror_entry_from_usermode_swapgs + SWAPGS + jmp .Lerror_entry_done .Lbstep_iret: /* Fix truncated RIP */ diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 4f741192846d..3dab75f2a673 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -22,7 +22,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff)); - if (hdr->e_type != ET_DYN) + if (GET_LE(&hdr->e_type) != ET_DYN) fail("input is not a shared object\n"); /* Walk the segment table. */ diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index f840766659a8..23c881caabd1 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -37,54 +37,6 @@ void __init init_vdso_image(const struct vdso_image *image) struct linux_binprm; -/* - * Put the vdso above the (randomized) stack with another randomized - * offset. This way there is no hole in the middle of address space. - * To save memory make sure it is still in the same PTE as the stack - * top. This doesn't give that many random bits. - * - * Note that this algorithm is imperfect: the distribution of the vdso - * start address within a PMD is biased toward the end. - * - * Only used for the 64-bit and x32 vdsos. - */ -static unsigned long vdso_addr(unsigned long start, unsigned len) -{ -#ifdef CONFIG_X86_32 - return 0; -#else - unsigned long addr, end; - unsigned offset; - - /* - * Round up the start address. It can start out unaligned as a result - * of stack start randomization. - */ - start = PAGE_ALIGN(start); - - /* Round the lowest possible end address up to a PMD boundary. */ - end = (start + len + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE_MAX) - end = TASK_SIZE_MAX; - end -= len; - - if (end > start) { - offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); - addr = start + (offset << PAGE_SHIFT); - } else { - addr = start; - } - - /* - * Forcibly align the final address in case we have a hardware - * issue that requires alignment for performance reasons. - */ - addr = align_vdso_addr(addr); - - return addr; -#endif -} - static int vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -176,30 +128,28 @@ static int vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; } -static int map_vdso(const struct vdso_image *image, bool calculate_addr) +static const struct vm_special_mapping vdso_mapping = { + .name = "[vdso]", + .fault = vdso_fault, + .mremap = vdso_mremap, +}; +static const struct vm_special_mapping vvar_mapping = { + .name = "[vvar]", + .fault = vvar_fault, +}; + +/* + * Add vdso and vvar mappings to current process. + * @image - blob to map + * @addr - request a specific address (zero to map at free addr) + */ +static int map_vdso(const struct vdso_image *image, unsigned long addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long addr, text_start; + unsigned long text_start; int ret = 0; - static const struct vm_special_mapping vdso_mapping = { - .name = "[vdso]", - .fault = vdso_fault, - .mremap = vdso_mremap, - }; - static const struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, - }; - - if (calculate_addr) { - addr = vdso_addr(current->mm->start_stack, - image->size - image->sym_vvar_start); - } else { - addr = 0; - } - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -238,24 +188,104 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto up_fail; + do_munmap(mm, text_start, image->size); } up_fail: - if (ret) + if (ret) { current->mm->context.vdso = NULL; + current->mm->context.vdso_image = NULL; + } up_write(&mm->mmap_sem); return ret; } +#ifdef CONFIG_X86_64 +/* + * Put the vdso above the (randomized) stack with another randomized + * offset. This way there is no hole in the middle of address space. + * To save memory make sure it is still in the same PTE as the stack + * top. This doesn't give that many random bits. + * + * Note that this algorithm is imperfect: the distribution of the vdso + * start address within a PMD is biased toward the end. + * + * Only used for the 64-bit and x32 vdsos. + */ +static unsigned long vdso_addr(unsigned long start, unsigned len) +{ + unsigned long addr, end; + unsigned offset; + + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; + if (end >= TASK_SIZE_MAX) + end = TASK_SIZE_MAX; + end -= len; + + if (end > start) { + offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } + + /* + * Forcibly align the final address in case we have a hardware + * issue that requires alignment for performance reasons. + */ + addr = align_vdso_addr(addr); + + return addr; +} + +static int map_vdso_randomized(const struct vdso_image *image) +{ + unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); + + return map_vdso(image, addr); +} +#endif + +int map_vdso_once(const struct vdso_image *image, unsigned long addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + down_write(&mm->mmap_sem); + /* + * Check if we have already mapped vdso blob - fail to prevent + * abusing from userspace install_speciall_mapping, which may + * not do accounting and rlimit right. + * We could search vma near context.vdso, but it's a slowpath, + * so let's explicitely check all VMAs to be completely sure. + */ + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma_is_special_mapping(vma, &vdso_mapping) || + vma_is_special_mapping(vma, &vvar_mapping)) { + up_write(&mm->mmap_sem); + return -EEXIST; + } + } + up_write(&mm->mmap_sem); + + return map_vdso(image, addr); +} + #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static int load_vdso32(void) { if (vdso32_enabled != 1) /* Other values all mean "disabled" */ return 0; - return map_vdso(&vdso_image_32, false); + return map_vdso(&vdso_image_32, 0); } #endif @@ -265,7 +295,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso64_enabled) return 0; - return map_vdso(&vdso_image_64, true); + return map_vdso_randomized(&vdso_image_64); } #ifdef CONFIG_COMPAT @@ -276,8 +306,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm, if (test_thread_flag(TIF_X32)) { if (!vdso64_enabled) return 0; - - return map_vdso(&vdso_image_x32, true); + return map_vdso_randomized(&vdso_image_x32); } #endif #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index e07a22bb9308..f5f4b3fbbbc2 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -119,8 +119,8 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index e6131d4454e6..65577f081d07 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -29,6 +29,8 @@ #define COUNTER_SHIFT 16 +static HLIST_HEAD(uncore_unused_list); + struct amd_uncore { int id; int refcnt; @@ -39,7 +41,7 @@ struct amd_uncore { cpumask_t *active_mask; struct pmu *pmu; struct perf_event *events[MAX_COUNTERS]; - struct amd_uncore *free_when_cpu_online; + struct hlist_node node; }; static struct amd_uncore * __percpu *amd_uncore_nb; @@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; uncore_nb->active_mask = &amd_nb_active_mask; uncore_nb->pmu = &amd_nb_pmu; + uncore_nb->id = -1; *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } @@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; uncore_l2->active_mask = &amd_l2_active_mask; uncore_l2->pmu = &amd_l2_pmu; + uncore_l2->id = -1; *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } @@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, continue; if (this->id == that->id) { - that->free_when_cpu_online = this; + hlist_add_head(&this->node, &uncore_unused_list); this = that; break; } @@ -388,13 +392,23 @@ static int amd_uncore_cpu_starting(unsigned int cpu) return 0; } +static void uncore_clean_online(void) +{ + struct amd_uncore *uncore; + struct hlist_node *n; + + hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { + hlist_del(&uncore->node); + kfree(uncore); + } +} + static void uncore_online(unsigned int cpu, struct amd_uncore * __percpu *uncores) { struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); - kfree(uncore->free_when_cpu_online); - uncore->free_when_cpu_online = NULL; + uncore_clean_online(); if (cpu == uncore->cpu) cpumask_set_cpu(cpu, uncore->active_mask); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d0efb5cb1b00..d31735f37ed7 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -37,6 +37,7 @@ #include <asm/timer.h> #include <asm/desc.h> #include <asm/ldt.h> +#include <asm/unwind.h> #include "perf_event.h" @@ -1201,6 +1202,9 @@ static int x86_pmu_add(struct perf_event *event, int flags) * If group events scheduling transaction was started, * skip the schedulability test here, it will be performed * at commit time (->commit_txn) as a whole. + * + * If commit fails, we'll call ->del() on all events + * for which ->add() was called. */ if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto done_collect; @@ -1223,6 +1227,14 @@ done_collect: cpuc->n_added += n - n0; cpuc->n_txn += n - n0; + if (x86_pmu.add) { + /* + * This is before x86_pmu_enable() will call x86_pmu_start(), + * so we enable LBRs before an event needs them etc.. + */ + x86_pmu.add(event); + } + ret = 0; out: return ret; @@ -1346,7 +1358,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; /* - * If we're called during a txn, we don't need to do anything. + * If we're called during a txn, we only need to undo x86_pmu.add. * The events never got scheduled and ->cancel_txn will truncate * the event_list. * @@ -1354,7 +1366,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) * an event added during that same TXN. */ if (cpuc->txn_flags & PERF_PMU_TXN_ADD) - return; + goto do_del; /* * Not a TXN, therefore cleanup properly. @@ -1384,6 +1396,15 @@ static void x86_pmu_del(struct perf_event *event, int flags) --cpuc->n_events; perf_event_update_userpage(event); + +do_del: + if (x86_pmu.del) { + /* + * This is after x86_pmu_stop(); so we disable LBRs after any + * event can need them etc.. + */ + x86_pmu.del(event); + } } int x86_pmu_handle_irq(struct pt_regs *regs) @@ -2247,39 +2268,26 @@ void arch_perf_update_userpage(struct perf_event *event, cyc2ns_read_end(data); } -/* - * callchain support - */ - -static int backtrace_stack(void *data, char *name) -{ - return 0; -} - -static int backtrace_address(void *data, unsigned long addr, int reliable) -{ - struct perf_callchain_entry_ctx *entry = data; - - return perf_callchain_store(entry, addr); -} - -static const struct stacktrace_ops backtrace_ops = { - .stack = backtrace_stack, - .address = backtrace_address, - .walk_stack = print_context_stack_bp, -}; - void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct unwind_state state; + unsigned long addr; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return; } - perf_callchain_store(entry, regs->ip); + if (perf_callchain_store(entry, regs->ip)) + return; - dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); + for (unwind_start(&state, current, regs, NULL); !unwind_done(&state); + unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } } static inline int diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 0a6e393a2e62..982c9e31daca 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -31,7 +31,17 @@ struct bts_ctx { struct perf_output_handle handle; struct debug_store ds_back; - int started; + int state; +}; + +/* BTS context states: */ +enum { + /* no ongoing AUX transactions */ + BTS_STATE_STOPPED = 0, + /* AUX transaction is on, BTS tracing is disabled */ + BTS_STATE_INACTIVE, + /* AUX transaction is on, BTS tracing is running */ + BTS_STATE_ACTIVE, }; static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); @@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts) static int bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +/* + * Ordering PMU callbacks wrt themselves and the PMI is done by means + * of bts::state, which: + * - is set when bts::handle::event is valid, that is, between + * perf_aux_output_begin() and perf_aux_output_end(); + * - is zero otherwise; + * - is ordered against bts::handle::event with a compiler barrier. + */ + static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); @@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event) /* * local barrier to make sure that ds configuration made it - * before we enable BTS + * before we enable BTS and bts::state goes ACTIVE */ wmb(); + /* INACTIVE/STOPPED -> ACTIVE */ + WRITE_ONCE(bts->state, BTS_STATE_ACTIVE); + intel_pmu_enable_bts(config); } @@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags) __bts_event_start(event); - /* PMI handler: this counter is running and likely generating PMIs */ - ACCESS_ONCE(bts->started) = 1; - return; fail_end_stop: @@ -263,30 +282,34 @@ fail_stop: event->hw.state = PERF_HES_STOPPED; } -static void __bts_event_stop(struct perf_event *event) +static void __bts_event_stop(struct perf_event *event, int state) { + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ + WRITE_ONCE(bts->state, state); + /* * No extra synchronization is mandated by the documentation to have * BTS data stores globally visible. */ intel_pmu_disable_bts(); - - if (event->hw.state & PERF_HES_STOPPED) - return; - - ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED; } static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *buf = NULL; + int state = READ_ONCE(bts->state); - /* PMI handler: don't restart this counter */ - ACCESS_ONCE(bts->started) = 0; + if (state == BTS_STATE_ACTIVE) + __bts_event_stop(event, BTS_STATE_STOPPED); - __bts_event_stop(event); + if (state != BTS_STATE_STOPPED) + buf = perf_get_aux(&bts->handle); + + event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); @@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); } @@ -310,8 +334,20 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + int state = READ_ONCE(bts->state); + + /* + * Here we transition from INACTIVE to ACTIVE; + * if we instead are STOPPED from the interrupt handler, + * stay that way. Can't be ACTIVE here though. + */ + if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE)) + return; + + if (state == BTS_STATE_STOPPED) + return; - if (bts->handle.event && bts->started) + if (bts->handle.event) __bts_event_start(bts->handle.event); } @@ -319,8 +355,15 @@ void intel_bts_disable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + /* + * Here we transition from ACTIVE to INACTIVE; + * do nothing for STOPPED or INACTIVE. + */ + if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE) + return; + if (bts->handle.event) - __bts_event_stop(bts->handle.event); + __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE); } static int @@ -335,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) return 0; head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); - if (WARN_ON_ONCE(head != local_read(&buf->head))) - return -EINVAL; phys = &buf->buf[buf->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; @@ -403,22 +444,37 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { + struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err; + int err = -ENOSPC, handled = 0; - if (!event || !bts->started) - return 0; + /* + * The only surefire way of knowing if this NMI is ours is by checking + * the write ptr against the PMI threshold. + */ + if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) + handled = 1; + + /* + * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, + * so we can only be INACTIVE or STOPPED + */ + if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) + return handled; buf = perf_get_aux(&bts->handle); + if (!buf) + return handled; + /* * Skip snapshot counters: they don't use the interrupt, but * there's no other way of telling, because the pointer will * keep moving */ - if (!buf || buf->snapshot) + if (buf->snapshot) return 0; old_head = local_read(&buf->head); @@ -426,18 +482,27 @@ int intel_bts_interrupt(void) /* no new data */ if (old_head == local_read(&buf->head)) - return 0; + return handled; perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) - return 1; + if (buf) + err = bts_buffer_reset(buf, &bts->handle); + + if (err) { + WRITE_ONCE(bts->state, BTS_STATE_STOPPED); - err = bts_buffer_reset(buf, &bts->handle); - if (err) - perf_aux_output_end(&bts->handle, 0, false); + if (buf) { + /* + * BTS_STATE_STOPPED should be visible before + * cleared handle::event + */ + barrier(); + perf_aux_output_end(&bts->handle, 0, false); + } + } return 1; } @@ -519,7 +584,8 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; - bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; bts_pmu.event_init = bts_event_init; bts_pmu.add = bts_event_add; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2cbde2f449aa..a3a9eb84b5cf 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1730,9 +1730,11 @@ static __initconst const u64 knl_hw_cache_extra_regs * disabled state if called consecutively. * * During consecutive calls, the same disable value will be written to related - * registers, so the PMU state remains unchanged. hw.state in - * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive - * calls. + * registers, so the PMU state remains unchanged. + * + * intel_bts events don't coexist with intel PMU's BTS events because of + * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them + * disabled around intel PMU's event batching etc, only inside the PMI handler. */ static void __intel_pmu_disable_all(void) { @@ -1742,8 +1744,6 @@ static void __intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - else - intel_bts_disable_local(); intel_pmu_pebs_disable_all(); } @@ -1771,8 +1771,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) return; intel_pmu_enable_bts(event->hw.config); - } else - intel_bts_enable_local(); + } } static void intel_pmu_enable_all(int added) @@ -1907,13 +1906,6 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); cpuc->intel_cp_status &= ~(1ull << hwc->idx); - /* - * must disable before any actual event - * because any event may be combined with LBR - */ - if (needs_branch_stack(event)) - intel_pmu_lbr_disable(event); - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; @@ -1925,6 +1917,14 @@ static void intel_pmu_disable_event(struct perf_event *event) intel_pmu_pebs_disable(event); } +static void intel_pmu_del_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + intel_pmu_lbr_del(event); + if (event->attr.precise_ip) + intel_pmu_pebs_del(event); +} + static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - INTEL_PMC_IDX_FIXED; @@ -1968,12 +1968,6 @@ static void intel_pmu_enable_event(struct perf_event *event) intel_pmu_enable_bts(hwc->config); return; } - /* - * must enabled before any actual event - * because any event may be combined with LBR - */ - if (needs_branch_stack(event)) - intel_pmu_lbr_enable(event); if (event->attr.exclude_host) cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); @@ -1994,6 +1988,14 @@ static void intel_pmu_enable_event(struct perf_event *event) __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } +static void intel_pmu_add_event(struct perf_event *event) +{ + if (event->attr.precise_ip) + intel_pmu_pebs_add(event); + if (needs_branch_stack(event)) + intel_pmu_lbr_add(event); +} + /* * Save and restart an expired event. Called by NMI contexts, * so it has to be careful about preempting normal event ops: @@ -2073,6 +2075,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); + intel_bts_disable_local(); __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2172,6 +2175,7 @@ done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ if (cpuc->enabled) __intel_pmu_enable_all(0, true); + intel_bts_enable_local(); /* * Only unmask the NMI after the overflow counters @@ -3290,6 +3294,8 @@ static __initconst const struct x86_pmu intel_pmu = { .enable_all = intel_pmu_enable_all, .enable = intel_pmu_enable_event, .disable = intel_pmu_disable_event, + .add = intel_pmu_add_event, + .del = intel_pmu_del_event, .hw_config = intel_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 783c49ddef29..8f82b02934fa 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info); static void init_mbm_sample(u32 rmid, u32 evt_type); static void __intel_mbm_event_count(void *info); +static bool is_cqm_event(int e) +{ + return (e == QOS_L3_OCCUP_EVENT_ID); +} + static bool is_mbm_event(int e) { return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID); @@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event) (event->attr.config > QOS_MBM_LOCAL_EVENT_ID)) return -EINVAL; + if ((is_cqm_event(event->attr.config) && !cqm_enabled) || + (is_mbm_event(event->attr.config) && !mbm_enabled)) + return -EINVAL; + /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ce9f3f669e6..0319311dbdbb 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -806,9 +806,65 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) return &emptyconstraint; } -static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc) +/* + * We need the sched_task callback even for per-cpu events when we use + * the large interrupt threshold, such that we can provide PID and TID + * to PEBS samples. + */ +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) +{ + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); +} + +static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) +{ + struct debug_store *ds = cpuc->ds; + u64 threshold; + + if (cpuc->n_pebs == cpuc->n_large_pebs) { + threshold = ds->pebs_absolute_maximum - + x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; + } else { + threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; + } + + ds->pebs_interrupt_threshold = threshold; +} + +static void +pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) { - return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1)); + /* + * Make sure we get updated with the first PEBS + * event. It will trigger also during removal, but + * that does not hurt: + */ + bool update = cpuc->n_pebs == 1; + + if (needed_cb != pebs_needs_sched_cb(cpuc)) { + if (!needed_cb) + perf_sched_cb_inc(pmu); + else + perf_sched_cb_dec(pmu); + + update = true; + } + + if (update) + pebs_update_threshold(cpuc); +} + +void intel_pmu_pebs_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + bool needed_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs++; + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + cpuc->n_large_pebs++; + + pebs_update_state(needed_cb, cpuc, event->ctx->pmu); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -816,12 +872,9 @@ void intel_pmu_pebs_enable(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; - bool first_pebs; - u64 threshold; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; - first_pebs = !pebs_is_enabled(cpuc); cpuc->pebs_enabled |= 1ULL << hwc->idx; if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) @@ -830,46 +883,34 @@ void intel_pmu_pebs_enable(struct perf_event *event) cpuc->pebs_enabled |= 1ULL << 63; /* - * When the event is constrained enough we can use a larger - * threshold and run the event with less frequent PMI. + * Use auto-reload if possible to save a MSR write in the PMI. + * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { - threshold = ds->pebs_absolute_maximum - - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; - - if (first_pebs) - perf_sched_cb_inc(event->ctx->pmu); - } else { - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; - - /* - * If not all events can use larger buffer, - * roll back to threshold = 1 - */ - if (!first_pebs && - (ds->pebs_interrupt_threshold > threshold)) - perf_sched_cb_dec(event->ctx->pmu); - } - - /* Use auto-reload if possible to save a MSR write in the PMI */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { ds->pebs_event_reset[hwc->idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } +} + +void intel_pmu_pebs_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + bool needed_cb = pebs_needs_sched_cb(cpuc); - if (first_pebs || ds->pebs_interrupt_threshold > threshold) - ds->pebs_interrupt_threshold = threshold; + cpuc->n_pebs--; + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + cpuc->n_large_pebs--; + + pebs_update_state(needed_cb, cpuc, event->ctx->pmu); } void intel_pmu_pebs_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - struct debug_store *ds = cpuc->ds; - bool large_pebs = ds->pebs_interrupt_threshold > - ds->pebs_buffer_base + x86_pmu.pebs_record_size; - if (large_pebs) + if (cpuc->n_pebs == cpuc->n_large_pebs) intel_pmu_drain_pebs_buffer(); cpuc->pebs_enabled &= ~(1ULL << hwc->idx); @@ -879,9 +920,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled &= ~(1ULL << 63); - if (large_pebs && !pebs_is_enabled(cpuc)) - perf_sched_cb_dec(event->ctx->pmu); - if (cpuc->enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); @@ -1274,18 +1312,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct pebs_record_nhm *p = at; u64 pebs_status; - /* PEBS v3 has accurate status bits */ + pebs_status = p->status & cpuc->pebs_enabled; + pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; + + /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { - for_each_set_bit(bit, (unsigned long *)&p->status, - MAX_PEBS_EVENTS) + for_each_set_bit(bit, (unsigned long *)&pebs_status, + x86_pmu.max_pebs_events) counts[bit]++; continue; } - pebs_status = p->status & cpuc->pebs_enabled; - pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; - /* * On some CPUs the PEBS status can be zero when PEBS is * racing with clearing of GLOBAL_STATUS. @@ -1333,8 +1371,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; event = cpuc->events[bit]; - WARN_ON_ONCE(!event); - WARN_ON_ONCE(!event->attr.precise_ip); + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; /* log dropped samples number */ if (error[bit]) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 707d358e0dff..fc6cf21c535e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -380,7 +380,6 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; /* @@ -390,31 +389,21 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) */ task_ctx = ctx ? ctx->task_ctx_data : NULL; if (task_ctx) { - if (sched_in) { + if (sched_in) __intel_pmu_lbr_restore(task_ctx); - cpuc->lbr_context = ctx; - } else { + else __intel_pmu_lbr_save(task_ctx); - } return; } /* - * When sampling the branck stack in system-wide, it may be - * necessary to flush the stack on context switch. This happens - * when the branch stack does not tag its entries with the pid - * of the current task. Otherwise it becomes impossible to - * associate a branch entry with a task. This ambiguity is more - * likely to appear when the branch stack supports priv level - * filtering and the user sets it to monitor only at the user - * level (which could be a useful measurement in system-wide - * mode). In that case, the risk is high of having a branch - * stack with branch from multiple tasks. - */ - if (sched_in) { + * Since a context switch can flip the address space and LBR entries + * are not tagged with an identifier, we need to wipe the LBR, even for + * per-cpu events. You simply cannot resolve the branches from the old + * address space. + */ + if (sched_in) intel_pmu_lbr_reset(); - cpuc->lbr_context = ctx; - } } static inline bool branch_user_callstack(unsigned br_sel) @@ -422,7 +411,7 @@ static inline bool branch_user_callstack(unsigned br_sel) return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); } -void intel_pmu_lbr_enable(struct perf_event *event) +void intel_pmu_lbr_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; @@ -430,27 +419,38 @@ void intel_pmu_lbr_enable(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - /* - * Reset the LBR stack if we changed task context to - * avoid data leaks. - */ - if (event->ctx->task && cpuc->lbr_context != event->ctx) { - intel_pmu_lbr_reset(); - cpuc->lbr_context = event->ctx; - } cpuc->br_sel = event->hw.branch_reg.reg; - if (branch_user_callstack(cpuc->br_sel) && event->ctx && - event->ctx->task_ctx_data) { + if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) { task_ctx = event->ctx->task_ctx_data; task_ctx->lbr_callstack_users++; } - cpuc->lbr_users++; + /* + * Request pmu::sched_task() callback, which will fire inside the + * regular perf event scheduling, so that call will: + * + * - restore or wipe; when LBR-callstack, + * - wipe; otherwise, + * + * when this is from __perf_event_task_sched_in(). + * + * However, if this is from perf_install_in_context(), no such callback + * will follow and we'll need to reset the LBR here if this is the + * first LBR event. + * + * The problem is, we cannot tell these cases apart... but we can + * exclude the biggest chunk of cases by looking at + * event->total_time_running. An event that has accrued runtime cannot + * be 'new'. Conversely, a new event can get installed through the + * context switch path for the first time. + */ perf_sched_cb_inc(event->ctx->pmu); + if (!cpuc->lbr_users++ && !event->total_time_running) + intel_pmu_lbr_reset(); } -void intel_pmu_lbr_disable(struct perf_event *event) +void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; @@ -467,12 +467,6 @@ void intel_pmu_lbr_disable(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); perf_sched_cb_dec(event->ctx->pmu); - - if (cpuc->enabled && !cpuc->lbr_users) { - __intel_pmu_lbr_disable(); - /* avoid stale pointer */ - cpuc->lbr_context = NULL; - } } void intel_pmu_lbr_enable_all(bool pmi) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 04bb5fb5a8d7..c5047b8f777b 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -69,6 +69,8 @@ static struct pt_cap_desc { PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)), PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)), PT_CAP(mtc, 0, CR_EBX, BIT(3)), + PT_CAP(ptwrite, 0, CR_EBX, BIT(4)), + PT_CAP(power_event_trace, 0, CR_EBX, BIT(5)), PT_CAP(topa_output, 0, CR_ECX, BIT(0)), PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)), PT_CAP(single_range_output, 0, CR_ECX, BIT(2)), @@ -259,10 +261,16 @@ fail: #define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \ RTIT_CTL_MTC_RANGE) +#define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \ + RTIT_CTL_FUP_ON_PTW) + #define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \ RTIT_CTL_DISRETC | \ RTIT_CTL_CYC_PSB | \ - RTIT_CTL_MTC) + RTIT_CTL_MTC | \ + RTIT_CTL_PWR_EVT_EN | \ + RTIT_CTL_FUP_ON_PTW | \ + RTIT_CTL_PTW_EN) static bool pt_event_valid(struct perf_event *event) { @@ -311,6 +319,20 @@ static bool pt_event_valid(struct perf_event *event) return false; } + if (config & RTIT_CTL_PWR_EVT_EN && + !pt_cap_get(PT_CAP_power_event_trace)) + return false; + + if (config & RTIT_CTL_PTW) { + if (!pt_cap_get(PT_CAP_ptwrite)) + return false; + + /* FUPonPTW without PTW doesn't make sense */ + if ((config & RTIT_CTL_FUP_ON_PTW) && + !(config & RTIT_CTL_PTW_EN)) + return false; + } + return true; } @@ -1074,6 +1096,11 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } +static inline bool valid_kernel_ip(unsigned long ip) +{ + return virt_addr_valid(ip) && kernel_ip(ip); +} + static int pt_event_addr_filters_validate(struct list_head *filters) { struct perf_addr_filter *filter; @@ -1081,11 +1108,16 @@ static int pt_event_addr_filters_validate(struct list_head *filters) list_for_each_entry(filter, filters, entry) { /* PT doesn't support single address triggers */ - if (!filter->range) + if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode && !kernel_ip(filter->offset)) - return -EINVAL; + if (!filter->inode) { + if (!valid_kernel_ip(filter->offset)) + return -EINVAL; + + if (!valid_kernel_ip(filter->offset + filter->size)) + return -EINVAL; + } if (++range > pt_cap_get(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; @@ -1111,7 +1143,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event) } else { /* apply the offset */ msr_a = filter->offset + offs[range]; - msr_b = filter->size + msr_a; + msr_b = filter->size + msr_a - 1; } filters->filter[range].msr_a = msr_a; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index efffa4a09f68..53473c21b554 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -26,11 +26,14 @@ #define RTIT_CTL_CYCLEACC BIT(1) #define RTIT_CTL_OS BIT(2) #define RTIT_CTL_USR BIT(3) +#define RTIT_CTL_PWR_EVT_EN BIT(4) +#define RTIT_CTL_FUP_ON_PTW BIT(5) #define RTIT_CTL_CR3EN BIT(7) #define RTIT_CTL_TOPA BIT(8) #define RTIT_CTL_MTC_EN BIT(9) #define RTIT_CTL_TSC_EN BIT(10) #define RTIT_CTL_DISRETC BIT(11) +#define RTIT_CTL_PTW_EN BIT(12) #define RTIT_CTL_BRANCH_EN BIT(13) #define RTIT_CTL_MTC_RANGE_OFFSET 14 #define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) @@ -91,6 +94,8 @@ enum pt_capabilities { PT_CAP_psb_cyc, PT_CAP_ip_filtering, PT_CAP_mtc, + PT_CAP_ptwrite, + PT_CAP_power_event_trace, PT_CAP_topa_output, PT_CAP_topa_multiple_entries, PT_CAP_single_range_output, diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 28865938aadf..b0f0e835a770 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -357,6 +357,8 @@ static int rapl_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; + event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; + /* * check event is known (determines counter) */ @@ -765,6 +767,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), + + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), {}, }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 463dc7a5a6c3..d9844cc74486 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -664,6 +664,8 @@ static int uncore_pmu_event_init(struct perf_event *event) event->cpu = box->cpu; event->pmu_private = box; + event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; + event->hw.idx = -1; event->hw.last_tag = ~0ULL; event->hw.extra_reg.idx = EXTRA_REG_NONE; @@ -683,7 +685,8 @@ static int uncore_pmu_event_init(struct perf_event *event) /* fixed counters have event field hardcoded to zero */ hwc->config = 0ULL; } else { - hwc->config = event->attr.config & pmu->type->event_mask; + hwc->config = event->attr.config & + (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); if (pmu->type->ops->hw_config) { ret = pmu->type->ops->hw_config(box, event); if (ret) @@ -1321,6 +1324,11 @@ static const struct intel_uncore_init_fun skl_uncore_init __initconst = { .pci_init = skl_uncore_pci_init, }; +static const struct intel_uncore_init_fun skx_uncore_init __initconst = { + .cpu_init = skx_uncore_cpu_init, + .pci_init = skx_uncore_pci_init, +}; + static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), @@ -1343,6 +1351,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), {}, }; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 78b9c23e2d8d..ad986c1e29bc 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -44,6 +44,7 @@ struct intel_uncore_type { unsigned perf_ctr; unsigned event_ctl; unsigned event_mask; + unsigned event_mask_ext; unsigned fixed_ctr; unsigned fixed_ctl; unsigned box_ctl; @@ -120,6 +121,7 @@ struct intel_uncore_box { }; #define UNCORE_BOX_FLAG_INITIATED 0 +#define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */ struct uncore_event_desc { struct kobj_attribute attr; @@ -172,6 +174,9 @@ static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box) static inline unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx) { + if (test_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags)) + return idx * 8 + box->pmu->type->event_ctl; + return idx * 4 + box->pmu->type->event_ctl; } @@ -377,6 +382,8 @@ int bdx_uncore_pci_init(void); void bdx_uncore_cpu_init(void); int knl_uncore_pci_init(void); void knl_uncore_cpu_init(void); +int skx_uncore_pci_init(void); +void skx_uncore_cpu_init(void); /* perf_event_intel_uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 9d35ec0cb8fc..5f845eef9a4d 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -388,6 +388,8 @@ static int snb_uncore_imc_event_init(struct perf_event *event) event->cpu = box->cpu; event->pmu_private = box; + event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; + event->hw.idx = -1; event->hw.last_tag = ~0ULL; event->hw.extra_reg.idx = EXTRA_REG_NONE; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 8aee83bcf71f..272427700d48 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,10 @@ /* SandyBridge-EP/IvyTown uncore support */ #include "uncore.h" +/* SNB-EP pci bus to socket mapping */ +#define SNBEP_CPUNODEID 0x40 +#define SNBEP_GIDNIDMAP 0x54 + /* SNB-EP Box level control */ #define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) #define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) @@ -264,15 +268,72 @@ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) +/* SKX pci bus to socket mapping */ +#define SKX_CPUNODEID 0xc0 +#define SKX_GIDNIDMAP 0xd4 + +/* SKX CHA */ +#define SKX_CHA_MSR_PMON_BOX_FILTER_TID (0x1ffULL << 0) +#define SKX_CHA_MSR_PMON_BOX_FILTER_LINK (0xfULL << 9) +#define SKX_CHA_MSR_PMON_BOX_FILTER_STATE (0x3ffULL << 17) +#define SKX_CHA_MSR_PMON_BOX_FILTER_REM (0x1ULL << 32) +#define SKX_CHA_MSR_PMON_BOX_FILTER_LOC (0x1ULL << 33) +#define SKX_CHA_MSR_PMON_BOX_FILTER_ALL_OPC (0x1ULL << 35) +#define SKX_CHA_MSR_PMON_BOX_FILTER_NM (0x1ULL << 36) +#define SKX_CHA_MSR_PMON_BOX_FILTER_NOT_NM (0x1ULL << 37) +#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC0 (0x3ffULL << 41) +#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC1 (0x3ffULL << 51) +#define SKX_CHA_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define SKX_CHA_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define SKX_CHA_MSR_PMON_BOX_FILTER_ISOC (0x1ULL << 63) + +/* SKX IIO */ +#define SKX_IIO0_MSR_PMON_CTL0 0xa48 +#define SKX_IIO0_MSR_PMON_CTR0 0xa41 +#define SKX_IIO0_MSR_PMON_BOX_CTL 0xa40 +#define SKX_IIO_MSR_OFFSET 0x20 + +#define SKX_PMON_CTL_TRESH_MASK (0xff << 24) +#define SKX_PMON_CTL_TRESH_MASK_EXT (0xf) +#define SKX_PMON_CTL_CH_MASK (0xff << 4) +#define SKX_PMON_CTL_FC_MASK (0x7 << 12) +#define SKX_IIO_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SKX_PMON_CTL_TRESH_MASK) +#define SKX_IIO_PMON_RAW_EVENT_MASK_EXT (SKX_PMON_CTL_TRESH_MASK_EXT | \ + SKX_PMON_CTL_CH_MASK | \ + SKX_PMON_CTL_FC_MASK) + +/* SKX IRP */ +#define SKX_IRP0_MSR_PMON_CTL0 0xa5b +#define SKX_IRP0_MSR_PMON_CTR0 0xa59 +#define SKX_IRP0_MSR_PMON_BOX_CTL 0xa58 +#define SKX_IRP_MSR_OFFSET 0x20 + +/* SKX UPI */ +#define SKX_UPI_PCI_PMON_CTL0 0x350 +#define SKX_UPI_PCI_PMON_CTR0 0x318 +#define SKX_UPI_PCI_PMON_BOX_CTL 0x378 +#define SKX_PMON_CTL_UMASK_EXT 0xff + +/* SKX M2M */ +#define SKX_M2M_PCI_PMON_CTL0 0x228 +#define SKX_M2M_PCI_PMON_CTR0 0x200 +#define SKX_M2M_PCI_PMON_BOX_CTL 0x258 + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-39"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35"); DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29"); DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); @@ -280,6 +341,8 @@ DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31"); +DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43"); +DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); @@ -288,18 +351,26 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link4, filter_link, "config1:9-12"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22"); DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23"); DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state5, filter_state, "config1:17-26"); +DEFINE_UNCORE_FORMAT_ATTR(filter_rem, filter_rem, "config1:32"); +DEFINE_UNCORE_FORMAT_ATTR(filter_loc, filter_loc, "config1:33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nm, filter_nm, "config1:36"); +DEFINE_UNCORE_FORMAT_ATTR(filter_not_nm, filter_not_nm, "config1:37"); DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33"); DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35"); DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37"); DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60"); DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc_0, filter_opc0, "config1:41-50"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc_1, filter_opc1, "config1:51-60"); DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62"); DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61"); DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63"); @@ -1153,7 +1224,7 @@ static struct pci_driver snbep_uncore_pci_driver = { /* * build pci bus to socket mapping */ -static int snbep_pci2phy_map_init(int devid) +static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse) { struct pci_dev *ubox_dev = NULL; int i, bus, nodeid, segment; @@ -1168,12 +1239,12 @@ static int snbep_pci2phy_map_init(int devid) break; bus = ubox_dev->bus->number; /* get the Node ID of the local register */ - err = pci_read_config_dword(ubox_dev, 0x40, &config); + err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); if (err) break; nodeid = config; /* get the Node ID mapping */ - err = pci_read_config_dword(ubox_dev, 0x54, &config); + err = pci_read_config_dword(ubox_dev, idmap_loc, &config); if (err) break; @@ -1207,11 +1278,20 @@ static int snbep_pci2phy_map_init(int devid) raw_spin_lock(&pci2phy_map_lock); list_for_each_entry(map, &pci2phy_map_head, list) { i = -1; - for (bus = 255; bus >= 0; bus--) { - if (map->pbus_to_physid[bus] >= 0) - i = map->pbus_to_physid[bus]; - else - map->pbus_to_physid[bus] = i; + if (reverse) { + for (bus = 255; bus >= 0; bus--) { + if (map->pbus_to_physid[bus] >= 0) + i = map->pbus_to_physid[bus]; + else + map->pbus_to_physid[bus] = i; + } + } else { + for (bus = 0; bus <= 255; bus++) { + if (map->pbus_to_physid[bus] >= 0) + i = map->pbus_to_physid[bus]; + else + map->pbus_to_physid[bus] = i; + } } } raw_spin_unlock(&pci2phy_map_lock); @@ -1224,7 +1304,7 @@ static int snbep_pci2phy_map_init(int devid) int snbep_uncore_pci_init(void) { - int ret = snbep_pci2phy_map_init(0x3ce0); + int ret = snbep_pci2phy_map_init(0x3ce0, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true); if (ret) return ret; uncore_pci_uncores = snbep_pci_uncores; @@ -1788,7 +1868,7 @@ static struct pci_driver ivbep_uncore_pci_driver = { int ivbep_uncore_pci_init(void) { - int ret = snbep_pci2phy_map_init(0x0e1e); + int ret = snbep_pci2phy_map_init(0x0e1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true); if (ret) return ret; uncore_pci_uncores = ivbep_pci_uncores; @@ -2897,7 +2977,7 @@ static struct pci_driver hswep_uncore_pci_driver = { int hswep_uncore_pci_init(void) { - int ret = snbep_pci2phy_map_init(0x2f1e); + int ret = snbep_pci2phy_map_init(0x2f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true); if (ret) return ret; uncore_pci_uncores = hswep_pci_uncores; @@ -3186,7 +3266,7 @@ static struct pci_driver bdx_uncore_pci_driver = { int bdx_uncore_pci_init(void) { - int ret = snbep_pci2phy_map_init(0x6f1e); + int ret = snbep_pci2phy_map_init(0x6f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true); if (ret) return ret; @@ -3196,3 +3276,525 @@ int bdx_uncore_pci_init(void) } /* end of BDX uncore support */ + +/* SKX uncore support */ + +static struct intel_uncore_type skx_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = HSWEP_U_MSR_PMON_CTR0, + .event_ctl = HSWEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_ubox_format_group, +}; + +static struct attribute *skx_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid4.attr, + &format_attr_filter_link4.attr, + &format_attr_filter_state5.attr, + &format_attr_filter_rem.attr, + &format_attr_filter_loc.attr, + &format_attr_filter_nm.attr, + &format_attr_filter_all_op.attr, + &format_attr_filter_not_nm.attr, + &format_attr_filter_opc_0.attr, + &format_attr_filter_opc_1.attr, + &format_attr_filter_nc.attr, + &format_attr_filter_c6.attr, + &format_attr_filter_isoc.attr, + NULL, +}; + +static struct attribute_group skx_uncore_chabox_format_group = { + .name = "format", + .attrs = skx_uncore_cha_formats_attr, +}; + +static struct event_constraint skx_uncore_chabox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg skx_uncore_cha_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x8134, 0xffff, 0x4), +}; + +static u64 skx_cha_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_STATE; + return mask; +} + +static struct event_constraint * +skx_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, skx_cha_filter_mask); +} + +static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = skx_uncore_cha_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + + HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & skx_cha_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static struct intel_uncore_ops skx_uncore_chabox_ops = { + /* There is no frz_en for chabox ctl */ + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = hswep_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = skx_cha_hw_config, + .get_constraint = skx_cha_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type skx_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_C0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = skx_uncore_chabox_constraints, + .ops = &skx_uncore_chabox_ops, + .format_group = &skx_uncore_chabox_format_group, +}; + +static struct attribute *skx_uncore_iio_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh9.attr, + &format_attr_ch_mask.attr, + &format_attr_fc_mask.attr, + NULL, +}; + +static struct attribute_group skx_uncore_iio_format_group = { + .name = "format", + .attrs = skx_uncore_iio_formats_attr, +}; + +static struct event_constraint skx_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0x88, 0xc), + UNCORE_EVENT_CONSTRAINT(0x95, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), + EVENT_CONSTRAINT_END +}; + +static void skx_iio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops skx_uncore_iio_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = skx_iio_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type skx_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SKX_IIO0_MSR_PMON_CTL0, + .perf_ctr = SKX_IIO0_MSR_PMON_CTR0, + .event_mask = SKX_IIO_PMON_RAW_EVENT_MASK, + .event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = SKX_IIO0_MSR_PMON_BOX_CTL, + .msr_offset = SKX_IIO_MSR_OFFSET, + .constraints = skx_uncore_iio_constraints, + .ops = &skx_uncore_iio_ops, + .format_group = &skx_uncore_iio_format_group, +}; + +static struct attribute *skx_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group skx_uncore_format_group = { + .name = "format", + .attrs = skx_uncore_formats_attr, +}; + +static struct intel_uncore_type skx_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SKX_IRP0_MSR_PMON_CTL0, + .perf_ctr = SKX_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SKX_IRP0_MSR_PMON_BOX_CTL, + .msr_offset = SKX_IRP_MSR_OFFSET, + .ops = &skx_uncore_iio_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct intel_uncore_ops skx_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = hswep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type skx_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0, + .event_ctl = HSWEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &skx_uncore_pcu_ops, + .format_group = &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *skx_msr_uncores[] = { + &skx_uncore_ubox, + &skx_uncore_chabox, + &skx_uncore_iio, + &skx_uncore_irp, + &skx_uncore_pcu, + NULL, +}; + +static int skx_count_chabox(void) +{ + struct pci_dev *chabox_dev = NULL; + int bus, count = 0; + + while (1) { + chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev); + if (!chabox_dev) + break; + if (count == 0) + bus = chabox_dev->bus->number; + if (bus != chabox_dev->bus->number) + break; + count++; + } + + pci_dev_put(chabox_dev); + return count; +} + +void skx_uncore_cpu_init(void) +{ + skx_uncore_chabox.num_boxes = skx_count_chabox(); + uncore_msr_uncores = skx_msr_uncores; +} + +static struct intel_uncore_type skx_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = hswep_uncore_imc_events, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct attribute *skx_upi_uncore_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask_ext.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group skx_upi_uncore_format_group = { + .name = "format", + .attrs = skx_upi_uncore_formats_attr, +}; + +static void skx_upi_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, SKX_UPI_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT); +} + +static struct intel_uncore_ops skx_upi_uncore_pci_ops = { + .init_box = skx_upi_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct intel_uncore_type skx_uncore_upi = { + .name = "upi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SKX_UPI_PCI_PMON_CTR0, + .event_ctl = SKX_UPI_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .event_mask_ext = SKX_PMON_CTL_UMASK_EXT, + .box_ctl = SKX_UPI_PCI_PMON_BOX_CTL, + .ops = &skx_upi_uncore_pci_ops, + .format_group = &skx_upi_uncore_format_group, +}; + +static void skx_m2m_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, SKX_M2M_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT); +} + +static struct intel_uncore_ops skx_m2m_uncore_pci_ops = { + .init_box = skx_m2m_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct intel_uncore_type skx_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = SKX_M2M_PCI_PMON_CTR0, + .event_ctl = SKX_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SKX_M2M_PCI_PMON_BOX_CTL, + .ops = &skx_m2m_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct event_constraint skx_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type skx_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .constraints = skx_uncore_m2pcie_constraints, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct event_constraint skx_uncore_m3upi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x1d, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1e, 0x1), + UNCORE_EVENT_CONSTRAINT(0x40, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4f, 0x7), + UNCORE_EVENT_CONSTRAINT(0x50, 0x7), + UNCORE_EVENT_CONSTRAINT(0x51, 0x7), + UNCORE_EVENT_CONSTRAINT(0x52, 0x7), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type skx_uncore_m3upi = { + .name = "m3upi", + .num_counters = 3, + .num_boxes = 3, + .perf_ctr_bits = 48, + .constraints = skx_uncore_m3upi_constraints, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +enum { + SKX_PCI_UNCORE_IMC, + SKX_PCI_UNCORE_M2M, + SKX_PCI_UNCORE_UPI, + SKX_PCI_UNCORE_M2PCIE, + SKX_PCI_UNCORE_M3UPI, +}; + +static struct intel_uncore_type *skx_pci_uncores[] = { + [SKX_PCI_UNCORE_IMC] = &skx_uncore_imc, + [SKX_PCI_UNCORE_M2M] = &skx_uncore_m2m, + [SKX_PCI_UNCORE_UPI] = &skx_uncore_upi, + [SKX_PCI_UNCORE_M2PCIE] = &skx_uncore_m2pcie, + [SKX_PCI_UNCORE_M3UPI] = &skx_uncore_m3upi, + NULL, +}; + +static const struct pci_device_id skx_uncore_pci_ids[] = { + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 2, SKX_PCI_UNCORE_IMC, 0), + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 6, SKX_PCI_UNCORE_IMC, 1), + }, + { /* MC0 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 2, SKX_PCI_UNCORE_IMC, 2), + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 2, SKX_PCI_UNCORE_IMC, 3), + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 6, SKX_PCI_UNCORE_IMC, 4), + }, + { /* MC1 Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 2, SKX_PCI_UNCORE_IMC, 5), + }, + { /* M2M0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 0, SKX_PCI_UNCORE_M2M, 0), + }, + { /* M2M1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 0, SKX_PCI_UNCORE_M2M, 1), + }, + { /* UPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, SKX_PCI_UNCORE_UPI, 0), + }, + { /* UPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, SKX_PCI_UNCORE_UPI, 1), + }, + { /* UPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, SKX_PCI_UNCORE_UPI, 2), + }, + { /* M2PCIe 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 1, SKX_PCI_UNCORE_M2PCIE, 0), + }, + { /* M2PCIe 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 1, SKX_PCI_UNCORE_M2PCIE, 1), + }, + { /* M2PCIe 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(23, 1, SKX_PCI_UNCORE_M2PCIE, 2), + }, + { /* M2PCIe 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), + }, + { /* M3UPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0), + }, + { /* M3UPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1), + }, + { /* M3UPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2), + }, + { /* end: all zeroes */ } +}; + + +static struct pci_driver skx_uncore_pci_driver = { + .name = "skx_uncore", + .id_table = skx_uncore_pci_ids, +}; + +int skx_uncore_pci_init(void) +{ + /* need to double check pci address */ + int ret = snbep_pci2phy_map_init(0x2014, SKX_CPUNODEID, SKX_GIDNIDMAP, false); + + if (ret) + return ret; + + uncore_pci_uncores = skx_pci_uncores; + uncore_pci_driver = &skx_uncore_pci_driver; + return 0; +} + +/* end of SKX uncore support */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8c4a47706296..5874d8de1f8d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -194,12 +194,13 @@ struct cpu_hw_events { */ struct debug_store *ds; u64 pebs_enabled; + int n_pebs; + int n_large_pebs; /* * Intel LBR bits */ int lbr_users; - void *lbr_context; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; struct er_account *lbr_sel; @@ -508,6 +509,8 @@ struct x86_pmu { void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); + void (*add)(struct perf_event *); + void (*del)(struct perf_event *); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; @@ -888,6 +891,10 @@ extern struct event_constraint intel_skl_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); +void intel_pmu_pebs_add(struct perf_event *event); + +void intel_pmu_pebs_del(struct perf_event *event); + void intel_pmu_pebs_enable(struct perf_event *event); void intel_pmu_pebs_disable(struct perf_event *event); @@ -906,9 +913,9 @@ u64 lbr_from_signext_quirk_wr(u64 val); void intel_pmu_lbr_reset(void); -void intel_pmu_lbr_enable(struct perf_event *event); +void intel_pmu_lbr_add(struct perf_event *event); -void intel_pmu_lbr_disable(struct perf_event *event); +void intel_pmu_lbr_del(struct perf_event *event); void intel_pmu_lbr_enable_all(bool pmi); diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 2f29f4e407c3..cb13c0564ea7 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -378,7 +378,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); - err |= copy_siginfo_to_user32(&frame->info, &ksig->info); + err |= __copy_siginfo_to_user32(&frame->info, &ksig->info, false); err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index e77a6443104f..1b020381ab38 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -217,10 +217,14 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ output, input...) \ +{ \ + register void *__sp asm(_ASM_SP); \ asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ "call %P[new2]", feature2) \ - : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ - [new2] "i" (newfunc2), ## input) + : output, "+r" (__sp) \ + : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ + [new2] "i" (newfunc2), ## input); \ +} /* * use this macro(s) if you need more than one output parameter diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 124357773ffa..f5aaf6c83222 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -650,8 +650,8 @@ static inline void entering_ack_irq(void) static inline void ipi_entering_ack_irq(void) { - ack_APIC_irq(); irq_enter(); + ack_APIC_irq(); } static inline void exiting_irq(void) @@ -661,9 +661,8 @@ static inline void exiting_irq(void) static inline void exiting_ack_irq(void) { - irq_exit(); - /* Ack only at the end to avoid potential reentry */ ack_APIC_irq(); + irq_exit(); } extern void ioapic_zap_locks(void); diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 9733361fed6f..97848cdfcb1a 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -158,53 +158,9 @@ extern void __add_wrong_size(void) * value of "*ptr". * * xadd() is locked when multiple CPUs are online - * xadd_sync() is always locked - * xadd_local() is never locked */ #define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) -#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") -#define xadd_local(ptr, inc) __xadd((ptr), (inc), "") - -#define __add(ptr, inc, lock) \ - ({ \ - __typeof__ (*(ptr)) __ret = (inc); \ - switch (sizeof(*(ptr))) { \ - case __X86_CASE_B: \ - asm volatile (lock "addb %b1, %0\n" \ - : "+m" (*(ptr)) : "qi" (inc) \ - : "memory", "cc"); \ - break; \ - case __X86_CASE_W: \ - asm volatile (lock "addw %w1, %0\n" \ - : "+m" (*(ptr)) : "ri" (inc) \ - : "memory", "cc"); \ - break; \ - case __X86_CASE_L: \ - asm volatile (lock "addl %1, %0\n" \ - : "+m" (*(ptr)) : "ri" (inc) \ - : "memory", "cc"); \ - break; \ - case __X86_CASE_Q: \ - asm volatile (lock "addq %1, %0\n" \ - : "+m" (*(ptr)) : "ri" (inc) \ - : "memory", "cc"); \ - break; \ - default: \ - __add_wrong_size(); \ - } \ - __ret; \ - }) - -/* - * add_*() adds "inc" to "*ptr" - * - * __add() takes a lock prefix - * add_smp() is locked when multiple CPUs are online - * add_sync() is always locked - */ -#define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX) -#define add_sync(ptr, inc) __add((ptr), (inc), "lock; ") #define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ ({ \ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index a18806165fe4..03d269bed941 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -275,10 +275,10 @@ struct compat_shmid64_ds { #ifdef CONFIG_X86_X32_ABI typedef struct user_regs_struct compat_elf_gregset_t; -#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216) -#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296) -#define SET_PR_FPVALID(S,V) \ - do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \ +/* Full regset -- prstatus on x32, otherwise on ia32 */ +#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296) +#define SET_PR_FPVALID(S, V, R) \ + do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \ while (0) #define COMPAT_USE_64BIT_TIME \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 92a8308b96f6..1188bc849ee3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -106,7 +106,6 @@ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 4e10d73cf018..12080d87da3b 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -36,7 +36,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in extern struct desc_ptr idt_descr; extern gate_desc idt_table[]; -extern struct desc_ptr debug_idt_descr; +extern const struct desc_ptr debug_idt_descr; extern gate_desc debug_idt_table[]; struct gdt_page { diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 3ab0537872fb..476b574de99e 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -10,8 +10,8 @@ #include <uapi/asm/e820.h> #ifndef __ASSEMBLY__ /* see comment in arch/x86/kernel/e820.c */ -extern struct e820map e820; -extern struct e820map e820_saved; +extern struct e820map *e820; +extern struct e820map *e820_saved; extern unsigned long pci_mem_start; extern int e820_any_mapped(u64 start, u64 end, unsigned type); @@ -53,6 +53,8 @@ extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); +extern void e820_reallocate_tables(void); + /* * Returns true iff the specified range [s,e) is completely contained inside * the ISA region. diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index d0bb76d81402..389d700b961e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -117,7 +117,6 @@ extern int __init efi_memblock_x86_reserve_range(void); extern pgd_t * __init efi_call_phys_prolog(void); extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_print_memmap(void); -extern void __init efi_unmap_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); @@ -192,14 +191,7 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( struct efi_config { u64 image_handle; u64 table; - u64 allocate_pool; - u64 allocate_pages; - u64 get_memory_map; - u64 free_pool; - u64 free_pages; - u64 locate_handle; - u64 handle_protocol; - u64 exit_boot_services; + u64 boot_services; u64 text_output; efi_status_t (*call)(unsigned long, ...); bool is64; @@ -207,14 +199,27 @@ struct efi_config { __pure const struct efi_config *__efi_early(void); +static inline bool efi_is_64bit(void) +{ + if (!IS_ENABLED(CONFIG_X86_64)) + return false; + + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return true; + + return __efi_early()->is64; +} + #define efi_call_early(f, ...) \ - __efi_early()->call(__efi_early()->f, __VA_ARGS__); + __efi_early()->call(efi_is_64bit() ? \ + ((efi_boot_services_64_t *)(unsigned long) \ + __efi_early()->boot_services)->f : \ + ((efi_boot_services_32_t *)(unsigned long) \ + __efi_early()->boot_services)->f, __VA_ARGS__) #define __efi_call_early(f, ...) \ __efi_early()->call((unsigned long)f, __VA_ARGS__); -#define efi_is_64bit() __efi_early()->is64 - extern bool efi_reboot_required(void); #else diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 0e970d00dfcd..20a1fbf7fe4e 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -19,6 +19,12 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, # define ia32_setup_rt_frame __setup_rt_frame #endif +#ifdef CONFIG_COMPAT +int __copy_siginfo_to_user32(compat_siginfo_t __user *to, + const siginfo_t *from, bool x32_ABI); +#endif + + extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); extern void convert_to_fxsr(struct task_struct *tsk, diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index ae55a43e09c0..d4957ac72b48 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -45,7 +45,8 @@ extern u64 xfeatures_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); +extern void __init update_regset_xstate_info(unsigned int size, + u64 xstate_mask); void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index a4820d4df617..eccd0ac6bc38 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -6,6 +6,7 @@ # define MCOUNT_ADDR ((unsigned long)(__fentry__)) #else # define MCOUNT_ADDR ((unsigned long)(mcount)) +# define HAVE_FUNCTION_GRAPH_FP_TEST #endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ @@ -13,6 +14,8 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 055ea9941dd5..67942b6ad4b7 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -43,6 +43,9 @@ struct hypervisor_x86 { /* X2APIC detection (run once per boot) */ bool (*x2apic_available)(void); + + /* pin current vcpu to specified physical cpu (run rarely) */ + void (*pin_vcpu)(int); }; extern const struct hypervisor_x86 *x86_hyper; @@ -56,6 +59,7 @@ extern const struct hypervisor_x86 x86_hyper_kvm; extern void init_hypervisor(struct cpuinfo_x86 *c); extern void init_hypervisor_platform(void); extern bool hypervisor_x2apic_available(void); +extern void hypervisor_pin_vcpu(int cpu); #else static inline void init_hypervisor(struct cpuinfo_x86 *c) { } static inline void init_hypervisor_platform(void) { } diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 627719475457..9ae5ab80a497 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -56,8 +56,8 @@ #define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ -#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 9d6b097aa73d..5b6753d1f7f4 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -18,6 +18,8 @@ extern int intel_mid_pci_init(void); extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); +extern void intel_mid_pwr_power_off(void); + #define INTEL_MID_PWR_LSS_OFFSET 4 #define INTEL_MID_PWR_LSS_TYPE (1 << 7) diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 925b605eb5c6..4fb1d0abef95 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -3,6 +3,8 @@ #include <linux/notifier.h> +#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */ + #define IPCMSG_WARM_RESET 0xF0 #define IPCMSG_COLD_RESET 0xF1 #define IPCMSG_SOFT_RESET 0xF2 diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 2674ee3de748..1052a797d71d 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -6,6 +6,7 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY extern unsigned long page_offset_base; extern unsigned long vmalloc_base; +extern unsigned long vmemmap_base; void kernel_randomize_memory(void); #else diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 1ef9d581b5d9..d31881188431 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -24,8 +24,6 @@ enum die_val { extern void printk_address(unsigned long address); extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); -extern void show_trace(struct task_struct *t, struct pt_regs *regs, - unsigned long *sp, unsigned long bp); extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, int all); extern unsigned long oops_begin(void); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 8bf766ef0e18..9bd7ff5ffbcc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -40,9 +40,10 @@ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ /* AMD-specific bits */ +#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ +#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */ #define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ -#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ /* * McaX field if set indicates a given bank supports MCA extensions: @@ -110,6 +111,7 @@ #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_SYND 0xc0002006 #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a @@ -119,6 +121,7 @@ #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) @@ -334,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected, * Scalable MCA. */ #ifdef CONFIG_X86_MCE_AMD -enum amd_ip_types { - SMCA_F17H_CORE = 0, /* Core errors */ - SMCA_DF, /* Data Fabric */ - SMCA_UMC, /* Unified Memory Controller */ - SMCA_PB, /* Parameter Block */ - SMCA_PSP, /* Platform Security Processor */ - SMCA_SMU, /* System Management Unit */ - N_AMD_IP_TYPES -}; - -struct amd_hwid { - const char *name; - unsigned int hwid; -}; - -extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES]; -enum amd_core_mca_blocks { +/* These may be used by multiple smca_hwid_mcatypes */ +enum smca_bank_types { SMCA_LS = 0, /* Load Store */ SMCA_IF, /* Instruction Fetch */ - SMCA_L2_CACHE, /* L2 cache */ - SMCA_DE, /* Decoder unit */ - RES, /* Reserved */ - SMCA_EX, /* Execution unit */ + SMCA_L2_CACHE, /* L2 Cache */ + SMCA_DE, /* Decoder Unit */ + SMCA_EX, /* Execution Unit */ SMCA_FP, /* Floating Point */ - SMCA_L3_CACHE, /* L3 cache */ - N_CORE_MCA_BLOCKS + SMCA_L3_CACHE, /* L3 Cache */ + SMCA_CS, /* Coherent Slave */ + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ + SMCA_SMU, /* System Management Unit */ + N_SMCA_BANK_TYPES }; -extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS]; +struct smca_bank_name { + const char *name; /* Short name for sysfs */ + const char *long_name; /* Long name for pretty-printing */ +}; + +extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES]; + +#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) -enum amd_df_mca_blocks { - SMCA_CS = 0, /* Coherent Slave */ - SMCA_PIE, /* Power management, Interrupts, etc */ - N_DF_BLOCKS +struct smca_hwid_mcatype { + unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ + u32 hwid_mcatype; /* (hwid,mcatype) tuple */ + u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ }; -extern const char * const amd_df_mcablock_names[N_DF_BLOCKS]; +struct smca_bank_info { + struct smca_hwid_mcatype *type; + u32 type_instance; +}; + +extern struct smca_bank_info smca_banks[MAX_NR_BANKS]; + #endif #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index b07233b64578..32007041ef8c 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -6,7 +6,6 @@ #include <asm/x86_init.h> #include <asm/apicdef.h> -extern int apic_version[]; extern int pic_mode; #ifdef CONFIG_X86_32 @@ -40,6 +39,7 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES]; extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); extern unsigned int boot_cpu_physical_apicid; +extern u8 boot_cpu_apic_version; extern unsigned long mp_lapic_addr; #ifdef CONFIG_X86_LOCAL_APIC @@ -86,6 +86,7 @@ static inline void early_reserve_e820_mpc_new(void) { } #endif int generic_processor_info(int apicid, int version); +int __generic_processor_info(int apicid, int version, bool enabled); #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 2970d22d7766..ce932812f142 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -80,10 +80,6 @@ static inline unsigned long __read_cr4(void) { return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); } -static inline unsigned long __read_cr4_safe(void) -{ - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); -} static inline void __write_cr4(unsigned long x) { @@ -661,8 +657,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) -#ifdef CONFIG_QUEUED_SPINLOCKS - static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { @@ -684,22 +678,6 @@ static __always_inline void pv_kick(int cpu) PVOP_VCALL1(pv_lock_ops.kick, cpu); } -#else /* !CONFIG_QUEUED_SPINLOCKS */ - -static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, - __ticket_t ticket) -{ - PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket); -} - -static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, - __ticket_t ticket) -{ - PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); -} - -#endif /* CONFIG_QUEUED_SPINLOCKS */ - #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7fa9e7740ba3..0f400c0e4979 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -108,7 +108,6 @@ struct pv_cpu_ops { unsigned long (*read_cr0)(void); void (*write_cr0)(unsigned long); - unsigned long (*read_cr4_safe)(void); unsigned long (*read_cr4)(void); void (*write_cr4)(unsigned long); @@ -301,23 +300,16 @@ struct pv_mmu_ops { struct arch_spinlock; #ifdef CONFIG_SMP #include <asm/spinlock_types.h> -#else -typedef u16 __ticket_t; #endif struct qspinlock; struct pv_lock_ops { -#ifdef CONFIG_QUEUED_SPINLOCKS void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); struct paravirt_callee_save queued_spin_unlock; void (*wait)(u8 *ptr, u8 val); void (*kick)(int cpu); -#else /* !CONFIG_QUEUED_SPINLOCKS */ - struct paravirt_callee_save lock_spinning; - void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); -#endif /* !CONFIG_QUEUED_SPINLOCKS */ }; /* This contains all the paravirt structures: we get a convenient diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6fdef9eef2d5..3a264200c62f 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -57,11 +57,13 @@ typedef struct { pteval_t pte; } pte_t; #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #define VMALLOC_SIZE_TB _AC(32, UL) #define __VMALLOC_BASE _AC(0xffffc90000000000, UL) -#define VMEMMAP_START _AC(0xffffea0000000000, UL) +#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #ifdef CONFIG_RANDOMIZE_MEMORY #define VMALLOC_START vmalloc_base +#define VMEMMAP_START vmemmap_base #else #define VMALLOC_START __VMALLOC_BASE +#define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_RANDOMIZE_MEMORY */ #define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 643eba42d620..2c1ebeb4d737 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -46,10 +46,7 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) { - if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) - return memcpy_mcsafe(dst, src, n); - memcpy(dst, src, n); - return 0; + return memcpy_mcsafe(dst, src, n); } /** diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 63def9537a2d..984a7bf17f6a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -389,9 +389,9 @@ struct thread_struct { unsigned short fsindex; unsigned short gsindex; #endif -#ifdef CONFIG_X86_32 - unsigned long ip; -#endif + + u32 status; /* thread synchronous flags */ + #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; @@ -438,6 +438,15 @@ struct thread_struct { }; /* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ + +/* * Set IOPL bits in EFLAGS from given mask */ static inline void native_set_iopl_mask(unsigned mask) @@ -724,8 +733,6 @@ static inline void spin_lock_prefetch(const void *x) .addr_limit = KERNEL_DS, \ } -extern unsigned long thread_saved_pc(struct task_struct *tsk); - /* * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" @@ -776,17 +783,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); .addr_limit = KERNEL_DS, \ } -/* - * Return saved PC of a blocked thread. - * What is this good for? it will be always the scheduler or ret_from_fork. - */ -#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8)) - #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ +extern unsigned long thread_saved_pc(struct task_struct *tsk); + extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index b2988c0ed829..230e1903acf0 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -44,9 +44,9 @@ struct trampoline_header { extern struct real_mode_header *real_mode_header; extern unsigned char real_mode_blob_end[]; -extern unsigned long init_rsp; extern unsigned long initial_code; extern unsigned long initial_gs; +extern unsigned long initial_stack; extern unsigned char real_mode_blob[]; extern unsigned char real_mode_relocs[]; diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 8dbc762ad132..3d33a719f5c1 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -154,7 +154,7 @@ static inline bool __down_write_trylock(struct rw_semaphore *sem) : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1), CC_OUT(e) (result) : "er" (RWSEM_ACTIVE_WRITE_BIAS) - : "memory", "cc"); + : "memory"); return result; } diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index dd1e7d6387ab..8af22be0fe61 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -23,6 +23,10 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +/* non-uapi in-kernel SA_FLAGS for those indicates ABI for a signal frame */ +#define SA_IA32_ABI 0x02000000u +#define SA_X32_ABI 0x01000000u + #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ebd0c164cd4e..19980b36f394 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -39,9 +39,6 @@ DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); #endif -/* Static state in head.S used to set up a CPU */ -extern unsigned long stack_start; /* Initial stack pointer address */ - struct task_struct; struct smp_ops { diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 587d7914ea4b..19a2224f9e16 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -59,22 +59,19 @@ static inline void native_write_cr3(unsigned long val) static inline unsigned long native_read_cr4(void) { unsigned long val; - asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline unsigned long native_read_cr4_safe(void) -{ - unsigned long val; - /* This could fault if %cr4 does not exist. In x86_64, a cr4 always - * exists, so it will never fail. */ #ifdef CONFIG_X86_32 + /* + * This could fault if CR4 does not exist. Non-existent CR4 + * is functionally equivalent to CR4 == 0. Keep it simple and pretend + * that CR4 == 0 on CPUs that don't have CR4. + */ asm volatile("1: mov %%cr4, %0\n" "2:\n" _ASM_EXTABLE(1b, 2b) : "=r" (val), "=m" (__force_order) : "0" (0)); #else - val = native_read_cr4(); + /* CR4 always exists on x86_64. */ + asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); #endif return val; } @@ -182,11 +179,6 @@ static inline unsigned long __read_cr4(void) return native_read_cr4(); } -static inline unsigned long __read_cr4_safe(void) -{ - return native_read_cr4_safe(); -} - static inline void __write_cr4(unsigned long x) { native_write_cr4(x); diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index be0a05913b91..921bea7a2708 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -20,187 +20,13 @@ * (the type definitions are in asm/spinlock_types.h) */ -#ifdef CONFIG_X86_32 -# define LOCK_PTR_REG "a" -#else -# define LOCK_PTR_REG "D" -#endif - -#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE)) -/* - * On PPro SMP, we use a locked operation to unlock - * (PPro errata 66, 92) - */ -# define UNLOCK_LOCK_PREFIX LOCK_PREFIX -#else -# define UNLOCK_LOCK_PREFIX -#endif - /* How long a lock should spin before we consider blocking */ #define SPIN_THRESHOLD (1 << 15) extern struct static_key paravirt_ticketlocks_enabled; static __always_inline bool static_key_false(struct static_key *key); -#ifdef CONFIG_QUEUED_SPINLOCKS #include <asm/qspinlock.h> -#else - -#ifdef CONFIG_PARAVIRT_SPINLOCKS - -static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) -{ - set_bit(0, (volatile unsigned long *)&lock->tickets.head); -} - -#else /* !CONFIG_PARAVIRT_SPINLOCKS */ -static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock, - __ticket_t ticket) -{ -} -static inline void __ticket_unlock_kick(arch_spinlock_t *lock, - __ticket_t ticket) -{ -} - -#endif /* CONFIG_PARAVIRT_SPINLOCKS */ -static inline int __tickets_equal(__ticket_t one, __ticket_t two) -{ - return !((one ^ two) & ~TICKET_SLOWPATH_FLAG); -} - -static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock, - __ticket_t head) -{ - if (head & TICKET_SLOWPATH_FLAG) { - arch_spinlock_t old, new; - - old.tickets.head = head; - new.tickets.head = head & ~TICKET_SLOWPATH_FLAG; - old.tickets.tail = new.tickets.head + TICKET_LOCK_INC; - new.tickets.tail = old.tickets.tail; - - /* try to clear slowpath flag when there are no contenders */ - cmpxchg(&lock->head_tail, old.head_tail, new.head_tail); - } -} - -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - return __tickets_equal(lock.tickets.head, lock.tickets.tail); -} - -/* - * Ticket locks are conceptually two parts, one indicating the current head of - * the queue, and the other indicating the current tail. The lock is acquired - * by atomically noting the tail and incrementing it by one (thus adding - * ourself to the queue and noting our position), then waiting until the head - * becomes equal to the the initial value of the tail. - * - * We use an xadd covering *both* parts of the lock, to increment the tail and - * also load the position of the head, which takes care of memory ordering - * issues and should be optimal for the uncontended case. Note the tail must be - * in the high part, because a wide xadd increment of the low part would carry - * up and contaminate the high part. - */ -static __always_inline void arch_spin_lock(arch_spinlock_t *lock) -{ - register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC }; - - inc = xadd(&lock->tickets, inc); - if (likely(inc.head == inc.tail)) - goto out; - - for (;;) { - unsigned count = SPIN_THRESHOLD; - - do { - inc.head = READ_ONCE(lock->tickets.head); - if (__tickets_equal(inc.head, inc.tail)) - goto clear_slowpath; - cpu_relax(); - } while (--count); - __ticket_lock_spinning(lock, inc.tail); - } -clear_slowpath: - __ticket_check_and_clear_slowpath(lock, inc.head); -out: - barrier(); /* make sure nothing creeps before the lock is taken */ -} - -static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - arch_spinlock_t old, new; - - old.tickets = READ_ONCE(lock->tickets); - if (!__tickets_equal(old.tickets.head, old.tickets.tail)) - return 0; - - new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); - new.head_tail &= ~TICKET_SLOWPATH_FLAG; - - /* cmpxchg is a full barrier, so nothing can move before it */ - return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; -} - -static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - if (TICKET_SLOWPATH_FLAG && - static_key_false(¶virt_ticketlocks_enabled)) { - __ticket_t head; - - BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS); - - head = xadd(&lock->tickets.head, TICKET_LOCK_INC); - - if (unlikely(head & TICKET_SLOWPATH_FLAG)) { - head &= ~TICKET_SLOWPATH_FLAG; - __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC)); - } - } else - __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); -} - -static inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - struct __raw_tickets tmp = READ_ONCE(lock->tickets); - - return !__tickets_equal(tmp.tail, tmp.head); -} - -static inline int arch_spin_is_contended(arch_spinlock_t *lock) -{ - struct __raw_tickets tmp = READ_ONCE(lock->tickets); - - tmp.head &= ~TICKET_SLOWPATH_FLAG; - return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; -} -#define arch_spin_is_contended arch_spin_is_contended - -static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, - unsigned long flags) -{ - arch_spin_lock(lock); -} - -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - __ticket_t head = READ_ONCE(lock->tickets.head); - - for (;;) { - struct __raw_tickets tmp = READ_ONCE(lock->tickets); - /* - * We need to check "unlocked" in a loop, tmp.head == head - * can be false positive because of overflow. - */ - if (__tickets_equal(tmp.head, tmp.tail) || - !__tickets_equal(tmp.head, head)) - break; - - cpu_relax(); - } -} -#endif /* CONFIG_QUEUED_SPINLOCKS */ /* * Read-write spinlocks, allowing multiple readers diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 65c3e37f879a..25311ebb446c 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -23,20 +23,7 @@ typedef u32 __ticketpair_t; #define TICKET_SHIFT (sizeof(__ticket_t) * 8) -#ifdef CONFIG_QUEUED_SPINLOCKS #include <asm-generic/qspinlock_types.h> -#else -typedef struct arch_spinlock { - union { - __ticketpair_t head_tail; - struct __raw_tickets { - __ticket_t head, tail; - } tickets; - }; -} arch_spinlock_t; - -#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } -#endif /* CONFIG_QUEUED_SPINLOCKS */ #include <asm-generic/qrwlock_types.h> diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 0944218af9e2..37f2e0b377ad 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -8,86 +8,86 @@ #include <linux/uaccess.h> #include <linux/ptrace.h> +#include <asm/switch_to.h> + +enum stack_type { + STACK_TYPE_UNKNOWN, + STACK_TYPE_TASK, + STACK_TYPE_IRQ, + STACK_TYPE_SOFTIRQ, + STACK_TYPE_EXCEPTION, + STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, +}; -extern int kstack_depth_to_print; - -struct thread_info; -struct stacktrace_ops; - -typedef unsigned long (*walk_stack_t)(struct task_struct *task, - unsigned long *stack, - unsigned long bp, - const struct stacktrace_ops *ops, - void *data, - unsigned long *end, - int *graph); - -extern unsigned long -print_context_stack(struct task_struct *task, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph); - -extern unsigned long -print_context_stack_bp(struct task_struct *task, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph); - -/* Generic stack tracer with callbacks */ - -struct stacktrace_ops { - int (*address)(void *data, unsigned long address, int reliable); - /* On negative return stop dumping */ - int (*stack)(void *data, char *name); - walk_stack_t walk_stack; +struct stack_info { + enum stack_type type; + unsigned long *begin, *end, *next_sp; }; -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data); +bool in_task_stack(unsigned long *stack, struct task_struct *task, + struct stack_info *info); + +int get_stack_info(unsigned long *stack, struct task_struct *task, + struct stack_info *info, unsigned long *visit_mask); + +void stack_type_str(enum stack_type type, const char **begin, + const char **end); + +static inline bool on_stack(struct stack_info *info, void *addr, size_t len) +{ + void *begin = info->begin; + void *end = info->end; + + return (info->type != STACK_TYPE_UNKNOWN && + addr >= begin && addr < end && + addr + len > begin && addr + len <= end); +} + +extern int kstack_depth_to_print; #ifdef CONFIG_X86_32 #define STACKSLOTS_PER_LINE 8 -#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) #else #define STACKSLOTS_PER_LINE 4 -#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif #ifdef CONFIG_FRAME_POINTER -static inline unsigned long -stack_frame(struct task_struct *task, struct pt_regs *regs) +static inline unsigned long * +get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { - unsigned long bp; - if (regs) - return regs->bp; + return (unsigned long *)regs->bp; - if (task == current) { - /* Grab bp right from our regs */ - get_bp(bp); - return bp; - } + if (task == current) + return __builtin_frame_address(0); - /* bp is the last reg pushed by switch_to */ - return *(unsigned long *)task->thread.sp; + return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp; } #else -static inline unsigned long -stack_frame(struct task_struct *task, struct pt_regs *regs) +static inline unsigned long * +get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { - return 0; + return NULL; +} +#endif /* CONFIG_FRAME_POINTER */ + +static inline unsigned long * +get_stack_pointer(struct task_struct *task, struct pt_regs *regs) +{ + if (regs) + return (unsigned long *)kernel_stack_pointer(regs); + + if (task == current) + return __builtin_frame_address(0); + + return (unsigned long *)task->thread.sp; } -#endif -extern void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl); +void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, char *log_lvl); -extern void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl); +void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, char *log_lvl); extern unsigned int code_bytes; @@ -106,7 +106,7 @@ static inline unsigned long caller_frame_pointer(void) { struct stack_frame *frame; - get_bp(frame); + frame = __builtin_frame_address(0); #ifdef CONFIG_FRAME_POINTER frame = frame->next_frame; diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 90dbbd9666d4..a164862d77e3 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -2,6 +2,7 @@ #define _ASM_X86_STRING_64_H #ifdef __KERNEL__ +#include <linux/jump_label.h> /* Written 2002 by Andi Kleen */ @@ -78,6 +79,9 @@ int strcmp(const char *cs, const char *ct); #define memset(s, c, n) __memset(s, c, n) #endif +__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); +DECLARE_STATIC_KEY_FALSE(mcsafe_key); + /** * memcpy_mcsafe - copy memory with indication if a machine check happened * @@ -86,10 +90,23 @@ int strcmp(const char *cs, const char *ct); * @cnt: number of bytes to copy * * Low level memory copy function that catches machine checks + * We only call into the "safe" function on systems that can + * actually do machine check recovery. Everyone else can just + * use memcpy(). * * Return 0 for success, -EFAULT for fail */ -int memcpy_mcsafe(void *dst, const void *src, size_t cnt); +static __always_inline __must_check int +memcpy_mcsafe(void *dst, const void *src, size_t cnt) +{ +#ifdef CONFIG_X86_MCE + if (static_branch_unlikely(&mcsafe_key)) + return memcpy_mcsafe_unrolled(dst, src, cnt); + else +#endif + memcpy(dst, src, cnt); + return 0; +} #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8f321a1b03a1..5cb436acd463 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -2,130 +2,66 @@ #define _ASM_X86_SWITCH_TO_H struct task_struct; /* one of the stranger aspects of C forward declarations */ + +struct task_struct *__switch_to_asm(struct task_struct *prev, + struct task_struct *next); + __visible struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next); + struct task_struct *next); struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); -#ifdef CONFIG_X86_32 +/* This runs runs on the previous thread's stack. */ +static inline void prepare_switch_to(struct task_struct *prev, + struct task_struct *next) +{ +#ifdef CONFIG_VMAP_STACK + /* + * If we switch to a stack that has a top-level paging entry + * that is not present in the current mm, the resulting #PF will + * will be promoted to a double-fault and we'll panic. Probe + * the new stack now so that vmalloc_fault can fix up the page + * tables if needed. This can only happen if we use a stack + * in vmap space. + * + * We assume that the stack is aligned so that it never spans + * more than one top-level paging entry. + * + * To minimize cache pollution, just follow the stack pointer. + */ + READ_ONCE(*(unsigned char *)next->thread.sp); +#endif +} + +asmlinkage void ret_from_fork(void); + +/* data that is pointed to by thread.sp */ +struct inactive_task_frame { +#ifdef CONFIG_X86_64 + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; +#else + unsigned long si; + unsigned long di; +#endif + unsigned long bx; + unsigned long bp; + unsigned long ret_addr; +}; -#ifdef CONFIG_CC_STACKPROTECTOR -#define __switch_canary \ - "movl %P[task_canary](%[next]), %%ebx\n\t" \ - "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" -#define __switch_canary_oparam \ - , [stack_canary] "=m" (stack_canary.canary) -#define __switch_canary_iparam \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) -#else /* CC_STACKPROTECTOR */ -#define __switch_canary -#define __switch_canary_oparam -#define __switch_canary_iparam -#endif /* CC_STACKPROTECTOR */ +struct fork_frame { + struct inactive_task_frame frame; + struct pt_regs regs; +}; -/* - * Saving eflags is important. It switches not only IOPL between tasks, - * it also protects other tasks from NT leaking through sysenter etc. - */ #define switch_to(prev, next, last) \ do { \ - /* \ - * Context-switching clobbers all registers, so we clobber \ - * them explicitly, via unused output variables. \ - * (EAX and EBP is not listed because EBP is saved/restored \ - * explicitly for wchan access and EAX is the return value of \ - * __switch_to()) \ - */ \ - unsigned long ebx, ecx, edx, esi, edi; \ - \ - asm volatile("pushl %%ebp\n\t" /* save EBP */ \ - "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ - "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ - "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ - "pushl %[next_ip]\n\t" /* restore EIP */ \ - __switch_canary \ - "jmp __switch_to\n" /* regparm call */ \ - "1:\t" \ - "popl %%ebp\n\t" /* restore EBP */ \ - \ - /* output parameters */ \ - : [prev_sp] "=m" (prev->thread.sp), \ - [prev_ip] "=m" (prev->thread.ip), \ - "=a" (last), \ - \ - /* clobbered output registers: */ \ - "=b" (ebx), "=c" (ecx), "=d" (edx), \ - "=S" (esi), "=D" (edi) \ - \ - __switch_canary_oparam \ - \ - /* input parameters: */ \ - : [next_sp] "m" (next->thread.sp), \ - [next_ip] "m" (next->thread.ip), \ - \ - /* regparm parameters for __switch_to(): */ \ - [prev] "a" (prev), \ - [next] "d" (next) \ + prepare_switch_to(prev, next); \ \ - __switch_canary_iparam \ - \ - : /* reloaded segment registers */ \ - "memory"); \ + ((last) = __switch_to_asm((prev), (next))); \ } while (0) -#else /* CONFIG_X86_32 */ - -/* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t" - -#define __EXTRA_CLOBBER \ - , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ - "r12", "r13", "r14", "r15", "flags" - -#ifdef CONFIG_CC_STACKPROTECTOR -#define __switch_canary \ - "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,"__percpu_arg([gs_canary])"\n\t" -#define __switch_canary_oparam \ - , [gs_canary] "=m" (irq_stack_union.stack_canary) -#define __switch_canary_iparam \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) -#else /* CC_STACKPROTECTOR */ -#define __switch_canary -#define __switch_canary_oparam -#define __switch_canary_iparam -#endif /* CC_STACKPROTECTOR */ - -/* - * There is no need to save or restore flags, because flags are always - * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL - * has no effect. - */ -#define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ - "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ - "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ - "call __switch_to\n\t" \ - "movq "__percpu_arg([current_task])",%%rsi\n\t" \ - __switch_canary \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ - "movq %%rax,%%rdi\n\t" \ - "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ - "jnz ret_from_fork\n\t" \ - RESTORE_CONTEXT \ - : "=a" (last) \ - __switch_canary_oparam \ - : [next] "S" (next), [prev] "D" (prev), \ - [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ - [ti_flags] "i" (offsetof(struct thread_info, flags)), \ - [_tif_fork] "i" (_TIF_FORK), \ - [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [current_task] "m" (current_task) \ - __switch_canary_iparam \ - : "memory", "cc" __EXTRA_CLOBBER) - -#endif /* CONFIG_X86_32 */ - #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 4e23dd15c661..e3c95e8e61c5 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task_thread_info(task)->status & TS_COMPAT) + if (task->thread.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task_thread_info(task)->status & TS_COMPAT) + if (task->thread.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; @@ -234,18 +234,8 @@ static inline void syscall_set_arguments(struct task_struct *task, static inline int syscall_get_arch(void) { -#ifdef CONFIG_IA32_EMULATION - /* - * TS_COMPAT is set for 32-bit syscall entry and then - * remains set until we return to user mode. - * - * x32 tasks should be considered AUDIT_ARCH_X86_64. - */ - if (task_thread_info(current)->status & TS_COMPAT) - return AUDIT_ARCH_I386; -#endif - /* Both x32 and x86_64 are considered "64-bit". */ - return AUDIT_ARCH_X86_64; + /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ + return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8e0852..2aaca53c0974 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -52,21 +52,6 @@ struct task_struct; #include <asm/cpufeature.h> #include <linux/atomic.h> -struct thread_info { - struct task_struct *task; /* main task structure */ - __u32 flags; /* low level flags */ - __u32 status; /* thread synchronous flags */ - __u32 cpu; /* current CPU */ -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .flags = 0, \ - .cpu = 0, \ -} - -#define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ @@ -95,7 +80,6 @@ struct thread_info { #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ -#define TIF_FORK 18 /* ret_from_fork */ #define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ @@ -119,7 +103,6 @@ struct thread_info { #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_FORK (1 << TIF_FORK) #define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) @@ -160,11 +143,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); -} - static inline unsigned long current_stack_pointer(void) { unsigned long sp; @@ -226,60 +204,19 @@ static inline int arch_within_stack_frames(const void * const stack, # define cpu_current_top_of_stack (cpu_tss + TSS_sp0) #endif -/* - * ASM operand which evaluates to a 'thread_info' address of - * the current task, if it is known that "reg" is exactly "off" - * bytes below the top of the stack currently. - * - * ( The kernel stack's size is known at build time, it is usually - * 2 or 4 pages, and the bottom of the kernel stack contains - * the thread_info structure. So to access the thread_info very - * quickly from assembly code we can calculate down from the - * top of the kernel stack to the bottom, using constant, - * build-time calculations only. ) - * - * For example, to fetch the current thread_info->flags value into %eax - * on x86-64 defconfig kernels, in syscall entry code where RSP is - * currently at exactly SIZEOF_PTREGS bytes away from the top of the - * stack: - * - * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax - * - * will translate to: - * - * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax - * - * which is below the current RSP by almost 16K. - */ -#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) - #endif -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ #endif - #ifndef __ASSEMBLY__ -static inline bool in_ia32_syscall(void) -{ #ifdef CONFIG_X86_32 - return true; -#endif -#ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & TS_COMPAT) - return true; +#define in_ia32_syscall() true +#else +#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ + current->thread.status & TS_COMPAT) #endif - return false; -} /* * Force syscall return via IRET by making it look as if there was diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index c3496619740a..01fd0a7f48cd 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -117,6 +117,12 @@ extern void ist_exit(struct pt_regs *regs); extern void ist_begin_non_atomic(struct pt_regs *regs); extern void ist_end_non_atomic(void); +#ifdef CONFIG_VMAP_STACK +void __noreturn handle_stack_overflow(const char *message, + struct pt_regs *regs, + unsigned long fault_address); +#endif + /* Interrupts/Exceptions */ enum { X86_TRAP_DE = 0, /* 0, Divide-by-zero */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c3f291195294..2131c4ce7d8a 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -433,7 +433,11 @@ do { \ #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ asm volatile("1: mov"itype" %1,%"rtype"0\n" \ "2:\n" \ - _ASM_EXTABLE_EX(1b, 2b) \ + ".section .fixup,\"ax\"\n" \ + "3:xor"itype" %"rtype"0,%"rtype"0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE_EX(1b, 3b) \ : ltype(x) : "m" (__m(addr))) #define __put_user_nocheck(x, ptr, size) \ @@ -705,7 +709,7 @@ static inline void copy_user_overflow(int size, unsigned long count) WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } -static inline unsigned long __must_check +static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { int sz = __compiletime_object_size(to); @@ -725,7 +729,7 @@ copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -static inline unsigned long __must_check +static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { int sz = __compiletime_object_size(from); diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h new file mode 100644 index 000000000000..c4b6d1cafa46 --- /dev/null +++ b/arch/x86/include/asm/unwind.h @@ -0,0 +1,73 @@ +#ifndef _ASM_X86_UNWIND_H +#define _ASM_X86_UNWIND_H + +#include <linux/sched.h> +#include <linux/ftrace.h> +#include <asm/ptrace.h> +#include <asm/stacktrace.h> + +struct unwind_state { + struct stack_info stack_info; + unsigned long stack_mask; + struct task_struct *task; + int graph_idx; +#ifdef CONFIG_FRAME_POINTER + unsigned long *bp; +#else + unsigned long *sp; +#endif +}; + +void __unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long *first_frame); + +bool unwind_next_frame(struct unwind_state *state); + +static inline bool unwind_done(struct unwind_state *state) +{ + return state->stack_info.type == STACK_TYPE_UNKNOWN; +} + +static inline +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long *first_frame) +{ + first_frame = first_frame ? : get_stack_pointer(task, regs); + + __unwind_start(state, task, regs, first_frame); +} + +#ifdef CONFIG_FRAME_POINTER + +static inline +unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) +{ + if (unwind_done(state)) + return NULL; + + return state->bp + 1; +} + +unsigned long unwind_get_return_address(struct unwind_state *state); + +#else /* !CONFIG_FRAME_POINTER */ + +static inline +unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) +{ + return NULL; +} + +static inline +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + if (unwind_done(state)) + return 0; + + return ftrace_graph_ret_addr(state->task, &state->graph_idx, + *state->sp, state->sp); +} + +#endif /* CONFIG_FRAME_POINTER */ + +#endif /* _ASM_X86_UNWIND_H */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index cc44d926c17e..57ab86d94d64 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -49,14 +49,12 @@ #define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) #define UV_DESC_PSHIFT 49 -#define UV_PAYLOADQ_PNODE_SHIFT 49 +#define UV_PAYLOADQ_GNODE_SHIFT 49 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" #define UV_BAU_BASENAME "sgi_uv/bau_tunables" #define UV_BAU_TUNABLES_DIR "sgi_uv" #define UV_BAU_TUNABLES_FILE "bau_tunables" #define WHITESPACE " \t\n" -#define uv_mmask ((1UL << uv_hub_info->m_val) - 1) -#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) @@ -387,6 +385,17 @@ struct uv2_3_bau_msg_header { /* bits 127:120 */ }; +/* Abstracted BAU functions */ +struct bau_operations { + unsigned long (*read_l_sw_ack)(void); + unsigned long (*read_g_sw_ack)(int pnode); + unsigned long (*bau_gpa_to_offset)(unsigned long vaddr); + void (*write_l_sw_ack)(unsigned long mmr); + void (*write_g_sw_ack)(int pnode, unsigned long mmr); + void (*write_payload_first)(int pnode, unsigned long mmr); + void (*write_payload_last)(int pnode, unsigned long mmr); +}; + /* * The activation descriptor: * The format of the message to send, plus all accompanying control @@ -655,6 +664,16 @@ static inline void write_gmmr_activation(int pnode, unsigned long mmr_image) write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image); } +static inline void write_mmr_proc_payload_first(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_FIRST, mmr_image); +} + +static inline void write_mmr_proc_payload_last(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_LAST, mmr_image); +} + static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image) { write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image); @@ -700,6 +719,26 @@ static inline unsigned long read_gmmr_sw_ack(int pnode) return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); } +static inline void write_mmr_proc_sw_ack(unsigned long mr) +{ + uv_write_local_mmr(UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr); +} + +static inline void write_gmmr_proc_sw_ack(int pnode, unsigned long mr) +{ + write_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr); +} + +static inline unsigned long read_mmr_proc_sw_ack(void) +{ + return read_lmmr(UV4H_LB_PROC_INTD_SOFT_ACK_PENDING); +} + +static inline unsigned long read_gmmr_proc_sw_ack(int pnode) +{ + return read_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_PENDING); +} + static inline void write_mmr_data_config(int pnode, unsigned long mr) { uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr); diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 43dc55be524e..2444189cbe28 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -41,6 +41,8 @@ extern const struct vdso_image vdso_image_32; extern void __init init_vdso_image(const struct vdso_image *image); +extern int map_vdso_once(const struct vdso_image *image, unsigned long addr); + #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_VDSO_H */ diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 2184943341bf..69a6e07e3149 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -26,6 +26,8 @@ struct mce { __u32 socketid; /* CPU socket ID */ __u32 apicid; /* CPU initial apic ID */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ + __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ + __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032fae09..ae135de547f5 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +#ifdef CONFIG_CHECKPOINT_RESTORE +# define ARCH_MAP_VDSO_X32 0x2001 +# define ARCH_MAP_VDSO_32 0x2002 +# define ARCH_MAP_VDSO_64 0x2003 +#endif + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0503f5bfb18d..45257cf84370 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -125,6 +125,12 @@ obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o +ifdef CONFIG_FRAME_POINTER +obj-y += unwind_frame.o +else +obj-y += unwind_guess.o +endif + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 3242e591fa82..26b78d86f25a 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o obj-$(CONFIG_ACPI_APEI) += apei.o +obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_msr.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 90d84c3eee53..32a7d70913ac 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -176,15 +176,10 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) return -EINVAL; } - if (!enabled) { - ++disabled_cpus; - return -EINVAL; - } - if (boot_cpu_physical_apicid != -1U) - ver = apic_version[boot_cpu_physical_apicid]; + ver = boot_cpu_apic_version; - cpu = generic_processor_info(id, ver); + cpu = __generic_processor_info(id, ver, enabled); if (cpu >= 0) early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; @@ -282,6 +277,8 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) return -EINVAL; + acpi_table_print_madt_entry(header); + acpi_lapic_addr = lapic_addr_ovr->address; return 0; @@ -705,7 +702,7 @@ static void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include <acpi/processor.h> -static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; @@ -716,6 +713,7 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) numa_set_node(cpu, nid); } #endif + return 0; } int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) @@ -998,21 +996,6 @@ static int __init acpi_parse_madt_lapic_entries(void) if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; - /* - * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value). - */ - - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, - acpi_parse_lapic_addr_ovr, 0); - if (count < 0) { - printk(KERN_ERR PREFIX - "Error parsing LAPIC address override entry\n"); - return count; - } - - register_lapic_address(acpi_lapic_addr); - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, acpi_parse_sapic, MAX_LOCAL_APIC); @@ -1031,8 +1014,8 @@ static int __init acpi_parse_madt_lapic_entries(void) return ret; } - x2count = madt_proc[0].count; - count = madt_proc[1].count; + count = madt_proc[0].count; + x2count = madt_proc[1].count; } if (!count && !x2count) { printk(KERN_ERR PREFIX "No LAPIC entries present\n"); @@ -1513,7 +1496,7 @@ void __init acpi_boot_table_init(void) * If acpi_disabled, bail out */ if (acpi_disabled) - return; + return; /* * Initialize the ACPI boot-time table parser. diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc_msr.c new file mode 100644 index 000000000000..6fb478bf82fd --- /dev/null +++ b/arch/x86/kernel/acpi/cppc_msr.c @@ -0,0 +1,58 @@ +/* + * cppc_msr.c: MSR Interface for CPPC + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <acpi/cppc_acpi.h> +#include <asm/msr.h> + +/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ + +bool cpc_ffh_supported(void) +{ + return true; +} + +int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val) +{ + int err; + + err = rdmsrl_safe_on_cpu(cpunum, reg->address, val); + if (!err) { + u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, + reg->bit_offset); + + *val &= mask; + *val >>= reg->bit_offset; + } + return err; +} + +int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) +{ + u64 rd_val; + int err; + + err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val); + if (!err) { + u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, + reg->bit_offset); + + val <<= reg->bit_offset; + val &= mask; + rd_val &= ~mask; + rd_val |= val; + err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val); + } + return err; +} diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index adb3eaf8fe2a..48587335ede8 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x12345678; #else /* CONFIG_64BIT */ #ifdef CONFIG_SMP - stack_start = (unsigned long)temp_stack + sizeof(temp_stack); + initial_stack = (unsigned long)temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); initial_gs = per_cpu_offset(smp_processor_id()); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 50c95af0f017..f266b8a92a9e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -64,6 +64,8 @@ unsigned disabled_cpus; unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); +u8 boot_cpu_apic_version; + /* * The highest APIC ID seen during enumeration. */ @@ -1374,7 +1376,6 @@ void setup_local_APIC(void) * Actually disabling the focus CPU check just makes the hang less * frequent as it makes the interrupt distributon model be more * like LRU than MRU (the short-term load is more even across CPUs). - * See also the comment in end_level_ioapic_irq(). --macro */ /* @@ -1816,8 +1817,7 @@ void __init init_apic_mappings(void) * since smp_sanity_check is prepared for such a case * and disable smp mode */ - apic_version[new_apicid] = - GET_APIC_VERSION(apic_read(APIC_LVR)); + boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); } } @@ -1828,17 +1828,14 @@ void __init register_lapic_address(unsigned long address) if (!x2apic_mode) { set_fixmap_nocache(FIX_APIC_BASE, address); apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, mp_lapic_addr); + APIC_BASE, address); } if (boot_cpu_physical_apicid == -1U) { boot_cpu_physical_apicid = read_apic_id(); - apic_version[boot_cpu_physical_apicid] = - GET_APIC_VERSION(apic_read(APIC_LVR)); + boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); } } -int apic_version[MAX_LOCAL_APIC]; - /* * Local APIC interrupts */ @@ -2027,7 +2024,53 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT1, value); } -int generic_processor_info(int apicid, int version) +/* + * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated + * contiguously, it equals to current allocated max logical CPU ID plus 1. + * All allocated CPU ID should be in [0, nr_logical_cpuidi), so the maximum of + * nr_logical_cpuids is nr_cpu_ids. + * + * NOTE: Reserve 0 for BSP. + */ +static int nr_logical_cpuids = 1; + +/* + * Used to store mapping between logical CPU IDs and APIC IDs. + */ +static int cpuid_to_apicid[] = { + [0 ... NR_CPUS - 1] = -1, +}; + +/* + * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids + * and cpuid_to_apicid[] synchronized. + */ +static int allocate_logical_cpuid(int apicid) +{ + int i; + + /* + * cpuid <-> apicid mapping is persistent, so when a cpu is up, + * check if the kernel has allocated a cpuid for it. + */ + for (i = 0; i < nr_logical_cpuids; i++) { + if (cpuid_to_apicid[i] == apicid) + return i; + } + + /* Allocate a new cpuid. */ + if (nr_logical_cpuids >= nr_cpu_ids) { + WARN_ONCE(1, "Only %d processors supported." + "Processor %d/0x%x and the rest are ignored.\n", + nr_cpu_ids - 1, nr_logical_cpuids, apicid); + return -1; + } + + cpuid_to_apicid[nr_logical_cpuids] = apicid; + return nr_logical_cpuids++; +} + +int __generic_processor_info(int apicid, int version, bool enabled) { int cpu, max = nr_cpu_ids; bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, @@ -2093,7 +2136,6 @@ int generic_processor_info(int apicid, int version) return -EINVAL; } - num_processors++; if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed @@ -2103,8 +2145,16 @@ int generic_processor_info(int apicid, int version) * for BSP. */ cpu = 0; - } else - cpu = cpumask_next_zero(-1, cpu_present_mask); + + /* Logical cpuid 0 is reserved for BSP. */ + cpuid_to_apicid[0] = apicid; + } else { + cpu = allocate_logical_cpuid(apicid); + if (cpu < 0) { + disabled_cpus++; + return -EINVAL; + } + } /* * This can happen on physical hotplug. The sanity check at boot time @@ -2116,6 +2166,7 @@ int generic_processor_info(int apicid, int version) pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", thiscpu, apicid); + disabled_cpus++; return -ENOSPC; } @@ -2128,14 +2179,12 @@ int generic_processor_info(int apicid, int version) cpu, apicid); version = 0x10; } - apic_version[apicid] = version; - if (version != apic_version[boot_cpu_physical_apicid]) { + if (version != boot_cpu_apic_version) { pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", - apic_version[boot_cpu_physical_apicid], cpu, version); + boot_cpu_apic_version, cpu, version); } - physid_set(apicid, phys_cpu_present_map); if (apicid > max_physical_apicid) max_physical_apicid = apicid; @@ -2148,11 +2197,23 @@ int generic_processor_info(int apicid, int version) apic->x86_32_early_logical_apicid(cpu); #endif set_cpu_possible(cpu, true); - set_cpu_present(cpu, true); + + if (enabled) { + num_processors++; + physid_set(apicid, phys_cpu_present_map); + set_cpu_present(cpu, true); + } else { + disabled_cpus++; + } return cpu; } +int generic_processor_info(int apicid, int version) +{ + return __generic_processor_info(apicid, version, true); +} + int hard_smp_processor_id(void) { return read_apic_id(); @@ -2275,7 +2336,7 @@ int __init APIC_init_uniprocessor(void) * Complain if the BIOS pretends there is one. */ if (!boot_cpu_has(X86_FEATURE_APIC) && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + APIC_INTEGRATED(boot_cpu_apic_version)) { pr_err("BIOS bug, local APIC 0x%x not detected!...\n", boot_cpu_physical_apicid); return -1; diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 5b2ae106bd4a..a4d7ff20ed22 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -25,7 +25,7 @@ static struct apic apic_physflat; static struct apic apic_flat; -struct apic __read_mostly *apic = &apic_flat; +struct apic *apic __ro_after_init = &apic_flat; EXPORT_SYMBOL_GPL(apic); static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) @@ -116,27 +116,17 @@ static void flat_send_IPI_all(int vector) static unsigned int flat_get_apic_id(unsigned long x) { - unsigned int id; - - id = (((x)>>24) & 0xFFu); - - return id; + return (x >> 24) & 0xFF; } static unsigned long set_apic_id(unsigned int id) { - unsigned long x; - - x = ((id & 0xFFu)<<24); - return x; + return (id & 0xFF) << 24; } static unsigned int read_xapic_id(void) { - unsigned int id; - - id = flat_get_apic_id(apic_read(APIC_ID)); - return id; + return flat_get_apic_id(apic_read(APIC_ID)); } static int flat_apic_id_registered(void) @@ -154,7 +144,7 @@ static int flat_probe(void) return 1; } -static struct apic apic_flat = { +static struct apic apic_flat __ro_after_init = { .name = "flat", .probe = flat_probe, .acpi_madt_oem_check = flat_acpi_madt_oem_check, @@ -248,7 +238,7 @@ static int physflat_probe(void) return 0; } -static struct apic apic_physflat = { +static struct apic apic_physflat __ro_after_init = { .name = "physical flat", .probe = physflat_probe, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index c05688b2deff..b109e4389c92 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32 v) WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); } -struct apic apic_noop = { +struct apic apic_noop __ro_after_init = { .name = "noop", .probe = noop_probe, .acpi_madt_oem_check = NULL, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 714d4fda0d52..e08fe2c8dd8c 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -40,10 +40,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x) static unsigned long numachip1_set_apic_id(unsigned int id) { - unsigned long x; - - x = ((id & 0xffU) << 24); - return x; + return (id & 0xff) << 24; } static unsigned int numachip2_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 06dbaa458bfe..56012010332c 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -142,7 +142,7 @@ static int probe_bigsmp(void) return dmi_bigsmp; } -static struct apic apic_bigsmp = { +static struct apic apic_bigsmp __ro_after_init = { .name = "bigsmp", .probe = probe_bigsmp, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7491f417a8e4..48e6d84f173e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1593,7 +1593,7 @@ void __init setup_ioapic_ids_from_mpc(void) * no meaning without the serial APIC bus. */ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + || APIC_XAPIC(boot_cpu_apic_version)) return; setup_ioapic_ids_from_mpc_nocheck(); } @@ -2423,7 +2423,7 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) static u8 io_apic_unique_id(int idx, u8 id) { if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + !APIC_XAPIC(boot_cpu_apic_version)) return io_apic_get_unique_id(idx, id); else return id; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index ade25320df96..015bbf30e3e3 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -269,7 +269,7 @@ static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) hpet_msi_write(irq_data_get_irq_handler_data(data), msg); } -static struct irq_chip hpet_msi_controller = { +static struct irq_chip hpet_msi_controller __ro_after_init = { .name = "HPET-MSI", .irq_unmask = hpet_msi_unmask, .irq_mask = hpet_msi_mask, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 7c43e716c158..c48264e202fd 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -72,7 +72,7 @@ static int probe_default(void) return 1; } -static struct apic apic_default = { +static struct apic apic_default __ro_after_init = { .name = "default", .probe = probe_default, @@ -126,7 +126,7 @@ static struct apic apic_default = { apic_driver(apic_default); -struct apic *apic = &apic_default; +struct apic *apic __ro_after_init = &apic_default; EXPORT_SYMBOL_GPL(apic); static int cmdline_apic __initdata; @@ -152,7 +152,7 @@ early_param("apic", parse_apic); void __init default_setup_apic_routing(void) { - int version = apic_version[boot_cpu_physical_apicid]; + int version = boot_cpu_apic_version; if (num_possible_cpus() > 8) { switch (boot_cpu_data.x86_vendor) { diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 54f35d988025..200af5ae9662 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -227,7 +227,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); } -static struct apic apic_x2apic_cluster = { +static struct apic apic_x2apic_cluster __ro_after_init = { .name = "cluster x2apic", .probe = x2apic_cluster_probe, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 4f13f54f1b1f..ff111f05a314 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -98,7 +98,7 @@ static int x2apic_phys_probe(void) return apic == &apic_x2apic_phys; } -static struct apic apic_x2apic_phys = { +static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", .probe = x2apic_phys_probe, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index cb0673c1e940..aeef53ce93e1 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -533,11 +533,8 @@ static unsigned int x2apic_get_apic_id(unsigned long x) static unsigned long set_apic_id(unsigned int id) { - unsigned long x; - - /* maskout x2apic_extra_bits ? */ - x = id; - return x; + /* CHECKME: Do we need to mask out the xapic extra bits? */ + return id; } static unsigned int uv_read_apic_id(void) @@ -560,7 +557,7 @@ static int uv_probe(void) return apic == &apic_x2apic_uv_x; } -static struct apic __refdata apic_x2apic_uv_x = { +static struct apic apic_x2apic_uv_x __ro_after_init = { .name = "UV large system", .probe = uv_probe, @@ -927,7 +924,7 @@ static void uv_heartbeat(unsigned long ignored) mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); } -static void uv_heartbeat_enable(int cpu) +static int uv_heartbeat_enable(unsigned int cpu) { while (!uv_cpu_scir_info(cpu)->enabled) { struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; @@ -941,43 +938,24 @@ static void uv_heartbeat_enable(int cpu) /* also ensure that boot cpu is enabled */ cpu = 0; } + return 0; } #ifdef CONFIG_HOTPLUG_CPU -static void uv_heartbeat_disable(int cpu) +static int uv_heartbeat_disable(unsigned int cpu) { if (uv_cpu_scir_info(cpu)->enabled) { uv_cpu_scir_info(cpu)->enabled = 0; del_timer(&uv_cpu_scir_info(cpu)->timer); } uv_set_cpu_scir_bits(cpu, 0xff); -} - -/* - * cpu hotplug notifier - */ -static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action, - void *hcpu) -{ - long cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_FAILED: - case CPU_ONLINE: - uv_heartbeat_enable(cpu); - break; - case CPU_DOWN_PREPARE: - uv_heartbeat_disable(cpu); - break; - default: - break; - } - return NOTIFY_OK; + return 0; } static __init void uv_scir_register_cpu_notifier(void) { - hotcpu_notifier(uv_scir_cpu_notify, 0); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/x2apic-uvx:online", + uv_heartbeat_enable, uv_heartbeat_disable); } #else /* !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 2bd5c6ff7ee7..c62e015b126c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -29,10 +29,13 @@ void common(void) { BLANK(); - OFFSET(TI_flags, thread_info, flags); - OFFSET(TI_status, thread_info, status); + OFFSET(TASK_threadsp, task_struct, thread.sp); +#ifdef CONFIG_CC_STACKPROTECTOR + OFFSET(TASK_stack_canary, task_struct, stack_canary); +#endif BLANK(); + OFFSET(TASK_TI_flags, task_struct, thread_info.flags); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ecdc1d217dc0..880aa093268d 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -57,6 +57,11 @@ void foo(void) /* Size of SYSENTER_stack */ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); +#ifdef CONFIG_CC_STACKPROTECTOR + BLANK(); + OFFSET(stack_canary_offset, stack_canary, canary); +#endif + #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d875f97d4e0b..210927ee2e74 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -56,6 +56,11 @@ int main(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); BLANK(); +#ifdef CONFIG_CC_STACKPROTECTOR + DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); + BLANK(); +#endif + DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); DEFINE(NR_syscalls, sizeof(syscalls_64)); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 809eda03c527..9bd910a7dd0a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -804,21 +804,20 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid_p()) - return; - - cpu_detect(c); - get_cpu_vendor(c); - get_cpu_cap(c); + if (have_cpuid_p()) { + cpu_detect(c); + get_cpu_vendor(c); + get_cpu_cap(c); - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); - c->cpu_index = 0; - filter_cpuid_features(c, false); + c->cpu_index = 0; + filter_cpuid_features(c, false); - if (this_cpu->c_bsp_init) - this_cpu->c_bsp_init(c); + if (this_cpu->c_bsp_init) + this_cpu->c_bsp_init(c); + } setup_force_cpu_cap(X86_FEATURE_ALWAYS); fpu__init_system(c); @@ -1265,9 +1264,14 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; -struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, - (unsigned long) debug_idt_table }; +struct desc_ptr idt_descr __ro_after_init = { + .size = NR_VECTORS * 16 - 1, + .address = (unsigned long) idt_table, +}; +const struct desc_ptr debug_idt_descr = { + .size = NR_VECTORS * 16 - 1, + .address = (unsigned long) debug_idt_table, +}; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE) __visible; @@ -1281,7 +1285,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(char *, irq_stack_ptr) = - init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; + init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE; DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; @@ -1305,11 +1309,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks /* May not be marked __init: used by software suspend */ void syscall_init(void) { - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to - * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. - */ wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 27e46658ebe3..35691a6b0d32 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -86,3 +86,14 @@ bool __init hypervisor_x2apic_available(void) x86_hyper->x2apic_available && x86_hyper->x2apic_available(); } + +void hypervisor_pin_vcpu(int cpu) +{ + if (!x86_hyper) + return; + + if (x86_hyper->pin_vcpu) + x86_hyper->pin_vcpu(cpu); + else + WARN_ONCE(1, "vcpu pinning requested but not supported!\n"); +} diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 79d8ec849468..a7fdf453d895 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -41,6 +41,7 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> +#include <linux/jump_label.h> #include <asm/processor.h> #include <asm/traps.h> @@ -292,6 +293,13 @@ static void print_mce(struct mce *m) if (m->misc) pr_cont("MISC %llx ", m->misc); + if (mce_flags.smca) { + if (m->synd) + pr_cont("SYND %llx ", m->synd); + if (m->ipid) + pr_cont("IPID %llx ", m->ipid); + } + pr_cont("\n"); /* * Note this output is parsed by external tools and old fields @@ -568,6 +576,7 @@ static void mce_read_aux(struct mce *m, int i) { if (m->status & MCI_STATUS_MISCV) m->misc = mce_rdmsrl(msr_ops.misc(i)); + if (m->status & MCI_STATUS_ADDRV) { m->addr = mce_rdmsrl(msr_ops.addr(i)); @@ -579,6 +588,23 @@ static void mce_read_aux(struct mce *m, int i) m->addr >>= shift; m->addr <<= shift; } + + /* + * Extract [55:<lsb>] where lsb is the least significant + * *valid* bit of the address bits. + */ + if (mce_flags.smca) { + u8 lsb = (m->addr >> 56) & 0x3f; + + m->addr &= GENMASK_ULL(55, lsb); + } + } + + if (mce_flags.smca) { + m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i)); + + if (m->status & MCI_STATUS_SYNDV) + m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); } } @@ -1633,17 +1659,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model == 45) quirk_no_way_out = quirk_sandybridge_ifu; - /* - * MCG_CAP.MCG_SER_P is necessary but not sufficient to know - * whether this processor will actually generate recoverable - * machine checks. Check to see if this is an E7 model Xeon. - * We can't do a model number check because E5 and E7 use the - * same model number. E5 doesn't support recovery, E7 does. - */ - if (mca_cfg.recovery || (mca_cfg.ser && - !strncmp(c->x86_model_id, - "Intel(R) Xeon(R) CPU E7-", 24))) - set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY); } if (cfg->monarch_timeout < 0) cfg->monarch_timeout = 0; @@ -2080,6 +2095,7 @@ void mce_disable_bank(int bank) * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. * mce=nobootlog Don't log MCEs from before booting. * mce=bios_cmci_threshold Don't program the CMCI threshold + * mce=recovery force enable memcpy_mcsafe() */ static int __init mcheck_enable(char *str) { @@ -2676,8 +2692,14 @@ static int __init mcheck_debugfs_init(void) static int __init mcheck_debugfs_init(void) { return -EINVAL; } #endif +DEFINE_STATIC_KEY_FALSE(mcsafe_key); +EXPORT_SYMBOL_GPL(mcsafe_key); + static int __init mcheck_late_init(void) { + if (mca_cfg.recovery) + static_branch_inc(&mcsafe_key); + mcheck_debugfs_init(); /* diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 7b7f3be783d4..9b5403462936 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/cpu.h> #include <linux/smp.h> +#include <linux/string.h> #include <asm/amd_nb.h> #include <asm/apic.h> @@ -63,34 +64,71 @@ static const char * const th_names[] = { "execution_unit", }; -/* Define HWID to IP type mappings for Scalable MCA */ -struct amd_hwid amd_hwids[] = { - [SMCA_F17H_CORE] = { "f17h_core", 0xB0 }, - [SMCA_DF] = { "data_fabric", 0x2E }, - [SMCA_UMC] = { "umc", 0x96 }, - [SMCA_PB] = { "param_block", 0x5 }, - [SMCA_PSP] = { "psp", 0xFF }, - [SMCA_SMU] = { "smu", 0x1 }, +static const char * const smca_umc_block_names[] = { + "dram_ecc", + "misc_umc" }; -EXPORT_SYMBOL_GPL(amd_hwids); - -const char * const amd_core_mcablock_names[] = { - [SMCA_LS] = "load_store", - [SMCA_IF] = "insn_fetch", - [SMCA_L2_CACHE] = "l2_cache", - [SMCA_DE] = "decode_unit", - [RES] = "", - [SMCA_EX] = "execution_unit", - [SMCA_FP] = "floating_point", - [SMCA_L3_CACHE] = "l3_cache", + +struct smca_bank_name smca_bank_names[] = { + [SMCA_LS] = { "load_store", "Load Store Unit" }, + [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, + [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, + [SMCA_DE] = { "decode_unit", "Decode Unit" }, + [SMCA_EX] = { "execution_unit", "Execution Unit" }, + [SMCA_FP] = { "floating_point", "Floating Point Unit" }, + [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, + [SMCA_CS] = { "coherent_slave", "Coherent Slave" }, + [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, + [SMCA_UMC] = { "umc", "Unified Memory Controller" }, + [SMCA_PB] = { "param_block", "Parameter Block" }, + [SMCA_PSP] = { "psp", "Platform Security Processor" }, + [SMCA_SMU] = { "smu", "System Management Unit" }, }; -EXPORT_SYMBOL_GPL(amd_core_mcablock_names); +EXPORT_SYMBOL_GPL(smca_bank_names); + +static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { + /* { bank_type, hwid_mcatype, xec_bitmap } */ + + /* ZN Core (HWID=0xB0) MCA types */ + { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF }, + { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, + { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, + { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, + /* HWID 0xB0 MCATYPE 0x4 is Reserved */ + { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF }, + { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F }, + { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF }, + + /* Data Fabric MCA types */ + { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF }, + { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF }, + + /* Unified Memory Controller MCA type */ + { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F }, + + /* Parameter Block MCA type */ + { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 }, + + /* Platform Security Processor MCA type */ + { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 }, -const char * const amd_df_mcablock_names[] = { - [SMCA_CS] = "coherent_slave", - [SMCA_PIE] = "pie", + /* System Management Unit MCA type */ + { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, }; -EXPORT_SYMBOL_GPL(amd_df_mcablock_names); + +struct smca_bank_info smca_banks[MAX_NR_BANKS]; +EXPORT_SYMBOL_GPL(smca_banks); + +/* + * In SMCA enabled processors, we can have multiple banks for a given IP type. + * So to define a unique name for each bank, we use a temp c-string to append + * the MCA_IPID[InstanceId] to type's name in get_name(). + * + * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN + * is greater than 8 plus 1 (for underscore) plus length of longest type name. + */ +#define MAX_MCATYPE_NAME_LEN 30 +static char buf_mcatype[MAX_MCATYPE_NAME_LEN]; static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ @@ -108,6 +146,36 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; * CPU Initialization */ +static void get_smca_bank_info(unsigned int bank) +{ + unsigned int i, hwid_mcatype, cpu = smp_processor_id(); + struct smca_hwid_mcatype *type; + u32 high, instanceId; + u16 hwid, mcatype; + + /* Collect bank_info using CPU 0 for now. */ + if (cpu) + return; + + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) { + pr_warn("Failed to read MCA_IPID for bank %d\n", bank); + return; + } + + hwid = high & MCI_IPID_HWID; + mcatype = (high & MCI_IPID_MCATYPE) >> 16; + hwid_mcatype = HWID_MCATYPE(hwid, mcatype); + + for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { + type = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == type->hwid_mcatype) { + smca_banks[bank].type = type; + smca_banks[bank].type_instance = instanceId; + break; + } + } +} + struct thresh_restart { struct threshold_block *b; int reset; @@ -293,7 +361,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } -static u32 get_block_address(u32 current_addr, u32 low, u32 high, +static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block) { u32 addr = 0, offset = 0; @@ -309,13 +377,13 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, */ u32 low, high; - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) return addr; if (!(low & MCI_CONFIG_MCAX)) return addr; - if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && + if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && (low & MASK_BLKPTR_LO)) addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); } @@ -395,6 +463,20 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, */ smca_high &= ~BIT(2); + /* + * SMCA sets the Deferred Error Interrupt type per bank. + * + * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us + * if the DeferredIntType bit field is available. + * + * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the + * high portion of the MSR). OS should set this to 0x1 to enable + * APIC based interrupt. First, check that no interrupt has been + * set. + */ + if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3)) + smca_high |= BIT(5); + wrmsr(smca_addr, smca_low, smca_high); } @@ -421,12 +503,15 @@ out: void mce_amd_feature_init(struct cpuinfo_x86 *c) { u32 low = 0, high = 0, address = 0; - unsigned int bank, block; + unsigned int bank, block, cpu = smp_processor_id(); int offset = -1; for (bank = 0; bank < mca_cfg.banks; ++bank) { + if (mce_flags.smca) + get_smca_bank_info(bank); + for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(address, low, high, bank, block); + address = get_block_address(cpu, address, low, high, bank, block); if (!address) break; @@ -476,9 +561,27 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) if (threshold_err) m.misc = misc; - if (m.status & MCI_STATUS_ADDRV) + if (m.status & MCI_STATUS_ADDRV) { rdmsrl(msr_addr, m.addr); + /* + * Extract [55:<lsb>] where lsb is the least significant + * *valid* bit of the address bits. + */ + if (mce_flags.smca) { + u8 lsb = (m.addr >> 56) & 0x3f; + + m.addr &= GENMASK_ULL(55, lsb); + } + } + + if (mce_flags.smca) { + rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid); + + if (m.status & MCI_STATUS_SYNDV) + rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); + } + mce_log(&m); wrmsrl(msr_status, 0); @@ -541,15 +644,14 @@ static void amd_deferred_error_interrupt(void) static void amd_threshold_interrupt(void) { u32 low = 0, high = 0, address = 0; - int cpu = smp_processor_id(); - unsigned int bank, block; + unsigned int bank, block, cpu = smp_processor_id(); /* assume first bank caused it */ for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(address, low, high, bank, block); + address = get_block_address(cpu, address, low, high, bank, block); if (!address) break; @@ -713,6 +815,34 @@ static struct kobj_type threshold_ktype = { .default_attrs = default_attrs, }; +static const char *get_name(unsigned int bank, struct threshold_block *b) +{ + unsigned int bank_type; + + if (!mce_flags.smca) { + if (b && bank == 4) + return bank4_names(b); + + return th_names[bank]; + } + + if (!smca_banks[bank].type) + return NULL; + + bank_type = smca_banks[bank].type->bank_type; + + if (b && bank_type == SMCA_UMC) { + if (b->block < ARRAY_SIZE(smca_umc_block_names)) + return smca_umc_block_names[b->block]; + return NULL; + } + + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, + "%s_%x", smca_bank_names[bank_type].name, + smca_banks[bank].type_instance); + return buf_mcatype; +} + static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, unsigned int block, u32 address) { @@ -767,11 +897,11 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, err = kobject_init_and_add(&b->kobj, &threshold_ktype, per_cpu(threshold_banks, cpu)[bank]->kobj, - (bank == 4 ? bank4_names(b) : th_names[bank])); + get_name(bank, b)); if (err) goto out_free; recurse: - address = get_block_address(address, low, high, bank, ++block); + address = get_block_address(cpu, address, low, high, bank, ++block); if (!address) return 0; @@ -822,7 +952,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) struct device *dev = per_cpu(mce_device, cpu); struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; - const char *name = th_names[bank]; + const char *name = get_name(bank, NULL); int err = 0; if (is_shared_bank(bank)) { @@ -869,7 +999,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank)); + err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); if (!err) goto out; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index b816971f5da4..620ab06bcf45 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -54,6 +54,7 @@ static LIST_HEAD(pcache); */ static u8 *container; static size_t container_size; +static bool ucode_builtin; static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; @@ -281,18 +282,22 @@ static bool __init load_builtin_amd_microcode(struct cpio_data *cp, void __init load_ucode_amd_bsp(unsigned int family) { struct cpio_data cp; + bool *builtin; void **data; size_t *size; #ifdef CONFIG_X86_32 data = (void **)__pa_nodebug(&ucode_cpio.data); size = (size_t *)__pa_nodebug(&ucode_cpio.size); + builtin = (bool *)__pa_nodebug(&ucode_builtin); #else data = &ucode_cpio.data; size = &ucode_cpio.size; + builtin = &ucode_builtin; #endif - if (!load_builtin_amd_microcode(&cp, family)) + *builtin = load_builtin_amd_microcode(&cp, family); + if (!*builtin) cp = find_ucode_in_initrd(); if (!(cp.data && cp.size)) @@ -373,7 +378,8 @@ void load_ucode_amd_ap(void) return; /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; + if (!ucode_builtin) + cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; eax = cpuid_eax(0x00000001); eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); @@ -439,7 +445,8 @@ int __init save_microcode_in_initrd_amd(void) container = cont_va; /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - container += PAGE_OFFSET - __PAGE_OFFSET_BASE; + if (!ucode_builtin) + container += PAGE_OFFSET - __PAGE_OFFSET_BASE; eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index df04b2d033f6..5ce5155f0695 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -558,55 +558,36 @@ static struct syscore_ops mc_syscore_ops = { .resume = mc_bp_resume, }; -static int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +static int mc_cpu_online(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; struct device *dev; dev = get_cpu_device(cpu); + microcode_update_cpu(cpu); + pr_debug("CPU%d added\n", cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - microcode_update_cpu(cpu); - pr_debug("CPU%d added\n", cpu); - /* - * "break" is missing on purpose here because we want to fall - * through in order to create the sysfs group. - */ - - case CPU_DOWN_FAILED: - if (sysfs_create_group(&dev->kobj, &mc_attr_group)) - pr_err("Failed to create group for CPU%d\n", cpu); - break; + if (sysfs_create_group(&dev->kobj, &mc_attr_group)) + pr_err("Failed to create group for CPU%d\n", cpu); + return 0; +} - case CPU_DOWN_PREPARE: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&dev->kobj, &mc_attr_group); - pr_debug("CPU%d removed\n", cpu); - break; +static int mc_cpu_down_prep(unsigned int cpu) +{ + struct device *dev; + dev = get_cpu_device(cpu); + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&dev->kobj, &mc_attr_group); + pr_debug("CPU%d removed\n", cpu); /* - * case CPU_DEAD: - * * When a CPU goes offline, don't free up or invalidate the copy of * the microcode in kernel memory, so that we can reuse it when the * CPU comes back online without unnecessarily requesting the userspace * for it again. */ - } - - /* The CPU refused to come up during a system resume */ - if (action == CPU_UP_CANCELED_FROZEN) - microcode_fini_cpu(cpu); - - return NOTIFY_OK; + return 0; } -static struct notifier_block mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - static struct attribute *cpu_root_microcode_attrs[] = { &dev_attr_reload.attr, NULL @@ -665,7 +646,8 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); - register_hotcpu_notifier(&mc_cpu_notifier); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", + mc_cpu_online, mc_cpu_down_prep); pr_info("Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 28f1b54b7fad..24e87e74990d 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -72,14 +72,14 @@ static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; static bool mtrr_aps_delayed_init; -static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; +static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init; const struct mtrr_ops *mtrr_if; static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); -void set_mtrr_ops(const struct mtrr_ops *ops) +void __init set_mtrr_ops(const struct mtrr_ops *ops) { if (ops->vendor && ops->vendor < X86_VENDOR_NUM) mtrr_ops[ops->vendor] = ops; diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 6c7ced07d16d..ad8bd763efa5 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -54,7 +54,7 @@ void fill_mtrr_var_range(unsigned int index, bool get_mtrr_state(void); void mtrr_bp_pat_init(void); -extern void set_mtrr_ops(const struct mtrr_ops *ops); +extern void __init set_mtrr_ops(const struct mtrr_ops *ops); extern u64 size_or_mask, size_and_mask; extern const struct mtrr_ops *mtrr_if; diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 92e8f0a7159c..9b7cf5c28f5f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -17,7 +17,7 @@ #include <linux/sysfs.h> #include <asm/stacktrace.h> - +#include <asm/unwind.h> int panic_on_unrecovered_nmi; int panic_on_io_nmi; @@ -25,11 +25,29 @@ unsigned int code_bytes = 64; int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; static int die_counter; +bool in_task_stack(unsigned long *stack, struct task_struct *task, + struct stack_info *info) +{ + unsigned long *begin = task_stack_page(task); + unsigned long *end = task_stack_page(task) + THREAD_SIZE; + + if (stack < begin || stack >= end) + return false; + + info->type = STACK_TYPE_TASK; + info->begin = begin; + info->end = end; + info->next_sp = NULL; + + return true; +} + static void printk_stack_address(unsigned long address, int reliable, - void *data) + char *log_lvl) { + touch_nmi_watchdog(); printk("%s [<%p>] %s%pB\n", - (char *)data, (void *)address, reliable ? "" : "? ", + log_lvl, (void *)address, reliable ? "" : "? ", (void *)address); } @@ -38,176 +56,120 @@ void printk_address(unsigned long address) pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void -print_ftrace_graph_addr(unsigned long addr, void *data, - const struct stacktrace_ops *ops, - struct task_struct *task, int *graph) +void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, char *log_lvl) { - unsigned long ret_addr; - int index; - - if (addr != (unsigned long)return_to_handler) - return; - - index = task->curr_ret_stack; - - if (!task->ret_stack || index < *graph) - return; - - index -= *graph; - ret_addr = task->ret_stack[index].ret; - - ops->address(data, ret_addr, 1); + struct unwind_state state; + struct stack_info stack_info = {0}; + unsigned long visit_mask = 0; + int graph_idx = 0; - (*graph)++; -} -#else -static inline void -print_ftrace_graph_addr(unsigned long addr, void *data, - const struct stacktrace_ops *ops, - struct task_struct *task, int *graph) -{ } -#endif - -/* - * x86-64 can have up to three kernel stacks: - * process stack - * interrupt stack - * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack - */ - -static inline int valid_stack_ptr(struct task_struct *task, - void *p, unsigned int size, void *end) -{ - void *t = task_stack_page(task); - if (end) { - if (p < end && p >= (end-THREAD_SIZE)) - return 1; - else - return 0; - } - return p >= t && p < t + THREAD_SIZE - size; -} + printk("%sCall Trace:\n", log_lvl); -unsigned long -print_context_stack(struct task_struct *task, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph) -{ - struct stack_frame *frame = (struct stack_frame *)bp; + unwind_start(&state, task, regs, stack); /* - * If we overflowed the stack into a guard page, jump back to the - * bottom of the usable stack. + * Iterate through the stacks, starting with the current stack pointer. + * Each stack has a pointer to the next one. + * + * x86-64 can have several stacks: + * - task stack + * - interrupt stack + * - HW exception stacks (double fault, nmi, debug, mce) + * + * x86-32 can have up to three stacks: + * - task stack + * - softirq stack + * - hardirq stack */ - if ((unsigned long)task_stack_page(task) - (unsigned long)stack < - PAGE_SIZE) - stack = (unsigned long *)task_stack_page(task); - - while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + sizeof(long)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, 0); - } - print_ftrace_graph_addr(addr, data, ops, task, graph); - } - stack++; - } - return bp; -} -EXPORT_SYMBOL_GPL(print_context_stack); - -unsigned long -print_context_stack_bp(struct task_struct *task, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - unsigned long *ret_addr = &frame->return_address; + for (; stack; stack = stack_info.next_sp) { + const char *str_begin, *str_end; - while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { - unsigned long addr = *ret_addr; + /* + * If we overflowed the task stack into a guard page, jump back + * to the bottom of the usable stack. + */ + if (task_stack_page(task) - (void *)stack < PAGE_SIZE) + stack = task_stack_page(task); - if (!__kernel_text_address(addr)) + if (get_stack_info(stack, task, &stack_info, &visit_mask)) break; - if (ops->address(data, addr, 1)) - break; - frame = frame->next_frame; - ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, task, graph); - } - - return (unsigned long)frame; -} -EXPORT_SYMBOL_GPL(print_context_stack_bp); - -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - -/* - * Print one address/symbol entries per line. - */ -static int print_trace_address(void *data, unsigned long addr, int reliable) -{ - touch_nmi_watchdog(); - printk_stack_address(addr, reliable, data); - return 0; -} - -static const struct stacktrace_ops print_trace_ops = { - .stack = print_trace_stack, - .address = print_trace_address, - .walk_stack = print_context_stack, -}; - -void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); -} + stack_type_str(stack_info.type, &str_begin, &str_end); + if (str_begin) + printk("%s <%s> ", log_lvl, str_begin); + + /* + * Scan the stack, printing any text addresses we find. At the + * same time, follow proper stack frames with the unwinder. + * + * Addresses found during the scan which are not reported by + * the unwinder are considered to be additional clues which are + * sometimes useful for debugging and are prefixed with '?'. + * This also serves as a failsafe option in case the unwinder + * goes off in the weeds. + */ + for (; stack < stack_info.end; stack++) { + unsigned long real_addr; + int reliable = 0; + unsigned long addr = *stack; + unsigned long *ret_addr_p = + unwind_get_return_address_ptr(&state); + + if (!__kernel_text_address(addr)) + continue; + + if (stack == ret_addr_p) + reliable = 1; + + /* + * When function graph tracing is enabled for a + * function, its return address on the stack is + * replaced with the address of an ftrace handler + * (return_to_handler). In that case, before printing + * the "real" address, we want to print the handler + * address as an "unreliable" hint that function graph + * tracing was involved. + */ + real_addr = ftrace_graph_ret_addr(task, &graph_idx, + addr, stack); + if (real_addr != addr) + printk_stack_address(addr, 0, log_lvl); + printk_stack_address(real_addr, reliable, log_lvl); + + if (!reliable) + continue; + + /* + * Get the next frame from the unwinder. No need to + * check for an error: if anything goes wrong, the rest + * of the addresses will just be printed as unreliable. + */ + unwind_next_frame(&state); + } -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); + if (str_end) + printk("%s <%s> ", log_lvl, str_end); + } } void show_stack(struct task_struct *task, unsigned long *sp) { - unsigned long bp = 0; - unsigned long stack; + task = task ? : current; /* * Stack frames below this one aren't interesting. Don't show them * if we're printing for %current. */ - if (!sp && (!task || task == current)) { - sp = &stack; - bp = stack_frame(current, NULL); - } + if (!sp && task == current) + sp = get_stack_pointer(current, NULL); - show_stack_log_lvl(task, NULL, sp, bp, ""); + show_stack_log_lvl(task, NULL, sp, ""); } void show_stack_regs(struct pt_regs *regs) { - show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, ""); + show_stack_log_lvl(current, regs, NULL, ""); } static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 09675712eba8..06eb322b5f9f 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -16,93 +16,121 @@ #include <asm/stacktrace.h> -static void *is_irq_stack(void *p, void *irq) +void stack_type_str(enum stack_type type, const char **begin, const char **end) { - if (p < irq || p >= (irq + THREAD_SIZE)) - return NULL; - return irq + THREAD_SIZE; + switch (type) { + case STACK_TYPE_IRQ: + case STACK_TYPE_SOFTIRQ: + *begin = "IRQ"; + *end = "EOI"; + break; + default: + *begin = NULL; + *end = NULL; + } } - -static void *is_hardirq_stack(unsigned long *stack, int cpu) +static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) { - void *irq = per_cpu(hardirq_stack, cpu); + unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack); + unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); - return is_irq_stack(stack, irq); -} + /* + * This is a software stack, so 'end' can be a valid stack pointer. + * It just means the stack is empty. + */ + if (stack < begin || stack > end) + return false; -static void *is_softirq_stack(unsigned long *stack, int cpu) -{ - void *irq = per_cpu(softirq_stack, cpu); + info->type = STACK_TYPE_IRQ; + info->begin = begin; + info->end = end; - return is_irq_stack(stack, irq); + /* + * See irq_32.c -- the next stack pointer is stored at the beginning of + * the stack. + */ + info->next_sp = (unsigned long *)*begin; + + return true; } -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) +static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) { - const unsigned cpu = get_cpu(); - int graph = 0; - u32 *prev_esp; + unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack); + unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); - if (!task) - task = current; + /* + * This is a software stack, so 'end' can be a valid stack pointer. + * It just means the stack is empty. + */ + if (stack < begin || stack > end) + return false; - if (!stack) { - unsigned long dummy; + info->type = STACK_TYPE_SOFTIRQ; + info->begin = begin; + info->end = end; - stack = &dummy; - if (task != current) - stack = (unsigned long *)task->thread.sp; - } + /* + * The next stack pointer is stored at the beginning of the stack. + * See irq_32.c. + */ + info->next_sp = (unsigned long *)*begin; - if (!bp) - bp = stack_frame(task, regs); + return true; +} - for (;;) { - void *end_stack; +int get_stack_info(unsigned long *stack, struct task_struct *task, + struct stack_info *info, unsigned long *visit_mask) +{ + if (!stack) + goto unknown; - end_stack = is_hardirq_stack(stack, cpu); - if (!end_stack) - end_stack = is_softirq_stack(stack, cpu); + task = task ? : current; - bp = ops->walk_stack(task, stack, bp, ops, data, - end_stack, &graph); + if (in_task_stack(stack, task, info)) + goto recursion_check; - /* Stop if not on irq stack */ - if (!end_stack) - break; + if (task != current) + goto unknown; - /* The previous esp is saved on the bottom of the stack */ - prev_esp = (u32 *)(end_stack - THREAD_SIZE); - stack = (unsigned long *)*prev_esp; - if (!stack) - break; + if (in_hardirq_stack(stack, info)) + goto recursion_check; - if (ops->stack(data, "IRQ") < 0) - break; - touch_nmi_watchdog(); + if (in_softirq_stack(stack, info)) + goto recursion_check; + + goto unknown; + +recursion_check: + /* + * Make sure we don't iterate through any given stack more than once. + * If it comes up a second time then there's something wrong going on: + * just break out and report an unknown stack type. + */ + if (visit_mask) { + if (*visit_mask & (1UL << info->type)) + goto unknown; + *visit_mask |= 1UL << info->type; } - put_cpu(); + + return 0; + +unknown: + info->type = STACK_TYPE_UNKNOWN; + return -EINVAL; } -EXPORT_SYMBOL(dump_trace); -void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) +void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, char *log_lvl) { unsigned long *stack; int i; - if (sp == NULL) { - if (regs) - sp = (unsigned long *)regs->sp; - else if (task) - sp = (unsigned long *)task->thread.sp; - else - sp = (unsigned long *)&sp; - } + if (!try_get_task_stack(task)) + return; + + sp = sp ? : get_stack_pointer(task, regs); stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { @@ -117,7 +145,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, touch_nmi_watchdog(); } pr_cont("\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); + show_trace_log_lvl(task, regs, sp, log_lvl); + + put_task_stack(task); } @@ -139,7 +169,7 @@ void show_regs(struct pt_regs *regs) u8 *ip; pr_emerg("Stack:\n"); - show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); + show_stack_log_lvl(current, regs, NULL, KERN_EMERG); pr_emerg("Code:"); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 9ee4520ce83c..36cf1a498227 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -16,261 +16,145 @@ #include <asm/stacktrace.h> +static char *exception_stack_names[N_EXCEPTION_STACKS] = { + [ DOUBLEFAULT_STACK-1 ] = "#DF", + [ NMI_STACK-1 ] = "NMI", + [ DEBUG_STACK-1 ] = "#DB", + [ MCE_STACK-1 ] = "#MC", +}; -#define N_EXCEPTION_STACKS_END \ - (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) - -static char x86_stack_ids[][8] = { - [ DEBUG_STACK-1 ] = "#DB", - [ NMI_STACK-1 ] = "NMI", - [ DOUBLEFAULT_STACK-1 ] = "#DF", - [ MCE_STACK-1 ] = "#MC", -#if DEBUG_STKSZ > EXCEPTION_STKSZ - [ N_EXCEPTION_STACKS ... - N_EXCEPTION_STACKS_END ] = "#DB[?]" -#endif +static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ }; -static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) +void stack_type_str(enum stack_type type, const char **begin, const char **end) { - unsigned k; - - /* - * Iterate over all exception stacks, and figure out whether - * 'stack' is in one of them: - */ - for (k = 0; k < N_EXCEPTION_STACKS; k++) { - unsigned long end = per_cpu(orig_ist, cpu).ist[k]; - /* - * Is 'stack' above this exception frame's end? - * If yes then skip to the next frame. - */ - if (stack >= end) - continue; - /* - * Is 'stack' above this exception frame's start address? - * If yes then we found the right frame. - */ - if (stack >= end - EXCEPTION_STKSZ) { - /* - * Make sure we only iterate through an exception - * stack once. If it comes up for the second time - * then there's something wrong going on - just - * break out and return NULL: - */ - if (*usedp & (1U << k)) - break; - *usedp |= 1U << k; - *idp = x86_stack_ids[k]; - return (unsigned long *)end; - } - /* - * If this is a debug stack, and if it has a larger size than - * the usual exception stacks, then 'stack' might still - * be within the lower portion of the debug stack: - */ -#if DEBUG_STKSZ > EXCEPTION_STKSZ - if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { - unsigned j = N_EXCEPTION_STACKS - 1; - - /* - * Black magic. A large debug stack is composed of - * multiple exception stack entries, which we - * iterate through now. Dont look: - */ - do { - ++j; - end -= EXCEPTION_STKSZ; - x86_stack_ids[j][4] = '1' + - (j - N_EXCEPTION_STACKS); - } while (stack < end - EXCEPTION_STKSZ); - if (*usedp & (1U << j)) - break; - *usedp |= 1U << j; - *idp = x86_stack_ids[j]; - return (unsigned long *)end; - } -#endif + BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); + + switch (type) { + case STACK_TYPE_IRQ: + *begin = "IRQ"; + *end = "EOI"; + break; + case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST: + *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION]; + *end = "EOE"; + break; + default: + *begin = NULL; + *end = NULL; } - return NULL; } -static inline int -in_irq_stack(unsigned long *stack, unsigned long *irq_stack, - unsigned long *irq_stack_end) +static bool in_exception_stack(unsigned long *stack, struct stack_info *info) { - return (stack >= irq_stack && stack < irq_stack_end); -} - -static const unsigned long irq_stack_size = - (IRQ_STACK_SIZE - 64) / sizeof(unsigned long); - -enum stack_type { - STACK_IS_UNKNOWN, - STACK_IS_NORMAL, - STACK_IS_EXCEPTION, - STACK_IS_IRQ, -}; - -static enum stack_type -analyze_stack(int cpu, struct task_struct *task, unsigned long *stack, - unsigned long **stack_end, unsigned long *irq_stack, - unsigned *used, char **id) -{ - unsigned long addr; + unsigned long *begin, *end; + struct pt_regs *regs; + unsigned k; - addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); - if ((unsigned long)task_stack_page(task) == addr) - return STACK_IS_NORMAL; + BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); - *stack_end = in_exception_stack(cpu, (unsigned long)stack, - used, id); - if (*stack_end) - return STACK_IS_EXCEPTION; + for (k = 0; k < N_EXCEPTION_STACKS; k++) { + end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k]; + begin = end - (exception_stack_sizes[k] / sizeof(long)); + regs = (struct pt_regs *)end - 1; - if (!irq_stack) - return STACK_IS_NORMAL; + if (stack < begin || stack >= end) + continue; - *stack_end = irq_stack; - irq_stack = irq_stack - irq_stack_size; + info->type = STACK_TYPE_EXCEPTION + k; + info->begin = begin; + info->end = end; + info->next_sp = (unsigned long *)regs->sp; - if (in_irq_stack(stack, irq_stack, *stack_end)) - return STACK_IS_IRQ; + return true; + } - return STACK_IS_UNKNOWN; + return false; } -/* - * x86-64 can have up to three kernel stacks: - * process stack - * interrupt stack - * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack - */ - -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) +static bool in_irq_stack(unsigned long *stack, struct stack_info *info) { - const unsigned cpu = get_cpu(); - unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); - unsigned long dummy; - unsigned used = 0; - int graph = 0; - int done = 0; - - if (!task) - task = current; - - if (!stack) { - if (regs) - stack = (unsigned long *)regs->sp; - else if (task != current) - stack = (unsigned long *)task->thread.sp; - else - stack = &dummy; - } + unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr); + unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long)); - if (!bp) - bp = stack_frame(task, regs); /* - * Print function call entries in all stacks, starting at the - * current stack address. If the stacks consist of nested - * exceptions + * This is a software stack, so 'end' can be a valid stack pointer. + * It just means the stack is empty. */ - while (!done) { - unsigned long *stack_end; - enum stack_type stype; - char *id; + if (stack < begin || stack > end) + return false; - stype = analyze_stack(cpu, task, stack, &stack_end, - irq_stack, &used, &id); + info->type = STACK_TYPE_IRQ; + info->begin = begin; + info->end = end; - /* Default finish unless specified to continue */ - done = 1; + /* + * The next stack pointer is the first thing pushed by the entry code + * after switching to the irq stack. + */ + info->next_sp = (unsigned long *)*(end - 1); - switch (stype) { + return true; +} - /* Break out early if we are on the thread stack */ - case STACK_IS_NORMAL: - break; +int get_stack_info(unsigned long *stack, struct task_struct *task, + struct stack_info *info, unsigned long *visit_mask) +{ + if (!stack) + goto unknown; - case STACK_IS_EXCEPTION: + task = task ? : current; - if (ops->stack(data, id) < 0) - break; + if (in_task_stack(stack, task, info)) + goto recursion_check; - bp = ops->walk_stack(task, stack, bp, ops, - data, stack_end, &graph); - ops->stack(data, "<EOE>"); - /* - * We link to the next stack via the - * second-to-last pointer (index -2 to end) in the - * exception stack: - */ - stack = (unsigned long *) stack_end[-2]; - done = 0; - break; + if (task != current) + goto unknown; - case STACK_IS_IRQ: + if (in_exception_stack(stack, info)) + goto recursion_check; - if (ops->stack(data, "IRQ") < 0) - break; - bp = ops->walk_stack(task, stack, bp, - ops, data, stack_end, &graph); - /* - * We link to the next stack (which would be - * the process stack normally) the last - * pointer (index -1 to end) in the IRQ stack: - */ - stack = (unsigned long *) (stack_end[-1]); - irq_stack = NULL; - ops->stack(data, "EOI"); - done = 0; - break; + if (in_irq_stack(stack, info)) + goto recursion_check; - case STACK_IS_UNKNOWN: - ops->stack(data, "UNK"); - break; - } - } + goto unknown; +recursion_check: /* - * This handles the process stack: + * Make sure we don't iterate through any given stack more than once. + * If it comes up a second time then there's something wrong going on: + * just break out and report an unknown stack type. */ - bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); - put_cpu(); + if (visit_mask) { + if (*visit_mask & (1UL << info->type)) + goto unknown; + *visit_mask |= 1UL << info->type; + } + + return 0; + +unknown: + info->type = STACK_TYPE_UNKNOWN; + return -EINVAL; } -EXPORT_SYMBOL(dump_trace); -void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) +void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, char *log_lvl) { unsigned long *irq_stack_end; unsigned long *irq_stack; unsigned long *stack; - int cpu; int i; - preempt_disable(); - cpu = smp_processor_id(); + if (!try_get_task_stack(task)) + return; - irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); - irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); + irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr); + irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long)); - /* - * Debugging aid: "show_stack(NULL, NULL);" prints the - * back trace for this cpu: - */ - if (sp == NULL) { - if (regs) - sp = (unsigned long *)regs->sp; - else if (task) - sp = (unsigned long *)task->thread.sp; - else - sp = (unsigned long *)&sp; - } + sp = sp ? : get_stack_pointer(task, regs); stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { @@ -299,18 +183,17 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, stack++; touch_nmi_watchdog(); } - preempt_enable(); pr_cont("\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); + show_trace_log_lvl(task, regs, sp, log_lvl); + + put_task_stack(task); } void show_regs(struct pt_regs *regs) { int i; - unsigned long sp; - sp = regs->sp; show_regs_print_info(KERN_DEFAULT); __show_regs(regs, 1); @@ -325,8 +208,7 @@ void show_regs(struct pt_regs *regs) u8 *ip; printk(KERN_DEFAULT "Stack:\n"); - show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - 0, KERN_DEFAULT); + show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT); printk(KERN_DEFAULT "Code: "); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 621b501f8935..b85fe5f91c3f 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -40,8 +40,10 @@ * user can e.g. boot the original kernel with mem=1G while still booting the * next kernel with full memory. */ -struct e820map e820; -struct e820map e820_saved; +static struct e820map initial_e820 __initdata; +static struct e820map initial_e820_saved __initdata; +struct e820map *e820 __refdata = &initial_e820; +struct e820map *e820_saved __refdata = &initial_e820_saved; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0xaeedbabe; @@ -58,8 +60,8 @@ e820_any_mapped(u64 start, u64 end, unsigned type) { int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + struct e820entry *ei = &e820->map[i]; if (type && ei->type != type) continue; @@ -81,8 +83,8 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type) { int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + struct e820entry *ei = &e820->map[i]; if (type && ei->type != type) continue; @@ -128,7 +130,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, void __init e820_add_region(u64 start, u64 size, int type) { - __e820_add_region(&e820, start, size, type); + __e820_add_region(e820, start, size, type); } static void __init e820_print_type(u32 type) @@ -164,12 +166,12 @@ void __init e820_print_map(char *who) { int i; - for (i = 0; i < e820.nr_map; i++) { + for (i = 0; i < e820->nr_map; i++) { printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who, - (unsigned long long) e820.map[i].addr, + (unsigned long long) e820->map[i].addr, (unsigned long long) - (e820.map[i].addr + e820.map[i].size - 1)); - e820_print_type(e820.map[i].type); + (e820->map[i].addr + e820->map[i].size - 1)); + e820_print_type(e820->map[i].type); printk(KERN_CONT "\n"); } } @@ -348,7 +350,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, * continue building up new bios map based on this * information */ - if (current_type != last_type || current_type == E820_PRAM) { + if (current_type != last_type) { if (last_type != 0) { new_bios[new_bios_entry].size = change_point[chgidx]->addr - last_addr; @@ -388,11 +390,11 @@ static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) while (nr_map) { u64 start = biosmap->addr; u64 size = biosmap->size; - u64 end = start + size; + u64 end = start + size - 1; u32 type = biosmap->type; /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) + if (start > end && likely(size)) return -1; e820_add_region(start, size, type); @@ -493,13 +495,13 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { - return __e820_update_range(&e820, start, size, old_type, new_type); + return __e820_update_range(e820, start, size, old_type, new_type); } static u64 __init e820_update_range_saved(u64 start, u64 size, unsigned old_type, unsigned new_type) { - return __e820_update_range(&e820_saved, start, size, old_type, + return __e820_update_range(e820_saved, start, size, old_type, new_type); } @@ -521,8 +523,8 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, e820_print_type(old_type); printk(KERN_CONT "\n"); - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + struct e820entry *ei = &e820->map[i]; u64 final_start, final_end; u64 ei_end; @@ -566,15 +568,15 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, void __init update_e820(void) { - if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map)) + if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map)) return; printk(KERN_INFO "e820: modified physical RAM map:\n"); e820_print_map("modified"); } static void __init update_e820_saved(void) { - sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), - &e820_saved.nr_map); + sanitize_e820_map(e820_saved->map, ARRAY_SIZE(e820_saved->map), + &e820_saved->nr_map); } #define MAX_GAP_END 0x100000000ull /* @@ -584,14 +586,14 @@ __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, unsigned long start_addr, unsigned long long end_addr) { unsigned long long last; - int i = e820.nr_map; + int i = e820->nr_map; int found = 0; last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; + unsigned long long start = e820->map[i].addr; + unsigned long long end = start + e820->map[i].size; if (end < start_addr) continue; @@ -649,6 +651,33 @@ __init void e820_setup_gap(void) gapstart, gapstart + gapsize - 1); } +/* + * Called late during init, in free_initmem(). + * + * Initial e820 and e820_saved are largish __initdata arrays. + * Copy them to (usually much smaller) dynamically allocated area. + * This is done after all tweaks we ever do to them: + * all functions which modify them are __init functions, + * they won't exist after this point. + */ +__init void e820_reallocate_tables(void) +{ + struct e820map *n; + int size; + + size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820->nr_map; + n = kmalloc(size, GFP_KERNEL); + BUG_ON(!n); + memcpy(n, e820, size); + e820 = n; + + size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820_saved->nr_map; + n = kmalloc(size, GFP_KERNEL); + BUG_ON(!n); + memcpy(n, e820_saved, size); + e820_saved = n; +} + /** * Because of the size limitation of struct boot_params, only first * 128 E820 memory entries are passed to kernel via @@ -665,7 +694,7 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len) entries = sdata->len / sizeof(struct e820entry); extmap = (struct e820entry *)(sdata->data); __append_e820_map(extmap, entries); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); early_memunmap(sdata, data_len); printk(KERN_INFO "e820: extended physical RAM map:\n"); e820_print_map("extended"); @@ -686,8 +715,8 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn) int i; unsigned long pfn = 0; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + struct e820entry *ei = &e820->map[i]; if (pfn < PFN_UP(ei->addr)) register_nosave_region(pfn, PFN_UP(ei->addr)); @@ -712,8 +741,8 @@ static int __init e820_mark_nvs_memory(void) { int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + struct e820entry *ei = &e820->map[i]; if (ei->type == E820_NVS) acpi_nvs_register(ei->addr, ei->size); @@ -754,22 +783,18 @@ u64 __init early_reserve_e820(u64 size, u64 align) /* * Find the highest page frame number we have available */ -static unsigned long __init e820_end_pfn(unsigned long limit_pfn) +static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) { int i; unsigned long last_pfn = 0; unsigned long max_arch_pfn = MAX_ARCH_PFN; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + struct e820entry *ei = &e820->map[i]; unsigned long start_pfn; unsigned long end_pfn; - /* - * Persistent memory is accounted as ram for purposes of - * establishing max_pfn and mem_map. - */ - if (ei->type != E820_RAM && ei->type != E820_PRAM) + if (ei->type != type) continue; start_pfn = ei->addr >> PAGE_SHIFT; @@ -794,15 +819,15 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn) } unsigned long __init e820_end_of_ram_pfn(void) { - return e820_end_pfn(MAX_ARCH_PFN); + return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); } unsigned long __init e820_end_of_low_ram_pfn(void) { - return e820_end_pfn(1UL << (32-PAGE_SHIFT)); + return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_RAM); } -static void early_panic(char *msg) +static void __init early_panic(char *msg) { early_printk(msg); panic(msg); @@ -856,7 +881,7 @@ static int __init parse_memmap_one(char *p) */ saved_max_pfn = e820_end_of_ram_pfn(); #endif - e820.nr_map = 0; + e820->nr_map = 0; userdef = 1; return 0; } @@ -903,8 +928,8 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { - if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), - &e820.nr_map) < 0) + if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), + &e820->nr_map) < 0) early_panic("Invalid user supplied memory map"); printk(KERN_INFO "e820: user-defined physical RAM map:\n"); @@ -912,7 +937,7 @@ void __init finish_e820_parsing(void) } } -static const char *e820_type_to_string(int e820_type) +static const char *__init e820_type_to_string(int e820_type) { switch (e820_type) { case E820_RESERVED_KERN: @@ -926,7 +951,7 @@ static const char *e820_type_to_string(int e820_type) } } -static unsigned long e820_type_to_iomem_type(int e820_type) +static unsigned long __init e820_type_to_iomem_type(int e820_type) { switch (e820_type) { case E820_RESERVED_KERN: @@ -942,7 +967,7 @@ static unsigned long e820_type_to_iomem_type(int e820_type) } } -static unsigned long e820_type_to_iores_desc(int e820_type) +static unsigned long __init e820_type_to_iores_desc(int e820_type) { switch (e820_type) { case E820_ACPI: @@ -961,7 +986,7 @@ static unsigned long e820_type_to_iores_desc(int e820_type) } } -static bool do_mark_busy(u32 type, struct resource *res) +static bool __init do_mark_busy(u32 type, struct resource *res) { /* this is the legacy bios/dos rom-shadow + mmio region */ if (res->start < (1ULL<<20)) @@ -991,35 +1016,35 @@ void __init e820_reserve_resources(void) struct resource *res; u64 end; - res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); + res = alloc_bootmem(sizeof(struct resource) * e820->nr_map); e820_res = res; - for (i = 0; i < e820.nr_map; i++) { - end = e820.map[i].addr + e820.map[i].size - 1; + for (i = 0; i < e820->nr_map; i++) { + end = e820->map[i].addr + e820->map[i].size - 1; if (end != (resource_size_t)end) { res++; continue; } - res->name = e820_type_to_string(e820.map[i].type); - res->start = e820.map[i].addr; + res->name = e820_type_to_string(e820->map[i].type); + res->start = e820->map[i].addr; res->end = end; - res->flags = e820_type_to_iomem_type(e820.map[i].type); - res->desc = e820_type_to_iores_desc(e820.map[i].type); + res->flags = e820_type_to_iomem_type(e820->map[i].type); + res->desc = e820_type_to_iores_desc(e820->map[i].type); /* * don't register the region that could be conflicted with * pci device BAR resource and insert them later in * pcibios_resource_survey() */ - if (do_mark_busy(e820.map[i].type, res)) { + if (do_mark_busy(e820->map[i].type, res)) { res->flags |= IORESOURCE_BUSY; insert_resource(&iomem_resource, res); } res++; } - for (i = 0; i < e820_saved.nr_map; i++) { - struct e820entry *entry = &e820_saved.map[i]; + for (i = 0; i < e820_saved->nr_map; i++) { + struct e820entry *entry = &e820_saved->map[i]; firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry->type)); @@ -1027,7 +1052,7 @@ void __init e820_reserve_resources(void) } /* How much should we pad RAM ending depending on where it is? */ -static unsigned long ram_alignment(resource_size_t pos) +static unsigned long __init ram_alignment(resource_size_t pos) { unsigned long mb = pos >> 20; @@ -1051,7 +1076,7 @@ void __init e820_reserve_resources_late(void) struct resource *res; res = e820_res; - for (i = 0; i < e820.nr_map; i++) { + for (i = 0; i < e820->nr_map; i++) { if (!res->parent && res->end) insert_resource_expand_to_fit(&iomem_resource, res); res++; @@ -1061,8 +1086,8 @@ void __init e820_reserve_resources_late(void) * Try to bump up RAM regions to reasonable boundaries to * avoid stolen RAM: */ - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *entry = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + struct e820entry *entry = &e820->map[i]; u64 start, end; if (entry->type != E820_RAM) @@ -1110,7 +1135,7 @@ char *__init default_machine_specific_memory_setup(void) who = "BIOS-e801"; } - e820.nr_map = 0; + e820->nr_map = 0; e820_add_region(0, LOWMEMSIZE(), E820_RAM); e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); } @@ -1124,7 +1149,7 @@ void __init setup_memory_map(void) char *who; who = x86_init.resources.memory_setup(); - memcpy(&e820_saved, &e820, sizeof(struct e820map)); + memcpy(e820_saved, e820, sizeof(struct e820map)); printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n"); e820_print_map(who); } @@ -1141,8 +1166,8 @@ void __init memblock_x86_fill(void) */ memblock_allow_resize(); - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + struct e820entry *ei = &e820->map[i]; end = ei->addr + ei->size; if (end != (resource_size_t)end) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index de7501edb21c..18bb3a639197 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -555,7 +555,7 @@ intel_graphics_stolen(int num, int slot, int func, /* Mark this space as reserved */ e820_add_region(base, size, E820_RESERVED); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); } static void __init intel_graphics_quirks(int num, int slot, int func) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 93982aebb398..2f2b8c7ccb85 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void) on_boot_cpu = 0; WARN_ON_FPU(current->thread.fpu.fpstate_active); - current_thread_info()->status = 0; if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) eagerfpu = ENABLE; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d036cfb4495d..8639bb2ae058 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, } if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer) == -EBUSY) { + frame_pointer, parent) == -EBUSY) { *parent = old; return; } diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6f8902b0d151..5f401262f12d 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) */ __HEAD ENTRY(startup_32) - movl pa(stack_start),%ecx + movl pa(initial_stack),%ecx /* test KEEP_SEGMENTS flag to see if the bootloader is asking us to not reload segments */ @@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4 * start_secondary(). */ ENTRY(start_cpu0) - movl stack_start, %ecx + movl initial_stack, %ecx movl %ecx, %esp jmp *(initial_code) ENDPROC(start_cpu0) @@ -307,7 +307,7 @@ ENTRY(startup_32_smp) movl %eax,%es movl %eax,%fs movl %eax,%gs - movl pa(stack_start),%ecx + movl pa(initial_stack),%ecx movl %eax,%ss leal -__PAGE_OFFSET(%ecx),%esp @@ -703,7 +703,7 @@ ENTRY(initial_page_table) .data .balign 4 -ENTRY(stack_start) +ENTRY(initial_stack) .long init_thread_union+THREAD_SIZE __INITRODATA diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9f8efc9f0075..c98a559c346e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -66,7 +66,7 @@ startup_64: */ /* - * Setup stack for verify_cpu(). "-8" because stack_start is defined + * Setup stack for verify_cpu(). "-8" because initial_stack is defined * this way, see below. Our best guess is a NULL ptr for stack * termination heuristics and we don't want to break anything which * might depend on it (kgdb, ...). @@ -226,7 +226,7 @@ ENTRY(secondary_startup_64) movq %rax, %cr0 /* Setup a boot time stack */ - movq stack_start(%rip), %rsp + movq initial_stack(%rip), %rsp /* zero EFLAGS after setting rsp */ pushq $0 @@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64) * start_secondary(). */ ENTRY(start_cpu0) - movq stack_start(%rip),%rsp + movq initial_stack(%rip),%rsp movq initial_code(%rip),%rax pushq $0 # fake return address to stop unwinder pushq $__KERNEL_CS # set correct cs @@ -319,17 +319,15 @@ ENTRY(start_cpu0) ENDPROC(start_cpu0) #endif - /* SMP bootup changes these two */ + /* Both SMP bootup and ACPI suspend change these variables */ __REFDATA .balign 8 GLOBAL(initial_code) .quad x86_64_start_kernel GLOBAL(initial_gs) .quad INIT_PER_CPU_VAR(irq_stack_union) - - GLOBAL(stack_start) + GLOBAL(initial_stack) .quad init_thread_union+THREAD_SIZE-8 - .word 0 __FINITDATA bad_address: diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c6dfd801df97..274fab99169d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -756,10 +756,104 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd) /* * Clock source related code */ +#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) +/* + * Reading the HPET counter is a very slow operation. If a large number of + * CPUs are trying to access the HPET counter simultaneously, it can cause + * massive delay and slow down system performance dramatically. This may + * happen when HPET is the default clock source instead of TSC. For a + * really large system with hundreds of CPUs, the slowdown may be so + * severe that it may actually crash the system because of a NMI watchdog + * soft lockup, for example. + * + * If multiple CPUs are trying to access the HPET counter at the same time, + * we don't actually need to read the counter multiple times. Instead, the + * other CPUs can use the counter value read by the first CPU in the group. + * + * This special feature is only enabled on x86-64 systems. It is unlikely + * that 32-bit x86 systems will have enough CPUs to require this feature + * with its associated locking overhead. And we also need 64-bit atomic + * read. + * + * The lock and the hpet value are stored together and can be read in a + * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t + * is 32 bits in size. + */ +union hpet_lock { + struct { + arch_spinlock_t lock; + u32 value; + }; + u64 lockval; +}; + +static union hpet_lock hpet __cacheline_aligned = { + { .lock = __ARCH_SPIN_LOCK_UNLOCKED, }, +}; + +static cycle_t read_hpet(struct clocksource *cs) +{ + unsigned long flags; + union hpet_lock old, new; + + BUILD_BUG_ON(sizeof(union hpet_lock) != 8); + + /* + * Read HPET directly if in NMI. + */ + if (in_nmi()) + return (cycle_t)hpet_readl(HPET_COUNTER); + + /* + * Read the current state of the lock and HPET value atomically. + */ + old.lockval = READ_ONCE(hpet.lockval); + + if (arch_spin_is_locked(&old.lock)) + goto contended; + + local_irq_save(flags); + if (arch_spin_trylock(&hpet.lock)) { + new.value = hpet_readl(HPET_COUNTER); + /* + * Use WRITE_ONCE() to prevent store tearing. + */ + WRITE_ONCE(hpet.value, new.value); + arch_spin_unlock(&hpet.lock); + local_irq_restore(flags); + return (cycle_t)new.value; + } + local_irq_restore(flags); + +contended: + /* + * Contended case + * -------------- + * Wait until the HPET value change or the lock is free to indicate + * its value is up-to-date. + * + * It is possible that old.value has already contained the latest + * HPET value while the lock holder was in the process of releasing + * the lock. Checking for lock state change will enable us to return + * the value immediately instead of waiting for the next HPET reader + * to come along. + */ + do { + cpu_relax(); + new.lockval = READ_ONCE(hpet.lockval); + } while ((new.value == old.value) && arch_spin_is_locked(&new.lock)); + + return (cycle_t)new.value; +} +#else +/* + * For UP or 32-bit. + */ static cycle_t read_hpet(struct clocksource *cs) { return (cycle_t)hpet_readl(HPET_COUNTER); } +#endif static struct clocksource clocksource_hpet = { .name = "hpet", diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4a7903714065..9ebd0b0e73d9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (user_mode(regs)) return; - if (regs->sp >= curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + STACK_TOP_MARGIN && + if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && regs->sp <= curbase + THREAD_SIZE) return; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index f2356bda2b05..3407b148c240 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -99,14 +99,14 @@ static int setup_e820_entries(struct boot_params *params) { unsigned int nr_e820_entries; - nr_e820_entries = e820_saved.nr_map; + nr_e820_entries = e820_saved->nr_map; /* TODO: Pass entries more than E820MAX in bootparams setup data */ if (nr_e820_entries > E820MAX) nr_e820_entries = E820MAX; params->e820_entries = nr_e820_entries; - memcpy(¶ms->e820_map, &e820_saved.map, + memcpy(¶ms->e820_map, &e820_saved->map, nr_e820_entries * sizeof(struct e820entry)); return 0; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 04cde527d728..8e36f249646e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -50,6 +50,7 @@ #include <asm/apicdef.h> #include <asm/apic.h> #include <asm/nmi.h> +#include <asm/switch_to.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_DX] = 0; gdb_regs[GDB_SI] = 0; gdb_regs[GDB_DI] = 0; - gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; + gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp; #ifdef CONFIG_X86_32 gdb_regs[GDB_DS] = __KERNEL_DS; gdb_regs[GDB_ES] = __KERNEL_DS; gdb_regs[GDB_PS] = 0; gdb_regs[GDB_CS] = __KERNEL_CS; - gdb_regs[GDB_PC] = p->thread.ip; gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; #else - gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); + gdb_regs32[GDB_PS] = 0; gdb_regs32[GDB_CS] = __KERNEL_CS; gdb_regs32[GDB_SS] = __KERNEL_DS; - gdb_regs[GDB_PC] = 0; gdb_regs[GDB_R8] = 0; gdb_regs[GDB_R9] = 0; gdb_regs[GDB_R10] = 0; @@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_R14] = 0; gdb_regs[GDB_R15] = 0; #endif + gdb_regs[GDB_PC] = 0; gdb_regs[GDB_SP] = p->thread.sp; } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7847e5c0e0b5..28cee019209c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -45,7 +45,7 @@ #include <linux/slab.h> #include <linux/hardirq.h> #include <linux/preempt.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/kdebug.h> #include <linux/kallsyms.h> #include <linux/ftrace.h> diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4425f593f0ec..3bb4c5f021f6 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -24,7 +24,7 @@ #include <linux/slab.h> #include <linux/hardirq.h> #include <linux/preempt.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/kdebug.h> #include <linux/kallsyms.h> #include <linux/ftrace.h> diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index c2bedaea11f7..4afc67f5facc 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -184,7 +184,7 @@ out: static struct kobj_attribute type_attr = __ATTR_RO(type); -static struct bin_attribute data_attr = { +static struct bin_attribute data_attr __ro_after_init = { .attr = { .name = "data", .mode = S_IRUGO, diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 1726c4c12336..edbbfc854e39 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -423,12 +423,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); } -static void kvm_guest_cpu_online(void *dummy) -{ - kvm_guest_cpu_init(); -} - -static void kvm_guest_cpu_offline(void *dummy) +static void kvm_guest_cpu_offline(void) { kvm_disable_steal_time(); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -437,29 +432,21 @@ static void kvm_guest_cpu_offline(void *dummy) apf_task_wake_all(); } -static int kvm_cpu_notify(struct notifier_block *self, unsigned long action, - void *hcpu) +static int kvm_cpu_online(unsigned int cpu) { - int cpu = (unsigned long)hcpu; - switch (action) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - case CPU_ONLINE_FROZEN: - smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1); - break; - default: - break; - } - return NOTIFY_OK; + local_irq_disable(); + kvm_guest_cpu_init(); + local_irq_enable(); + return 0; } -static struct notifier_block kvm_cpu_notifier = { - .notifier_call = kvm_cpu_notify, -}; +static int kvm_cpu_down_prepare(unsigned int cpu) +{ + local_irq_disable(); + kvm_guest_cpu_offline(); + local_irq_enable(); + return 0; +} #endif static void __init kvm_apf_trap_init(void) @@ -494,7 +481,9 @@ void __init kvm_guest_init(void) #ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; - register_cpu_notifier(&kvm_cpu_notifier); + if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", + kvm_cpu_online, kvm_cpu_down_prepare) < 0) + pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); #else kvm_guest_cpu_init(); #endif @@ -575,9 +564,6 @@ static void kvm_kick_cpu(int cpu) kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); } - -#ifdef CONFIG_QUEUED_SPINLOCKS - #include <asm/qspinlock.h> static void kvm_wait(u8 *ptr, u8 val) @@ -606,243 +592,6 @@ out: local_irq_restore(flags); } -#else /* !CONFIG_QUEUED_SPINLOCKS */ - -enum kvm_contention_stat { - TAKEN_SLOW, - TAKEN_SLOW_PICKUP, - RELEASED_SLOW, - RELEASED_SLOW_KICKED, - NR_CONTENTION_STATS -}; - -#ifdef CONFIG_KVM_DEBUG_FS -#define HISTO_BUCKETS 30 - -static struct kvm_spinlock_stats -{ - u32 contention_stats[NR_CONTENTION_STATS]; - u32 histo_spin_blocked[HISTO_BUCKETS+1]; - u64 time_blocked; -} spinlock_stats; - -static u8 zero_stats; - -static inline void check_zero(void) -{ - u8 ret; - u8 old; - - old = READ_ONCE(zero_stats); - if (unlikely(old)) { - ret = cmpxchg(&zero_stats, old, 0); - /* This ensures only one fellow resets the stat */ - if (ret == old) - memset(&spinlock_stats, 0, sizeof(spinlock_stats)); - } -} - -static inline void add_stats(enum kvm_contention_stat var, u32 val) -{ - check_zero(); - spinlock_stats.contention_stats[var] += val; -} - - -static inline u64 spin_time_start(void) -{ - return sched_clock(); -} - -static void __spin_time_accum(u64 delta, u32 *array) -{ - unsigned index; - - index = ilog2(delta); - check_zero(); - - if (index < HISTO_BUCKETS) - array[index]++; - else - array[HISTO_BUCKETS]++; -} - -static inline void spin_time_accum_blocked(u64 start) -{ - u32 delta; - - delta = sched_clock() - start; - __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); - spinlock_stats.time_blocked += delta; -} - -static struct dentry *d_spin_debug; -static struct dentry *d_kvm_debug; - -static struct dentry *kvm_init_debugfs(void) -{ - d_kvm_debug = debugfs_create_dir("kvm-guest", NULL); - if (!d_kvm_debug) - printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); - - return d_kvm_debug; -} - -static int __init kvm_spinlock_debugfs(void) -{ - struct dentry *d_kvm; - - d_kvm = kvm_init_debugfs(); - if (d_kvm == NULL) - return -ENOMEM; - - d_spin_debug = debugfs_create_dir("spinlocks", d_kvm); - - debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); - - debugfs_create_u32("taken_slow", 0444, d_spin_debug, - &spinlock_stats.contention_stats[TAKEN_SLOW]); - debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, - &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); - - debugfs_create_u32("released_slow", 0444, d_spin_debug, - &spinlock_stats.contention_stats[RELEASED_SLOW]); - debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, - &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); - - debugfs_create_u64("time_blocked", 0444, d_spin_debug, - &spinlock_stats.time_blocked); - - debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, - spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); - - return 0; -} -fs_initcall(kvm_spinlock_debugfs); -#else /* !CONFIG_KVM_DEBUG_FS */ -static inline void add_stats(enum kvm_contention_stat var, u32 val) -{ -} - -static inline u64 spin_time_start(void) -{ - return 0; -} - -static inline void spin_time_accum_blocked(u64 start) -{ -} -#endif /* CONFIG_KVM_DEBUG_FS */ - -struct kvm_lock_waiting { - struct arch_spinlock *lock; - __ticket_t want; -}; - -/* cpus 'waiting' on a spinlock to become available */ -static cpumask_t waiting_cpus; - -/* Track spinlock on which a cpu is waiting */ -static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); - -__visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) -{ - struct kvm_lock_waiting *w; - int cpu; - u64 start; - unsigned long flags; - __ticket_t head; - - if (in_nmi()) - return; - - w = this_cpu_ptr(&klock_waiting); - cpu = smp_processor_id(); - start = spin_time_start(); - - /* - * Make sure an interrupt handler can't upset things in a - * partially setup state. - */ - local_irq_save(flags); - - /* - * The ordering protocol on this is that the "lock" pointer - * may only be set non-NULL if the "want" ticket is correct. - * If we're updating "want", we must first clear "lock". - */ - w->lock = NULL; - smp_wmb(); - w->want = want; - smp_wmb(); - w->lock = lock; - - add_stats(TAKEN_SLOW, 1); - - /* - * This uses set_bit, which is atomic but we should not rely on its - * reordering gurantees. So barrier is needed after this call. - */ - cpumask_set_cpu(cpu, &waiting_cpus); - - barrier(); - - /* - * Mark entry to slowpath before doing the pickup test to make - * sure we don't deadlock with an unlocker. - */ - __ticket_enter_slowpath(lock); - - /* make sure enter_slowpath, which is atomic does not cross the read */ - smp_mb__after_atomic(); - - /* - * check again make sure it didn't become free while - * we weren't looking. - */ - head = READ_ONCE(lock->tickets.head); - if (__tickets_equal(head, want)) { - add_stats(TAKEN_SLOW_PICKUP, 1); - goto out; - } - - /* - * halt until it's our turn and kicked. Note that we do safe halt - * for irq enabled case to avoid hang when lock info is overwritten - * in irq spinlock slowpath and no spurious interrupt occur to save us. - */ - if (arch_irqs_disabled_flags(flags)) - halt(); - else - safe_halt(); - -out: - cpumask_clear_cpu(cpu, &waiting_cpus); - w->lock = NULL; - local_irq_restore(flags); - spin_time_accum_blocked(start); -} -PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning); - -/* Kick vcpu waiting on @lock->head to reach value @ticket */ -static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) -{ - int cpu; - - add_stats(RELEASED_SLOW, 1); - for_each_cpu(cpu, &waiting_cpus) { - const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); - if (READ_ONCE(w->lock) == lock && - READ_ONCE(w->want) == ticket) { - add_stats(RELEASED_SLOW_KICKED, 1); - kvm_kick_cpu(cpu); - break; - } - } -} - -#endif /* !CONFIG_QUEUED_SPINLOCKS */ - /* * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. */ @@ -854,16 +603,11 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; -#ifdef CONFIG_QUEUED_SPINLOCKS __pv_init_lock_hash(); pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.wait = kvm_wait; pv_lock_ops.kick = kvm_kick_cpu; -#else /* !CONFIG_QUEUED_SPINLOCKS */ - pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); - pv_lock_ops.unlock_kick = kvm_unlock_kick; -#endif } static __init int kvm_spinlock_init_jump(void) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1d39bfbd26bb..60b9949f1e65 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -29,7 +29,7 @@ #include <asm/x86_init.h> #include <asm/reboot.h> -static int kvmclock = 1; +static int kvmclock __ro_after_init = 1; static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; static cycle_t kvm_sched_clock_offset; @@ -289,6 +289,7 @@ void __init kvmclock_init(void) put_cpu(); x86_platform.calibrate_tsc = kvm_get_tsc_khz; + x86_platform.calibrate_cpu = kvm_get_tsc_khz; x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.set_wallclock = kvm_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 068c4a929de6..0f8d20497383 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -499,6 +499,9 @@ void __init default_get_smp_config(unsigned int early) { struct mpf_intel *mpf = mpf_found; + if (!smp_found_config) + return; + if (!mpf) return; diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 1939a0269377..2c55a003b793 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -8,7 +8,6 @@ #include <asm/paravirt.h> -#ifdef CONFIG_QUEUED_SPINLOCKS __visible void __native_queued_spin_unlock(struct qspinlock *lock) { native_queued_spin_unlock(lock); @@ -21,19 +20,13 @@ bool pv_is_native_spin_unlock(void) return pv_lock_ops.queued_spin_unlock.func == __raw_callee_save___native_queued_spin_unlock; } -#endif struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP -#ifdef CONFIG_QUEUED_SPINLOCKS .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), .wait = paravirt_nop, .kick = paravirt_nop, -#else /* !CONFIG_QUEUED_SPINLOCKS */ - .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), - .unlock_kick = paravirt_nop, -#endif /* !CONFIG_QUEUED_SPINLOCKS */ #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1acfd76e3e26..bbf3d5933eaa 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .read_cr0 = native_read_cr0, .write_cr0 = native_write_cr0, .read_cr4 = native_read_cr4, - .read_cr4_safe = native_read_cr4_safe, .write_cr4 = native_write_cr4, #ifdef CONFIG_X86_64 .read_cr8 = native_read_cr8, @@ -389,7 +388,7 @@ NOKPROBE_SYMBOL(native_load_idt); #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif -struct pv_mmu_ops pv_mmu_ops = { +struct pv_mmu_ops pv_mmu_ops __ro_after_init = { .read_cr2 = native_read_cr2, .write_cr2 = native_write_cr2, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 158dc0650d5d..920c6ae08592 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -10,7 +10,7 @@ DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) +#if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); #endif @@ -49,7 +49,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_cpu_ops, clts); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) +#if defined(CONFIG_PARAVIRT_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): if (pv_is_native_spin_unlock()) { start = start_pv_lock_ops_queued_spin_unlock; diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index e70087a04cc8..bb3840cedb4f 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -19,7 +19,7 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov64, "mov %rdi, %rax"); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) +#if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); #endif @@ -61,7 +61,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_cpu_ops, clts); PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_cpu_ops, wbinvd); -#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) +#if defined(CONFIG_PARAVIRT_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): if (pv_is_native_spin_unlock()) { start = start_pv_lock_ops_queued_spin_unlock; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0ea2ce4..4002b475171c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include <asm/tlbflush.h> #include <asm/mce.h> #include <asm/vm86.h> +#include <asm/switch_to.h> /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -513,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } /* + * Return saved PC of a blocked thread. + * What is this good for? it will be always the scheduler or ret_from_fork. + */ +unsigned long thread_saved_pc(struct task_struct *tsk) +{ + struct inactive_task_frame *frame = + (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); + return READ_ONCE_NOCHECK(frame->ret_addr); +} + +/* * Called from fs/proc with a reference on @p to find the function * which called into schedule(). This needs to be done carefully * because the task might wake up and we might look at a stack @@ -520,15 +532,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) */ unsigned long get_wchan(struct task_struct *p) { - unsigned long start, bottom, top, sp, fp, ip; + unsigned long start, bottom, top, sp, fp, ip, ret = 0; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; + if (!try_get_task_stack(p)) + return 0; + start = (unsigned long)task_stack_page(p); if (!start) - return 0; + goto out; /* * Layout of the stack page: @@ -537,9 +552,7 @@ unsigned long get_wchan(struct task_struct *p) * PADDING * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING * stack - * ----------- bottom = start + sizeof(thread_info) - * thread_info - * ----------- start + * ----------- bottom = start * * The tasks stack pointer points at the location where the * framepointer is stored. The data on the stack is: @@ -550,20 +563,25 @@ unsigned long get_wchan(struct task_struct *p) */ top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; top -= 2 * sizeof(unsigned long); - bottom = start + sizeof(struct thread_info); + bottom = start; sp = READ_ONCE(p->thread.sp); if (sp < bottom || sp > top) - return 0; + goto out; - fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); + fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); do { if (fp < bottom || fp > top) - return 0; + goto out; ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); - if (!in_sched_functions(ip)) - return ip; + if (!in_sched_functions(ip)) { + ret = ip; + goto out; + } fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); } while (count++ < 16 && p->state != TASK_RUNNING); - return 0; + +out: + put_task_stack(p); + return ret; } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d86be29c38c7..bd7be8efdc4c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,17 +55,6 @@ #include <asm/switch_to.h> #include <asm/vm86.h> -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); - -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *)tsk->thread.sp)[3]; -} - void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -101,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all) cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); - cr4 = __read_cr4_safe(); + cr4 = __read_cr4(); printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); @@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); + struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs); + struct inactive_task_frame *frame = &fork_frame->frame; struct task_struct *tsk; int err; - p->thread.sp = (unsigned long) childregs; + frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; + p->thread.sp = (unsigned long) fork_frame; p->thread.sp0 = (unsigned long) (childregs+1); memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - p->thread.ip = (unsigned long) ret_from_kernel_thread; - task_user_gs(p) = __KERNEL_STACK_CANARY; - childregs->ds = __USER_DS; - childregs->es = __USER_DS; - childregs->fs = __KERNEL_PERCPU; - childregs->bx = sp; /* function */ - childregs->bp = arg; - childregs->orig_ax = -1; - childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + frame->bx = sp; /* function */ + frame->di = arg; p->thread.io_bitmap_ptr = NULL; return 0; } + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0; if (sp) childregs->sp = sp; - p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); p->thread.io_bitmap_ptr = NULL; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8f84bf..ee944bd2310d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -49,8 +49,7 @@ #include <asm/debugreg.h> #include <asm/switch_to.h> #include <asm/xen/hypervisor.h> - -asmlinkage extern void ret_from_fork(void); +#include <asm/vdso.h> __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); @@ -141,12 +140,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, { int err; struct pt_regs *childregs; + struct fork_frame *fork_frame; + struct inactive_task_frame *frame; struct task_struct *me = current; p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; childregs = task_pt_regs(p); - p->thread.sp = (unsigned long) childregs; - set_tsk_thread_flag(p, TIF_FORK); + fork_frame = container_of(childregs, struct fork_frame, regs); + frame = &fork_frame->frame; + frame->bp = 0; + frame->ret_addr = (unsigned long) ret_from_fork; + p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); @@ -160,15 +164,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->sp = (unsigned long)childregs; - childregs->ss = __KERNEL_DS; - childregs->bx = sp; /* function */ - childregs->bp = arg; - childregs->orig_ax = -1; - childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; + frame->bx = sp; /* function */ + frame->r12 = arg; return 0; } + frame->bx = 0; *childregs = *current_pt_regs(); childregs->ax = 0; @@ -511,7 +511,7 @@ void set_personality_ia32(bool x32) current->personality &= ~READ_IMPLIES_EXEC; /* in_compat_syscall() uses the presence of the x32 syscall bit flag to determine compat status */ - current_thread_info()->status &= ~TS_COMPAT; + current->thread.status &= ~TS_COMPAT; } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); @@ -519,11 +519,24 @@ void set_personality_ia32(bool x32) current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ - current_thread_info()->status |= TS_COMPAT; + current->thread.status |= TS_COMPAT; } } EXPORT_SYMBOL_GPL(set_personality_ia32); +#ifdef CONFIG_CHECKPOINT_RESTORE +static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) +{ + int ret; + + ret = map_vdso_once(image, addr); + if (ret) + return ret; + + return (long)image->size; +} +#endif + long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) { int ret = 0; @@ -577,6 +590,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) break; } +#ifdef CONFIG_CHECKPOINT_RESTORE +# ifdef CONFIG_X86_X32_ABI + case ARCH_MAP_VDSO_X32: + return prctl_map_vdso(&vdso_image_x32, addr); +# endif +# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + case ARCH_MAP_VDSO_32: + return prctl_map_vdso(&vdso_image_32, addr); +# endif + case ARCH_MAP_VDSO_64: + return prctl_map_vdso(&vdso_image_64, addr); +#endif + default: ret = -EINVAL; break; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f79576a541ff..0e63c0267f99 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) return sp; prev_esp = (u32 *)(context); - if (prev_esp) - return (unsigned long)prev_esp; + if (*prev_esp) + return (unsigned long)*prev_esp; return (unsigned long)regs; } @@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - task_thread_info(child)->status |= TS_I386_REGS_POKED; + child->thread.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): @@ -1250,7 +1250,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, #ifdef CONFIG_X86_64 -static struct user_regset x86_64_regsets[] __read_mostly = { +static struct user_regset x86_64_regsets[] __ro_after_init = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct) / sizeof(long), @@ -1291,7 +1291,7 @@ static const struct user_regset_view user_x86_64_view = { #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION -static struct user_regset x86_32_regsets[] __read_mostly = { +static struct user_regset x86_32_regsets[] __ro_after_init = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct32) / sizeof(u32), @@ -1344,7 +1344,7 @@ static const struct user_regset_view user_x86_32_view = { */ u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -void update_regset_xstate_info(unsigned int size, u64 xstate_mask) +void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask) { #ifdef CONFIG_X86_64 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); @@ -1358,7 +1358,7 @@ void update_regset_xstate_info(unsigned int size, u64 xstate_mask) const struct user_regset_view *task_user_regset_view(struct task_struct *task) { #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) + if (!user_64bit_mode(task_pt_regs(task))) #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION return &user_x86_32_view; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index cc457ff818ad..51402a7e4ca6 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -626,3 +626,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, amd_disable_seq_and_redirect_scrub); #endif + +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) +#include <linux/jump_label.h> +#include <asm/string_64.h> + +/* Ivy Bridge, Haswell, Broadwell */ +static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) +{ + u32 capid0; + + pci_read_config_dword(pdev, 0x84, &capid0); + + if (capid0 & 0x10) + static_branch_inc(&mcsafe_key); +} + +/* Skylake */ +static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) +{ + u32 capid0; + + pci_read_config_dword(pdev, 0x84, &capid0); + + if ((capid0 & 0xc0) == 0xc0) + static_branch_inc(&mcsafe_key); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); +#endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 63bf27d972b7..e244c19a2451 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -705,7 +705,7 @@ static void native_machine_power_off(void) tboot_shutdown(TB_SHUTDOWN_HALT); } -struct machine_ops machine_ops = { +struct machine_ops machine_ops __ro_after_init = { .power_off = native_machine_power_off, .shutdown = native_machine_shutdown, .emergency_restart = native_machine_emergency_restart, diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 80eab01c1a68..2408c1603438 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -27,8 +27,8 @@ static void remove_e820_regions(struct resource *avail) int i; struct e820entry *entry; - for (i = 0; i < e820.nr_map; i++) { - entry = &e820.map[i]; + for (i = 0; i < e820->nr_map; i++) { + entry = &e820->map[i]; resource_clip(avail, entry->addr, entry->addr + entry->size - 1); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa60f5f5a16..bbfbca5fea0c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -210,9 +210,9 @@ EXPORT_SYMBOL(boot_cpu_data); #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) -__visible unsigned long mmu_cr4_features; +__visible unsigned long mmu_cr4_features __ro_after_init; #else -__visible unsigned long mmu_cr4_features = X86_CR4_PAE; +__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE; #endif /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ @@ -458,8 +458,8 @@ static void __init e820_reserve_setup_data(void) early_memunmap(data, sizeof(*data)); } - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); - memcpy(&e820_saved, &e820, sizeof(struct e820map)); + sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); + memcpy(e820_saved, e820, sizeof(struct e820map)); printk(KERN_INFO "extended physical RAM map:\n"); e820_print_map("reserve setup_data"); } @@ -763,7 +763,7 @@ static void __init trim_bios_range(void) */ e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); } /* called before trim_bios_range() to spare extra sanitize */ @@ -1032,7 +1032,7 @@ void __init setup_arch(char **cmdline_p) if (ppro_with_ram_bug()) { e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, E820_RESERVED); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); printk(KERN_INFO "fixed physical RAM map:\n"); e820_print_map("bad_ppro"); } @@ -1096,19 +1096,19 @@ void __init setup_arch(char **cmdline_p) memblock_set_current_limit(ISA_END_ADDRESS); memblock_x86_fill(); - if (efi_enabled(EFI_BOOT)) { + reserve_bios_regions(); + + if (efi_enabled(EFI_MEMMAP)) { efi_fake_memmap(); efi_find_mirror(); - } - - reserve_bios_regions(); + efi_esrt_init(); - /* - * The EFI specification says that boot service code won't be called - * after ExitBootServices(). This is, in fact, a lie. - */ - if (efi_enabled(EFI_MEMMAP)) + /* + * The EFI specification says that boot service code won't be + * called after ExitBootServices(). This is, in fact, a lie. + */ efi_reserve_boot_services(); + } /* preallocate 4k for mptable mpc */ early_reserve_e820_mpc_new(); @@ -1137,9 +1137,7 @@ void __init setup_arch(char **cmdline_p) * auditing all the early-boot CR4 manipulation would be needed to * rule it out. */ - if (boot_cpu_data.cpuid_level >= 0) - /* A CPU has %cr4 if and only if it has CPUID. */ - mmu_cr4_features = __read_cr4(); + mmu_cr4_features = __read_cr4(); memblock_set_current_limit(get_max_mapped()); @@ -1221,8 +1219,7 @@ void __init setup_arch(char **cmdline_p) /* * get boot-time SMP configuration: */ - if (smp_found_config) - get_smp_config(); + get_smp_config(); prefill_possible_map(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 7a40e068302d..2bbd27f89802 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -33,7 +33,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; EXPORT_PER_CPU_SYMBOL(this_cpu_off); -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { +unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; EXPORT_SYMBOL(__per_cpu_offset); @@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + - IRQ_STACK_SIZE - 64; + IRQ_STACK_SIZE; #endif #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 04cb3212db2d..763af1d0de64 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -42,6 +42,7 @@ #include <asm/syscalls.h> #include <asm/sigframe.h> +#include <asm/signal.h> #define COPY(x) do { \ get_user_ex(regs->x, &sc->x); \ @@ -547,7 +548,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, return -EFAULT; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user32(&frame->info, &ksig->info)) + if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true)) return -EFAULT; } @@ -660,20 +661,21 @@ badframe: return 0; } -static inline int is_ia32_compat_frame(void) +static inline int is_ia32_compat_frame(struct ksignal *ksig) { return IS_ENABLED(CONFIG_IA32_EMULATION) && - test_thread_flag(TIF_IA32); + ksig->ka.sa.sa_flags & SA_IA32_ABI; } -static inline int is_ia32_frame(void) +static inline int is_ia32_frame(struct ksignal *ksig) { - return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(); + return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig); } -static inline int is_x32_frame(void) +static inline int is_x32_frame(struct ksignal *ksig) { - return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); + return IS_ENABLED(CONFIG_X86_X32_ABI) && + ksig->ka.sa.sa_flags & SA_X32_ABI; } static int @@ -684,12 +686,12 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) compat_sigset_t *cset = (compat_sigset_t *) set; /* Set up the stack frame */ - if (is_ia32_frame()) { + if (is_ia32_frame(ksig)) { if (ksig->ka.sa.sa_flags & SA_SIGINFO) return ia32_setup_rt_frame(usig, ksig, cset, regs); else return ia32_setup_frame(usig, ksig, cset, regs); - } else if (is_x32_frame()) { + } else if (is_x32_frame(ksig)) { return x32_setup_rt_frame(ksig, cset, regs); } else { return __setup_rt_frame(ksig->sig, ksig, set, regs); @@ -783,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * than the tracee. */ #ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index b44564bf86a8..40df33753bae 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -1,5 +1,6 @@ #include <linux/compat.h> #include <linux/uaccess.h> +#include <linux/ptrace.h> /* * The compat_siginfo_t structure and handing code is very easy @@ -92,10 +93,31 @@ static inline void signal_compat_build_tests(void) /* any new si_fields should be added here */ } -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) +void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) +{ + /* Don't leak in-kernel non-uapi flags to user-space */ + if (oact) + oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); + + if (!act) + return; + + /* Don't let flags to be set from userspace */ + act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); + + if (user_64bit_mode(current_pt_regs())) + return; + + if (in_ia32_syscall()) + act->sa.sa_flags |= SA_IA32_ABI; + if (in_x32_syscall()) + act->sa.sa_flags |= SA_X32_ABI; +} + +int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, + bool x32_ABI) { int err = 0; - bool ia32 = test_thread_flag(TIF_IA32); signal_compat_build_tests(); @@ -146,7 +168,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) put_user_ex(from->si_arch, &to->si_arch); break; case __SI_CHLD >> 16: - if (ia32) { + if (!x32_ABI) { put_user_ex(from->si_utime, &to->si_utime); put_user_ex(from->si_stime, &to->si_stime); } else { @@ -180,6 +202,12 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) return err; } +/* from syscall's path, where we know the ABI */ +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) +{ + return __copy_siginfo_to_user32(to, from, in_x32_syscall()); +} + int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { int err = 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4296beb8fdd3..42a93621f5b0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -471,7 +471,7 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } -static struct sched_domain_topology_level numa_inside_package_topology[] = { +static struct sched_domain_topology_level x86_numa_in_package_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, #endif @@ -480,22 +480,23 @@ static struct sched_domain_topology_level numa_inside_package_topology[] = { #endif { NULL, }, }; + +static struct sched_domain_topology_level x86_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + /* - * set_sched_topology() sets the topology internal to a CPU. The - * NUMA topologies are layered on top of it to build the full - * system topology. - * - * If NUMA nodes are observed to occur within a CPU package, this - * function should be called. It forces the sched domain code to - * only use the SMT level for the CPU portion of the topology. - * This essentially falls back to relying on NUMA information - * from the SRAT table to describe the entire system topology - * (except for hyperthreads). + * Set if a package/die has multiple NUMA nodes inside. + * AMD Magny-Cours and Intel Cluster-on-Die have this. */ -static void primarily_use_numa_for_topology(void) -{ - set_sched_topology(numa_inside_package_topology); -} +static bool x86_has_numa_in_package; void set_cpu_sibling_map(int cpu) { @@ -558,7 +559,7 @@ void set_cpu_sibling_map(int cpu) c->booted_cores = cpu_data(i).booted_cores; } if (match_die(c, o) && !topology_same_node(c, o)) - primarily_use_numa_for_topology(); + x86_has_numa_in_package = true; } threads = cpumask_weight(topology_sibling_cpumask(cpu)); @@ -690,7 +691,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) * Give the other CPU some time to accept the IPI. */ udelay(200); - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + if (APIC_INTEGRATED(boot_cpu_apic_version)) { maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); @@ -717,7 +718,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* * Be paranoid about clearing APIC errors. */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) { + if (APIC_INTEGRATED(boot_cpu_apic_version)) { if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); apic_read(APIC_ESR); @@ -756,7 +757,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) * Determine this based on the APIC version. * If we don't have an integrated APIC, don't send the STARTUP IPIs. */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) + if (APIC_INTEGRATED(boot_cpu_apic_version)) num_starts = 2; else num_starts = 0; @@ -942,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) per_cpu(cpu_current_top_of_stack, cpu) = (unsigned long)task_stack_page(idle) + THREAD_SIZE; #else - clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif } @@ -969,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; - stack_start = idle->thread.sp; + initial_stack = idle->thread.sp; /* * Enable the espfix hack for this CPU @@ -994,7 +994,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) /* * Be paranoid about clearing APIC errors. */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + if (APIC_INTEGRATED(boot_cpu_apic_version)) { apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } @@ -1115,17 +1115,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) common_cpu_up(cpu, tidle); - /* - * We have to walk the irq descriptors to setup the vector - * space for the cpu which comes online. Prevent irq - * alloc/free across the bringup. - */ - irq_lock_sparse(); - err = do_boot_cpu(apicid, cpu, tidle); - if (err) { - irq_unlock_sparse(); pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); return -EIO; } @@ -1143,8 +1134,6 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) touch_nmi_watchdog(); } - irq_unlock_sparse(); - return 0; } @@ -1249,7 +1238,7 @@ static int __init smp_sanity_check(unsigned max_cpus) /* * If we couldn't find a local APIC, then get out of here now! */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && + if (APIC_INTEGRATED(boot_cpu_apic_version) && !boot_cpu_has(X86_FEATURE_APIC)) { if (!disable_apic) { pr_err("BIOS bug, local APIC #%d not detected!...\n", @@ -1304,6 +1293,16 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } + + /* + * Set 'default' x86 topology, this matches default_topology() in that + * it has NUMA nodes as a topology level. See also + * native_smp_cpus_done(). + * + * Must be done before set_cpus_sibling_map() is ran. + */ + set_sched_topology(x86_topology); + set_cpu_sibling_map(0); switch (smp_sanity_check(max_cpus)) { @@ -1323,14 +1322,13 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) break; } - default_setup_apic_routing(); - if (read_apic_id() != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", read_apic_id(), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } + default_setup_apic_routing(); cpu0_logical_apicid = apic_bsp_setup(false); pr_info("CPU%d: ", 0); @@ -1370,6 +1368,9 @@ void __init native_smp_cpus_done(unsigned int max_cpus) { pr_debug("Boot done\n"); + if (x86_has_numa_in_package) + set_sched_topology(x86_numa_in_package_topology); + nmi_selftest(); impress_friends(); setup_ioapic_dest(); diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 4738f5e0f2ab..0653788026e2 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -8,80 +8,69 @@ #include <linux/export.h> #include <linux/uaccess.h> #include <asm/stacktrace.h> +#include <asm/unwind.h> -static int save_stack_stack(void *data, char *name) +static int save_stack_address(struct stack_trace *trace, unsigned long addr, + bool nosched) { - return 0; -} - -static int -__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) -{ - struct stack_trace *trace = data; -#ifdef CONFIG_FRAME_POINTER - if (!reliable) - return 0; -#endif if (nosched && in_sched_functions(addr)) return 0; + if (trace->skip > 0) { trace->skip--; return 0; } - if (trace->nr_entries < trace->max_entries) { - trace->entries[trace->nr_entries++] = addr; - return 0; - } else { - return -1; /* no more room, stop walking the stack */ - } -} -static int save_stack_address(void *data, unsigned long addr, int reliable) -{ - return __save_stack_address(data, addr, reliable, false); + if (trace->nr_entries >= trace->max_entries) + return -1; + + trace->entries[trace->nr_entries++] = addr; + return 0; } -static int -save_stack_address_nosched(void *data, unsigned long addr, int reliable) +static void __save_stack_trace(struct stack_trace *trace, + struct task_struct *task, struct pt_regs *regs, + bool nosched) { - return __save_stack_address(data, addr, reliable, true); -} + struct unwind_state state; + unsigned long addr; -static const struct stacktrace_ops save_stack_ops = { - .stack = save_stack_stack, - .address = save_stack_address, - .walk_stack = print_context_stack, -}; + if (regs) + save_stack_address(trace, regs->ip, nosched); -static const struct stacktrace_ops save_stack_ops_nosched = { - .stack = save_stack_stack, - .address = save_stack_address_nosched, - .walk_stack = print_context_stack, -}; + for (unwind_start(&state, task, regs, NULL); !unwind_done(&state); + unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || save_stack_address(trace, addr, nosched)) + break; + } + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} /* * Save stack-backtrace addresses into a stack_trace buffer. */ void save_stack_trace(struct stack_trace *trace) { - dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; + __save_stack_trace(trace, current, NULL, false); } EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) { - dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; + __save_stack_trace(trace, current, regs, false); } void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = ULONG_MAX; + if (!try_get_task_stack(tsk)) + return; + + __save_stack_trace(trace, tsk, NULL, true); + + put_task_stack(tsk); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 654f6c66fe45..8402907825b0 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -188,12 +188,12 @@ static int tboot_setup_sleep(void) tboot->num_mac_regions = 0; - for (i = 0; i < e820.nr_map; i++) { - if ((e820.map[i].type != E820_RAM) - && (e820.map[i].type != E820_RESERVED_KERN)) + for (i = 0; i < e820->nr_map; i++) { + if ((e820->map[i].type != E820_RAM) + && (e820->map[i].type != E820_RESERVED_KERN)) continue; - add_mac_region(e820.map[i].addr, e820.map[i].size); + add_mac_region(e820->map[i].addr, e820->map[i].size); } tboot->acpi_sinfo.kernel_s3_resume_vector = diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b70ca12dd389..bd4e3d4d3625 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) +#ifdef CONFIG_VMAP_STACK +__visible void __noreturn handle_stack_overflow(const char *message, + struct pt_regs *regs, + unsigned long fault_address) +{ + printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n", + (void *)fault_address, current->stack, + (char *)current->stack + THREAD_SIZE - 1); + die(message, regs, 0); + + /* Be absolutely certain we don't return. */ + panic(message); +} +#endif + #ifdef CONFIG_X86_64 /* Runs on IST stack */ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; struct task_struct *tsk = current; +#ifdef CONFIG_VMAP_STACK + unsigned long cr2; +#endif #ifdef CONFIG_X86_ESPFIX64 extern unsigned char native_irq_return_iret[]; @@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_DF; +#ifdef CONFIG_VMAP_STACK + /* + * If we overflow the stack into a guard page, the CPU will fail + * to deliver #PF and will send #DF instead. Similarly, if we + * take any non-IST exception while too close to the bottom of + * the stack, the processor will get a page fault while + * delivering the exception and will generate a double fault. + * + * According to the SDM (footnote in 6.15 under "Interrupt 14 - + * Page-Fault Exception (#PF): + * + * Processors update CR2 whenever a page fault is detected. If a + * second page fault occurs while an earlier page fault is being + * deliv- ered, the faulting linear address of the second fault will + * overwrite the contents of CR2 (replacing the previous + * address). These updates to CR2 occur even if the page fault + * results in a double fault or occurs during the delivery of a + * double fault. + * + * The logic below has a small possibility of incorrectly diagnosing + * some errors as stack overflows. For example, if the IDT or GDT + * gets corrupted such that #GP delivery fails due to a bad descriptor + * causing #GP and we hit this condition while CR2 coincidentally + * points to the stack guard page, we'll think we overflowed the + * stack. Given that we're going to panic one way or another + * if this happens, this isn't necessarily worth fixing. + * + * If necessary, we could improve the test by only diagnosing + * a stack overflow if the saved RSP points within 47 bytes of + * the bottom of the stack: if RSP == tsk_stack + 48 and we + * take an exception, the stack is already aligned and there + * will be enough room SS, RSP, RFLAGS, CS, RIP, and a + * possible error code, so a stack overflow would *not* double + * fault. With any less space left, exception delivery could + * fail, and, as a practical matter, we've overflowed the + * stack even if the actual trigger for the double fault was + * something else. + */ + cr2 = read_cr2(); + if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE) + handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2); +#endif + #ifdef CONFIG_DOUBLEFAULT df_debug(regs, error_code); #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 78b9cb5a26af..46b2f41f8b05 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -23,6 +23,7 @@ #include <asm/x86_init.h> #include <asm/geode.h> #include <asm/apic.h> +#include <asm/intel-family.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -686,11 +687,16 @@ unsigned long native_calibrate_tsc(void) if (crystal_khz == 0) { switch (boot_cpu_data.x86_model) { - case 0x4E: /* SKL */ - case 0x5E: /* SKL */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000; /* 24.0 MHz */ break; - case 0x5C: /* BXT */ + case INTEL_FAM6_SKYLAKE_X: + crystal_khz = 25000; /* 25.0 MHz */ + break; + case INTEL_FAM6_ATOM_GOLDMONT: crystal_khz = 19200; /* 19.2 MHz */ break; } diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c new file mode 100644 index 000000000000..a2456d4d286a --- /dev/null +++ b/arch/x86/kernel/unwind_frame.c @@ -0,0 +1,93 @@ +#include <linux/sched.h> +#include <asm/ptrace.h> +#include <asm/bitops.h> +#include <asm/stacktrace.h> +#include <asm/unwind.h> + +#define FRAME_HEADER_SIZE (sizeof(long) * 2) + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + unsigned long addr; + unsigned long *addr_p = unwind_get_return_address_ptr(state); + + if (unwind_done(state)) + return 0; + + addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p, + addr_p); + + return __kernel_text_address(addr) ? addr : 0; +} +EXPORT_SYMBOL_GPL(unwind_get_return_address); + +static bool update_stack_state(struct unwind_state *state, void *addr, + size_t len) +{ + struct stack_info *info = &state->stack_info; + + /* + * If addr isn't on the current stack, switch to the next one. + * + * We may have to traverse multiple stacks to deal with the possibility + * that 'info->next_sp' could point to an empty stack and 'addr' could + * be on a subsequent stack. + */ + while (!on_stack(info, addr, len)) + if (get_stack_info(info->next_sp, state->task, info, + &state->stack_mask)) + return false; + + return true; +} + +bool unwind_next_frame(struct unwind_state *state) +{ + unsigned long *next_bp; + + if (unwind_done(state)) + return false; + + next_bp = (unsigned long *)*state->bp; + + /* make sure the next frame's data is accessible */ + if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE)) + return false; + + /* move to the next frame */ + state->bp = next_bp; + return true; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); + +void __unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long *first_frame) +{ + memset(state, 0, sizeof(*state)); + state->task = task; + + /* don't even attempt to start from user mode regs */ + if (regs && user_mode(regs)) { + state->stack_info.type = STACK_TYPE_UNKNOWN; + return; + } + + /* set up the starting stack frame */ + state->bp = get_frame_pointer(task, regs); + + /* initialize stack info and make sure the frame data is accessible */ + get_stack_info(state->bp, state->task, &state->stack_info, + &state->stack_mask); + update_stack_state(state, state->bp, FRAME_HEADER_SIZE); + + /* + * The caller can provide the address of the first frame directly + * (first_frame) or indirectly (regs->sp) to indicate which stack frame + * to start unwinding at. Skip ahead until we reach it. + */ + while (!unwind_done(state) && + (!on_stack(&state->stack_info, first_frame, sizeof(long)) || + state->bp < first_frame)) + unwind_next_frame(state); +} +EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c new file mode 100644 index 000000000000..b5a834c93065 --- /dev/null +++ b/arch/x86/kernel/unwind_guess.c @@ -0,0 +1,43 @@ +#include <linux/sched.h> +#include <linux/ftrace.h> +#include <asm/ptrace.h> +#include <asm/bitops.h> +#include <asm/stacktrace.h> +#include <asm/unwind.h> + +bool unwind_next_frame(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + + if (unwind_done(state)) + return false; + + do { + for (state->sp++; state->sp < info->end; state->sp++) + if (__kernel_text_address(*state->sp)) + return true; + + state->sp = info->next_sp; + + } while (!get_stack_info(state->sp, state->task, info, + &state->stack_mask)); + + return false; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); + +void __unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long *first_frame) +{ + memset(state, 0, sizeof(*state)); + + state->task = task; + state->sp = first_frame; + + get_stack_info(first_frame, state->task, &state->stack_info, + &state->stack_mask); + + if (!__kernel_text_address(*first_frame)) + unwind_next_frame(state); +} +EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 95e49f6e4fc3..b2cee3d19477 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -38,7 +38,7 @@ EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(_copy_to_user); -EXPORT_SYMBOL_GPL(memcpy_mcsafe); +EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 76c5e52436c4..0bd9f1287f39 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -91,7 +91,7 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; static int default_i8042_detect(void) { return 1; }; -struct x86_platform_ops x86_platform = { +struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu, .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, @@ -108,7 +108,7 @@ struct x86_platform_ops x86_platform = { EXPORT_SYMBOL_GPL(x86_platform); #if defined(CONFIG_PCI_MSI) -struct x86_msi_ops x86_msi = { +struct x86_msi_ops x86_msi __ro_after_init = { .setup_msi_irqs = native_setup_msi_irqs, .teardown_msi_irq = native_teardown_msi_irq, .teardown_msi_irqs = default_teardown_msi_irqs, @@ -137,7 +137,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev) } #endif -struct x86_io_apic_ops x86_io_apic_ops = { +struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = { .read = native_io_apic_read, .disable = native_disable_io_apic, }; diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 5f42d038fcb4..c7220ba94aa7 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -109,6 +109,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) { bool new_val, old_val; struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + struct dest_map *dest_map = &ioapic->rtc_status.dest_map; union kvm_ioapic_redirect_entry *e; e = &ioapic->redirtbl[RTC_GSI]; @@ -117,16 +118,17 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); - old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + old_val = test_bit(vcpu->vcpu_id, dest_map->map); if (new_val == old_val) return; if (new_val) { - __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + __set_bit(vcpu->vcpu_id, dest_map->map); + dest_map->vectors[vcpu->vcpu_id] = e->fields.vector; ioapic->rtc_status.pending_eoi++; } else { - __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + __clear_bit(vcpu->vcpu_id, dest_map->map); ioapic->rtc_status.pending_eoi--; rtc_status_pending_eoi_check_valid(ioapic); } diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 39b91127ef07..cd944435dfbd 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -23,8 +23,8 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, - [2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES }, - [3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES }, + [2] = { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }, + [3] = { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }, [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, [6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index af523d84d102..1e6b84b96ea6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4961,7 +4961,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) avic_handle_ldr_update(vcpu); } -static struct kvm_x86_ops svm_x86_ops = { +static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, .hardware_setup = svm_hardware_setup, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5cede40e2552..121fdf6e9ed0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11177,7 +11177,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEATURE_CONTROL_LMCE; } -static struct kvm_x86_ops vmx_x86_ops = { +static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, .hardware_setup = hardware_setup, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19f9f9e05c2a..699f8726539a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2743,16 +2743,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); - if (kvm_lapic_hv_timer_in_use(vcpu) && - kvm_x86_ops->set_hv_timer(vcpu, - kvm_get_lapic_tscdeadline_msr(vcpu))) - kvm_lapic_switch_to_sw_timer(vcpu); if (check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; } + if (kvm_lapic_hv_timer_in_use(vcpu) && + kvm_x86_ops->set_hv_timer(vcpu, + kvm_get_lapic_tscdeadline_msr(vcpu))) + kvm_lapic_switch_to_sw_timer(vcpu); /* * On a host with synchronized TSC, there is no need to update * kvmclock on vcpu->cpu migration diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 2ec0b0abbfaa..49e6ebac7e73 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -181,11 +181,11 @@ ENDPROC(memcpy_orig) #ifndef CONFIG_UML /* - * memcpy_mcsafe - memory copy with machine check exception handling + * memcpy_mcsafe_unrolled - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe) +ENTRY(memcpy_mcsafe_unrolled) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe) .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe) +ENDPROC(memcpy_mcsafe_unrolled) .section .fixup, "ax" /* Return -EFAULT for any failure */ diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index ba47524f56e8..d1c7de095808 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -52,21 +52,6 @@ static __init int find_northbridge(void) return -ENOENT; } -static __init void early_get_boot_cpu_id(void) -{ - /* - * need to get the APIC ID of the BSP so can use that to - * create apicid_to_node in amd_scan_nodes() - */ -#ifdef CONFIG_X86_MPPARSE - /* - * get boot-time SMP configuration: - */ - if (smp_found_config) - early_get_smp_config(); -#endif -} - int __init amd_numa_init(void) { u64 start = PFN_PHYS(0); @@ -180,8 +165,11 @@ int __init amd_numa_init(void) cores = 1 << bits; apicid_base = 0; - /* get the APIC ID of the BSP early for systems with apicid lifting */ - early_get_boot_cpu_id(); + /* + * get boot-time SMP configuration: + */ + early_get_smp_config(); + if (boot_cpu_physical_apicid > 0) { pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); apicid_base = boot_cpu_physical_apicid; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 832b98f822be..79ae939970d3 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,4 +1,4 @@ -#include <linux/module.h> +#include <linux/extable.h> #include <asm/uaccess.h> #include <asm/traps.h> #include <asm/kdebug.h> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index dc8023060456..1e525122cbe4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -5,7 +5,7 @@ */ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ -#include <linux/module.h> /* search_exception_table */ +#include <linux/extable.h> /* search_exception_table */ #include <linux/bootmem.h> /* max_low_pfn */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ @@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code, return; } +#ifdef CONFIG_VMAP_STACK + /* + * Stack overflow? During boot, we can fault near the initial + * stack in the direct map, but that's not an overflow -- check + * that we're in vmalloc space to avoid this. + */ + if (is_vmalloc_addr((void *)address) && + (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || + address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { + register void *__sp asm("rsp"); + unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); + /* + * We're likely to be running with very little stack space + * left. It's plausible that we'd hit this condition but + * double-fault even before we get this far, in which case + * we're fine: the double-fault handler will deal with it. + * + * We don't want to make it all the way into the oops code + * and then double-fault, though, because we're likely to + * break the console driver and lose most of the stack dump. + */ + asm volatile ("movq %[stack], %%rsp\n\t" + "call handle_stack_overflow\n\t" + "1: jmp 1b" + : "+r" (__sp) + : "D" ("kernel stack overflow (page fault)"), + "S" (regs), "d" (address), + [stack] "rm" (stack)); + unreachable(); + } +#endif + /* * 32-bit: * diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d28a2d741f9e..22af912d66d2 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -699,8 +699,10 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) } } -void free_initmem(void) +void __ref free_initmem(void) { + e820_reallocate_tables(); + free_init_pages("unused kernel", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index bda8d5eef04d..ddd2661c4502 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -40,17 +40,26 @@ * You need to add an if/def entry if you introduce a new memory region * compatible with KASLR. Your entry must be in logical order with memory * layout. For example, ESPFIX is before EFI because its virtual address is - * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to + * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to * ensure that this order is correct and won't be changed. */ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; -static const unsigned long vaddr_end = VMEMMAP_START; + +#if defined(CONFIG_X86_ESPFIX64) +static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; +#elif defined(CONFIG_EFI) +static const unsigned long vaddr_end = EFI_VA_START; +#else +static const unsigned long vaddr_end = __START_KERNEL_map; +#endif /* Default values */ unsigned long page_offset_base = __PAGE_OFFSET_BASE; EXPORT_SYMBOL(page_offset_base); unsigned long vmalloc_base = __VMALLOC_BASE; EXPORT_SYMBOL(vmalloc_base); +unsigned long vmemmap_base = __VMEMMAP_BASE; +EXPORT_SYMBOL(vmemmap_base); /* * Memory regions randomized by KASLR (except modules that use a separate logic @@ -63,6 +72,7 @@ static __initdata struct kaslr_memory_region { } kaslr_regions[] = { { &page_offset_base, 64/* Maximum */ }, { &vmalloc_base, VMALLOC_SIZE_TB }, + { &vmemmap_base, 1 }, }; /* Get size in bytes used by the memory region */ @@ -89,6 +99,18 @@ void __init kernel_randomize_memory(void) struct rnd_state rand_state; unsigned long remain_entropy; + /* + * All these BUILD_BUG_ON checks ensures the memory layout is + * consistent with the vaddr_start/vaddr_end variables. + */ + BUILD_BUG_ON(vaddr_start >= vaddr_end); + BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) && + vaddr_end >= EFI_VA_START); + BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) || + config_enabled(CONFIG_EFI)) && + vaddr_end >= __START_KERNEL_map); + BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); + if (!kaslr_memory_enabled()) return; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index fb682108f4dc..3f35b48d1d9d 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -722,22 +722,19 @@ void __init x86_numa_init(void) numa_init(dummy_numa_init); } -static __init int find_near_online_node(int node) +static void __init init_memory_less_node(int nid) { - int n, val; - int min_val = INT_MAX; - int best_node = -1; + unsigned long zones_size[MAX_NR_ZONES] = {0}; + unsigned long zholes_size[MAX_NR_ZONES] = {0}; - for_each_online_node(n) { - val = node_distance(node, n); + /* Allocate and initialize node data. Memory-less node is now online.*/ + alloc_node_data(nid); + free_area_init_node(nid, zones_size, 0, zholes_size); - if (val < min_val) { - min_val = val; - best_node = n; - } - } - - return best_node; + /* + * All zonelists will be built later in start_kernel() after per cpu + * areas are initialized. + */ } /* @@ -766,8 +763,10 @@ void __init init_cpu_to_node(void) if (node == NUMA_NO_NODE) continue; + if (!node_online(node)) - node = find_near_online_node(node); + init_memory_less_node(node); + numa_set_node(cpu, node); } } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 849dc09fa4f0..e3353c97d086 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -917,11 +917,11 @@ static void populate_pte(struct cpa_data *cpa, } } -static int populate_pmd(struct cpa_data *cpa, - unsigned long start, unsigned long end, - unsigned num_pages, pud_t *pud, pgprot_t pgprot) +static long populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) { - unsigned int cur_pages = 0; + long cur_pages = 0; pmd_t *pmd; pgprot_t pmd_pgprot; @@ -991,12 +991,12 @@ static int populate_pmd(struct cpa_data *cpa, return num_pages; } -static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, - pgprot_t pgprot) +static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + pgprot_t pgprot) { pud_t *pud; unsigned long end; - int cur_pages = 0; + long cur_pages = 0; pgprot_t pud_pgprot; end = start + (cpa->numpages << PAGE_SHIFT); @@ -1052,7 +1052,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* Map trailing leftover */ if (start < end) { - int tmp; + long tmp; pud = pud_offset(pgd, start); if (pud_none(*pud)) @@ -1078,7 +1078,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgprot_t pgprot = __pgprot(_KERNPG_TABLE); pud_t *pud = NULL; /* shut up gcc */ pgd_t *pgd_entry; - int ret; + long ret; pgd_entry = cpa->pgd + pgd_index(addr); @@ -1327,7 +1327,8 @@ static int cpa_process_alias(struct cpa_data *cpa) static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) { - int ret, numpages = cpa->numpages; + unsigned long numpages = cpa->numpages; + int ret; while (numpages) { /* diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index ecb1b69c1651..170cc4ff057b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -927,9 +927,10 @@ int track_pfn_copy(struct vm_area_struct *vma) } /* - * prot is passed in as a parameter for the new mapping. If the vma has a - * linear pfn mapping for the entire range reserve the entire vma range with - * single reserve_pfn_range call. + * prot is passed in as a parameter for the new mapping. If the vma has + * a linear pfn mapping for the entire range, or no vma is provided, + * reserve the entire pfn + size range with single reserve_pfn_range + * call. */ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long addr, unsigned long size) @@ -938,11 +939,12 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, enum page_cache_mode pcm; /* reserve the whole chunk starting from paddr */ - if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { + if (!vma || (addr == vma->vm_start + && size == (vma->vm_end - vma->vm_start))) { int ret; ret = reserve_pfn_range(paddr, size, prot, 0); - if (!ret) + if (ret == 0 && vma) vma->vm_flags |= VM_PAT; return ret; } @@ -997,7 +999,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, resource_size_t paddr; unsigned long prot; - if (!(vma->vm_flags & VM_PAT)) + if (vma && !(vma->vm_flags & VM_PAT)) return; /* free the chunk starting from pfn or the whole chunk */ @@ -1011,7 +1013,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); - vma->vm_flags &= ~VM_PAT; + if (vma) + vma->vm_flags &= ~VM_PAT; } /* diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index de391b7bc19a..159b52ccd600 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -254,9 +254,7 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end) struct memtype *rbt_memtype_lookup(u64 addr) { - struct memtype *data; - data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); - return data; + return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); } #if defined(CONFIG_DEBUG_FS) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 4dbe65622810..a7655f6caf7d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, unsigned cpu = smp_processor_id(); if (likely(prev != next)) { + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + /* + * If our current stack is in vmalloc space and isn't + * mapped in the new pgd, we'll double-fault. Forcibly + * map it. + */ + unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); + + pgd_t *pgd = next->pgd + stack_pgd_index; + + if (unlikely(pgd_none(*pgd))) + set_pgd(pgd, init_mm.pgd[stack_pgd_index]); + } + #ifdef CONFIG_SMP this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); #endif + cpumask_set_cpu(cpu, mm_cpumask(next)); /* diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index cb31a4440e58..a2488b6e27d6 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -16,27 +16,7 @@ #include <asm/ptrace.h> #include <asm/stacktrace.h> - -static int backtrace_stack(void *data, char *name) -{ - /* Yes, we want all stacks */ - return 0; -} - -static int backtrace_address(void *data, unsigned long addr, int reliable) -{ - unsigned int *depth = data; - - if ((*depth)--) - oprofile_add_trace(addr); - return 0; -} - -static struct stacktrace_ops backtrace_ops = { - .stack = backtrace_stack, - .address = backtrace_address, - .walk_stack = print_context_stack, -}; +#include <asm/unwind.h> #ifdef CONFIG_COMPAT static struct stack_frame_ia32 * @@ -113,10 +93,29 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); if (!user_mode(regs)) { - unsigned long stack = kernel_stack_pointer(regs); - if (depth) - dump_trace(NULL, regs, (unsigned long *)stack, 0, - &backtrace_ops, &depth); + struct unwind_state state; + unsigned long addr; + + if (!depth) + return; + + oprofile_add_trace(regs->ip); + + if (!--depth) + return; + + for (unwind_start(&state, current, regs, NULL); + !unwind_done(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr) + break; + + oprofile_add_trace(addr); + + if (!--depth) + break; + } + return; } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 837ea36a837d..6d52b94f4bb9 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -553,15 +553,21 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); /* - * Broadwell EP Home Agent BARs erroneously return non-zero values when read. + * Device [8086:2fc0] + * Erratum HSE43 + * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html * - * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html - * entry BDF2. + * Devices [8086:6f60,6fa0,6fc0] + * Erratum BDF2 + * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html */ -static void pci_bdwep_bar(struct pci_dev *dev) +static void pci_invalid_bar(struct pci_dev *dev) { dev->non_compliant_bars = 1; } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar); diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 9770e55e768f..1d97cea3b3a4 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -120,9 +120,12 @@ static unsigned long __init bios32_service(unsigned long service) static struct { unsigned long address; unsigned short segment; -} pci_indirect = { 0, __KERNEL_CS }; +} pci_indirect __ro_after_init = { + .address = 0, + .segment = __KERNEL_CS, +}; -static int pci_bios_present; +static int pci_bios_present __ro_after_init; static int __init check_pcibios(void) { diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 184842ef332e..3c3c19ea94df 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -8,6 +8,7 @@ obj-y += iris/ obj-y += intel/ obj-y += intel-mid/ obj-y += intel-quark/ +obj-y += mellanox/ obj-y += olpc/ obj-y += scx200/ obj-y += sfi/ diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c index 8ff7b9355416..d49d3be81953 100644 --- a/arch/x86/platform/atom/punit_atom_debug.c +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -155,7 +155,7 @@ static void punit_dbgfs_unregister(void) static const struct x86_cpu_id intel_punit_cpu_ids[] = { ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt), - ICPU(INTEL_FAM6_ATOM_MERRIFIELD1, punit_device_tng), + ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng), ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht), {} }; diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 6a2f5691b1ab..6aad870e8962 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -82,21 +82,12 @@ void __init efi_bgrt_init(void) } bgrt_image_size = bmp_header.size; - bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); + bgrt_image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); if (!bgrt_image) { - pr_notice("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n", - bgrt_image_size); - return; - } - - image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); - if (!image) { pr_notice("Ignoring BGRT: failed to map image memory\n"); - kfree(bgrt_image); bgrt_image = NULL; return; } - memcpy(bgrt_image, image, bgrt_image_size); - memunmap(image); + efi_mem_reserve(bgrt_tab->image_address, bgrt_image_size); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1fbb408e2e72..bf99aa7005eb 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -166,13 +166,15 @@ static void __init do_add_efi_memmap(void) } e820_add_region(start, size, e820_type); } - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); } int __init efi_memblock_x86_reserve_range(void) { struct efi_info *e = &boot_params.efi_info; + struct efi_memory_map_data data; phys_addr_t pmap; + int rv; if (efi_enabled(EFI_PARAVIRT)) return 0; @@ -187,11 +189,17 @@ int __init efi_memblock_x86_reserve_range(void) #else pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); #endif - efi.memmap.phys_map = pmap; - efi.memmap.nr_map = e->efi_memmap_size / - e->efi_memdesc_size; - efi.memmap.desc_size = e->efi_memdesc_size; - efi.memmap.desc_version = e->efi_memdesc_version; + data.phys_map = pmap; + data.size = e->efi_memmap_size; + data.desc_size = e->efi_memdesc_size; + data.desc_version = e->efi_memdesc_version; + + rv = efi_memmap_init_early(&data); + if (rv) + return rv; + + if (add_efi_memmap) + do_add_efi_memmap(); WARN(efi.memmap.desc_version != 1, "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", @@ -218,19 +226,6 @@ void __init efi_print_memmap(void) } } -void __init efi_unmap_memmap(void) -{ - unsigned long size; - - clear_bit(EFI_MEMMAP, &efi.flags); - - size = efi.memmap.nr_map * efi.memmap.desc_size; - if (efi.memmap.map) { - early_memunmap(efi.memmap.map, size); - efi.memmap.map = NULL; - } -} - static int __init efi_systab_init(void *phys) { if (efi_enabled(EFI_64BIT)) { @@ -414,33 +409,6 @@ static int __init efi_runtime_init(void) return 0; } -static int __init efi_memmap_init(void) -{ - unsigned long addr, size; - - if (efi_enabled(EFI_PARAVIRT)) - return 0; - - /* Map the EFI memory map */ - size = efi.memmap.nr_map * efi.memmap.desc_size; - addr = (unsigned long)efi.memmap.phys_map; - - efi.memmap.map = early_memremap(addr, size); - if (efi.memmap.map == NULL) { - pr_err("Could not map the memory map!\n"); - return -ENOMEM; - } - - efi.memmap.map_end = efi.memmap.map + size; - - if (add_efi_memmap) - do_add_efi_memmap(); - - set_bit(EFI_MEMMAP, &efi.flags); - - return 0; -} - void __init efi_init(void) { efi_char16_t *c16; @@ -498,16 +466,14 @@ void __init efi_init(void) if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else { - if (efi_runtime_disabled() || efi_runtime_init()) + if (efi_runtime_disabled() || efi_runtime_init()) { + efi_memmap_unmap(); return; + } } - if (efi_memmap_init()) - return; if (efi_enabled(EFI_DBG)) efi_print_memmap(); - - efi_esrt_init(); } void __init efi_late_init(void) @@ -624,42 +590,6 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) } } -static void __init save_runtime_map(void) -{ -#ifdef CONFIG_KEXEC_CORE - unsigned long desc_size; - efi_memory_desc_t *md; - void *tmp, *q = NULL; - int count = 0; - - if (efi_enabled(EFI_OLD_MEMMAP)) - return; - - desc_size = efi.memmap.desc_size; - - for_each_efi_memory_desc(md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME) || - (md->type == EFI_BOOT_SERVICES_CODE) || - (md->type == EFI_BOOT_SERVICES_DATA)) - continue; - tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL); - if (!tmp) - goto out; - q = tmp; - - memcpy(q + count * desc_size, md, desc_size); - count++; - } - - efi_runtime_map_setup(q, count, desc_size); - return; - -out: - kfree(q); - pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); -#endif -} - static void *realloc_pages(void *old_memmap, int old_shift) { void *ret; @@ -745,6 +675,46 @@ static void *efi_map_next_entry(void *entry) return entry; } +static bool should_map_region(efi_memory_desc_t *md) +{ + /* + * Runtime regions always require runtime mappings (obviously). + */ + if (md->attribute & EFI_MEMORY_RUNTIME) + return true; + + /* + * 32-bit EFI doesn't suffer from the bug that requires us to + * reserve boot services regions, and mixed mode support + * doesn't exist for 32-bit kernels. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return false; + + /* + * Map all of RAM so that we can access arguments in the 1:1 + * mapping when making EFI runtime calls. + */ + if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) { + if (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_LOADER_DATA || + md->type == EFI_LOADER_CODE) + return true; + } + + /* + * Map boot services regions as a workaround for buggy + * firmware that accesses them even when they shouldn't. + * + * See efi_{reserve,free}_boot_services(). + */ + if (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) + return true; + + return false; +} + /* * Map the efi memory ranges of the runtime services and update new_mmap with * virtual addresses. @@ -761,13 +731,9 @@ static void * __init efi_map_regions(int *count, int *pg_shift) p = NULL; while ((p = efi_map_next_entry(p))) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME)) { -#ifdef CONFIG_X86_64 - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) -#endif - continue; - } + + if (!should_map_region(md)) + continue; efi_map_region(md); get_systab_virt_addr(md); @@ -803,7 +769,7 @@ static void __init kexec_enter_virtual_mode(void) * non-native EFI */ if (!efi_is_native()) { - efi_unmap_memmap(); + efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; } @@ -823,7 +789,18 @@ static void __init kexec_enter_virtual_mode(void) get_systab_virt_addr(md); } - save_runtime_map(); + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map. + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(efi.memmap.phys_map, + efi.memmap.desc_size * efi.memmap.nr_map)) { + pr_err("Failed to remap late EFI memory map\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } BUG_ON(!efi.systab); @@ -884,6 +861,7 @@ static void __init __efi_enter_virtual_mode(void) int count = 0, pg_shift = 0; void *new_memmap = NULL; efi_status_t status; + phys_addr_t pa; efi.systab = NULL; @@ -901,11 +879,24 @@ static void __init __efi_enter_virtual_mode(void) return; } - save_runtime_map(); + pa = __pa(new_memmap); + + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map that we are about to pass to the + * firmware via SetVirtualAddressMap(). + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { + pr_err("Failed to remap late EFI memory map\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } BUG_ON(!efi.systab); - if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) { + if (efi_setup_page_tables(pa, 1 << pg_shift)) { clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; } @@ -917,14 +908,14 @@ static void __init __efi_enter_virtual_mode(void) efi.memmap.desc_size * count, efi.memmap.desc_size, efi.memmap.desc_version, - (efi_memory_desc_t *)__pa(new_memmap)); + (efi_memory_desc_t *)pa); } else { status = efi_thunk_set_virtual_address_map( efi_phys.set_virtual_address_map, efi.memmap.desc_size * count, efi.memmap.desc_size, efi.memmap.desc_version, - (efi_memory_desc_t *)__pa(new_memmap)); + (efi_memory_desc_t *)pa); } if (status != EFI_SUCCESS) { @@ -956,15 +947,6 @@ static void __init __efi_enter_virtual_mode(void) efi_runtime_update_mappings(); efi_dump_pagetable(); - /* - * We mapped the descriptor array into the EFI pagetable above - * but we're not unmapping it here because if we're running in - * EFI mixed mode we need all of memory to be accessible when - * we pass parameters to the EFI runtime services in the - * thunking code. - */ - free_pages((unsigned long)new_memmap, pg_shift); - /* clean DUMMY object */ efi_delete_dummy_variable(); } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 677e29e29473..58b0f801f66f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -85,7 +85,7 @@ pgd_t * __init efi_call_phys_prolog(void) early_code_mapping_set_exec(1); n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); - save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); + save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); for (pgd = 0; pgd < n_pgds; pgd++) { save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); @@ -214,7 +214,6 @@ void efi_sync_low_kernel_mappings(void) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { unsigned long pfn, text; - efi_memory_desc_t *md; struct page *page; unsigned npages; pgd_t *pgd; @@ -245,28 +244,9 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED)) + if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) return 0; - /* - * Map all of RAM so that we can access arguments in the 1:1 - * mapping when making EFI runtime calls. - */ - for_each_efi_memory_desc(md) { - if (md->type != EFI_CONVENTIONAL_MEMORY && - md->type != EFI_LOADER_DATA && - md->type != EFI_LOADER_CODE) - continue; - - pfn = md->phys_addr >> PAGE_SHIFT; - npages = md->num_pages; - - if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, _PAGE_RW)) { - pr_err("Failed to map 1:1 memory\n"); - return 1; - } - } - page = alloc_page(GFP_KERNEL|__GFP_DMA32); if (!page) panic("Unable to allocate EFI runtime stack < 4GB\n"); @@ -359,6 +339,7 @@ void __init efi_map_region(efi_memory_desc_t *md) */ void __init efi_map_region_fixed(efi_memory_desc_t *md) { + __map_region(md, md->phys_addr); __map_region(md, md->virt_addr); } diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 89d1146f5a6f..10aca63a50d7 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -164,6 +164,75 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, EXPORT_SYMBOL_GPL(efi_query_variable_store); /* + * The UEFI specification makes it clear that the operating system is + * free to do whatever it wants with boot services code after + * ExitBootServices() has been called. Ignoring this recommendation a + * significant bunch of EFI implementations continue calling into boot + * services code (SetVirtualAddressMap). In order to work around such + * buggy implementations we reserve boot services region during EFI + * init and make sure it stays executable. Then, after + * SetVirtualAddressMap(), it is discarded. + * + * However, some boot services regions contain data that is required + * by drivers, so we need to track which memory ranges can never be + * freed. This is done by tagging those regions with the + * EFI_MEMORY_RUNTIME attribute. + * + * Any driver that wants to mark a region as reserved must use + * efi_mem_reserve() which will insert a new EFI memory descriptor + * into efi.memmap (splitting existing regions if necessary) and tag + * it with EFI_MEMORY_RUNTIME. + */ +void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) +{ + phys_addr_t new_phys, new_size; + struct efi_mem_range mr; + efi_memory_desc_t md; + int num_entries; + void *new; + + if (efi_mem_desc_lookup(addr, &md)) { + pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); + return; + } + + if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { + pr_err("Region spans EFI memory descriptors, %pa\n", &addr); + return; + } + + size += addr % EFI_PAGE_SIZE; + size = round_up(size, EFI_PAGE_SIZE); + addr = round_down(addr, EFI_PAGE_SIZE); + + mr.range.start = addr; + mr.range.end = addr + size - 1; + mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; + + num_entries = efi_memmap_split_count(&md, &mr.range); + num_entries += efi.memmap.nr_map; + + new_size = efi.memmap.desc_size * num_entries; + + new_phys = memblock_alloc(new_size, 0); + if (!new_phys) { + pr_err("Could not allocate boot services memmap\n"); + return; + } + + new = early_memremap(new_phys, new_size); + if (!new) { + pr_err("Failed to map new boot services memmap\n"); + return; + } + + efi_memmap_insert(&efi.memmap, new, &mr); + early_memunmap(new, new_size); + + efi_memmap_install(new_phys, num_entries); +} + +/* * Helper function for efi_reserve_boot_services() to figure out if we * can free regions in efi_free_boot_services(). * @@ -184,15 +253,6 @@ static bool can_free_region(u64 start, u64 size) return true; } -/* - * The UEFI specification makes it clear that the operating system is free to do - * whatever it wants with boot services code after ExitBootServices() has been - * called. Ignoring this recommendation a significant bunch of EFI implementations - * continue calling into boot services code (SetVirtualAddressMap). In order to - * work around such buggy implementations we reserve boot services region during - * EFI init and make sure it stays executable. Then, after SetVirtualAddressMap(), it -* is discarded. -*/ void __init efi_reserve_boot_services(void) { efi_memory_desc_t *md; @@ -249,7 +309,10 @@ void __init efi_reserve_boot_services(void) void __init efi_free_boot_services(void) { + phys_addr_t new_phys, new_size; efi_memory_desc_t *md; + int num_entries = 0; + void *new, *new_md; for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; @@ -257,12 +320,16 @@ void __init efi_free_boot_services(void) size_t rm_size; if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) + md->type != EFI_BOOT_SERVICES_DATA) { + num_entries++; continue; + } /* Do not free, someone else owns it: */ - if (md->attribute & EFI_MEMORY_RUNTIME) + if (md->attribute & EFI_MEMORY_RUNTIME) { + num_entries++; continue; + } /* * Nasty quirk: if all sub-1MB memory is used for boot @@ -287,7 +354,41 @@ void __init efi_free_boot_services(void) free_bootmem_late(start, size); } - efi_unmap_memmap(); + new_size = efi.memmap.desc_size * num_entries; + new_phys = memblock_alloc(new_size, 0); + if (!new_phys) { + pr_err("Failed to allocate new EFI memmap\n"); + return; + } + + new = memremap(new_phys, new_size, MEMREMAP_WB); + if (!new) { + pr_err("Failed to map new EFI memmap\n"); + return; + } + + /* + * Build a new EFI memmap that excludes any boot services + * regions that are not tagged EFI_MEMORY_RUNTIME, since those + * regions have now been freed. + */ + new_md = new; + for_each_efi_memory_desc(md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA)) + continue; + + memcpy(new_md, md, efi.memmap.desc_size); + new_md += efi.memmap.desc_size; + } + + memunmap(new); + + if (efi_memmap_install(new_phys, num_entries)) { + pr_err("Could not install new EFI memmap\n"); + return; + } } /* @@ -365,7 +466,7 @@ void __init efi_apply_memmap_quirks(void) */ if (!efi_runtime_supported()) { pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); - efi_unmap_memmap(); + efi_memmap_unmap(); } /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index fc135bf70511..429d08be7848 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -1,5 +1,9 @@ # Family-Level Interface Shim (FLIS) obj-$(subst m,y,$(CONFIG_PINCTRL_MERRIFIELD)) += platform_mrfld_pinctrl.o +# SDHCI Devices +obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += platform_mrfld_sd.o +# WiFi +obj-$(subst m,y,$(CONFIG_BRCMFMAC_SDIO)) += platform_bcm43xx.o # IPC Devices obj-y += platform_ipc.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c new file mode 100644 index 000000000000..4392c15ed9e0 --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c @@ -0,0 +1,95 @@ +/* + * platform_bcm43xx.c: bcm43xx platform data initilization file + * + * (C) Copyright 2016 Intel Corporation + * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/gpio.h> +#include <linux/platform_device.h> +#include <linux/regulator/machine.h> +#include <linux/regulator/fixed.h> +#include <linux/sfi.h> + +#include <asm/intel-mid.h> + +#define WLAN_SFI_GPIO_IRQ_NAME "WLAN-interrupt" +#define WLAN_SFI_GPIO_ENABLE_NAME "WLAN-enable" + +#define WLAN_DEV_NAME "0000:00:01.3" + +static struct regulator_consumer_supply bcm43xx_vmmc_supply = { + .dev_name = WLAN_DEV_NAME, + .supply = "vmmc", +}; + +static struct regulator_init_data bcm43xx_vmmc_data = { + .constraints = { + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = 1, + .consumer_supplies = &bcm43xx_vmmc_supply, +}; + +static struct fixed_voltage_config bcm43xx_vmmc = { + .supply_name = "bcm43xx-vmmc-regulator", + /* + * Announce 2.0V here to be compatible with SDIO specification. The + * real voltage and signaling are still 1.8V. + */ + .microvolts = 2000000, /* 1.8V */ + .gpio = -EINVAL, + .startup_delay = 250 * 1000, /* 250ms */ + .enable_high = 1, /* active high */ + .enabled_at_boot = 0, /* disabled at boot */ + .init_data = &bcm43xx_vmmc_data, +}; + +static struct platform_device bcm43xx_vmmc_regulator = { + .name = "reg-fixed-voltage", + .id = PLATFORM_DEVID_AUTO, + .dev = { + .platform_data = &bcm43xx_vmmc, + }, +}; + +static int __init bcm43xx_regulator_register(void) +{ + int ret; + + bcm43xx_vmmc.gpio = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME); + ret = platform_device_register(&bcm43xx_vmmc_regulator); + if (ret) { + pr_err("%s: vmmc regulator register failed\n", __func__); + return ret; + } + + return 0; +} + +static void __init *bcm43xx_platform_data(void *info) +{ + int ret; + + ret = bcm43xx_regulator_register(); + if (ret) + return NULL; + + pr_info("Using generic wifi platform data\n"); + + /* For now it's empty */ + return NULL; +} + +static const struct devs_id bcm43xx_clk_vmmc_dev_id __initconst = { + .name = "bcm43xx_clk_vmmc", + .type = SFI_DEV_TYPE_SD, + .get_platform_data = &bcm43xx_platform_data, +}; + +sfi_device(bcm43xx_clk_vmmc_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c new file mode 100644 index 000000000000..00c4a034ad93 --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c @@ -0,0 +1,47 @@ +/* + * SDHCI platform data initilisation file + * + * (C) Copyright 2016 Intel Corporation + * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/init.h> +#include <linux/pci.h> + +#include <linux/mmc/sdhci-pci-data.h> + +#include <asm/intel-mid.h> + +#define INTEL_MRFLD_SD 2 +#define INTEL_MRFLD_SD_CD_GPIO 77 + +static struct sdhci_pci_data mrfld_sdhci_pci_data = { + .rst_n_gpio = -EINVAL, + .cd_gpio = INTEL_MRFLD_SD_CD_GPIO, +}; + +static struct sdhci_pci_data * +mrfld_sdhci_pci_get_data(struct pci_dev *pdev, int slotno) +{ + unsigned int func = PCI_FUNC(pdev->devfn); + + if (func == INTEL_MRFLD_SD) + return &mrfld_sdhci_pci_data; + + return NULL; +} + +static int __init mrfld_sd_init(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + sdhci_pci_get_data = mrfld_sdhci_pci_get_data; + return 0; +} +arch_initcall(mrfld_sd_init); diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index ce119d2ba0d0..7850128f0026 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -70,6 +70,11 @@ EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); static void intel_mid_power_off(void) { + /* Shut down South Complex via PWRMU */ + intel_mid_pwr_power_off(); + + /* Only for Tangier, the rest will ignore this command */ + intel_scu_ipc_simple_command(IPCMSG_COLD_OFF, 1); }; static void intel_mid_reboot(void) diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c index c901a3423772..5d3b45ad1c03 100644 --- a/arch/x86/platform/intel-mid/pwr.c +++ b/arch/x86/platform/intel-mid/pwr.c @@ -44,7 +44,19 @@ /* Bits in PM_CMD */ #define PM_CMD_CMD(x) ((x) << 0) #define PM_CMD_IOC (1 << 8) -#define PM_CMD_D3cold (1 << 21) +#define PM_CMD_CM_NOP (0 << 9) +#define PM_CMD_CM_IMMEDIATE (1 << 9) +#define PM_CMD_CM_DELAY (2 << 9) +#define PM_CMD_CM_TRIGGER (3 << 9) + +/* System states */ +#define PM_CMD_SYS_STATE_S5 (5 << 16) + +/* Trigger variants */ +#define PM_CMD_CFG_TRIGGER_NC (3 << 19) + +/* Message to wait for TRIGGER_NC case */ +#define TRIGGER_NC_MSG_2 (2 << 22) /* List of commands */ #define CMD_SET_CFG 0x01 @@ -137,7 +149,7 @@ static int mid_pwr_wait(struct mid_pwr *pwr) static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd) { - writel(PM_CMD_CMD(cmd), pwr->regs + PM_CMD); + writel(PM_CMD_CMD(cmd) | PM_CMD_CM_IMMEDIATE, pwr->regs + PM_CMD); return mid_pwr_wait(pwr); } @@ -260,6 +272,20 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) } EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state); +void intel_mid_pwr_power_off(void) +{ + struct mid_pwr *pwr = midpwr; + u32 cmd = PM_CMD_SYS_STATE_S5 | + PM_CMD_CMD(CMD_SET_CFG) | + PM_CMD_CM_TRIGGER | + PM_CMD_CFG_TRIGGER_NC | + TRIGGER_NC_MSG_2; + + /* Send command to SCU */ + writel(cmd, pwr->regs + PM_CMD); + mid_pwr_wait(pwr); +} + int intel_mid_pwr_get_lss_id(struct pci_dev *pdev) { int vndr; @@ -354,7 +380,7 @@ static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; } -static int mid_set_initial_state(struct mid_pwr *pwr) +static int mid_set_initial_state(struct mid_pwr *pwr, const u32 *states) { unsigned int i, j; int ret; @@ -379,10 +405,10 @@ static int mid_set_initial_state(struct mid_pwr *pwr) * NOTE: The actual device mapping is provided by a platform at run * time using vendor capability of PCI configuration space. */ - mid_pwr_set_state(pwr, 0, 0xffffffff); - mid_pwr_set_state(pwr, 1, 0xffffffff); - mid_pwr_set_state(pwr, 2, 0xffffffff); - mid_pwr_set_state(pwr, 3, 0xffffffff); + mid_pwr_set_state(pwr, 0, states[0]); + mid_pwr_set_state(pwr, 1, states[1]); + mid_pwr_set_state(pwr, 2, states[2]); + mid_pwr_set_state(pwr, 3, states[3]); /* Send command to SCU */ ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG); @@ -397,13 +423,41 @@ static int mid_set_initial_state(struct mid_pwr *pwr) return 0; } -static const struct mid_pwr_device_info mid_info = { - .set_initial_state = mid_set_initial_state, +static int pnw_set_initial_state(struct mid_pwr *pwr) +{ + /* On Penwell SRAM must stay powered on */ + const u32 states[] = { + 0xf00fffff, /* PM_SSC(0) */ + 0xffffffff, /* PM_SSC(1) */ + 0xffffffff, /* PM_SSC(2) */ + 0xffffffff, /* PM_SSC(3) */ + }; + return mid_set_initial_state(pwr, states); +} + +static int tng_set_initial_state(struct mid_pwr *pwr) +{ + const u32 states[] = { + 0xffffffff, /* PM_SSC(0) */ + 0xffffffff, /* PM_SSC(1) */ + 0xffffffff, /* PM_SSC(2) */ + 0xffffffff, /* PM_SSC(3) */ + }; + return mid_set_initial_state(pwr, states); +} + +static const struct mid_pwr_device_info pnw_info = { + .set_initial_state = pnw_set_initial_state, +}; + +static const struct mid_pwr_device_info tng_info = { + .set_initial_state = tng_set_initial_state, }; +/* This table should be in sync with the one in drivers/pci/pci-mid.c */ static const struct pci_device_id mid_pwr_pci_ids[] = { - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&mid_info }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&mid_info }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&pnw_info }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info }, {} }; diff --git a/arch/x86/platform/mellanox/Makefile b/arch/x86/platform/mellanox/Makefile new file mode 100644 index 000000000000..f43c93188a1d --- /dev/null +++ b/arch/x86/platform/mellanox/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_MLX_PLATFORM) += mlx-platform.o diff --git a/arch/x86/platform/mellanox/mlx-platform.c b/arch/x86/platform/mellanox/mlx-platform.c new file mode 100644 index 000000000000..7dcfcca97399 --- /dev/null +++ b/arch/x86/platform/mellanox/mlx-platform.c @@ -0,0 +1,266 @@ +/* + * arch/x86/platform/mellanox/mlx-platform.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/device.h> +#include <linux/dmi.h> +#include <linux/i2c.h> +#include <linux/i2c-mux.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/platform_data/i2c-mux-reg.h> + +#define MLX_PLAT_DEVICE_NAME "mlxplat" + +/* LPC bus IO offsets */ +#define MLXPLAT_CPLD_LPC_I2C_BASE_ADRR 0x2000 +#define MLXPLAT_CPLD_LPC_REG_BASE_ADRR 0x2500 +#define MLXPLAT_CPLD_LPC_IO_RANGE 0x100 +#define MLXPLAT_CPLD_LPC_I2C_CH1_OFF 0xdb +#define MLXPLAT_CPLD_LPC_I2C_CH2_OFF 0xda +#define MLXPLAT_CPLD_LPC_PIO_OFFSET 0x10000UL +#define MLXPLAT_CPLD_LPC_REG1 ((MLXPLAT_CPLD_LPC_REG_BASE_ADRR + \ + MLXPLAT_CPLD_LPC_I2C_CH1_OFF) | \ + MLXPLAT_CPLD_LPC_PIO_OFFSET) +#define MLXPLAT_CPLD_LPC_REG2 ((MLXPLAT_CPLD_LPC_REG_BASE_ADRR + \ + MLXPLAT_CPLD_LPC_I2C_CH2_OFF) | \ + MLXPLAT_CPLD_LPC_PIO_OFFSET) + +/* Start channel numbers */ +#define MLXPLAT_CPLD_CH1 2 +#define MLXPLAT_CPLD_CH2 10 + +/* Number of LPC attached MUX platform devices */ +#define MLXPLAT_CPLD_LPC_MUX_DEVS 2 + +/* mlxplat_priv - platform private data + * @pdev_i2c - i2c controller platform device + * @pdev_mux - array of mux platform devices + */ +struct mlxplat_priv { + struct platform_device *pdev_i2c; + struct platform_device *pdev_mux[MLXPLAT_CPLD_LPC_MUX_DEVS]; +}; + +/* Regions for LPC I2C controller and LPC base register space */ +static const struct resource mlxplat_lpc_resources[] = { + [0] = DEFINE_RES_NAMED(MLXPLAT_CPLD_LPC_I2C_BASE_ADRR, + MLXPLAT_CPLD_LPC_IO_RANGE, + "mlxplat_cpld_lpc_i2c_ctrl", IORESOURCE_IO), + [1] = DEFINE_RES_NAMED(MLXPLAT_CPLD_LPC_REG_BASE_ADRR, + MLXPLAT_CPLD_LPC_IO_RANGE, + "mlxplat_cpld_lpc_regs", + IORESOURCE_IO), +}; + +/* Platform default channels */ +static const int mlxplat_default_channels[][8] = { + { + MLXPLAT_CPLD_CH1, MLXPLAT_CPLD_CH1 + 1, MLXPLAT_CPLD_CH1 + 2, + MLXPLAT_CPLD_CH1 + 3, MLXPLAT_CPLD_CH1 + 4, MLXPLAT_CPLD_CH1 + + 5, MLXPLAT_CPLD_CH1 + 6, MLXPLAT_CPLD_CH1 + 7 + }, + { + MLXPLAT_CPLD_CH2, MLXPLAT_CPLD_CH2 + 1, MLXPLAT_CPLD_CH2 + 2, + MLXPLAT_CPLD_CH2 + 3, MLXPLAT_CPLD_CH2 + 4, MLXPLAT_CPLD_CH2 + + 5, MLXPLAT_CPLD_CH2 + 6, MLXPLAT_CPLD_CH2 + 7 + }, +}; + +/* Platform channels for MSN21xx system family */ +static const int mlxplat_msn21xx_channels[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; + +/* Platform mux data */ +static struct i2c_mux_reg_platform_data mlxplat_mux_data[] = { + { + .parent = 1, + .base_nr = MLXPLAT_CPLD_CH1, + .write_only = 1, + .reg = (void __iomem *)MLXPLAT_CPLD_LPC_REG1, + .reg_size = 1, + .idle_in_use = 1, + }, + { + .parent = 1, + .base_nr = MLXPLAT_CPLD_CH2, + .write_only = 1, + .reg = (void __iomem *)MLXPLAT_CPLD_LPC_REG2, + .reg_size = 1, + .idle_in_use = 1, + }, + +}; + +static struct platform_device *mlxplat_dev; + +static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) { + mlxplat_mux_data[i].values = mlxplat_default_channels[i]; + mlxplat_mux_data[i].n_values = + ARRAY_SIZE(mlxplat_default_channels[i]); + } + + return 1; +}; + +static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) { + mlxplat_mux_data[i].values = mlxplat_msn21xx_channels; + mlxplat_mux_data[i].n_values = + ARRAY_SIZE(mlxplat_msn21xx_channels); + } + + return 1; +}; + +static struct dmi_system_id mlxplat_dmi_table[] __initdata = { + { + .callback = mlxplat_dmi_default_matched, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"), + DMI_MATCH(DMI_PRODUCT_NAME, "MSN24"), + }, + }, + { + .callback = mlxplat_dmi_default_matched, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"), + DMI_MATCH(DMI_PRODUCT_NAME, "MSN27"), + }, + }, + { + .callback = mlxplat_dmi_default_matched, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"), + DMI_MATCH(DMI_PRODUCT_NAME, "MSB"), + }, + }, + { + .callback = mlxplat_dmi_default_matched, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"), + DMI_MATCH(DMI_PRODUCT_NAME, "MSX"), + }, + }, + { + .callback = mlxplat_dmi_msn21xx_matched, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"), + DMI_MATCH(DMI_PRODUCT_NAME, "MSN21"), + }, + }, + { } +}; + +static int __init mlxplat_init(void) +{ + struct mlxplat_priv *priv; + int i, err; + + if (!dmi_check_system(mlxplat_dmi_table)) + return -ENODEV; + + mlxplat_dev = platform_device_register_simple(MLX_PLAT_DEVICE_NAME, -1, + mlxplat_lpc_resources, + ARRAY_SIZE(mlxplat_lpc_resources)); + + if (IS_ERR(mlxplat_dev)) + return PTR_ERR(mlxplat_dev); + + priv = devm_kzalloc(&mlxplat_dev->dev, sizeof(struct mlxplat_priv), + GFP_KERNEL); + if (!priv) { + err = -ENOMEM; + goto fail_alloc; + } + platform_set_drvdata(mlxplat_dev, priv); + + priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", -1, + NULL, 0); + if (IS_ERR(priv->pdev_i2c)) { + err = PTR_ERR(priv->pdev_i2c); + goto fail_alloc; + }; + + for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) { + priv->pdev_mux[i] = platform_device_register_resndata( + &mlxplat_dev->dev, + "i2c-mux-reg", i, NULL, + 0, &mlxplat_mux_data[i], + sizeof(mlxplat_mux_data[i])); + if (IS_ERR(priv->pdev_mux[i])) { + err = PTR_ERR(priv->pdev_mux[i]); + goto fail_platform_mux_register; + } + } + + return 0; + +fail_platform_mux_register: + for (i--; i > 0 ; i--) + platform_device_unregister(priv->pdev_mux[i]); + platform_device_unregister(priv->pdev_i2c); +fail_alloc: + platform_device_unregister(mlxplat_dev); + + return err; +} +module_init(mlxplat_init); + +static void __exit mlxplat_exit(void) +{ + struct mlxplat_priv *priv = platform_get_drvdata(mlxplat_dev); + int i; + + for (i = ARRAY_SIZE(mlxplat_mux_data) - 1; i >= 0 ; i--) + platform_device_unregister(priv->pdev_mux[i]); + + platform_device_unregister(priv->pdev_i2c); + platform_device_unregister(mlxplat_dev); +} +module_exit(mlxplat_exit); + +MODULE_AUTHOR("Vadim Pasternak (vadimp@mellanox.com)"); +MODULE_DESCRIPTION("Mellanox platform driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("dmi:*:*Mellanox*:MSN24*:"); +MODULE_ALIAS("dmi:*:*Mellanox*:MSN27*:"); +MODULE_ALIAS("dmi:*:*Mellanox*:MSB*:"); +MODULE_ALIAS("dmi:*:*Mellanox*:MSX*:"); +MODULE_ALIAS("dmi:*:*Mellanox*:MSN21*:"); diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 23f2f3e41c7f..b4d5e95fe4df 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -149,11 +149,8 @@ EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); s64 uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) { - s64 ret; - - ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, - (u64)addr, buf, (u64)len, 0); - return ret; + return uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, + (u64)addr, buf, (u64)len, 0); } EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index fdb4d42b4ce5..9e42842e924a 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -24,6 +24,29 @@ #include <asm/irq_vectors.h> #include <asm/timer.h> +static struct bau_operations ops; + +static struct bau_operations uv123_bau_ops = { + .bau_gpa_to_offset = uv_gpa_to_offset, + .read_l_sw_ack = read_mmr_sw_ack, + .read_g_sw_ack = read_gmmr_sw_ack, + .write_l_sw_ack = write_mmr_sw_ack, + .write_g_sw_ack = write_gmmr_sw_ack, + .write_payload_first = write_mmr_payload_first, + .write_payload_last = write_mmr_payload_last, +}; + +static struct bau_operations uv4_bau_ops = { + .bau_gpa_to_offset = uv_gpa_to_soc_phys_ram, + .read_l_sw_ack = read_mmr_proc_sw_ack, + .read_g_sw_ack = read_gmmr_proc_sw_ack, + .write_l_sw_ack = write_mmr_proc_sw_ack, + .write_g_sw_ack = write_gmmr_proc_sw_ack, + .write_payload_first = write_mmr_proc_payload_first, + .write_payload_last = write_mmr_proc_payload_last, +}; + + /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ static int timeout_base_ns[] = { 20, @@ -55,16 +78,16 @@ static int congested_reps = CONGESTED_REPS; static int disabled_period = DISABLED_PERIOD; static struct tunables tunables[] = { - {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ - {&plugged_delay, PLUGGED_DELAY}, - {&plugsb4reset, PLUGSB4RESET}, - {&timeoutsb4reset, TIMEOUTSB4RESET}, - {&ipi_reset_limit, IPI_RESET_LIMIT}, - {&complete_threshold, COMPLETE_THRESHOLD}, - {&congested_respns_us, CONGESTED_RESPONSE_US}, - {&congested_reps, CONGESTED_REPS}, - {&disabled_period, DISABLED_PERIOD}, - {&giveup_limit, GIVEUP_LIMIT} + {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ + {&plugged_delay, PLUGGED_DELAY}, + {&plugsb4reset, PLUGSB4RESET}, + {&timeoutsb4reset, TIMEOUTSB4RESET}, + {&ipi_reset_limit, IPI_RESET_LIMIT}, + {&complete_threshold, COMPLETE_THRESHOLD}, + {&congested_respns_us, CONGESTED_RESPONSE_US}, + {&congested_reps, CONGESTED_REPS}, + {&disabled_period, DISABLED_PERIOD}, + {&giveup_limit, GIVEUP_LIMIT} }; static struct dentry *tunables_dir; @@ -216,7 +239,7 @@ static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp, msg = mdp->msg; if (!msg->canceled && do_acknowledge) { dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; - write_mmr_sw_ack(dw); + ops.write_l_sw_ack(dw); } msg->replied_to = 1; msg->swack_vec = 0; @@ -252,7 +275,7 @@ static void bau_process_retry_msg(struct msg_desc *mdp, msg->swack_vec) == 0) && (msg2->sending_cpu == msg->sending_cpu) && (msg2->msg_type != MSG_NOOP)) { - mmr = read_mmr_sw_ack(); + mmr = ops.read_l_sw_ack(); msg_res = msg2->swack_vec; /* * This is a message retry; clear the resources held @@ -270,7 +293,7 @@ static void bau_process_retry_msg(struct msg_desc *mdp, stat->d_canceled++; cancel_count++; mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; - write_mmr_sw_ack(mr); + ops.write_l_sw_ack(mr); } } } @@ -403,12 +426,12 @@ static void do_reset(void *ptr) /* * only reset the resource if it is still pending */ - mmr = read_mmr_sw_ack(); + mmr = ops.read_l_sw_ack(); msg_res = msg->swack_vec; mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; if (mmr & msg_res) { stat->d_rcanceled++; - write_mmr_sw_ack(mr); + ops.write_l_sw_ack(mr); } } } @@ -580,11 +603,7 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, */ static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc) { - unsigned long descriptor_status; - - descriptor_status = - ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; - return descriptor_status; + return ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; } /* @@ -1202,7 +1221,7 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) struct bau_pq_entry *msg = mdp->msg; struct bau_pq_entry *other_msg; - mmr_image = read_mmr_sw_ack(); + mmr_image = ops.read_l_sw_ack(); swack_vec = msg->swack_vec; if ((swack_vec & mmr_image) == 0) { @@ -1431,7 +1450,7 @@ static int ptc_seq_show(struct seq_file *file, void *data) /* destination side statistics */ seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", - read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), + ops.read_g_sw_ack(uv_cpu_to_pnode(cpu)), stat->d_requestee, cycles_2_us(stat->d_time), stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, stat->d_nomsg, stat->d_retries, stat->d_canceled, @@ -1497,16 +1516,16 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user, } if (kstrtol(optstr, 10, &input_arg) < 0) { - printk(KERN_DEBUG "%s is invalid\n", optstr); + pr_debug("%s is invalid\n", optstr); return -EINVAL; } if (input_arg == 0) { elements = ARRAY_SIZE(stat_description); - printk(KERN_DEBUG "# cpu: cpu number\n"); - printk(KERN_DEBUG "Sender statistics:\n"); + pr_debug("# cpu: cpu number\n"); + pr_debug("Sender statistics:\n"); for (i = 0; i < elements; i++) - printk(KERN_DEBUG "%s\n", stat_description[i]); + pr_debug("%s\n", stat_description[i]); } else if (input_arg == -1) { for_each_present_cpu(cpu) { stat = &per_cpu(ptcstats, cpu); @@ -1554,7 +1573,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr, break; } if (cnt != e) { - printk(KERN_INFO "bau tunable error: should be %d values\n", e); + pr_info("bau tunable error: should be %d values\n", e); return -EINVAL; } @@ -1571,7 +1590,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr, continue; } if (val < 1 || val > bcp->cpus_in_uvhub) { - printk(KERN_DEBUG + pr_debug( "Error: BAU max concurrent %d is invalid\n", val); return -EINVAL; @@ -1619,17 +1638,17 @@ static ssize_t tunables_write(struct file *file, const char __user *user, for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); - bcp->max_concurr = max_concurr; - bcp->max_concurr_const = max_concurr; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->cong_response_us = congested_respns_us; - bcp->cong_reps = congested_reps; - bcp->disabled_period = sec_2_cycles(disabled_period); - bcp->giveup_limit = giveup_limit; + bcp->max_concurr = max_concurr; + bcp->max_concurr_const = max_concurr; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->cong_response_us = congested_respns_us; + bcp->cong_reps = congested_reps; + bcp->disabled_period = sec_2_cycles(disabled_period); + bcp->giveup_limit = giveup_limit; } return count; } @@ -1676,21 +1695,21 @@ static int __init uv_ptc_init(void) proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, &proc_uv_ptc_operations); if (!proc_uv_ptc) { - printk(KERN_ERR "unable to create %s proc entry\n", + pr_err("unable to create %s proc entry\n", UV_PTC_BASENAME); return -EINVAL; } tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); if (!tunables_dir) { - printk(KERN_ERR "unable to create debugfs directory %s\n", + pr_err("unable to create debugfs directory %s\n", UV_BAU_TUNABLES_DIR); return -EINVAL; } tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, tunables_dir, NULL, &tunables_fops); if (!tunables_file) { - printk(KERN_ERR "unable to create debugfs file %s\n", + pr_err("unable to create debugfs file %s\n", UV_BAU_TUNABLES_FILE); return -EINVAL; } @@ -1725,7 +1744,7 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) gpa = uv_gpa(bau_desc); n = uv_gpa_to_gnode(gpa); - m = uv_gpa_to_offset(gpa); + m = ops.bau_gpa_to_offset(gpa); if (is_uv1_hub()) uv1 = 1; @@ -1740,7 +1759,7 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) memset(bd2, 0, sizeof(struct bau_desc)); if (uv1) { uv1_hdr = &bd2->header.uv1_hdr; - uv1_hdr->swack_flag = 1; + uv1_hdr->swack_flag = 1; /* * The base_dest_nasid set in the message header * is the nasid of the first uvhub in the partition. @@ -1749,10 +1768,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) * if nasid striding is being used. */ uv1_hdr->base_dest_nasid = - UV_PNODE_TO_NASID(base_pnode); - uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID; - uv1_hdr->command = UV_NET_ENDPOINT_INTD; - uv1_hdr->int_both = 1; + UV_PNODE_TO_NASID(base_pnode); + uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID; + uv1_hdr->command = UV_NET_ENDPOINT_INTD; + uv1_hdr->int_both = 1; /* * all others need to be set to zero: * fairness chaining multilevel count replied_to @@ -1763,11 +1782,11 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) * uses native mode for selective broadcasts. */ uv2_3_hdr = &bd2->header.uv2_3_hdr; - uv2_3_hdr->swack_flag = 1; + uv2_3_hdr->swack_flag = 1; uv2_3_hdr->base_dest_nasid = - UV_PNODE_TO_NASID(base_pnode); - uv2_3_hdr->dest_subnodeid = UV_LB_SUBNODEID; - uv2_3_hdr->command = UV_NET_ENDPOINT_INTD; + UV_PNODE_TO_NASID(base_pnode); + uv2_3_hdr->dest_subnodeid = UV_LB_SUBNODEID; + uv2_3_hdr->command = UV_NET_ENDPOINT_INTD; } } for_each_present_cpu(cpu) { @@ -1790,10 +1809,7 @@ static void pq_init(int node, int pnode) size_t plsize; char *cp; void *vp; - unsigned long pn; - unsigned long first; - unsigned long pn_first; - unsigned long last; + unsigned long gnode, first, last, tail; struct bau_pq_entry *pqp; struct bau_control *bcp; @@ -1814,17 +1830,25 @@ static void pq_init(int node, int pnode) bcp->bau_msg_head = pqp; bcp->queue_last = pqp + (DEST_Q_SIZE - 1); } + + first = ops.bau_gpa_to_offset(uv_gpa(pqp)); + last = ops.bau_gpa_to_offset(uv_gpa(pqp + (DEST_Q_SIZE - 1))); + /* - * need the gnode of where the memory was really allocated + * Pre UV4, the gnode is required to locate the payload queue + * and the payload queue tail must be maintained by the kernel. */ - pn = uv_gpa_to_gnode(uv_gpa(pqp)); - first = uv_physnodeaddr(pqp); - pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; - last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); - write_mmr_payload_first(pnode, pn_first); - write_mmr_payload_tail(pnode, first); - write_mmr_payload_last(pnode, last); - write_gmmr_sw_ack(pnode, 0xffffUL); + bcp = &per_cpu(bau_control, smp_processor_id()); + if (bcp->uvhub_version <= 3) { + tail = first; + gnode = uv_gpa_to_gnode(uv_gpa(pqp)); + first = (gnode << UV_PAYLOADQ_GNODE_SHIFT) | tail; + write_mmr_payload_tail(pnode, tail); + } + + ops.write_payload_first(pnode, first); + ops.write_payload_last(pnode, last); + ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); @@ -1914,8 +1938,8 @@ static void __init init_per_cpu_tunables(void) bcp->complete_threshold = complete_threshold; bcp->cong_response_us = congested_respns_us; bcp->cong_reps = congested_reps; - bcp->disabled_period = sec_2_cycles(disabled_period); - bcp->giveup_limit = giveup_limit; + bcp->disabled_period = sec_2_cycles(disabled_period); + bcp->giveup_limit = giveup_limit; spin_lock_init(&bcp->queue_lock); spin_lock_init(&bcp->uvhub_lock); spin_lock_init(&bcp->disable_lock); @@ -1944,7 +1968,7 @@ static int __init get_cpu_topology(int base_pnode, pnode = uv_cpu_hub_info(cpu)->pnode; if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) { - printk(KERN_EMERG + pr_emerg( "cpu %d pnode %d-%d beyond %d; BAU disabled\n", cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE); return 1; @@ -1969,7 +1993,7 @@ static int __init get_cpu_topology(int base_pnode, sdp->cpu_number[sdp->num_cpus] = cpu; sdp->num_cpus++; if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { - printk(KERN_EMERG "%d cpus per socket invalid\n", + pr_emerg("%d cpus per socket invalid\n", sdp->num_cpus); return 1; } @@ -2035,15 +2059,17 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, bcp->uvhub_version = 2; else if (is_uv3_hub()) bcp->uvhub_version = 3; + else if (is_uv4_hub()) + bcp->uvhub_version = 4; else { - printk(KERN_EMERG "uvhub version not 1, 2 or 3\n"); + pr_emerg("uvhub version not 1, 2, 3, or 4\n"); return 1; } bcp->uvhub_master = *hmasterp; bcp->uvhub_cpu = uv_cpu_blade_processor_id(cpu); if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { - printk(KERN_EMERG "%d cpus per uvhub invalid\n", + pr_emerg("%d cpus per uvhub invalid\n", bcp->uvhub_cpu); return 1; } @@ -2098,7 +2124,8 @@ static int __init init_per_cpu(int nuvhubs, int base_part_pnode) void *vp; struct uvhub_desc *uvhub_descs; - timeout_us = calculate_destination_timeout(); + if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub()) + timeout_us = calculate_destination_timeout(); vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); uvhub_descs = (struct uvhub_desc *)vp; @@ -2138,6 +2165,15 @@ static int __init uv_bau_init(void) if (!is_uv_system()) return 0; + if (is_uv4_hub()) + ops = uv4_bau_ops; + else if (is_uv3_hub()) + ops = uv123_bau_ops; + else if (is_uv2_hub()) + ops = uv123_bau_ops; + else if (is_uv1_hub()) + ops = uv123_bau_ops; + for_each_possible_cpu(cur_cpu) { mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); @@ -2153,7 +2189,9 @@ static int __init uv_bau_init(void) uv_base_pnode = uv_blade_to_pnode(uvhub); } - enable_timeouts(); + /* software timeouts are not supported on UV4 */ + if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub()) + enable_timeouts(); if (init_per_cpu(nuvhubs, uv_base_pnode)) { set_bau_off(); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index b12c26e2e309..53cace2ec0e2 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -130,7 +130,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr0 = read_cr0(); ctxt->cr2 = read_cr2(); ctxt->cr3 = read_cr3(); - ctxt->cr4 = __read_cr4_safe(); + ctxt->cr4 = __read_cr4(); #ifdef CONFIG_X86_64 ctxt->cr8 = read_cr8(); #endif diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 1104515d5ad2..1ac76479c266 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -68,6 +68,7 @@ static int inj_##reg##_set(void *data, u64 val) \ MCE_INJECT_SET(status); MCE_INJECT_SET(misc); MCE_INJECT_SET(addr); +MCE_INJECT_SET(synd); #define MCE_INJECT_GET(reg) \ static int inj_##reg##_get(void *data, u64 *val) \ @@ -81,10 +82,12 @@ static int inj_##reg##_get(void *data, u64 *val) \ MCE_INJECT_GET(status); MCE_INJECT_GET(misc); MCE_INJECT_GET(addr); +MCE_INJECT_GET(synd); DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n"); DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n"); DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n"); /* * Caller needs to be make sure this cpu doesn't disappear @@ -243,27 +246,27 @@ static void toggle_nb_mca_mst_cpu(u16 nid) static void prepare_msrs(void *info) { - struct mce i_mce = *(struct mce *)info; - u8 b = i_mce.bank; + struct mce m = *(struct mce *)info; + u8 b = m.bank; - wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus); + wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); if (boot_cpu_has(X86_FEATURE_SMCA)) { - if (i_mce.inject_flags == DFR_INT_INJ) { - wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status); - wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr); + if (m.inject_flags == DFR_INT_INJ) { + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status); + wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr); } else { - wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status); - wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr); + wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status); + wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr); } - wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc); + wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc); + wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd); } else { - wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status); - wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr); - wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc); + wrmsrl(MSR_IA32_MCx_STATUS(b), m.status); + wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr); + wrmsrl(MSR_IA32_MCx_MISC(b), m.misc); } - } static void do_inject(void) @@ -275,6 +278,9 @@ static void do_inject(void) if (i_mce.misc) i_mce.status |= MCI_STATUS_MISCV; + if (i_mce.synd) + i_mce.status |= MCI_STATUS_SYNDV; + if (inj_type == SW_INJ) { mce_inject_log(&i_mce); return; @@ -301,7 +307,9 @@ static void do_inject(void) * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for * Fam10h and later BKDGs. */ - if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) { + if (static_cpu_has(X86_FEATURE_AMD_DCM) && + b == 4 && + boot_cpu_data.x86 < 0x17) { toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu)); cpu = get_nbc_for_node(amd_get_nb_id(cpu)); } @@ -371,6 +379,9 @@ static const char readme_msg[] = "\t used for error thresholding purposes and its validity is indicated by\n" "\t MCi_STATUS[MiscV].\n" "\n" +"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n" +"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n" +"\n" "addr:\t Error address value to be written to MCi_ADDR. Log address information\n" "\t associated with the error.\n" "\n" @@ -420,6 +431,7 @@ static struct dfs_node { { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR }, + { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR }, { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR }, @@ -428,7 +440,7 @@ static struct dfs_node { static int __init init_mce_inject(void) { - int i; + unsigned int i; u64 cap; rdmsrl(MSR_IA32_MCG_CAP, cap); @@ -452,26 +464,22 @@ static int __init init_mce_inject(void) return 0; err_dfs_add: - while (--i >= 0) + while (i-- > 0) debugfs_remove(dfs_fls[i].d); debugfs_remove(dfs_inj); dfs_inj = NULL; - return -ENOMEM; + return -ENODEV; } static void __exit exit_mce_inject(void) { - int i; - for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) - debugfs_remove(dfs_fls[i].d); + debugfs_remove_recursive(dfs_inj); + dfs_inj = NULL; memset(&dfs_fls, 0, sizeof(dfs_fls)); - - debugfs_remove(dfs_inj); - dfs_inj = NULL; } module_init(init_mce_inject); module_exit(exit_mce_inject); diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index ebd4dd6ef73b..5766ead6fdb9 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -84,7 +84,10 @@ int putreg(struct task_struct *child, int regno, unsigned long value) case EAX: case EIP: case UESP: + break; case ORIG_EAX: + /* Update the syscall number. */ + UPT_SYSCALL_NR(&child->thread.regs.regs) = value; break; case FS: if (value && (value & 3) != 3) @@ -191,7 +194,7 @@ int peek_user(struct task_struct *child, long addr, long data) static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { - int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + int err, n, cpu = task_cpu(child); struct user_i387_struct fpregs; err = save_i387_registers(userspace_pid[cpu], @@ -208,7 +211,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) { - int n, cpu = ((struct thread_info *) child->stack)->cpu; + int n, cpu = task_cpu(child); struct user_i387_struct fpregs; n = copy_from_user(&fpregs, buf, sizeof(fpregs)); @@ -221,7 +224,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) { - int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + int err, n, cpu = task_cpu(child); struct user_fxsr_struct fpregs; err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); @@ -237,7 +240,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct * static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) { - int n, cpu = ((struct thread_info *) child->stack)->cpu; + int n, cpu = task_cpu(child); struct user_fxsr_struct fpregs; n = copy_from_user(&fpregs, buf, sizeof(fpregs)); diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index faab418876ce..0b5c184dd5b3 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -78,7 +78,11 @@ int putreg(struct task_struct *child, int regno, unsigned long value) case RSI: case RDI: case RBP: + break; + case ORIG_RAX: + /* Update the syscall number. */ + UPT_SYSCALL_NR(&child->thread.regs.regs) = value; break; case FS: diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b86ebb1a9a7f..f1d2182e071f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1237,7 +1237,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .write_cr0 = xen_write_cr0, .read_cr4 = native_read_cr4, - .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, #ifdef CONFIG_X86_64 @@ -1925,6 +1924,45 @@ static void xen_set_cpu_features(struct cpuinfo_x86 *c) } } +static void xen_pin_vcpu(int cpu) +{ + static bool disable_pinning; + struct sched_pin_override pin_override; + int ret; + + if (disable_pinning) + return; + + pin_override.pcpu = cpu; + ret = HYPERVISOR_sched_op(SCHEDOP_pin_override, &pin_override); + + /* Ignore errors when removing override. */ + if (cpu < 0) + return; + + switch (ret) { + case -ENOSYS: + pr_warn("Unable to pin on physical cpu %d. In case of problems consider vcpu pinning.\n", + cpu); + disable_pinning = true; + break; + case -EPERM: + WARN(1, "Trying to pin vcpu without having privilege to do so\n"); + disable_pinning = true; + break; + case -EINVAL: + case -EBUSY: + pr_warn("Physical cpu %d not available for pinning. Check Xen cpu configuration.\n", + cpu); + break; + case 0: + break; + default: + WARN(1, "rc %d while trying to pin vcpu\n", ret); + disable_pinning = true; + } +} + const struct hypervisor_x86 x86_hyper_xen = { .name = "Xen", .detect = xen_platform, @@ -1933,6 +1971,7 @@ const struct hypervisor_x86 x86_hyper_xen = { #endif .x2apic_available = xen_x2apic_para_available, .set_cpu_features = xen_set_cpu_features, + .pin_vcpu = xen_pin_vcpu, }; EXPORT_SYMBOL(x86_hyper_xen); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 176425233e4d..f8960fca0827 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -861,7 +861,7 @@ char * __init xen_memory_setup(void) e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RESERVED); - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); /* * Check whether the kernel itself conflicts with the target E820 map. diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index f42e78de1e10..3d6e0064cbfc 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -21,8 +21,6 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); static bool xen_pvspin = true; -#ifdef CONFIG_QUEUED_SPINLOCKS - #include <asm/qspinlock.h> static void xen_qlock_kick(int cpu) @@ -71,207 +69,6 @@ static void xen_qlock_wait(u8 *byte, u8 val) xen_poll_irq(irq); } -#else /* CONFIG_QUEUED_SPINLOCKS */ - -enum xen_contention_stat { - TAKEN_SLOW, - TAKEN_SLOW_PICKUP, - TAKEN_SLOW_SPURIOUS, - RELEASED_SLOW, - RELEASED_SLOW_KICKED, - NR_CONTENTION_STATS -}; - - -#ifdef CONFIG_XEN_DEBUG_FS -#define HISTO_BUCKETS 30 -static struct xen_spinlock_stats -{ - u32 contention_stats[NR_CONTENTION_STATS]; - u32 histo_spin_blocked[HISTO_BUCKETS+1]; - u64 time_blocked; -} spinlock_stats; - -static u8 zero_stats; - -static inline void check_zero(void) -{ - u8 ret; - u8 old = READ_ONCE(zero_stats); - if (unlikely(old)) { - ret = cmpxchg(&zero_stats, old, 0); - /* This ensures only one fellow resets the stat */ - if (ret == old) - memset(&spinlock_stats, 0, sizeof(spinlock_stats)); - } -} - -static inline void add_stats(enum xen_contention_stat var, u32 val) -{ - check_zero(); - spinlock_stats.contention_stats[var] += val; -} - -static inline u64 spin_time_start(void) -{ - return xen_clocksource_read(); -} - -static void __spin_time_accum(u64 delta, u32 *array) -{ - unsigned index = ilog2(delta); - - check_zero(); - - if (index < HISTO_BUCKETS) - array[index]++; - else - array[HISTO_BUCKETS]++; -} - -static inline void spin_time_accum_blocked(u64 start) -{ - u32 delta = xen_clocksource_read() - start; - - __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); - spinlock_stats.time_blocked += delta; -} -#else /* !CONFIG_XEN_DEBUG_FS */ -static inline void add_stats(enum xen_contention_stat var, u32 val) -{ -} - -static inline u64 spin_time_start(void) -{ - return 0; -} - -static inline void spin_time_accum_blocked(u64 start) -{ -} -#endif /* CONFIG_XEN_DEBUG_FS */ - -struct xen_lock_waiting { - struct arch_spinlock *lock; - __ticket_t want; -}; - -static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); -static cpumask_t waiting_cpus; - -__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) -{ - int irq = __this_cpu_read(lock_kicker_irq); - struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); - int cpu = smp_processor_id(); - u64 start; - __ticket_t head; - unsigned long flags; - - /* If kicker interrupts not initialized yet, just spin */ - if (irq == -1) - return; - - start = spin_time_start(); - - /* - * Make sure an interrupt handler can't upset things in a - * partially setup state. - */ - local_irq_save(flags); - /* - * We don't really care if we're overwriting some other - * (lock,want) pair, as that would mean that we're currently - * in an interrupt context, and the outer context had - * interrupts enabled. That has already kicked the VCPU out - * of xen_poll_irq(), so it will just return spuriously and - * retry with newly setup (lock,want). - * - * The ordering protocol on this is that the "lock" pointer - * may only be set non-NULL if the "want" ticket is correct. - * If we're updating "want", we must first clear "lock". - */ - w->lock = NULL; - smp_wmb(); - w->want = want; - smp_wmb(); - w->lock = lock; - - /* This uses set_bit, which atomic and therefore a barrier */ - cpumask_set_cpu(cpu, &waiting_cpus); - add_stats(TAKEN_SLOW, 1); - - /* clear pending */ - xen_clear_irq_pending(irq); - - /* Only check lock once pending cleared */ - barrier(); - - /* - * Mark entry to slowpath before doing the pickup test to make - * sure we don't deadlock with an unlocker. - */ - __ticket_enter_slowpath(lock); - - /* make sure enter_slowpath, which is atomic does not cross the read */ - smp_mb__after_atomic(); - - /* - * check again make sure it didn't become free while - * we weren't looking - */ - head = READ_ONCE(lock->tickets.head); - if (__tickets_equal(head, want)) { - add_stats(TAKEN_SLOW_PICKUP, 1); - goto out; - } - - /* Allow interrupts while blocked */ - local_irq_restore(flags); - - /* - * If an interrupt happens here, it will leave the wakeup irq - * pending, which will cause xen_poll_irq() to return - * immediately. - */ - - /* Block until irq becomes pending (or perhaps a spurious wakeup) */ - xen_poll_irq(irq); - add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); - - local_irq_save(flags); - - kstat_incr_irq_this_cpu(irq); -out: - cpumask_clear_cpu(cpu, &waiting_cpus); - w->lock = NULL; - - local_irq_restore(flags); - - spin_time_accum_blocked(start); -} -PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); - -static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) -{ - int cpu; - - add_stats(RELEASED_SLOW, 1); - - for_each_cpu(cpu, &waiting_cpus) { - const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); - - /* Make sure we read lock before want */ - if (READ_ONCE(w->lock) == lock && - READ_ONCE(w->want) == next) { - add_stats(RELEASED_SLOW_KICKED, 1); - xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); - break; - } - } -} -#endif /* CONFIG_QUEUED_SPINLOCKS */ - static irqreturn_t dummy_handler(int irq, void *dev_id) { BUG(); @@ -334,16 +131,12 @@ void __init xen_init_spinlocks(void) return; } printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); -#ifdef CONFIG_QUEUED_SPINLOCKS + __pv_init_lock_hash(); pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.wait = xen_qlock_wait; pv_lock_ops.kick = xen_qlock_kick; -#else - pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); - pv_lock_ops.unlock_kick = xen_unlock_kick; -#endif } /* @@ -372,44 +165,3 @@ static __init int xen_parse_nopvspin(char *arg) } early_param("xen_nopvspin", xen_parse_nopvspin); -#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS) - -static struct dentry *d_spin_debug; - -static int __init xen_spinlock_debugfs(void) -{ - struct dentry *d_xen = xen_init_debugfs(); - - if (d_xen == NULL) - return -ENOMEM; - - if (!xen_pvspin) - return 0; - - d_spin_debug = debugfs_create_dir("spinlocks", d_xen); - - debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); - - debugfs_create_u32("taken_slow", 0444, d_spin_debug, - &spinlock_stats.contention_stats[TAKEN_SLOW]); - debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, - &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); - debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, - &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); - - debugfs_create_u32("released_slow", 0444, d_spin_debug, - &spinlock_stats.contention_stats[RELEASED_SLOW]); - debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, - &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); - - debugfs_create_u64("time_blocked", 0444, d_spin_debug, - &spinlock_stats.time_blocked); - - debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, - spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); - - return 0; -} -fs_initcall(xen_spinlock_debugfs); - -#endif /* CONFIG_XEN_DEBUG_FS */ |