diff options
Diffstat (limited to 'arch')
136 files changed, 995 insertions, 597 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index bd8056b5b246..e9c9334507dd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -461,6 +461,15 @@ config CC_STACKPROTECTOR_STRONG endchoice +config HAVE_ARCH_WITHIN_STACK_FRAMES + bool + help + An architecture should select this if it can walk the kernel stack + frames to determine if an object is part of either the arguments + or local variables (i.e. that it excludes saved return addresses, + and similar) by implementing an inline arch_within_stack_frames(), + which is used by CONFIG_HARDENED_USERCOPY. + config HAVE_CONTEXT_TRACKING bool help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2d601d769a1c..a9c4e48bb7ec 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -35,6 +35,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 56ea5c60b318..61f6ccc19cfa 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -260,12 +260,14 @@ machdirs := $(patsubst %,arch/arm/mach-%/,$(machine-y)) platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y))) ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y) +ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y) ifeq ($(KBUILD_SRC),) KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs)) else KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs)) endif endif +endif export TEXT_OFFSET GZFLAGS MMUEXT diff --git a/arch/arm/boot/dts/arm-realview-pbx-a9.dts b/arch/arm/boot/dts/arm-realview-pbx-a9.dts index db808f92dd79..90d00b407f85 100644 --- a/arch/arm/boot/dts/arm-realview-pbx-a9.dts +++ b/arch/arm/boot/dts/arm-realview-pbx-a9.dts @@ -70,13 +70,12 @@ * associativity as these may be erroneously set * up by boot loader(s). */ - cache-size = <1048576>; // 1MB - cache-sets = <4096>; + cache-size = <131072>; // 128KB + cache-sets = <512>; cache-line-size = <32>; arm,parity-disable; - arm,tag-latency = <1>; - arm,data-latency = <1 1>; - arm,dirty-latency = <1>; + arm,tag-latency = <1 1 1>; + arm,data-latency = <1 1 1>; }; scu: scu@1f000000 { diff --git a/arch/arm/boot/dts/integratorap.dts b/arch/arm/boot/dts/integratorap.dts index cf06e32ee108..4b34b54e09a1 100644 --- a/arch/arm/boot/dts/integratorap.dts +++ b/arch/arm/boot/dts/integratorap.dts @@ -42,7 +42,7 @@ }; syscon { - compatible = "arm,integrator-ap-syscon"; + compatible = "arm,integrator-ap-syscon", "syscon"; reg = <0x11000000 0x100>; interrupt-parent = <&pic>; /* These are the logical module IRQs */ diff --git a/arch/arm/boot/dts/integratorcp.dts b/arch/arm/boot/dts/integratorcp.dts index d43f15b4f79a..79430fbfec3b 100644 --- a/arch/arm/boot/dts/integratorcp.dts +++ b/arch/arm/boot/dts/integratorcp.dts @@ -94,7 +94,7 @@ }; syscon { - compatible = "arm,integrator-cp-syscon"; + compatible = "arm,integrator-cp-syscon", "syscon"; reg = <0xcb000000 0x100>; }; diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi index 00cb314d5e4d..e23f46d15c80 100644 --- a/arch/arm/boot/dts/keystone.dtsi +++ b/arch/arm/boot/dts/keystone.dtsi @@ -70,14 +70,6 @@ cpu_on = <0x84000003>; }; - psci { - compatible = "arm,psci"; - method = "smc"; - cpu_suspend = <0x84000001>; - cpu_off = <0x84000002>; - cpu_on = <0x84000003>; - }; - soc { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index e52b82449a79..6403e0de540e 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -1382,7 +1382,7 @@ * Pin 41: BR_UART1_TXD * Pin 44: BR_UART1_RXD */ - serial@70006000 { + serial@0,70006000 { compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart"; status = "okay"; }; @@ -1394,7 +1394,7 @@ * Pin 71: UART2_CTS_L * Pin 74: UART2_RTS_L */ - serial@70006040 { + serial@0,70006040 { compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart"; status = "okay"; }; diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig index b6e54ee9bdbd..ca39c04fec6b 100644 --- a/arch/arm/configs/aspeed_g4_defconfig +++ b/arch/arm/configs/aspeed_g4_defconfig @@ -58,7 +58,7 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_FIRMWARE_MEMMAP=y CONFIG_FANOTIFY=y -CONFIG_PRINTK_TIME=1 +CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_STRIP_ASM_SYMS=y CONFIG_PAGE_POISONING=y diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index 892605167357..4f366b0370e9 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig @@ -59,7 +59,7 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_FIRMWARE_MEMMAP=y CONFIG_FANOTIFY=y -CONFIG_PRINTK_TIME=1 +CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_STRIP_ASM_SYMS=y CONFIG_PAGE_POISONING=y diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 62a6f65029e6..a93c0f99acf7 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -480,7 +480,10 @@ arm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned int __ua_flags = uaccess_save_and_enable(); + unsigned int __ua_flags; + + check_object_size(to, n, false); + __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); return n; @@ -495,11 +498,15 @@ static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY - unsigned int __ua_flags = uaccess_save_and_enable(); + unsigned int __ua_flags; + + check_object_size(from, n, true); + __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else + check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bc5f50799d75..9f157e7c51e7 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -295,6 +295,7 @@ __und_svc_fault: bl __und_fault __und_svc_finish: + get_thread_info tsk ldr r5, [sp, #S_PSR] @ Get SVC cpsr svc_exit r5 @ return from exception UNWIND(.fnend ) diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 087acb569b63..5f221acd21ae 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, mm_segment_t fs; long ret, err, i; - if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event))) + if (maxevents <= 0 || + maxevents > (INT_MAX/sizeof(*kbuf)) || + maxevents > (INT_MAX/sizeof(*events))) return -EINVAL; + if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents)) + return -EFAULT; kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL); if (!kbuf) return -ENOMEM; @@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid, if (nsops < 1 || nsops > SEMOPM) return -EINVAL; + if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops)) + return -EFAULT; sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); if (!sops) return -ENOMEM; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d94bb9093ead..75f130ef6504 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1009,9 +1009,13 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { + int ret; if (!vgic_present) return -ENXIO; - return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + mutex_lock(&kvm->lock); + ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + mutex_unlock(&kvm->lock); + return ret; } case KVM_ARM_SET_DEVICE_ADDR: { struct kvm_arm_device_addr dev_addr; diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig index dc7c6edeab39..61284b9389cf 100644 --- a/arch/arm/mach-clps711x/Kconfig +++ b/arch/arm/mach-clps711x/Kconfig @@ -1,13 +1,13 @@ menuconfig ARCH_CLPS711X bool "Cirrus Logic EP721x/EP731x-based" depends on ARCH_MULTI_V4T - select ARCH_REQUIRE_GPIOLIB select AUTO_ZRELADDR select CLKSRC_OF select CLPS711X_TIMER select COMMON_CLK select CPU_ARM720T select GENERIC_CLOCKEVENTS + select GPIOLIB select MFD_SYSCON select OF_IRQ select USE_OF diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index fd8720532471..0df062d8b2c9 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -271,6 +271,12 @@ static int __init imx_gpc_init(struct device_node *node, for (i = 0; i < IMR_NUM; i++) writel_relaxed(~0, gpc_base + GPC_IMR1 + i * 4); + /* + * Clear the OF_POPULATED flag set in of_irq_init so that + * later the GPC power domain driver will not be skipped. + */ + of_node_clear_flag(node, OF_POPULATED); + return 0; } IRQCHIP_DECLARE(imx_gpc, "fsl,imx6q-gpc", imx_gpc_init); diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile index e53c6cfcab51..6c6497e80a7b 100644 --- a/arch/arm/mach-mvebu/Makefile +++ b/arch/arm/mach-mvebu/Makefile @@ -1,5 +1,4 @@ -ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \ - -I$(srctree)/arch/arm/plat-orion/include +ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include AFLAGS_coherency_ll.o := -Wa,-march=armv7-a CFLAGS_pmsu.o := -march=armv7-a diff --git a/arch/arm/mach-oxnas/Kconfig b/arch/arm/mach-oxnas/Kconfig index 567496bd250a..29100beb2e7f 100644 --- a/arch/arm/mach-oxnas/Kconfig +++ b/arch/arm/mach-oxnas/Kconfig @@ -11,11 +11,13 @@ if ARCH_OXNAS config MACH_OX810SE bool "Support OX810SE Based Products" + select ARCH_HAS_RESET_CONTROLLER select COMMON_CLK_OXNAS select CPU_ARM926T select MFD_SYSCON select OXNAS_RPS_TIMER select PINCTRL_OXNAS + select RESET_CONTROLLER select RESET_OXNAS select VERSATILE_FPGA_IRQ help diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index dc109dc3a622..10bfdb169366 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -13,6 +13,7 @@ */ #include <linux/kernel.h> +#include <linux/module.h> /* symbol_get ; symbol_put */ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/major.h> diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 1080580b1343..2c150bfc0cd5 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -13,6 +13,7 @@ */ #include <linux/kernel.h> +#include <linux/module.h> /* symbol_get ; symbol_put */ #include <linux/platform_device.h> #include <linux/delay.h> #include <linux/gpio_keys.h> diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile index dae8d86ef4cc..404882130956 100644 --- a/arch/arm/mach-realview/Makefile +++ b/arch/arm/mach-realview/Makefile @@ -1,8 +1,7 @@ # # Makefile for the linux kernel. # -ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \ - -I$(srctree)/arch/arm/plat-versatile/include +ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-versatile/include obj-y := core.o obj-$(CONFIG_REALVIEW_DT) += realview-dt.o diff --git a/arch/arm/mach-s5pv210/Makefile b/arch/arm/mach-s5pv210/Makefile index 72b9e9671507..fa7fb716e388 100644 --- a/arch/arm/mach-s5pv210/Makefile +++ b/arch/arm/mach-s5pv210/Makefile @@ -5,7 +5,7 @@ # # Licensed under GPLv2 -ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/$(src)/include -I$(srctree)/arch/arm/plat-samsung/include +ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/arch/arm/plat-samsung/include # Core diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c index f3dba6f356e2..02e21bceb085 100644 --- a/arch/arm/mach-shmobile/platsmp.c +++ b/arch/arm/mach-shmobile/platsmp.c @@ -40,5 +40,8 @@ bool shmobile_smp_cpu_can_disable(unsigned int cpu) bool __init shmobile_smp_init_fallback_ops(void) { /* fallback on PSCI/smp_ops if no other DT based method is detected */ + if (!IS_ENABLED(CONFIG_SMP)) + return false; + return platform_can_secondary_boot() ? true : false; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 62f4d01941f7..6344913f0804 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -728,7 +728,8 @@ static void *__init late_alloc(unsigned long sz) { void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz)); - BUG_ON(!ptr); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); return ptr; } @@ -1155,10 +1156,19 @@ void __init sanity_check_meminfo(void) { phys_addr_t memblock_limit = 0; int highmem = 0; - phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; + u64 vmalloc_limit; struct memblock_region *reg; bool should_use_highmem = false; + /* + * Let's use our own (unoptimized) equivalent of __pa() that is + * not affected by wrap-arounds when sizeof(phys_addr_t) == 4. + * The result is used as the upper bound on physical memory address + * and may itself be outside the valid range for which phys_addr_t + * and therefore __pa() is defined. + */ + vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET; + for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; phys_addr_t block_end = reg->base + reg->size; @@ -1183,10 +1193,11 @@ void __init sanity_check_meminfo(void) if (reg->size > size_limit) { phys_addr_t overlap_size = reg->size - size_limit; - pr_notice("Truncating RAM at %pa-%pa to -%pa", - &block_start, &block_end, &vmalloc_limit); - memblock_remove(vmalloc_limit, overlap_size); + pr_notice("Truncating RAM at %pa-%pa", + &block_start, &block_end); block_end = vmalloc_limit; + pr_cont(" to -%pa", &block_end); + memblock_remove(vmalloc_limit, overlap_size); should_use_highmem = true; } } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 69c8787bec7d..bc3f00f586f1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -54,6 +54,7 @@ config ARM64 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index bb2616b16157..be5d824ebdba 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -8,7 +8,7 @@ config ARCH_SUNXI config ARCH_ALPINE bool "Annapurna Labs Alpine platform" - select ALPINE_MSI + select ALPINE_MSI if PCI help This enables support for the Annapurna Labs Alpine Soc family. @@ -66,7 +66,7 @@ config ARCH_LG1K config ARCH_HISI bool "Hisilicon SoC Family" select ARM_TIMER_SP804 - select HISILICON_IRQ_MBIGEN + select HISILICON_IRQ_MBIGEN if PCI help This enables support for Hisilicon ARMv8 SoC family diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts index 299f3ce969ab..c528dd52ba2d 100644 --- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts +++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts @@ -12,6 +12,7 @@ /dts-v1/; #include "exynos7.dtsi" #include <dt-bindings/interrupt-controller/irq.h> +#include <dt-bindings/clock/samsung,s2mps11.h> / { model = "Samsung Exynos7 Espresso board based on EXYNOS7"; @@ -43,6 +44,8 @@ &rtc { status = "okay"; + clocks = <&clock_ccore PCLK_RTC>, <&s2mps15_osc S2MPS11_CLK_AP>; + clock-names = "rtc", "rtc_src"; }; &watchdog { diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 0555b7caaf2c..eadf4855ad2d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,4 +1,3 @@ -# CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y @@ -15,10 +14,14 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y +CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_HUGETLB=y -# CONFIG_UTS_NS is not set -# CONFIG_IPC_NS is not set -# CONFIG_NET_NS is not set +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y @@ -71,6 +74,7 @@ CONFIG_PREEMPT=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA=y +CONFIG_SECCOMP=y CONFIG_XEN=y CONFIG_KEXEC=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set @@ -84,10 +88,37 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y +CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_IPV6 is not set +CONFIG_IPV6=m +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_BRIDGE=m +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_VLAN_8021Q_MVRP=y CONFIG_BPF_JIT=y CONFIG_CFG80211=m CONFIG_MAC80211=m @@ -103,6 +134,7 @@ CONFIG_MTD=y CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=m CONFIG_VIRTIO_BLK=y CONFIG_SRAM=y # CONFIG_SCSI_PROC_FS is not set @@ -120,7 +152,10 @@ CONFIG_SATA_SIL24=y CONFIG_PATA_PLATFORM=y CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m CONFIG_TUN=y +CONFIG_VETH=m CONFIG_VIRTIO_NET=y CONFIG_AMD_XGBE=y CONFIG_NET_XGENE=y @@ -350,12 +385,16 @@ CONFIG_EXYNOS_ADC=y CONFIG_PWM_SAMSUNG=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y CONFIG_AUTOFS4_FS=y -CONFIG_FUSE_FS=y -CONFIG_CUSE=y +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_OVERLAY_FS=m CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 61b49150dfa3..1737aecfcc5e 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -22,7 +22,6 @@ #define __ARCH_WANT_KPROBES_INSN_SLOT #define MAX_INSN_SIZE 1 -#define MAX_STACK_SIZE 128 #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 @@ -47,7 +46,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe; struct kprobe_step_ctx ss_ctx; struct pt_regs jprobe_saved_regs; - char jprobes_stack[MAX_STACK_SIZE]; }; void arch_remove_kprobe(struct kprobe *); diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5e834d10b291..c47257c91b77 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -265,22 +265,25 @@ extern unsigned long __must_check __clear_user(void __user *addr, unsigned long static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); - return __arch_copy_from_user(to, from, n); + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); } static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { kasan_check_read(from, n); - return __arch_copy_to_user(to, from, n); + check_object_size(from, n, true); + return __arch_copy_to_user(to, from, n); } static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); - if (access_ok(VERIFY_READ, from, n)) + if (access_ok(VERIFY_READ, from, n)) { + check_object_size(to, n, false); n = __arch_copy_from_user(to, from, n); - else /* security hole - plug it */ + } else /* security hole - plug it */ memset(to, 0, n); return n; } @@ -289,8 +292,10 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const voi { kasan_check_read(from, n); - if (access_ok(VERIFY_WRITE, to, n)) + if (access_ok(VERIFY_WRITE, to, n)) { + check_object_size(from, n, true); n = __arch_copy_to_user(to, from, n); + } return n; } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 96e4a2b64cc1..441420ca7d08 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -353,6 +353,8 @@ el1_sync: lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 b.eq el1_da + cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 + b.eq el1_ia cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el1_undef cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception @@ -364,6 +366,11 @@ el1_sync: cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 b.ge el1_dbg b el1_inv + +el1_ia: + /* + * Fall through to the Data abort case + */ el1_da: /* * Data abort handling diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 21ab5df9fa76..65d81f965e74 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -35,6 +35,7 @@ #include <asm/sections.h> #include <asm/smp.h> #include <asm/suspend.h> +#include <asm/sysreg.h> #include <asm/virt.h> /* @@ -217,12 +218,22 @@ static int create_safe_exec_page(void *src_start, size_t length, set_pte(pte, __pte(virt_to_phys((void *)dst) | pgprot_val(PAGE_KERNEL_EXEC))); - /* Load our new page tables */ - asm volatile("msr ttbr0_el1, %0;" - "isb;" - "tlbi vmalle1is;" - "dsb ish;" - "isb" : : "r"(virt_to_phys(pgd))); + /* + * Load our new page tables. A strict BBM approach requires that we + * ensure that TLBs are free of any entries that may overlap with the + * global mappings we are about to install. + * + * For a real hibernate/resume cycle TTBR0 currently points to a zero + * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI + * runtime services), while for a userspace-driven test_resume cycle it + * points to userspace page tables (and we must point it at a zero page + * ourselves). Elsewhere we only (un)install the idmap with preemption + * disabled, so T0SZ should be as required regardless. + */ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + write_sysreg(virt_to_phys(pgd), ttbr0_el1); + isb(); *phys_dst_addr = virt_to_phys((void *)dst); @@ -394,6 +405,38 @@ int swsusp_arch_resume(void) void *, phys_addr_t, phys_addr_t); /* + * Restoring the memory image will overwrite the ttbr1 page tables. + * Create a second copy of just the linear map, and use this when + * restoring. + */ + tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!tmp_pg_dir) { + pr_err("Failed to allocate memory for temporary page tables."); + rc = -ENOMEM; + goto out; + } + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + if (rc) + goto out; + + /* + * Since we only copied the linear map, we need to find restore_pblist's + * linear map address. + */ + lm_restore_pblist = LMADDR(restore_pblist); + + /* + * We need a zero page that is zero before & after resume in order to + * to break before make on the ttbr1 page tables. + */ + zero_page = (void *)get_safe_page(GFP_ATOMIC); + if (!zero_page) { + pr_err("Failed to allocate zero page."); + rc = -ENOMEM; + goto out; + } + + /* * Locate the exit code in the bottom-but-one page, so that *NULL * still has disastrous affects. */ @@ -419,27 +462,6 @@ int swsusp_arch_resume(void) __flush_dcache_area(hibernate_exit, exit_size); /* - * Restoring the memory image will overwrite the ttbr1 page tables. - * Create a second copy of just the linear map, and use this when - * restoring. - */ - tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); - if (!tmp_pg_dir) { - pr_err("Failed to allocate memory for temporary page tables."); - rc = -ENOMEM; - goto out; - } - rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); - if (rc) - goto out; - - /* - * Since we only copied the linear map, we need to find restore_pblist's - * linear map address. - */ - lm_restore_pblist = LMADDR(restore_pblist); - - /* * KASLR will cause the el2 vectors to be in a different location in * the resumed kernel. Load hibernate's temporary copy into el2. * @@ -453,12 +475,6 @@ int swsusp_arch_resume(void) __hyp_set_vectors(el2_vectors); } - /* - * We need a zero page that is zero before & after resume in order to - * to break before make on the ttbr1 page tables. - */ - zero_page = (void *)get_safe_page(GFP_ATOMIC); - hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, resume_hdr.reenter_kernel, lm_restore_pblist, resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index bf9768588288..c6b0f40620d8 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -41,18 +41,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); -static inline unsigned long min_stack_size(unsigned long addr) -{ - unsigned long size; - - if (on_irq_stack(addr, raw_smp_processor_id())) - size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr; - else - size = (unsigned long)current_thread_info() + THREAD_START_SP - addr; - - return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack)); -} - static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { /* prepare insn slot */ @@ -489,20 +477,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - long stack_ptr = kernel_stack_pointer(regs); kcb->jprobe_saved_regs = *regs; /* - * As Linus pointed out, gcc assumes that the callee - * owns the argument space and could overwrite it, e.g. - * tailcall optimization. So, to be absolutely safe - * we also save and restore enough stack bytes to cover - * the argument area. + * Since we can't be sure where in the stack frame "stacked" + * pass-by-value arguments are stored we just don't try to + * duplicate any of the stack. Do not use jprobes on functions that + * use more than 64 bytes (after padding each to an 8 byte boundary) + * of arguments, or pass individual arguments larger than 16 bytes. */ - kasan_disable_current(); - memcpy(kcb->jprobes_stack, (void *)stack_ptr, - min_stack_size(stack_ptr)); - kasan_enable_current(); instruction_pointer_set(regs, (unsigned long) jp->entry); preempt_disable(); @@ -554,10 +537,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; - kasan_disable_current(); - memcpy((void *)stack_addr, kcb->jprobes_stack, - min_stack_size(stack_addr)); - kasan_enable_current(); preempt_enable_no_resched(); return 1; } diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 9a3aec97ac09..ccf79d849e0a 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,12 +101,20 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly /* enable the MMU early - so we can access sleep_save_stash by va */ adr_l lr, __enable_mmu /* __cpu_setup will return here */ - ldr x27, =_cpu_resume /* __enable_mmu will branch here */ + adr_l x27, _resume_switched /* __enable_mmu will branch here */ adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir b __cpu_setup ENDPROC(cpu_resume) + .pushsection ".idmap.text", "ax" +_resume_switched: + ldr x8, =_cpu_resume + br x8 +ENDPROC(_resume_switched) + .ltorg + .popsection + ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 76a6d9263908..d93d43352504 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -661,9 +661,9 @@ void __init smp_init_cpus(void) acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, acpi_parse_gic_cpu_interface, 0); - if (cpu_count > NR_CPUS) - pr_warn("no. of cores (%d) greater than configured maximum of %d - clipping\n", - cpu_count, NR_CPUS); + if (cpu_count > nr_cpu_ids) + pr_warn("Number of cores (%d) exceeds configured maximum of %d - clipping\n", + cpu_count, nr_cpu_ids); if (!bootcpu_valid) { pr_err("missing boot CPU MPIDR, not enabling secondaries\n"); @@ -677,7 +677,7 @@ void __init smp_init_cpus(void) * with entries in cpu_logical_map while initializing the cpus. * If the cpu set-up fails, invalidate the cpu_logical_map entry. */ - for (i = 1; i < NR_CPUS; i++) { + for (i = 1; i < nr_cpu_ids; i++) { if (cpu_logical_map(i) != INVALID_HWID) { if (smp_cpu_setup(i)) cpu_logical_map(i) = INVALID_HWID; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index f94b80eb295d..9c3e75df2180 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) { - pte_t *pte = pte_offset_kernel(pmd, 0); + pte_t *pte = pte_offset_kernel(pmd, 0UL); unsigned long addr; unsigned i; @@ -254,7 +254,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) { - pmd_t *pmd = pmd_offset(pud, 0); + pmd_t *pmd = pmd_offset(pud, 0UL); unsigned long addr; unsigned i; @@ -271,7 +271,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(pgd, 0UL); unsigned long addr; unsigned i; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c8beaa0da7df..05d2bd776c69 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -153,6 +153,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma, } #endif +static bool is_el1_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; +} + /* * The kernel tried to access some page that wasn't present. */ @@ -161,8 +166,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, { /* * Are we prepared to handle this kernel fault? + * We are almost certainly not prepared to handle instruction faults. */ - if (fixup_exception(regs)) + if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; /* @@ -267,7 +273,8 @@ static inline bool is_permission_fault(unsigned int esr) unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; - return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); + return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) || + (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM); } static bool is_el0_instruction_abort(unsigned int esr) @@ -312,6 +319,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + if (is_el1_instruction_abort(esr)) + die("Attempting to execute userspace memory", regs, esr); + if (!search_exception_tables(regs->pc)) die("Accessing user space memory outside uaccess.h routines", regs, esr); } diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index c7fe3ec70774..5bb15eab6f00 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -23,6 +23,8 @@ #include <linux/module.h> #include <linux/of.h> +#include <asm/acpi.h> + struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); nodemask_t numa_nodes_parsed __initdata; diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h index 2e221c5f0203..f86918aed9e1 100644 --- a/arch/h8300/include/asm/io.h +++ b/arch/h8300/include/asm/io.h @@ -3,6 +3,8 @@ #ifdef __KERNEL__ +#include <linux/types.h> + /* H8/300 internal I/O functions */ #define __raw_readb __raw_readb diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 6a15083cc366..18ca6a9ce566 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -52,6 +52,7 @@ config IA64 select MODULES_USE_ELF_RELA select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 2189d5ddc1ee..465c70982f40 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -241,12 +241,18 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use static inline unsigned long __copy_to_user (void __user *to, const void *from, unsigned long count) { + if (!__builtin_constant_p(count)) + check_object_size(from, count, true); + return __copy_user(to, (__force void __user *) from, count); } static inline unsigned long __copy_from_user (void *to, const void __user *from, unsigned long count) { + if (!__builtin_constant_p(count)) + check_object_size(to, count, false); + return __copy_user((__force void __user *) to, from, count); } @@ -258,8 +264,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) const void *__cu_from = (from); \ long __cu_len = (n); \ \ - if (__access_ok(__cu_to, __cu_len, get_fs())) \ - __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ + if (__access_ok(__cu_to, __cu_len, get_fs())) { \ + if (!__builtin_constant_p(n)) \ + check_object_size(__cu_from, __cu_len, true); \ + __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ + } \ __cu_len; \ }) @@ -270,8 +279,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) long __cu_len = (n); \ \ __chk_user_ptr(__cu_from); \ - if (__access_ok(__cu_from, __cu_len, get_fs())) \ + if (__access_ok(__cu_from, __cu_len, get_fs())) { \ + if (!__builtin_constant_p(n)) \ + check_object_size(__cu_to, __cu_len, false); \ __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ + } \ __cu_len; \ }) diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 2dcee3a88867..9202f82dfce6 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -213,7 +213,6 @@ static inline int frame_extra_sizes(int f) static inline void adjustformat(struct pt_regs *regs) { - ((struct switch_stack *)regs - 1)->a5 = current->mm->start_data; /* * set format byte to make stack appear modulo 4, which it will * be when doing the rte diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index 11fa51c89617..c0ec116b3993 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -390,7 +390,6 @@ void __init mem_init(void) free_all_bootmem(); mem_init_print_info(NULL); - show_mem(0); } void free_initmem(void) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 6eb52b9c9818..e788515f766b 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1642,8 +1642,14 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, preempt_disable(); if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { - if (kvm_mips_host_tlb_lookup(vcpu, va) < 0) - kvm_mips_handle_kseg0_tlb_fault(va, vcpu); + if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 && + kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) { + kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n", + __func__, va, vcpu, read_c0_entryhi()); + er = EMULATE_FAIL; + preempt_enable(); + goto done; + } } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) || KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { int index; @@ -1680,12 +1686,18 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, run, vcpu); preempt_enable(); goto dont_update_pc; - } else { - /* - * We fault an entry from the guest tlb to the - * shadow host TLB - */ - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb); + } + /* + * We fault an entry from the guest tlb to the + * shadow host TLB + */ + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) { + kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", + __func__, va, index, vcpu, + read_c0_entryhi()); + er = EMULATE_FAIL; + preempt_enable(); + goto done; } } } else { @@ -2659,7 +2671,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, * OK we have a Guest TLB entry, now inject it into the * shadow host TLB */ - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb); + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) { + kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", + __func__, va, index, vcpu, + read_c0_entryhi()); + er = EMULATE_FAIL; + } } } diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 57319ee57c4f..6cfdcf55572d 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -99,7 +99,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, } gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT); - if (gfn >= kvm->arch.guest_pmap_npages) { + if ((gfn | 1) >= kvm->arch.guest_pmap_npages) { kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__, gfn, badvaddr); kvm_mips_dump_host_tlbs(); @@ -138,35 +138,49 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; struct kvm *kvm = vcpu->kvm; kvm_pfn_t pfn0, pfn1; + gfn_t gfn0, gfn1; + long tlb_lo[2]; int ret; - if ((tlb->tlb_hi & VPN2_MASK) == 0) { - pfn0 = 0; - pfn1 = 0; - } else { - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) - >> PAGE_SHIFT) < 0) - return -1; - - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) - >> PAGE_SHIFT) < 0) - return -1; - - pfn0 = kvm->arch.guest_pmap[ - mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) >> PAGE_SHIFT]; - pfn1 = kvm->arch.guest_pmap[ - mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) >> PAGE_SHIFT]; + tlb_lo[0] = tlb->tlb_lo[0]; + tlb_lo[1] = tlb->tlb_lo[1]; + + /* + * The commpage address must not be mapped to anything else if the guest + * TLB contains entries nearby, or commpage accesses will break. + */ + if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) & + VPN2_MASK & (PAGE_MASK << 1))) + tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0; + + gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT; + gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT; + if (gfn0 >= kvm->arch.guest_pmap_npages || + gfn1 >= kvm->arch.guest_pmap_npages) { + kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n", + __func__, gfn0, gfn1, tlb->tlb_hi); + kvm_mips_dump_guest_tlbs(vcpu); + return -1; } + if (kvm_mips_map_page(kvm, gfn0) < 0) + return -1; + + if (kvm_mips_map_page(kvm, gfn1) < 0) + return -1; + + pfn0 = kvm->arch.guest_pmap[gfn0]; + pfn1 = kvm->arch.guest_pmap[gfn1]; + /* Get attributes from the Guest TLB */ entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | - (tlb->tlb_lo[0] & ENTRYLO_D) | - (tlb->tlb_lo[0] & ENTRYLO_V); + (tlb_lo[0] & ENTRYLO_D) | + (tlb_lo[0] & ENTRYLO_V); entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | - (tlb->tlb_lo[1] & ENTRYLO_D) | - (tlb->tlb_lo[1] & ENTRYLO_V); + (tlb_lo[1] & ENTRYLO_D) | + (tlb_lo[1] & ENTRYLO_V); kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, tlb->tlb_lo[0], tlb->tlb_lo[1]); @@ -354,9 +368,15 @@ u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu) local_irq_restore(flags); return KVM_INVALID_INST; } - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, - &vcpu->arch. - guest_tlb[index]); + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, + &vcpu->arch.guest_tlb[index])) { + kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n", + __func__, opc, index, vcpu, + read_c0_entryhi()); + kvm_mips_dump_guest_tlbs(vcpu); + local_irq_restore(flags); + return KVM_INVALID_INST; + } inst = *(opc); } local_irq_restore(flags); diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index c0ae62520d15..274d5bc6ecce 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -97,10 +97,10 @@ #define ENOTCONN 235 /* Transport endpoint is not connected */ #define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */ #define ETOOMANYREFS 237 /* Too many references: cannot splice */ -#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ +#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ +#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 5adc339eb7c8..0c2a94a0f751 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -51,8 +51,6 @@ EXPORT_SYMBOL(_parisc_requires_coherency); DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data); -extern int update_cr16_clocksource(void); /* from time.c */ - /* ** PARISC CPU driver - claim "device" and initialize CPU data structures. ** @@ -228,12 +226,6 @@ static int processor_probe(struct parisc_device *dev) } #endif - /* If we've registered more than one cpu, - * we'll use the jiffies clocksource since cr16 - * is not synchronized between CPUs. - */ - update_cr16_clocksource(); - return 0; } diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 505cf1ac5af2..4b0b963d52a7 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -221,18 +221,6 @@ static struct clocksource clocksource_cr16 = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -int update_cr16_clocksource(void) -{ - /* since the cr16 cycle counters are not synchronized across CPUs, - we'll check if we should switch to a safe clocksource: */ - if (clocksource_cr16.rating != 0 && num_online_cpus() > 1) { - clocksource_change_rating(&clocksource_cr16, 0); - return 1; - } - - return 0; -} - void __init start_cpu_itimer(void) { unsigned int cpu = smp_processor_id(); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ec4047e170a0..927d2ab2ce08 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -166,6 +166,7 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select GENERIC_CPU_AUTOPROBE select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_ARCH_HARDENED_USERCOPY config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ca254546cd05..1934707bf321 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -66,29 +66,28 @@ endif UTS_MACHINE := $(OLDARCH) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -override CC += -mlittle-endian -ifneq ($(cc-name),clang) -override CC += -mno-strict-align -endif -override AS += -mlittle-endian override LD += -EL -override CROSS32CC += -mlittle-endian override CROSS32AS += -mlittle-endian LDEMULATION := lppc GNUTARGET := powerpcle MULTIPLEWORD := -mno-multiple KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect) else -ifeq ($(call cc-option-yn,-mbig-endian),y) -override CC += -mbig-endian -override AS += -mbig-endian -endif override LD += -EB LDEMULATION := ppc GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +ifneq ($(cc-name),clang) + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align +endif + +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian + ifeq ($(HAS_BIARCH),y) override AS += -a$(CONFIG_WORD_SIZE) override LD += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION) @@ -232,6 +231,9 @@ cpu-as-$(CONFIG_E200) += -Wa,-me200 KBUILD_AFLAGS += $(cpu-as-y) KBUILD_CFLAGS += $(cpu-as-y) +KBUILD_AFLAGS += $(aflags-y) +KBUILD_CFLAGS += $(cflags-y) + head-y := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index bfe3d37a24ef..9fa046d56eba 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -4,6 +4,7 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/cpufeature.h> #include <asm/switch_to.h> #define CHKSUM_BLOCK_SIZE 1 @@ -157,7 +158,7 @@ static void __exit crc32c_vpmsum_mod_fini(void) crypto_unregister_shash(&alg); } -module_init(crc32c_vpmsum_mod_init); +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init); module_exit(crc32c_vpmsum_mod_fini); MODULE_AUTHOR("Anton Blanchard <anton@samba.org>"); diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 3d7fc06532a1..01b8a13f0224 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -19,4 +19,17 @@ extern u64 pnv_first_deep_stop_state; #endif +/* Idle state entry routines */ +#ifdef CONFIG_PPC_P7_NAP +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +1: cmp cr0,r0,r0; \ + bne 1b; \ + IDLE_INST; \ + b . +#endif /* CONFIG_PPC_P7_NAP */ + #endif diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 57fec8ac7b92..ddf54f5bbdd1 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -186,6 +186,7 @@ label##3: \ #ifndef __ASSEMBLY__ void apply_feature_fixups(void); +void setup_feature_keys(void); #endif #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 0a74ebe934e1..17c8380673a6 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -75,14 +75,6 @@ static inline void disable_kernel_spe(void) static inline void __giveup_spe(struct task_struct *t) { } #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -extern void flush_tmregs_to_thread(struct task_struct *); -#else -static inline void flush_tmregs_to_thread(struct task_struct *t) -{ -} -#endif - static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index b7c20f0b8fbe..c1dc6c14deb8 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -310,10 +310,15 @@ static inline unsigned long copy_from_user(void *to, { unsigned long over; - if (access_ok(VERIFY_READ, from, n)) + if (access_ok(VERIFY_READ, from, n)) { + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); + } if ((unsigned long)from < TASK_SIZE) { over = (unsigned long)from + n - TASK_SIZE; + if (!__builtin_constant_p(n - over)) + check_object_size(to, n - over, false); return __copy_tofrom_user((__force void __user *)to, from, n - over) + over; } @@ -325,10 +330,15 @@ static inline unsigned long copy_to_user(void __user *to, { unsigned long over; - if (access_ok(VERIFY_WRITE, to, n)) + if (access_ok(VERIFY_WRITE, to, n)) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_tofrom_user(to, (__force void __user *)from, n); + } if ((unsigned long)to < TASK_SIZE) { over = (unsigned long)to + n - TASK_SIZE; + if (!__builtin_constant_p(n)) + check_object_size(from, n - over, true); return __copy_tofrom_user(to, (__force void __user *)from, n - over) + over; } @@ -372,6 +382,10 @@ static inline unsigned long __copy_from_user_inatomic(void *to, if (ret == 0) return 0; } + + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + return __copy_tofrom_user((__force void __user *)to, from, n); } @@ -398,6 +412,9 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to, if (ret == 0) return 0; } + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + return __copy_tofrom_user(to, (__force const void __user *)from, n); } diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index f5f729c11578..f0b238516e9b 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -159,6 +159,8 @@ extern void xics_teardown_cpu(void); extern void xics_kexec_teardown_cpu(int secondary); extern void xics_migrate_irqs_away(void); extern void icp_native_eoi(struct irq_data *d); +extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type); +extern int xics_retrigger(struct irq_data *data); #ifdef CONFIG_SMP extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, unsigned int strict_check); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c9bc78e9c610..7429556eb8df 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -168,10 +168,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) int n = 0, l = 0; char buffer[128]; - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", + n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", edev->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); - pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", + pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", edev->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 41091fdf9bd8..df6d45eb4115 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -144,29 +144,14 @@ machine_check_pSeries_1: * vector */ SET_SCRATCH0(r13) /* save r13 */ -#ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are - * waking up from nap. We only handle no state loss and - * supervisor state loss. We do -not- handle hypervisor - * state loss at this time. + /* + * Running native on arch 2.06 or later, we may wakeup from winkle + * inside machine check. If yes, then last bit of HSPGR0 would be set + * to 1. Hence clear it unconditionally. */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) - beq 9f - - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - /* waking up from powersave (nap) state */ - cmpwi cr1,r13,2 - /* Total loss of HV state is fatal. let's just stay stuck here */ - OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) - bgt cr1,. -9: - OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ + GET_PACA(r13) + clrrdi r13,r13,1 + SET_PACA(r13) EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION b machine_check_powernv_early @@ -1273,25 +1258,51 @@ machine_check_handle_early: * Check if thread was in power saving mode. We come here when any * of the following is true: * a. thread wasn't in power saving mode - * b. thread was in power saving mode with no state loss or - * supervisor state loss + * b. thread was in power saving mode with no state loss, + * supervisor state loss or hypervisor state loss. * - * Go back to nap again if (b) is true. + * Go back to nap/sleep/winkle mode again if (b) is true. */ rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ beq 4f /* No, it wasn;t */ /* Thread was in power saving mode. Go back to nap again. */ cmpwi r11,2 - bne 3f - /* Supervisor state loss */ + blt 3f + /* Supervisor/Hypervisor state loss */ li r0,1 stb r0,PACA_NAPSTATELOST(r13) 3: bl machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP GET_PACA(r13) ld r1,PACAR1(r13) - li r3,PNV_THREAD_NAP - b pnv_enter_arch207_idle_mode + /* + * Check what idle state this CPU was in and go back to same mode + * again. + */ + lbz r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi r3,PNV_THREAD_NAP + bgt 10f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +10: + cmpwi r3,PNV_THREAD_SLEEP + bgt 2f + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + /* No return */ + +2: + /* + * Go back to winkle. Please note that this thread was woken up in + * machine check from winkle and have not restored the per-subcore + * state. Hence before going back to winkle, set last bit of HSPGR0 + * to 1. This will make sure that if this thread gets woken up + * again at reset vector 0x100 then it will get chance to restore + * the subcore state. + */ + ori r13,r13,1 + SET_PACA(r13) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + /* No return */ 4: #endif /* diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index ba79d15f4ddd..2265c6398a17 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -44,18 +44,6 @@ PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ PSSCR_MTL_MASK -/* Idle state entry routines */ - -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ - ptesync; \ - ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ - bne 1b; \ - IDLE_INST; \ - b . - .text /* @@ -363,8 +351,8 @@ _GLOBAL(power9_idle_stop) * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ _GLOBAL(pnv_restore_hyp_resource) - ld r2,PACATOC(r13); BEGIN_FTR_SECTION + ld r2,PACATOC(r13); /* * POWER ISA 3. Use PSSCR to determine if we * are waking up from deep idle state @@ -395,6 +383,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) */ clrldi r5,r13,63 clrrdi r13,r13,1 + + /* Now that we are sure r13 is corrected, load TOC */ + ld r2,PACATOC(r13); cmpwi cr4,r5,1 mtspr SPRN_HSPRG0,r13 diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index ef267fd9dd22..5e7ece0fda9f 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->in_use = 1; mce->initiator = MCE_INITIATOR_CPU; - if (handled) + /* Mark it recovered if we have handled it and MSR(RI=1). */ + if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index a5c0153ede37..7fdf324d5b51 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -78,6 +78,7 @@ EXPORT_SYMBOL(get_pci_dma_ops); static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; + u32 prop_32; u64 prop; /* @@ -86,8 +87,10 @@ static int get_phb_number(struct device_node *dn) * reading "ibm,opal-phbid", only present in OPAL environment. */ ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); - if (ret) - ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop); + if (ret) { + ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); + prop = prop_32; + } if (!ret) phb_id = (int)(prop & (MAX_PHBS - 1)); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 58ccf86415b4..9ee2623e0f67 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1074,26 +1074,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, #endif } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void flush_tmregs_to_thread(struct task_struct *tsk) -{ - /* - * Process self tracing is not yet supported through - * ptrace interface. Ptrace generic code should have - * prevented this from happening in the first place. - * Warn once here with the message, if some how it - * is attempted. - */ - WARN_ONCE(tsk == current, - "Not expecting ptrace on self: TM regs may be incorrect\n"); - - /* - * If task is not current, it should have been flushed - * already to it's thread_struct during __switch_to(). - */ -} -#endif - struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 6ee4b72cda42..4e74fc588a3f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2940,7 +2940,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* Don't print anything after quiesce under OPAL, it crashes OFW */ if (of_platform != PLATFORM_OPAL) { - prom_printf("Booting Linux via __start() ...\n"); + prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); prom_debug("->dt_header_start=0x%x\n", hdr); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 4f3c5756cc09..bf91658a8a40 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -38,6 +38,7 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> +#include <asm/tm.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -118,6 +119,24 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_END, }; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void flush_tmregs_to_thread(struct task_struct *tsk) +{ + /* + * If task is not current, it will have been flushed already to + * it's thread_struct during __switch_to(). + * + * A reclaim flushes ALL the state. + */ + + if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(TM_CAUSE_SIGNAL); + +} +#else +static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } +#endif + /** * regs_query_register_offset() - query register offset from its name * @name: the name of a register diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index c3e861df4b20..24ec3ea4b3a2 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -93,15 +93,16 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * and we are running with enough of the MMU enabled to have our * proper kernel virtual addresses * - * Find out what kind of machine we're on and save any data we need - * from the early boot process (devtree is copied on pmac by prom_init()). - * This is called very early on the boot process, after a minimal - * MMU environment has been set up but before MMU_init is called. + * We do the initial parsing of the flat device-tree and prepares + * for the MMU to be fully initialized. */ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ notrace void __init machine_init(u64 dt_ptr) { + /* Configure static keys first, now that we're relocated. */ + setup_feature_keys(); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index eafb9a79e011..7ac8e6eaab5b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -300,6 +300,7 @@ void __init early_setup(unsigned long dt_ptr) /* Apply all the dynamic patching */ apply_feature_fixups(); + setup_feature_keys(); /* Initialize the hash table or TLB handling */ early_init_mmu(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6767605ea8da..4111d30badfa 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -22,6 +22,7 @@ #include <linux/security.h> #include <linux/memblock.h> +#include <asm/cpu_has_feature.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mmu.h> diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index cbabd143acae..78a7449bf489 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE $(call if_changed,vdso32ld) # strip rule for the .so file @@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S +$(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index c710802b8fb6..366ae09b14c1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE $(call if_changed,vdso64ld) # strip rule for the .so file @@ -32,12 +32,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso64): %.o: %.S +$(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index a75ba38a2d81..05aa11399a78 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1329,20 +1329,16 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) xics->kvm = kvm; /* Already there ? */ - mutex_lock(&kvm->lock); if (kvm->arch.xics) ret = -EEXIST; else kvm->arch.xics = xics; - mutex_unlock(&kvm->lock); if (ret) { kfree(xics); return ret; } - xics_debugfs_init(xics); - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE if (cpu_has_feature(CPU_FTR_ARCH_206)) { /* Enable real mode support */ @@ -1354,9 +1350,17 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) return 0; } +static void kvmppc_xics_init(struct kvm_device *dev) +{ + struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private; + + xics_debugfs_init(xics); +} + struct kvm_device_ops kvm_xics_ops = { .name = "kvm-xics", .create = kvmppc_xics_create, + .init = kvmppc_xics_init, .destroy = kvmppc_xics_free, .set_attr = xics_set_attr, .get_attr = xics_get_attr, diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index d90870a66b60..0a57fe6d49cc 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -127,8 +127,9 @@ _GLOBAL(csum_partial_copy_generic) stw r7,12(r1) stw r8,8(r1) - andi. r0,r4,1 /* is destination address even ? */ - cmplwi cr7,r0,0 + rlwinm r0,r4,3,0x8 + rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */ + cmplwi cr7,r0,0 /* is destination address even ? */ addic r12,r6,0 addi r6,r4,-4 neg r0,r4 @@ -237,7 +238,7 @@ _GLOBAL(csum_partial_copy_generic) 66: addze r3,r12 addi r1,r1,16 beqlr+ cr7 - rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */ + rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ blr /* read fault */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 74145f02ad41..043415f0bdb1 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -188,7 +188,10 @@ void __init apply_feature_fixups(void) &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); #endif do_final_fixups(); +} +void __init setup_feature_keys(void) +{ /* * Initialise jump label. This causes all the cpu/mmu_has_feature() * checks to take on their correct polarity based on the current set of diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 5be15cff758d..2975754c65ea 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -496,8 +496,10 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) gang = alloc_spu_gang(); SPUFS_I(inode)->i_ctx = NULL; SPUFS_I(inode)->i_gang = gang; - if (!gang) + if (!gang) { + ret = -ENOMEM; goto out_iput; + } inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 309d9ccccd50..c61667e8bb06 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -187,6 +187,11 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) if (dev->vendor == 0x1959 && dev->device == 0xa007 && !firmware_has_feature(FW_FEATURE_LPAR)) { dev->dev.archdata.dma_ops = &dma_direct_ops; + /* + * Set the coherent DMA mask to prevent the iommu + * being used unnecessarily + */ + dev->dev.coherent_dma_mask = DMA_BIT_MASK(44); return; } #endif diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index e505223b4ec5..ed8bba68a162 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -228,7 +228,8 @@ int __init opal_event_init(void) } /* Install interrupt handler */ - rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); + rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW, + "opal", NULL); if (rc) { irq_dispose_mapping(virq); pr_warn("Error %d requesting irq %d (0x%x)\n", diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 8b4fc68cebcb..6c9a65b52e63 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -399,6 +399,7 @@ static int opal_recover_mce(struct pt_regs *regs, if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ + pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { /* Platform corrected itself */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6b9528307f62..fd9444f9fb0c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -111,10 +111,17 @@ static int __init iommu_setup(char *str) } early_param("iommu", iommu_setup); -static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) +static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) { - return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == - (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); + /* + * WARNING: We cannot rely on the resource flags. The Linux PCI + * allocation code sometimes decides to put a 64-bit prefetchable + * BAR in the 32-bit window, so we have to compare the addresses. + * + * For simplicity we only test resource start. + */ + return (r->start >= phb->ioda.m64_base && + r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); } static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) @@ -229,7 +236,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, sgsz = phb->ioda.m64_segsize; for (i = 0; i <= PCI_ROM_RESOURCE; i++) { r = &pdev->resource[i]; - if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) + if (!r->parent || !pnv_pci_is_m64(phb, r)) continue; start = _ALIGN_DOWN(r->start - base, sgsz); @@ -1877,7 +1884,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm, unsigned shift, unsigned long index, unsigned long npages) { - __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false); + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm); unsigned long start, end, inc; /* We'll invalidate DMA address in PE scope */ @@ -2863,7 +2870,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) { + if (!pnv_pci_is_m64(phb, res)) { dev_warn(&pdev->dev, "Don't support SR-IOV with" " non M64 VF BAR%d: %pR. \n", i, res); @@ -2958,7 +2965,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, index++; } } else if ((res->flags & IORESOURCE_MEM) && - !pnv_pci_is_mem_pref_64(res->flags)) { + !pnv_pci_is_m64(phb, res)) { region.start = res->start - phb->hose->mem_offset[0] - phb->ioda.m32_pci_base; @@ -3083,9 +3090,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, bridge = bridge->bus->self; } - /* We fail back to M32 if M64 isn't supported */ - if (phb->ioda.m64_segsize && - pnv_pci_is_mem_pref_64(type)) + /* + * We fall back to M32 if M64 isn't supported. We enforce the M64 + * alignment for any 64-bit resource, PCIe doesn't care and + * bridges only do 64-bit prefetchable anyway. + */ + if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64)) return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; @@ -3125,7 +3135,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, w = NULL; if (r->flags & type & IORESOURCE_IO) w = &hose->io_resource; - else if (pnv_pci_is_mem_pref_64(r->flags) && + else if (pnv_pci_is_m64(phb, r) && (type & IORESOURCE_PREFETCH) && phb->ioda.m64_segsize) w = &hose->mem_resources[1]; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 43f7beb2902d..76ec104e88be 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -320,19 +320,6 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb) return dlpar_update_device_tree_lmb(lmb); } -static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) -{ - unsigned long section_nr; - struct mem_section *mem_sect; - struct memory_block *mem_block; - - section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); - mem_sect = __nr_to_section(section_nr); - - mem_block = find_memory_block(mem_sect); - return mem_block; -} - #ifdef CONFIG_MEMORY_HOTREMOVE static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { @@ -420,6 +407,19 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb) static int dlpar_add_lmb(struct of_drconf_cell *); +static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) +{ + unsigned long section_nr; + struct mem_section *mem_sect; + struct memory_block *mem_block; + + section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); + mem_sect = __nr_to_section(section_nr); + + mem_block = find_memory_block(mem_sect); + return mem_block; +} + static int dlpar_remove_lmb(struct of_drconf_cell *lmb) { struct memory_block *mem_block; diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig index 0031eda320c3..385e7aa9e273 100644 --- a/arch/powerpc/sysdev/xics/Kconfig +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -1,6 +1,7 @@ config PPC_XICS def_bool n select PPC_SMP_MUXED_IPI + select HARDIRQS_SW_RESEND config PPC_ICP_NATIVE def_bool n diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 27c936c080a6..1c6bf4b66f56 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -156,7 +156,9 @@ static struct irq_chip ics_opal_irq_chip = { .irq_mask = ics_opal_mask_irq, .irq_unmask = ics_opal_unmask_irq, .irq_eoi = NULL, /* Patched at init time */ - .irq_set_affinity = ics_opal_set_affinity + .irq_set_affinity = ics_opal_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, }; static int ics_opal_map(struct ics *ics, unsigned int virq); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 3854dd41558d..78ee5c778ef8 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -163,7 +163,9 @@ static struct irq_chip ics_rtas_irq_chip = { .irq_mask = ics_rtas_mask_irq, .irq_unmask = ics_rtas_unmask_irq, .irq_eoi = NULL, /* Patched at init time */ - .irq_set_affinity = ics_rtas_set_affinity + .irq_set_affinity = ics_rtas_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, }; static int ics_rtas_map(struct ics *ics, unsigned int virq) diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index a795a5f0301c..9d530f479588 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -328,8 +328,12 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq, pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); - /* They aren't all level sensitive but we just don't really know */ - irq_set_status_flags(virq, IRQ_LEVEL); + /* + * Mark interrupts as edge sensitive by default so that resend + * actually works. The device-tree parsing will turn the LSIs + * back to level. + */ + irq_clear_status_flags(virq, IRQ_LEVEL); /* Don't call into ICS for IPIs */ if (hw == XICS_IPI) { @@ -351,13 +355,54 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct, irq_hw_number_t *out_hwirq, unsigned int *out_flags) { - /* Current xics implementation translates everything - * to level. It is not technically right for MSIs but this - * is irrelevant at this point. We might get smarter in the future - */ *out_hwirq = intspec[0]; - *out_flags = IRQ_TYPE_LEVEL_LOW; + /* + * If intsize is at least 2, we look for the type in the second cell, + * we assume the LSB indicates a level interrupt. + */ + if (intsize > 1) { + if (intspec[1] & 1) + *out_flags = IRQ_TYPE_LEVEL_LOW; + else + *out_flags = IRQ_TYPE_EDGE_RISING; + } else + *out_flags = IRQ_TYPE_LEVEL_LOW; + + return 0; +} + +int xics_set_irq_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * We only support these. This has really no effect other than setting + * the corresponding descriptor bits mind you but those will in turn + * affect the resend function when re-enabling an edge interrupt. + * + * Set set the default to edge as explained in map(). + */ + if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE) + flow_type = IRQ_TYPE_EDGE_RISING; + + if (flow_type != IRQ_TYPE_EDGE_RISING && + flow_type != IRQ_TYPE_LEVEL_LOW) + return -EINVAL; + + irqd_set_trigger_type(d, flow_type); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +int xics_retrigger(struct irq_data *data) +{ + /* + * We need to push a dummy CPPR when retriggering, since the subsequent + * EOI will try to pop it. Passing 0 works, as the function hard codes + * the priority value anyway. + */ + xics_push_cppr(0); + + /* Tell the core to do a soft retrigger */ return 0; } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9e607bf2d640..e751fe25d6ab 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -123,6 +123,7 @@ config S390 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_EARLY_PFN_TO_NID + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER @@ -871,4 +872,17 @@ config S390_GUEST Select this option if you want to run the kernel as a guest under the KVM hypervisor. +config S390_GUEST_OLD_TRANSPORT + def_bool y + prompt "Guest support for old s390 virtio transport (DEPRECATED)" + depends on S390_GUEST + help + Enable this option to add support for the old s390-virtio + transport (i.e. virtio devices NOT based on virtio-ccw). This + type of virtio devices is only available on the experimental + kuli userspace or with old (< 2.6) qemu. If you are running + with a modern version of qemu (which supports virtio-ccw since + 1.4 and uses it by default since version 2.4), you probably won't + need this. + endmenu diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index f86a4eef28a9..28c4f96a2d9c 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -21,16 +21,21 @@ ENTRY(startup_continue) lg %r15,.Lstack-.LPG1(%r13) aghi %r15,-160 brasl %r14,decompress_kernel - # setup registers for memory mover & branch to target + # Set up registers for memory mover. We move the decompressed image to + # 0x11000, starting at offset 0x11000 in the decompressed image so + # that code living at 0x11000 in the image will end up at 0x11000 in + # memory. lgr %r4,%r2 lg %r2,.Loffset-.LPG1(%r13) la %r4,0(%r2,%r4) lg %r3,.Lmvsize-.LPG1(%r13) lgr %r5,%r3 - # move the memory mover someplace safe + # Move the memory mover someplace safe so it doesn't overwrite itself. la %r1,0x200 mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) - # decompress image is started at 0x11000 + # When the memory mover is done we pass control to + # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in + # the decompressed image. lgr %r6,%r2 br %r1 mover: diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 889ea3450210..26e0c7f08814 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -678,7 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 1bcfd764910a..24879dab47bc 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -616,7 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 13ff090139c8..a5c1e5f2a0ca 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -615,7 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 577ae1d4ae89..2bad9d837029 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -51,6 +51,9 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); struct kernel_fpu vxstate; \ unsigned long prealign, aligned, remaining; \ \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ + return ___crc32_sw(crc, data, datalen); \ + \ if ((unsigned long)data & VX_ALIGN_MASK) { \ prealign = VX_ALIGNMENT - \ ((unsigned long)data & VX_ALIGN_MASK); \ @@ -59,9 +62,6 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); data = (void *)((unsigned long)data + prealign); \ } \ \ - if (datalen < VX_MIN_LEN) \ - return ___crc32_sw(crc, data, datalen); \ - \ aligned = datalen & ~VX_ALIGN_MASK; \ remaining = datalen & VX_ALIGN_MASK; \ \ diff --git a/arch/s390/defconfig b/arch/s390/defconfig index ccccebeeaaf6..73610f2e3b4f 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -234,7 +234,7 @@ CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_POWERPC is not set diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 56e4d8234ef2..4431905f8cfa 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -309,7 +309,9 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities - /* Continue with startup code in head64.S */ +# For uncompressed images, continue in +# arch/s390/kernel/head64.S. For compressed images, continue in +# arch/s390/boot/compressed/head.S. jg startup_continue .Lstack: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3f3ae4865d57..f142215ed30d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1672,6 +1672,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; + kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With @@ -2361,8 +2362,10 @@ retry: rc = gmap_mprotect_notify(vcpu->arch.gmap, kvm_s390_get_prefix(vcpu), PAGE_SIZE * 2, PROT_WRITE); - if (rc) + if (rc) { + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); return rc; + } goto retry; } diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index e390bbb16443..48352bffbc92 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -237,11 +237,10 @@ char * strrchr(const char * s, int c) EXPORT_SYMBOL(strrchr); static inline int clcle(const char *s1, unsigned long l1, - const char *s2, unsigned long l2, - int *diff) + const char *s2, unsigned long l2) { register unsigned long r2 asm("2") = (unsigned long) s1; - register unsigned long r3 asm("3") = (unsigned long) l2; + register unsigned long r3 asm("3") = (unsigned long) l1; register unsigned long r4 asm("4") = (unsigned long) s2; register unsigned long r5 asm("5") = (unsigned long) l2; int cc; @@ -252,7 +251,6 @@ static inline int clcle(const char *s1, unsigned long l1, " srl %0,28" : "=&d" (cc), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5) : : "cc"); - *diff = *(char *)r2 - *(char *)r4; return cc; } @@ -270,9 +268,9 @@ char * strstr(const char * s1,const char * s2) return (char *) s1; l1 = __strend(s1) - s1; while (l1-- >= l2) { - int cc, dummy; + int cc; - cc = clcle(s1, l1, s2, l2, &dummy); + cc = clcle(s1, l2, s2, l2); if (!cc) return (char *) s1; s1++; @@ -313,11 +311,11 @@ EXPORT_SYMBOL(memchr); */ int memcmp(const void *cs, const void *ct, size_t n) { - int ret, diff; + int ret; - ret = clcle(cs, n, ct, n, &diff); + ret = clcle(cs, n, ct, n); if (ret) - ret = diff; + ret = ret == 1 ? -1 : 1; return ret; } EXPORT_SYMBOL(memcmp); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index d96596128e9f..f481fcde067b 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -104,6 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { + check_object_size(to, n, false); if (static_branch_likely(&have_mvcos)) return copy_from_user_mvcos(to, from, n); return copy_from_user_mvcp(to, from, n); @@ -177,6 +178,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); if (static_branch_likely(&have_mvcos)) return copy_to_user_mvcos(to, from, n); return copy_to_user_mvcs(to, from, n); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 7104ffb5a67f..af7cf28cf97e 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -252,6 +252,8 @@ static int change_page_attr(unsigned long addr, unsigned long end, int rc = -EINVAL; pgd_t *pgdp; + if (addr == end) + return 0; if (end >= MODULES_END) return -EINVAL; mutex_lock(&cpa_mutex); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 546293d9e6c5..59b09600dd32 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -43,6 +43,7 @@ config SPARC select OLD_SIGSUSPEND select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS + select HAVE_ARCH_HARDENED_USERCOPY config SPARC32 def_bool !64BIT diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 57aca2792d29..341a5a133f48 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -248,22 +248,28 @@ unsigned long __copy_user(void __user *to, const void __user *from, unsigned lon static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - if (n && __access_ok((unsigned long) to, n)) + if (n && __access_ok((unsigned long) to, n)) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); - else + } else return n; } static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); } static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (n && __access_ok((unsigned long) from, n)) + if (n && __access_ok((unsigned long) from, n)) { + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); - else + } else return n; } diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index e9a51d64974d..8bda94fab8e8 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -210,8 +210,12 @@ unsigned long copy_from_user_fixup(void *to, const void __user *from, static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret = ___copy_from_user(to, from, size); + unsigned long ret; + if (!__builtin_constant_p(size)) + check_object_size(to, size, false); + + ret = ___copy_from_user(to, from, size); if (unlikely(ret)) ret = copy_from_user_fixup(to, from, size); @@ -227,8 +231,11 @@ unsigned long copy_to_user_fixup(void __user *to, const void *from, static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret = ___copy_to_user(to, from, size); + unsigned long ret; + if (!__builtin_constant_p(size)) + check_object_size(from, size, true); + ret = ___copy_to_user(to, from, size); if (unlikely(ret)) ret = copy_to_user_fixup(to, from, size); return ret; diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index e35632ef23c7..62dfc644c908 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -98,7 +98,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, } static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, - bool write, bool foreign) + bool write, bool execute, bool foreign) { /* by default, allow everything */ return true; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5c6e7471b732..c580d8c33562 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -80,6 +80,7 @@ config X86 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_AOUT if X86_32 select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP @@ -91,6 +92,7 @@ config X86 select HAVE_ARCH_SOFT_DIRTY if X86_64 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_EBPF_JIT if X86_64 select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index fe91c25092da..77f28ce9c646 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -5,6 +5,8 @@ OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y +CFLAGS_syscall_64.o += -Wno-override-init +CFLAGS_syscall_32.o += -Wno-override-init obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b846875aeea6..d172c619c449 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -288,11 +288,15 @@ return_from_SYSCALL_64: jne opportunistic_sysret_failed /* - * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, - * restoring TF results in a trap from userspace immediately after - * SYSRET. This would cause an infinite loop whenever #DB happens - * with register state that satisfies the opportunistic SYSRET - * conditions. For example, single-stepping this user code: + * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot + * restore RF properly. If the slowpath sets it for whatever reason, we + * need to restore it correctly. + * + * SYSRET can restore TF, but unlike IRET, restoring TF results in a + * trap from userspace immediately after SYSRET. This would cause an + * infinite loop whenever #DB happens with register state that satisfies + * the opportunistic SYSRET conditions. For example, single-stepping + * this user code: * * movq $stuck_here, %rcx * pushfq @@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym) .endm #endif +/* Make sure APIC interrupt handlers end up in the irqentry section: */ +#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) +# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" +# define POP_SECTION_IRQENTRY .popsection +#else +# define PUSH_SECTION_IRQENTRY +# define POP_SECTION_IRQENTRY +#endif + .macro apicinterrupt num sym do_sym +PUSH_SECTION_IRQENTRY apicinterrupt3 \num \sym \do_sym trace_apicinterrupt \num \sym +POP_SECTION_IRQENTRY .endm #ifdef CONFIG_SMP diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 97a69dbba649..9d35ec0cb8fc 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static void snb_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); +} + static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) @@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = { static struct intel_uncore_ops snb_uncore_msr_ops = { .init_box = snb_uncore_msr_init_box, + .enable_box = snb_uncore_msr_enable_box, .exit_box = snb_uncore_msr_exit_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, @@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box) } } +static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); +} + static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) @@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) static struct intel_uncore_ops skl_uncore_msr_ops = { .init_box = skl_uncore_msr_init_box, + .enable_box = skl_uncore_msr_enable_box, .exit_box = skl_uncore_msr_exit_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 824e54086e07..8aee83bcf71f 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void) static struct intel_uncore_type hswep_uncore_ha = { .name = "ha", - .num_counters = 5, + .num_counters = 4, .num_boxes = 2, .perf_ctr_bits = 48, SNBEP_UNCORE_PCI_COMMON_INIT(), @@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = { static struct intel_uncore_type hswep_uncore_imc = { .name = "imc", - .num_counters = 5, + .num_counters = 4, .num_boxes = 8, .perf_ctr_bits = 48, .fixed_ctr_bits = 48, @@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = { static struct intel_uncore_type hswep_uncore_qpi = { .name = "qpi", - .num_counters = 5, + .num_counters = 4, .num_boxes = 3, .perf_ctr_bits = 48, .perf_ctr = SNBEP_PCI_PMON_CTR0, @@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = { static struct intel_uncore_type hswep_uncore_r3qpi = { .name = "r3qpi", - .num_counters = 4, + .num_counters = 3, .num_boxes = 3, .perf_ctr_bits = 44, .constraints = hswep_uncore_r3qpi_constraints, @@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = { static struct intel_uncore_type bdx_uncore_imc = { .name = "imc", - .num_counters = 5, + .num_counters = 4, .num_boxes = 8, .perf_ctr_bits = 48, .fixed_ctr_bits = 48, diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f5befd4945f2..124357773ffa 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -135,6 +135,7 @@ extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); extern void setup_secondary_APIC_clock(void); +extern void lapic_update_tsc_freq(void); extern int APIC_init_uniprocessor(void); #ifdef CONFIG_X86_64 @@ -170,6 +171,7 @@ static inline void init_apic_mappings(void) { } static inline void disable_local_APIC(void) { } # define setup_boot_APIC_clock x86_init_noop # define setup_secondary_APIC_clock x86_init_noop +static inline void lapic_update_tsc_freq(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 7178043b0e1d..59405a248fc2 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -22,10 +22,6 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; - /* - * irq_tlb_count is double-counted in irq_call_count, so it must be - * subtracted from irq_call_count when displaying irq_call_count - */ unsigned int irq_tlb_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 223042086f4e..737da62bfeb0 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -5,10 +5,10 @@ struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ void *context; /* context for alloc_pgt_page */ unsigned long pmd_flag; /* page flag for PMD entry */ - bool kernel_mapping; /* kernel mapping or ident mapping */ + unsigned long offset; /* ident mapping offset */ }; int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, - unsigned long addr, unsigned long end); + unsigned long pstart, unsigned long pend); #endif /* _ASM_X86_INIT_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7e8ec7ae10fa..1cc82ece9ac1 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names - * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry + * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) #define SWP_TYPE_BITS 5 /* Place the offset above the type: */ -#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1) +#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 9c6b890d5e7a..b2988c0ed829 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -58,7 +58,15 @@ extern unsigned char boot_gdt[]; extern unsigned char secondary_startup_64[]; #endif +static inline size_t real_mode_size_needed(void) +{ + if (real_mode_header) + return 0; /* already allocated. */ + + return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE); +} + +void set_real_mode_mem(phys_addr_t mem, size_t size); void reserve_real_mode(void); -void setup_real_mode(void); #endif /* _ARCH_X86_REALMODE_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 84b59846154a..8b7c8d8e0852 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -176,6 +176,50 @@ static inline unsigned long current_stack_pointer(void) return sp; } +/* + * Walks up the stack frames to make sure that the specified object is + * entirely contained by a single stack frame. + * + * Returns: + * 1 if within a frame + * -1 if placed across a frame boundary (or outside stack) + * 0 unable to determine (no frame pointers, etc) + */ +static inline int arch_within_stack_frames(const void * const stack, + const void * const stackend, + const void *obj, unsigned long len) +{ +#if defined(CONFIG_FRAME_POINTER) + const void *frame = NULL; + const void *oldframe; + + oldframe = __builtin_frame_address(1); + if (oldframe) + frame = __builtin_frame_address(2); + /* + * low ----------------------------------------------> high + * [saved bp][saved ip][args][local vars][saved bp][saved ip] + * ^----------------^ + * allow copies only within here + */ + while (stack <= frame && frame < stackend) { + /* + * If obj + len extends past the last frame, this + * check won't pass and the next frame will be 0, + * causing us to bail out and correctly report + * the copy as invalid. + */ + if (obj + len <= frame) + return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1; + oldframe = frame; + frame = *(const void * const *)frame; + } + return -1; +#else + return 0; +#endif +} + #else /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 4e5be94e079a..6fa85944af83 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) static inline void __native_flush_tlb(void) { + /* + * If current->mm == NULL then we borrow a mm which may change during a + * task switch and therefore we must not be preempted while we write CR3 + * back: + */ + preempt_disable(); native_write_cr3(native_read_cr3()); + preempt_enable(); } static inline void __native_flush_tlb_global_irq_disabled(void) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c03bfb68c503..a0ae610b9280 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -761,9 +761,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) * case, and do only runtime checking for non-constant sizes. */ - if (likely(sz < 0 || sz >= n)) + if (likely(sz < 0 || sz >= n)) { + check_object_size(to, n, false); n = _copy_from_user(to, from, n); - else if(__builtin_constant_p(n)) + } else if (__builtin_constant_p(n)) copy_from_user_overflow(); else __copy_from_user_overflow(sz, n); @@ -781,9 +782,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n) might_fault(); /* See the comment in copy_from_user() above. */ - if (likely(sz < 0 || sz >= n)) + if (likely(sz < 0 || sz >= n)) { + check_object_size(from, n, true); n = _copy_to_user(to, from, n); - else if(__builtin_constant_p(n)) + } else if (__builtin_constant_p(n)) copy_to_user_overflow(); else __copy_to_user_overflow(sz, n); @@ -812,21 +814,21 @@ copy_to_user(void __user *to, const void *from, unsigned long n) #define user_access_begin() __uaccess_begin() #define user_access_end() __uaccess_end() -#define unsafe_put_user(x, ptr) \ -({ \ +#define unsafe_put_user(x, ptr, err_label) \ +do { \ int __pu_err; \ __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ - __builtin_expect(__pu_err, 0); \ -}) + if (unlikely(__pu_err)) goto err_label; \ +} while (0) -#define unsafe_get_user(x, ptr) \ -({ \ +#define unsafe_get_user(x, ptr, err_label) \ +do { \ int __gu_err; \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - __builtin_expect(__gu_err, 0); \ -}) + if (unlikely(__gu_err)) goto err_label; \ +} while (0) #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 4b32da24faaf..7d3bdd1ed697 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -37,6 +37,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); return __copy_to_user_ll(to, from, n); } @@ -95,6 +96,7 @@ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + check_object_size(to, n, false); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 2eac2aa3e37f..673059a109fe 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -54,6 +54,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) { int ret = 0; + check_object_size(dst, size, false); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -119,6 +120,7 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size) { int ret = 0; + check_object_size(src, size, true); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index c852590254d5..e652a7cc6186 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -79,7 +79,7 @@ struct uv_gam_range_entry { u16 nasid; /* HNasid */ u16 sockid; /* Socket ID, high bits of APIC ID */ u16 pnode; /* Index to MMR and GRU spaces */ - u32 pxm; /* ACPI proximity domain number */ + u32 unused2; u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */ }; @@ -88,7 +88,8 @@ struct uv_gam_range_entry { #define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */ #define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */ #define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */ -#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2 +#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */ +#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3 #define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */ #define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 20abd912f0e4..cea4fc19e844 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -313,7 +313,7 @@ int lapic_get_maxlvt(void) /* Clock divisor */ #define APIC_DIVISOR 16 -#define TSC_DIVISOR 32 +#define TSC_DIVISOR 8 /* * This function sets up the local APIC timer, with a timeout of @@ -565,13 +565,37 @@ static void setup_APIC_timer(void) CLOCK_EVT_FEAT_DUMMY); levt->set_next_event = lapic_next_deadline; clockevents_config_and_register(levt, - (tsc_khz / TSC_DIVISOR) * 1000, + tsc_khz * (1000 / TSC_DIVISOR), 0xF, ~0UL); } else clockevents_register_device(levt); } /* + * Install the updated TSC frequency from recalibration at the TSC + * deadline clockevent devices. + */ +static void __lapic_update_tsc_freq(void *info) +{ + struct clock_event_device *levt = this_cpu_ptr(&lapic_events); + + if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return; + + clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR)); +} + +void lapic_update_tsc_freq(void) +{ + /* + * The clockevent device's ->mult and ->shift can both be + * changed. In order to avoid races, schedule the frequency + * update code on each CPU. + */ + on_each_cpu(__lapic_update_tsc_freq, NULL, 0); +} + +/* * In this functions we calibrate APIC bus clocks to the external timer. * * We want to do the calibration only once since we want to have local timer diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6368fa69d2af..54f35d988025 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -155,7 +155,7 @@ static void init_x2apic_ldr(void) /* * At CPU state changes, update the x2apic cluster sibling info. */ -int x2apic_prepare_cpu(unsigned int cpu) +static int x2apic_prepare_cpu(unsigned int cpu) { if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) return -ENOMEM; @@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu) return 0; } -int x2apic_dead_cpu(unsigned int this_cpu) +static int x2apic_dead_cpu(unsigned int this_cpu) { int cpu; @@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu) static int x2apic_cluster_probe(void) { int cpu = smp_processor_id(); + int ret; if (!x2apic_mode) return 0; + ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", + x2apic_prepare_cpu, x2apic_dead_cpu); + if (ret < 0) { + pr_err("Failed to register X2APIC_PREPARE\n"); + return 0; + } cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); - cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", - x2apic_prepare_cpu, x2apic_dead_cpu); return 1; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 09b59adaea3f..cb0673c1e940 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (strncmp(oem_id, "SGI", 3) != 0) return 0; + if (numa_off) { + pr_err("UV: NUMA is off, disabling UV support\n"); + return 0; + } + /* Setup early hub type field in uv_hub_info for Node 0 */ uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; @@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void) struct uv_gam_range_entry *gre = uv_gre_table; struct uv_gam_range_s *grt; unsigned long last_limit = 0, ram_limit = 0; - int bytes, i, sid, lsid = -1; + int bytes, i, sid, lsid = -1, indx = 0, lindx = -1; if (!gre) return; @@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void) } sid = gre->sockid - _min_socket; if (lsid < sid) { /* new range */ - grt = &_gr_table[sid]; - grt->base = lsid; + grt = &_gr_table[indx]; + grt->base = lindx; grt->nasid = gre->nasid; grt->limit = last_limit = gre->limit; lsid = sid; + lindx = indx++; continue; } if (lsid == sid && !ram_limit) { /* update range */ @@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void) } if (!ram_limit) { /* non-contiguous ram range */ grt++; - grt->base = sid - 1; + grt->base = lindx; grt->nasid = gre->nasid; grt->limit = last_limit = gre->limit; continue; @@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (!index) { pr_info("UV: GAM Range Table...\n"); - pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n", + pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", - "SID", "PN", "PXM"); + "SID", "PN"); } pr_info( - "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n", + "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n", index++, (unsigned long)lgre << UV_GAM_RANGE_SHFT, (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, ((unsigned long)(gre->limit - lgre)) >> (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ - gre->type, gre->nasid, gre->sockid, - gre->pnode, gre->pxm); + gre->type, gre->nasid, gre->sockid, gre->pnode); lgre = gre->limit; if (sock_min > gre->sockid) @@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void) _pnode_to_socket[i] = SOCK_EMPTY; /* fill in pnode/node/addr conversion list values */ - pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n"); + pr_info("UV: GAM Building socket/pnode conversion tables\n"); for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (gre->type == UV_GAM_RANGE_TYPE_HOLE) continue; @@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void) if (_socket_to_pnode[i] != SOCK_EMPTY) continue; /* duplicate */ _socket_to_pnode[i] = gre->pnode; - _socket_to_node[i] = gre->pxm; i = gre->pnode - minpnode; _pnode_to_socket[i] = gre->sockid; pr_info( - "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n", + "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", gre->sockid, gre->type, gre->nasid, _socket_to_pnode[gre->sockid - minsock], - _socket_to_node[gre->sockid - minsock], _pnode_to_socket[gre->pnode - minpnode]); } - /* check socket -> node values */ + /* Set socket -> node values */ lnid = -1; for_each_present_cpu(cpu) { int nid = cpu_to_node(cpu); @@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void) lnid = nid; apicid = per_cpu(x86_cpu_to_apicid, cpu); sockid = apicid >> uv_cpuid.socketid_shift; - i = sockid - minsock; - - if (nid != _socket_to_node[i]) { - pr_warn( - "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n", - i, sockid, gre->type, _socket_to_node[i], nid); - _socket_to_node[i] = nid; - } + _socket_to_node[sockid - minsock] = nid; + pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", + sockid, apicid, nid); } /* Setup physical blade to pnode translation from GAM Range Table */ diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 27a0228c9cae..b816971f5da4 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -355,6 +355,7 @@ void load_ucode_amd_ap(void) unsigned int cpu = smp_processor_id(); struct equiv_cpu_entry *eq; struct microcode_amd *mc; + u8 *cont = container; u32 rev, eax; u16 eq_id; @@ -371,8 +372,11 @@ void load_ucode_amd_ap(void) if (check_current_patch_level(&rev, false)) return; + /* Add CONFIG_RANDOMIZE_MEMORY offset. */ + cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; + eax = cpuid_eax(0x00000001); - eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); + eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); eq_id = find_equiv_id(eq, eax); if (!eq_id) @@ -434,6 +438,9 @@ int __init save_microcode_in_initrd_amd(void) else container = cont_va; + /* Add CONFIG_RANDOMIZE_MEMORY offset. */ + container += PAGE_OFFSET - __PAGE_OFFSET_BASE; + eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 680049aa4593..01567aa87503 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state) return get_xsave_addr(&fpu->state.xsave, xsave_state); } - -/* - * Set xfeatures (aka XSTATE_BV) bit for a feature that we want - * to take out of its "init state". This will ensure that an - * XRSTOR actually restores the state. - */ -static void fpu__xfeature_set_non_init(struct xregs_state *xsave, - int xstate_feature_mask) -{ - xsave->header.xfeatures |= xstate_feature_mask; -} - -/* - * This function is safe to call whether the FPU is in use or not. - * - * Note that this only works on the current task. - * - * Inputs: - * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, - * XFEATURE_MASK_SSE, etc...) - * @xsave_state_ptr: a pointer to a copy of the state that you would - * like written in to the current task's FPU xsave state. This pointer - * must not be located in the current tasks's xsave area. - * Output: - * address of the state in the xsave area or NULL if the state - * is not present or is in its 'init state'. - */ -static void fpu__xfeature_set_state(int xstate_feature_mask, - void *xstate_feature_src, size_t len) -{ - struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; - struct fpu *fpu = ¤t->thread.fpu; - void *dst; - - if (!boot_cpu_has(X86_FEATURE_XSAVE)) { - WARN_ONCE(1, "%s() attempted with no xsave support", __func__); - return; - } - - /* - * Tell the FPU code that we need the FPU state to be in - * 'fpu' (not in the registers), and that we need it to - * be stable while we write to it. - */ - fpu__current_fpstate_write_begin(); - - /* - * This method *WILL* *NOT* work for compact-format - * buffers. If the 'xstate_feature_mask' is unset in - * xcomp_bv then we may need to move other feature state - * "up" in the buffer. - */ - if (xsave->header.xcomp_bv & xstate_feature_mask) { - WARN_ON_ONCE(1); - goto out; - } - - /* find the location in the xsave buffer of the desired state */ - dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask); - - /* - * Make sure that the pointer being passed in did not - * come from the xsave buffer itself. - */ - WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself"); - - /* put the caller-provided data in the location */ - memcpy(dst, xstate_feature_src, len); - - /* - * Mark the xfeature so that the CPU knows there is state - * in the buffer now. - */ - fpu__xfeature_set_non_init(xsave, xstate_feature_mask); -out: - /* - * We are done writing to the 'fpu'. Reenable preeption - * and (possibly) move the fpstate back in to the fpregs. - */ - fpu__current_fpstate_write_end(); -} - #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) /* - * This will go out and modify the XSAVE buffer so that PKRU is - * set to a particular state for access to 'pkey'. - * - * PKRU state does affect kernel access to user memory. We do - * not modfiy PKRU *itself* here, only the XSAVE state that will - * be restored in to PKRU when we return back to userspace. + * This will go out and modify PKRU register to set the access + * rights for @pkey to @init_val. */ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { - struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; - struct pkru_state *old_pkru_state; - struct pkru_state new_pkru_state; + u32 old_pkru; int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); u32 new_pkru_bits = 0; @@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return -EINVAL; + /* + * For most XSAVE components, this would be an arduous task: + * brining fpstate up to date with fpregs, updating fpstate, + * then re-populating fpregs. But, for components that are + * never lazily managed, we can just access the fpregs + * directly. PKRU is never managed lazily, so we can just + * manipulate it directly. Make sure it stays that way. + */ + WARN_ON_ONCE(!use_eager_fpu()); /* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) @@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, /* Shift the bits in to the correct place in PKRU for pkey: */ new_pkru_bits <<= pkey_shift; - /* Locate old copy of the state in the xsave buffer: */ - old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); - - /* - * When state is not in the buffer, it is in the init - * state, set it manually. Otherwise, copy out the old - * state. - */ - if (!old_pkru_state) - new_pkru_state.pkru = 0; - else - new_pkru_state.pkru = old_pkru_state->pkru; - - /* Mask off any old bits in place: */ - new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); - - /* Set the newly-requested bits: */ - new_pkru_state.pkru |= new_pkru_bits; - - /* - * We could theoretically live without zeroing pkru.pad. - * The current XSAVE feature state definition says that - * only bytes 0->3 are used. But we do not want to - * chance leaking kernel stack out to userspace in case a - * memcpy() of the whole xsave buffer was done. - * - * They're in the same cacheline anyway. - */ - new_pkru_state.pad = 0; + /* Get old PKRU and mask off any old bits in place: */ + old_pkru = read_pkru(); + old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); - fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state)); + /* Write old part along with new part: */ + write_pkru(old_pkru | new_pkru_bits); return 0; } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 2dda0bc4576e..f16c55bfc090 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void) /* Initialize 32bit specific setup functions */ x86_init.resources.reserve_resources = i386_reserve_resources; x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; - - reserve_bios_regions(); } asmlinkage __visible void __init i386_start_kernel(void) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 99d48e7d2974..54a2372f5dbb 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data) copy_bootdata(__va(real_mode_data)); x86_early_init_platform_quirks(); - reserve_bios_regions(); switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_INTEL_MID: diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ed16e58658a4..c6dfd801df97 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1242,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) memset(&curr_time, 0, sizeof(struct rtc_time)); if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) - mc146818_set_time(&curr_time); + mc146818_get_time(&curr_time); if (hpet_rtc_flags & RTC_UIE && curr_time.tm_sec != hpet_prev_update_sec) { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 61521dc19c10..9f669fdd2010 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Rescheduling interrupts\n"); seq_printf(p, "%*s: ", prec, "CAL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_call_count - - irq_stats(j)->irq_tlb_count); + seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); seq_puts(p, " Function call interrupts\n"); seq_printf(p, "%*s: ", prec, "TLB"); for_each_online_cpu(j) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 991b77986d57..0fa60f5f5a16 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); - kernel_randomize_memory(); - iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; setup_memory_map(); parse_setup_data(); @@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p) max_possible_pfn = max_pfn; + /* + * Define random base addresses for memory sections after max_pfn is + * defined and before each memory section base is used. + */ + kernel_randomize_memory(); + #ifdef CONFIG_X86_32 /* max_low_pfn get updated here */ find_low_pfn_range(); @@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p) efi_find_mirror(); } + reserve_bios_regions(); + /* * The EFI specification says that boot service code won't be called * after ExitBootServices(). This is, in fact, a lie. @@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p) early_trap_pf_init(); - setup_real_mode(); + /* + * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features) + * with the current CR4 value. This may not be necessary, but + * auditing all the early-boot CR4 manipulation would be needed to + * rule it out. + */ + if (boot_cpu_data.cpuid_level >= 0) + /* A CPU has %cr4 if and only if it has CPUID. */ + mmu_cr4_features = __read_cr4(); memblock_set_current_limit(get_max_mapped()); @@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p) kasan_init(); - if (boot_cpu_data.cpuid_level >= 0) { - /* A CPU has %cr4 if and only if it has CPUID */ - mmu_cr4_features = __read_cr4(); - if (trampoline_cr4_features) - *trampoline_cr4_features = mmu_cr4_features; - } - #ifdef CONFIG_X86_32 /* sync back kernel address range */ clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2a6e84a30a54..4296beb8fdd3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); /* Logical package management. We might want to allocate that dynamically */ static int *physical_to_logical_pkg __read_mostly; static unsigned long *physical_package_map __read_mostly;; -static unsigned long *logical_package_map __read_mostly; static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +static unsigned int logical_packages __read_mostly; +static bool logical_packages_frozen __read_mostly; /* Maximum number of SMT threads on any online core */ int __max_smt_threads __read_mostly; @@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu) if (test_and_set_bit(pkg, physical_package_map)) goto found; - new = find_first_zero_bit(logical_package_map, __max_logical_packages); - if (new >= __max_logical_packages) { + if (logical_packages_frozen) { physical_to_logical_pkg[pkg] = -1; - pr_warn("APIC(%x) Package %u exceeds logical package map\n", + pr_warn("APIC(%x) Package %u exceeds logical package max\n", apicid, pkg); return -ENOSPC; } - set_bit(new, logical_package_map); + + new = logical_packages++; pr_info("APIC(%x) Converting physical %u to logical package %u\n", apicid, pkg, new); physical_to_logical_pkg[pkg] = new; @@ -341,6 +342,7 @@ static void __init smp_init_package_map(void) } __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); + logical_packages = 0; /* * Possibly larger than what we need as the number of apic ids per @@ -352,10 +354,6 @@ static void __init smp_init_package_map(void) memset(physical_to_logical_pkg, 0xff, size); size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); physical_package_map = kzalloc(size, GFP_KERNEL); - size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long); - logical_package_map = kzalloc(size, GFP_KERNEL); - - pr_info("Max logical packages: %u\n", __max_logical_packages); for_each_present_cpu(cpu) { unsigned int apicid = apic->cpu_present_to_apicid(cpu); @@ -369,6 +367,15 @@ static void __init smp_init_package_map(void) set_cpu_possible(cpu, false); set_cpu_present(cpu, false); } + + if (logical_packages > __max_logical_packages) { + pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n", + logical_packages, __max_logical_packages); + logical_packages_frozen = true; + __max_logical_packages = logical_packages; + } + + pr_info("Max logical packages: %u\n", __max_logical_packages); } void __init smp_store_boot_cpu_info(void) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 1ef87e887051..78b9cb5a26af 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -22,6 +22,7 @@ #include <asm/nmi.h> #include <asm/x86_init.h> #include <asm/geode.h> +#include <asm/apic.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1249,6 +1250,9 @@ static void tsc_refine_calibration_work(struct work_struct *work) (unsigned long)tsc_khz / 1000, (unsigned long)tsc_khz % 1000); + /* Inform the TSC deadline clockevent devices about the recalibration */ + lapic_update_tsc_freq(); + out: if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 6c1ff31d99ff..495c776de4b4 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) *cursor &= 0xfe; } /* - * Similar treatment for VEX3 prefix. - * TODO: add XOP/EVEX treatment when insn decoder supports them + * Similar treatment for VEX3/EVEX prefix. + * TODO: add XOP treatment when insn decoder supports them */ - if (insn->vex_prefix.nbytes == 3) { + if (insn->vex_prefix.nbytes >= 3) { /* * vex2: c5 rvvvvLpp (has no b bit) * vex3/xop: c4/8f rxbmmmmm wvvvvLpp * evex: 62 rxbR00mm wvvvv1pp zllBVaaa - * (evex will need setting of both b and x since - * in non-sib encoding evex.x is 4th bit of MODRM.rm) - * Setting VEX3.b (setting because it has inverted meaning): + * Setting VEX3.b (setting because it has inverted meaning). + * Setting EVEX.x since (in non-SIB encoding) EVEX.x + * is the 4th bit of MODRM.rm, and needs the same treatment. + * For VEX3-encoded insns, VEX3.x value has no effect in + * non-SIB encoding, the change is superfluous but harmless. */ cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; - *cursor |= 0x20; + *cursor |= 0x60; } /* @@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) reg = MODRM_REG(insn); /* Fetch modrm.reg */ reg2 = 0xff; /* Fetch vex.vvvv */ - if (insn->vex_prefix.nbytes == 2) - reg2 = insn->vex_prefix.bytes[1]; - else if (insn->vex_prefix.nbytes == 3) + if (insn->vex_prefix.nbytes) reg2 = insn->vex_prefix.bytes[2]; /* - * TODO: add XOP, EXEV vvvv reading. + * TODO: add XOP vvvv reading. * * vex.vvvv field is in bits 6-3, bits are inverted. * But in 32-bit mode, high-order bit may be ignored. diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S index 02de3d74d2c5..8a602a1e404a 100644 --- a/arch/x86/lib/hweight.S +++ b/arch/x86/lib/hweight.S @@ -35,6 +35,7 @@ ENDPROC(__sw_hweight32) ENTRY(__sw_hweight64) #ifdef CONFIG_X86_64 + pushq %rdi pushq %rdx movq %rdi, %rdx # w -> t @@ -60,6 +61,7 @@ ENTRY(__sw_hweight64) shrq $56, %rax # w = w_tmp >> 56 popq %rdx + popq %rdi ret #else /* CONFIG_X86_32 */ /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index f7dfeda83e5c..121f59c6ee54 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c @@ -19,7 +19,7 @@ #include <asm/cpufeature.h> #include <asm/setup.h> -#define debug_putstr(v) early_printk(v) +#define debug_putstr(v) early_printk("%s", v) #define has_cpuflag(f) boot_cpu_has(f) #define get_boot_seed() kaslr_offset() #endif diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index ec21796ac5fd..4473cb4f8b90 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -3,15 +3,17 @@ * included by both the compressed kernel and the regular kernel. */ -static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, +static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page, unsigned long addr, unsigned long end) { addr &= PMD_MASK; for (; addr < end; addr += PMD_SIZE) { pmd_t *pmd = pmd_page + pmd_index(addr); - if (!pmd_present(*pmd)) - set_pmd(pmd, __pmd(addr | pmd_flag)); + if (pmd_present(*pmd)) + continue; + + set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag)); } } @@ -30,13 +32,13 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, if (pud_present(*pud)) { pmd = pmd_offset(pud, 0); - ident_pmd_init(info->pmd_flag, pmd, addr, next); + ident_pmd_init(info, pmd, addr, next); continue; } pmd = (pmd_t *)info->alloc_pgt_page(info->context); if (!pmd) return -ENOMEM; - ident_pmd_init(info->pmd_flag, pmd, addr, next); + ident_pmd_init(info, pmd, addr, next); set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); } @@ -44,14 +46,15 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, } int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, - unsigned long addr, unsigned long end) + unsigned long pstart, unsigned long pend) { + unsigned long addr = pstart + info->offset; + unsigned long end = pend + info->offset; unsigned long next; int result; - int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; for (; addr < end; addr = next) { - pgd_t *pgd = pgd_page + pgd_index(addr) + off; + pgd_t *pgd = pgd_page + pgd_index(addr); pud_t *pud; next = (addr & PGDIR_MASK) + PGDIR_SIZE; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 620928903be3..d28a2d741f9e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -122,8 +122,18 @@ __ref void *alloc_low_pages(unsigned int num) return __va(pfn << PAGE_SHIFT); } -/* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */ -#define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE) +/* + * By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS. + * With KASLR memory randomization, depending on the machine e820 memory + * and the PUD alignment. We may need twice more pages when KASLR memory + * randomization is enabled. + */ +#ifndef CONFIG_RANDOMIZE_MEMORY +#define INIT_PGD_PAGE_COUNT 6 +#else +#define INIT_PGD_PAGE_COUNT 12 +#endif +#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE) RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); void __init early_alloc_pgt_buf(void) { diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 26dccd6c0df1..ec8654f117d8 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -97,7 +97,7 @@ void __init kernel_randomize_memory(void) * add padding if needed (especially for memory hotplug support). */ BUG_ON(kaslr_regions[0].base != &page_offset_base); - memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) + + memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) + CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING; /* Adapt phyiscal memory region size based on available memory */ diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 4480c06cade7..89d1146f5a6f 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -254,6 +254,7 @@ void __init efi_free_boot_services(void) for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + size_t rm_size; if (md->type != EFI_BOOT_SERVICES_CODE && md->type != EFI_BOOT_SERVICES_DATA) @@ -263,6 +264,26 @@ void __init efi_free_boot_services(void) if (md->attribute & EFI_MEMORY_RUNTIME) continue; + /* + * Nasty quirk: if all sub-1MB memory is used for boot + * services, we can get here without having allocated the + * real mode trampoline. It's too late to hand boot services + * memory back to the memblock allocator, so instead + * try to manually allocate the trampoline if needed. + * + * I've seen this on a Dell XPS 13 9350 with firmware + * 1.4.4 with SGX enabled booting Linux via Fedora 24's + * grub2-efi on a hard disk. (And no, I don't know why + * this happened, but Linux should still try to boot rather + * panicing early.) + */ + rm_size = real_mode_size_needed(); + if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { + set_real_mode_mem(start, rm_size); + start += rm_size; + size -= rm_size; + } + free_bootmem_late(start, size); } diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 66b2166ea4a1..23f2f3e41c7f 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -187,7 +187,8 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); void uv_bios_init(void) { uv_systab = NULL; - if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) { + if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || + !efi.uv_systab || efi_runtime_disabled()) { pr_crit("UV: UVsystab: missing\n"); return; } @@ -199,12 +200,14 @@ void uv_bios_init(void) return; } + /* Starting with UV4 the UV systab size is variable */ if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) { + int size = uv_systab->size; + iounmap(uv_systab); - uv_systab = ioremap(efi.uv_systab, uv_systab->size); + uv_systab = ioremap(efi.uv_systab, size); if (!uv_systab) { - pr_err("UV: UVsystab: ioremap(%d) failed!\n", - uv_systab->size); + pr_err("UV: UVsystab: ioremap(%d) failed!\n", size); return; } } diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index f0b5f2d402af..9634557a5444 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -87,7 +87,7 @@ static int set_up_temporary_mappings(void) struct x86_mapping_info info = { .alloc_pgt_page = alloc_pgt_page, .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, - .kernel_mapping = true, + .offset = __PAGE_OFFSET, }; unsigned long mstart, mend; pgd_t *pgd; @@ -113,7 +113,7 @@ static int set_up_temporary_mappings(void) return result; } - temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET; + temp_level4_pgt = __pa(pgd); return 0; } diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 705e3fffb4a1..5db706f14111 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -1,9 +1,11 @@ #include <linux/io.h> +#include <linux/slab.h> #include <linux/memblock.h> #include <asm/cacheflush.h> #include <asm/pgtable.h> #include <asm/realmode.h> +#include <asm/tlbflush.h> struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; @@ -11,25 +13,37 @@ u32 *trampoline_cr4_features; /* Hold the pgd entry used on booting additional CPUs */ pgd_t trampoline_pgd_entry; +void __init set_real_mode_mem(phys_addr_t mem, size_t size) +{ + void *base = __va(mem); + + real_mode_header = (struct real_mode_header *) base; + printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", + base, (unsigned long long)mem, size); +} + void __init reserve_real_mode(void) { phys_addr_t mem; - unsigned char *base; - size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); + size_t size = real_mode_size_needed(); + + if (!size) + return; + + WARN_ON(slab_is_available()); /* Has to be under 1M so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); - if (!mem) - panic("Cannot allocate trampoline\n"); + if (!mem) { + pr_info("No sub-1M memory is available for the trampoline\n"); + return; + } - base = __va(mem); memblock_reserve(mem, size); - real_mode_header = (struct real_mode_header *) base; - printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - base, (unsigned long long)mem, size); + set_real_mode_mem(mem, size); } -void __init setup_real_mode(void) +static void __init setup_real_mode(void) { u16 real_mode_seg; const u32 *rel; @@ -84,7 +98,7 @@ void __init setup_real_mode(void) trampoline_header->start = (u64) secondary_startup_64; trampoline_cr4_features = &trampoline_header->cr4; - *trampoline_cr4_features = __read_cr4(); + *trampoline_cr4_features = mmu_cr4_features; trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); trampoline_pgd[0] = trampoline_pgd_entry.pgd; @@ -100,7 +114,7 @@ void __init setup_real_mode(void) * need to mark it executable at do_pre_smp_initcalls() at least, * thus run it as a early_initcall(). */ -static int __init set_real_mode_permissions(void) +static void __init set_real_mode_permissions(void) { unsigned char *base = (unsigned char *) real_mode_header; size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); @@ -119,7 +133,16 @@ static int __init set_real_mode_permissions(void) set_memory_nx((unsigned long) base, size >> PAGE_SHIFT); set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT); set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT); +} + +static int __init init_real_mode(void) +{ + if (!real_mode_header) + panic("Real mode trampoline was not allocated"); + + setup_real_mode(); + set_real_mode_permissions(); return 0; } -early_initcall(set_real_mode_permissions); +early_initcall(init_real_mode); |